-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetGenCode.py
More file actions
39 lines (30 loc) · 1.01 KB
/
getGenCode.py
File metadata and controls
39 lines (30 loc) · 1.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import urllib.request
import gzip
import os
import shutil
# 1st arg : url link for the file
def download_unzip(saved_set):
url = saved_set.url
# create cache directory
if not os.path.exists("cache"):
os.mkdir("cache")
# read file using url
res = urllib.request.urlopen(url)
# unzip the file into file_content
file = gzip.open(res, 'rb')
file_content = file.read()
file.close()
# create the unzipped file from file_content
with open("cache/"+get_unzipped_file_name(saved_set), 'b+w') as unzipped_File:
unzipped_File.write(file_content)
def get_unzipped_file_name(saved_set):
url = saved_set.url
# grab the file name from the url
index = url.index("gencode.")
unzipped_file_name = url[index: len(url) - 3]
return unzipped_file_name
def clear_cache():
if not os.path.exists('cache'):
return
shutil.rmtree('cache')
#download_unzip("ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_27/gencode.v27.lncRNA_transcripts.fa.gz")