-
Notifications
You must be signed in to change notification settings - Fork 36
/
rxnorm_download.py
64 lines (52 loc) · 1.92 KB
/
rxnorm_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import mechanize
import zipfile
import re
import sys
import argparse
DOWNLOADS_URL = "https://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html"
ZIP_URL = "http://download.nlm.nih.gov/umls/kss/rxnorm/RxNorm_full_%s.zip"
CHUNK_SIZE= 1000
LINK_PATTERN = re.compile("download.nlm.nih.gov.*full")
def download_rxnorm(args):
br = mechanize.Browser()
br.set_handle_robots(False)
if args.release:
url = ZIP_URL%args.release
else:
br.open(DOWNLOADS_URL)
url = br.links(url_regex=LINK_PATTERN).next().url
print("Signing in to download %s"%(url))
br.open(url)
br.select_form(nr=0)
br["username"] = args.username
br["password"] = args.password
zip_request = br.submit()
try:
bytes = int(zip_request.info().getheader('Content-Length'))
except:
print "Failed to download file. Check your credentials."
sys.exit(1)
with open(args.file, "wb") as outfile:
while zip_request.tell() < bytes:
outfile.write(zip_request.read(size=CHUNK_SIZE))
read = zip_request.tell()
print "\rDownload: %.2f%% of %sMB"%(
read * 100.0 / bytes,
bytes / 1000000),
print("Extracting zip")
with zipfile.ZipFile(args.file) as zf:
zf.extractall()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Download RxNorm Release')
parser.add_argument('--username', help='UMLS username', required=True)
parser.add_argument('--password', help='UMLS password', required=True)
parser.add_argument(
"--release",
help="specify release version (e.g. '10052015'). Default: latest.",
default=None)
parser.add_argument(
'--file',
help='Where to save .zip download. Default: "rxnorm-download.zip"',
default="rxnorm-download.zip")
args = parser.parse_args()
download_rxnorm(args)