forked from aptnotes/tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utilities.py
79 lines (56 loc) · 2.08 KB
/
utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import glob
import json
import os
import magic
import requests
from bs4 import BeautifulSoup
def get_download_url(page):
"""
Parse preview page for desired elements to build download URL
"""
soup = BeautifulSoup(page, 'lxml')
scripts = soup.find('body').find_all('script')
sections = scripts[-1].contents[0].split(';')
app_api = json.loads(sections[0].split('=')[1])['/app-api/enduserapp/shared-item']
# Build download URL
box_url = "https://app.box.com/index.php"
box_args = "?rm=box_download_shared_file&shared_name={}&file_id={}"
file_url = box_url + box_args.format(app_api['sharedName'], 'f_{}'.format(app_api['itemID']))
return file_url
def load_notes():
"""
Retrieve APT Note Data
"""
github_url = "https://raw.githubusercontent.com/aptnotes/data/master/APTnotes.json"
APTnotes = requests.get(github_url)
if APTnotes.status_code == 200:
# Load APT report metadata into JSON container
APT_reports = json.loads(APTnotes.text)
else:
APT_reports = []
# Reverse order of reports in order to download newest to oldest
APT_reports.reverse()
return APT_reports
supported_filetypes = { "application/pdf": ".pdf",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx" }
def verify_report_filetype(download_path):
"""
Identify filetype and add extension
"""
file_type = magic.from_file(download_path, mime=True)
# Add supported extension to path
if file_type in supported_filetypes and not download_path.endswith(supported_filetypes[file_type]):
extension_path = download_path + supported_filetypes[file_type]
# Leave as original download path
else:
extension_path = download_path
os.rename(download_path, extension_path)
download_path = extension_path
return download_path
def report_already_downloaded(download_path):
"""
Check if report is already downloaded
"""
if glob.glob(download_path) or glob.glob("{}.*".format(download_path)):
return True
return False