-
Notifications
You must be signed in to change notification settings - Fork 36
/
01_amfiindia.py
80 lines (62 loc) · 2.3 KB
/
01_amfiindia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from bs4 import BeautifulSoup
import requests, re, itertools, csv
#
# Build a dictionary of Mutual Fund families mapped to their AMFI IDs
#
response = requests.get('https://www.amfiindia.com/research-information/other-data/scheme-details').text
soup = BeautifulSoup(response, 'lxml')
options = soup.select('select#MFName > option')
fund_id_dict = {}
for option in options:
if option['value'].strip():
fund_id_dict[ unicode(option.string) ] = option['value']
#
# Get latest NAV and details for all mutual fund schemes published by AMFI
#
response = requests.get('http://portal.amfiindia.com/spages/NAV0.txt').text
raw_data = [line for line in response.split('\n') if line.strip()]
# Headings come from the first line of the data
headings = [heading.strip() for heading in raw_data[0].split(';')]
# Add scheme classification, fund family name and fund family ID as headings
headings.extend([
u'Scheme Classification',
u'Scheme Type',
u'Scheme Category',
u'Fund Family',
u'Fund ID',
u'Scheme Short Name'
])
final_data = []
check_next = False
mf_scheme_type = u''
mf_family = u''
for line in raw_data[1:]:
if line.find(';') == -1:
if check_next:
mf_scheme_type = mf_family.strip()
check_next = False
else:
check_next = True
mf_family = line.strip()
else:
check_next = False
row = [element.strip() for element in line.split(';') ]
row.extend( [
mf_scheme_type,
re.search( r'(^.*)\(', mf_scheme_type).group(1),
re.search( r'\((.*)\)', mf_scheme_type).group(1),
mf_family,
fund_id_dict[mf_family],
' '.join( row[3].split() ).split('-')[0].strip()
] )
final_data.append( dict( itertools.izip( headings, row ) ) )
# Clean the data
for idx, row in enumerate(final_data):
# Step 1: In the scheme name,
# Convert multiple spaces into one
final_data[idx]['Scheme Name'] = ' '.join( row['Scheme Name'].split() )
# Write the final data as CSV delimited by ;
with open('amfi.csv', 'wb') as f: # Just use 'w' mode in 3.x
w = csv.DictWriter(f, headings, delimiter = ';')
w.writeheader()
w.writerows(final_data)