generated from edgi-govdata-archiving/Template
-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
leg_info.py
executable file
·124 lines (112 loc) · 4.31 KB
/
leg_info.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import pdb
import datetime
import json
import urllib
import sqlite3
import wget
from os import path
def get_since_fields( the_date ):
mon = the_date.strftime( "%B" )
day = the_date.strftime( "%-d" )
year = the_date.strftime( "%Y" )
suffix = 'th'
if ( day == '1' or day == '21' or day == '31' ):
suffix = 'st'
elif ( day == '2' or day == '22' ):
suffix = 'nd'
elif ( day == '3' or day == '23' ):
suffix = 'rd'
return '{} {}{}, {}'.format( mon, day, suffix, year ), year
conn = sqlite3.connect( 'leg_info.db' )
cursor = conn.cursor()
legs_url = 'https://theunitedstates.io/congress-legislators/legislators-current.json'
legs = urllib.request.urlopen( legs_url ).read().decode()
obj = json.loads( legs )
govtrack_base = "https://govtrack.us/congress/members/"
wiki_base = "https://en.wikipedia.org/wiki/"
opensecrets_base = "https://www.opensecrets.org/members-of-congress/summary?cid="
image_url = "https://govtrack.us/static/legislator-photos/{}-200px.jpeg"
for leg in obj:
id = leg['id']
bioguide_id = id['bioguide']
govtrack_id = id['govtrack']
opensecrets_id = ''
try:
opensecrets_id = id['opensecrets']
except KeyError:
pass
first_name = leg['name']['first']
last_name = leg['name']['last']
full_name = ''
try: full_name = leg['name']['official_full']
except KeyError:
full_name = '{} {}'.format( first_name, last_name )
govtrack_url = '{}{}_{}/{}'.format( govtrack_base, first_name.lower(),
last_name.lower(), govtrack_id )
wikipedia_url = ''
try: wikipedia_url = '{}{}'.format( wiki_base, id['wikipedia'].replace(' ','_'))
except KeyError:
print( 'No wikipedia: {}'.format( full_name ))
pass
opensecrets_url = ''
if opensecrets_id:
opensecrets_url = '{}{}'.format( opensecrets_base, opensecrets_id )
else:
print( 'No opensecrets: {}'.format( full_name ))
terms = leg['terms']
start_date = datetime.date.today()
party = ''
sen_rep = ''
state = ''
district = ''
sen_class = ''
official_url = ''
image_file = ''
for term in terms:
if ( term['type'] != sen_rep or
( term['type'] == 'rep' and term['district'] != district )):
# Will be true the first time through, and with change rep <--> sen
sen_rep = term['type']
start_date = datetime.datetime.strptime( term['start'], '%Y-%m-%d' )
party = term['party']
state = term['state']
if ( sen_rep == 'rep' ):
district = term['district']
image_file = '{}{}_rep.jpeg'.format( state,
str( district ).zfill(2))
else:
sen_class = term['class']
district = ''
image_file = '{}_sen{}.jpeg'.format( state,
sen_class )
else:
this_date = datetime.datetime.strptime( term['start'], '%Y-%m-%d' )
if ( this_date < start_date ):
start_date = this_date
elif ( party != term['party'] ):
# They changed party after they became sen or rep
party = term['party']
try: official_url = term['url']
except KeyError: pass
( since_date, since_year ) = get_since_fields( start_date )
if district == '':
cd_state = state
else:
cd_state = state + str( district ).zfill(2)
cursor.execute(
'insert into legislators ( cd_state, name, party, govtrack_id, ' \
'opensecrets_id, bioguide_id, sen_class, since_date, since_year, ' \
'official_url, govtrack_url, opensecrets_url, wikipedia_url ) ' \
'values ( ?,?,?,?,?,?,?,?,?,?,?,?,? )',
( cd_state, full_name, party, govtrack_id, opensecrets_id, bioguide_id,
sen_class, since_date, since_year, official_url, govtrack_url,
opensecrets_url, wikipedia_url ))
conn.commit()
# pdb.set_trace()
if image_file != '':
try:
this_image_url = image_url.format( govtrack_id )
wget.download( this_image_url, 'CD_images/{}'.format(
image_file ))
except urllib.error.HTTPError:
print( "Photo for {} not available.".format( cd_state ))