-
Notifications
You must be signed in to change notification settings - Fork 0
/
Scraping the data
63 lines (49 loc) · 1.84 KB
/
Scraping the data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from importlib.resources import path
from tokenize import Double
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver import ChromeOptions
import requests
import pandas as pd
#getting rid of the browser window
def setting_up_chrome():
options = ChromeOptions()
options.headless = True
driver = webdriver.Chrome(ChromeDriverManager().install(),options=options)
return driver
def get_path_objects(driver, round):
url = 'https://llama.airforce/#/votium/rounds/{}'.format(round)
driver.get(url)
soup = BeautifulSoup(driver.page_source, 'html.parser')
body = soup.find('body')
div1 = body.find('div')
graph = div1.find('div',id='apexchartsvotiumxbribexround')
path_in_graph = graph.find_all('path') # these are in a list and can be indexed
return path_in_graph
#mydict = {"name" : [cvx, fxs, luna], "val" : [20394, 2485792]}
# mydict["joey"] --> 1
def extract_values(path_in_graph, round):
allnames = []
allvals = []
allrounds = []
for path_tag in path_in_graph:
val = float(path_tag.get('val'))
if val > 0:
name = path_tag.parent.get('seriesname')
allnames.append(name)
allvals.append(val)
allrounds.append(round)
return allnames, allvals, allrounds
def build_df(df, allnames, allvals, allrounds):
tempdf = pd.DataFrame({'names' : allnames, 'values' : allvals, 'rounds' : allrounds})
df = pd.concat([tempdf, df])
return df
df = pd.DataFrame()
driver = setting_up_chrome()
for x in range(1,21):
path_in_graph = get_path_objects(driver, x)
allnames, allvals, allrounds = extract_values(path_in_graph, x)
df = build_df(df, allnames, allvals, allrounds)
driver.quit()
df.to_excel('Bribes.xlsx')