-
Notifications
You must be signed in to change notification settings - Fork 15
/
buildDomains.py
96 lines (78 loc) · 2.71 KB
/
buildDomains.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from bs4 import BeautifulSoup
import requests
import json
import datetime
import codecs
import time
domainHolder = {}
domainHolder['name'] = 'Pathfinder 2.0 domains list'
domainHolder['date'] = datetime.date.today().strftime("%B %d, %Y")
def get_details(link):
itemDetails = {}
res = requests.get(link)
res.raise_for_status()
soup = BeautifulSoup(res.text, 'lxml')
feat = soup.find_all("div", {'class':'main'})
detail = soup.find("span", {'id':'ctl00_MainContent_DetailedOutput'})
#print(detail.contents)
children = detail.contents
reachedBreak = False
reachedCrit = False
godHolder = []
string = ","
tagType = ""
for child in children:
stringContents = str(child)
if stringContents.startswith("<"):
if child.name == "b":
tagType = child.text
if child.name == "a":
if tagType == "Source":
itemDetails[tagType] = child.text
if child.name == "u":
if tagType == "Deities":
godHolder.append(child.text)
itemDetails['deities'] = string.join(godHolder)
return itemDetails
def get_domains(link):
items = []
multiItems = []
res2 = requests.get(link)
res2.raise_for_status()
soup2 = BeautifulSoup(res2.text, 'lxml')
table = soup2.find(lambda tag: tag.name=='table' and tag.has_attr('id') and tag['id']=="ctl00_MainContent_DomainElement")
rows = table.findAll(lambda tag: tag.name=='tr')
t = 0
for row in rows:
t += 1
#print(row)
#print("-----------------------------------")
item = {}
entries = row.find_all(lambda tag: tag.name=='td')
if entries is not None:
if len(entries) > 0:
name = entries[0].find("a").text
item['name'] = name
item['link'] = "https://2e.aonprd.com/"+entries[0].find("a")['href']
item['description'] = entries[1].text
item['domainSpell'] = entries[2].text.replace(u'\u2014', '')
item['advancedDomainSpell'] = entries[3].text
print("getting domain:",item['name'])
itemDetails = get_details(item['link'])
for key in itemDetails.keys():
item[key] = itemDetails[key]
items.append(item)
#if t > 5:
#break
return items
def get_all():
domainHolder['domains'] = get_domains("https://2e.aonprd.com/Domains.aspx")
#domainHolder['rangedWeapons'] = get
return domainHolder
#print(get_all())
json_data = json.dumps(get_all(), indent=4)
#print(json_data)
filename = "domains-pf2.json"
f = open(filename, "w")
f.write(json_data)
f.close