forked from jimbarnesrtp/pf2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
buildTraits.py
142 lines (118 loc) · 4.11 KB
/
buildTraits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
from bs4 import BeautifulSoup
import requests
import json
import datetime
import codecs
import time
condHolder = {}
condHolder['name'] = 'Pathfinder 2.0 Condition list'
condHolder['date'] = datetime.date.today().strftime("%B %d, %Y")
def get_single(link):
details = {}
itemDetails = {}
res2 = requests.get(link)
res2.raise_for_status()
soup2 = BeautifulSoup(res2.text, 'lxml')
detail = soup2.find(lambda tag: tag.name=='span' and tag.has_attr('id') and tag['id']=="ctl00_MainContent_DetailedOutput")
children = detail.contents
reachedBreak = False
detailHolder = []
tagType = ""
for child in children:
stringContents = str(child)
if stringContents.startswith("<"):
if child.name == "h1":
name = child.text
details['name'] = name.strip()
if child.name == "hr":
tagType = ""
reachedBreak = True
if child.name == "a":
try:
if child['class'][0] == "external-link" :
details['source'] = child.text
except:
pass
tagType = ""
if child.name == "b":
if(child.text != "Source"):
tagType = child.text.lower().replace(" ", "")
if child.name == "img":
details['actions'] = child['alt']
if child.name == "i":
if(reachedBreak):
detailHolder.append(child.text)
#else:
#if not stringContents.isspace() :
#detailHolder.append(child.text)
else:
if not stringContents.isspace():
detailHolder.append(stringContents)
string = " "
#print(child)
finalText = ""
for text in detailHolder:
if text.isspace():
pass
elif text == ", ":
pass
else:
#print("text:", text)
finalText += text
details['text'] = finalText
return details
def get_links():
listOfLinks = []
listOfLinks.append("https://2e.aonprd.com/Traits.aspx")
itemHolder = []
for link in listOfLinks:
res2 = requests.get(link)
res2.raise_for_status()
soup2 = BeautifulSoup(res2.text, 'lxml')
detail = soup2.find(lambda tag: tag.name=='span' and tag.has_attr('id') and tag['id']=="ctl00_MainContent_DetailedOutput")
children = detail.contents
traitType = ""
for child in children:
stringContents = str(child)
if stringContents.startswith("<"):
#print(child.name)
if child.name == "h2":
traitType = child.text
if traitType == "":
traitType = "General"
#print(child.text)
if child.name == "span":
trait = {}
trait['name'] = child.text
trait['type'] = traitType
link = child.find("a")
if (link is None):
pass
else:
trait['link'] = "https://2e.aonprd.com/"+link['href']
itemHolder.append(trait)
#print()
#print(child.text)
t = 0
for item in itemHolder:
t += 1
print("get Item:", item['name'],"link:", item['link'])
try:
holder = get_single(item['link'])
for key in holder.keys():
item[key] = holder[key]
except:
print("error on getting item", item['name'])
#if t > 5:
#break
return itemHolder
def get_all():
condHolder['traits'] = get_links()
return condHolder
#print(get_all())
json_data = json.dumps(get_all(), indent=4)
#print(json_data)
filename = "traits-pf2.json"
f = open(filename, "w")
f.write(json_data)
f.close