This repository has been archived by the owner on Jul 11, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
122 lines (110 loc) · 4.63 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# from soup_scrape import *
import csv
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.common.keys import Keys
from load_data import *
from scrape import *
from urls import *
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import os
fieldnames = [
"vendor", "about", "listings", "reviews", "ratings", "number of profiles",
"profiles (site: link)", "items sold"
]
def searchWithKilo():
# print(f'\n\n\n{os.path.dirname(os.path.abspath(__file__))} \n\n\n')
file = open(
os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..",
"kilo_scrape.csv"), "w")
try:
csv_file = csv.writer(file, delimiter=',')
csv_file.writerow(fieldnames) # add headers
for index, vendor in enumerate(vendors):
try:
# get to main profile page
driver.get(KILOS_VENDOR.format(vendor))
about = driver.find_element_by_xpath(
".//textarea").get_attribute('value').replace('\n', '\\n')
# print(f"\n\nabout:\n{about.get_attribute('value')}")
stats = driver.find_elements_by_class_name("stat")
numLists = stats[0].text
numReviews = stats[1].text
avgRating = stats[2].text
numProfiles = stats[3].text
# get to market profiles
driver.get(KILOS_MARKET_PROFILE.format(vendor))
table = driver.find_element_by_xpath("//tbody")
sites = set()
links = {}
market_profiles = ""
for i, row in enumerate(table.find_elements_by_xpath(".//tr")):
if i == 0: # table header
continue
tds = row.find_elements_by_xpath(".//td")
sites.add(tds[0].text)
if tds[0].text not in links.keys() or links[
tds[0].
text] == "http://dead.site.dont.visit.onion/removed_for_user_safety":
links[tds[0].text] = tds[1].text
for site, link in links.items():
market_profiles += f"{site}: {link}\\n"
# get to listing results
driver.get(KILOS_SEARCH_RESULT.format(vendor))
search_results = driver.find_elements_by_class_name(
"searchResult")
items = set()
for search_result in search_results:
seller = search_result.find_element_by_xpath(
"./div/div[2]/p[1]/a").text
if seller != vendor:
continue
item_name = search_result.find_element_by_xpath(
"./div/div[2]/a/h4").text
items.add(item_name)
itemnames = ""
for item in items:
itemnames += item + "\\n"
csv_file.writerow([
vendor, about, numLists, numReviews, avgRating,
numProfiles, market_profiles[:-2], itemnames[:-2]
])
file.flush()
print(
f"{index}: {vendor, about, numLists, numReviews, avgRating, numProfiles, market_profiles[:-2], itemnames[:-2]}"
)
except NoSuchElementException:
continue
except KeyboardInterrupt:
print("\n\nKeyboardInterrupt!!!!!!\n\n\n")
file.flush()
file.close()
finally:
file.close()
def searchWithRecon():
for vendor in vendors:
driver.get(RECON1 + "/vendor")
searchbox = driver.find_element_by_name("vendor_username")
searchbox.send_keys(vendor)
searchbox.send_keys(Keys.ENTER)
try:
WebDriverWait(driver, 20).until(
EC.presence_of_element_located(
(By.XPATH, "//div[contains(@class, 'market-name')]")))
vendor_page = driver.current_url
profiles = driver.find_elements_by_xpath(
"//div[contains(@class, 'market-name')]")
for profile in profiles:
print(profile.text)
except TimeoutException:
print(TimeoutException)
# print(vendors)
url = KILOS
if url == KILOS:
driver.get(url)
time.sleep(5)
searchWithKilo()
elif url == RECON1:
driver.get(url)
time.sleep(45)
searchWithRecon()