-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyokatlascrawler.py
119 lines (85 loc) · 4.32 KB
/
yokatlascrawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from time import sleep
import yokatlasparameters
import operator
driver = webdriver.Chrome(executable_path=yokatlasparameters.chrome_driver_path)
file = open(yokatlasparameters.dosya_adi, 'w', encoding='utf-8', newline='')
writer = csv.writer(file)
writer.writerow([
'Üniversite Adı',
'Tür',
'Bölüm',
'Sıralama',
'Kontenjan'
])
def stringToInt (sNum):
num = ""
for char in sNum:
if (char != '.'):
num += char
return int(num)
university_list = []
for bolum in yokatlasparameters.bolumler:
#Getting to main page to select the wanted department from the list
driver.get('https://yokatlas.yok.gov.tr/lisans-anasayfa.php')
lisans_sec = driver.find_element_by_id('bolum')
options = lisans_sec.find_elements_by_tag_name('option')
for option in options:
bolum_adi = option.text
if bolum_adi == bolum:
option.click()
break
#Getting the elements of universites so that we can get the url's of them
element_section = driver.find_element_by_xpath('/html/body/div/div[2]/div[2]/div[1]')
all_elements = element_section.find_elements_by_class_name('panel-title')
urls = []
for universite_element in all_elements:
url = universite_element.find_element_by_xpath('.//a').get_attribute('href')
urls.append(url)
print("Found {} {} courses".format(len(urls), bolum))
i = 1
#Starting to getting the data from each page
for url in urls:
driver.get(url)
#Clicking the down arrows in order to load the data of quota and the last entered person's score
driver.find_element_by_xpath('//*[@id="h1070"]/a/h4/span[1]').click()
driver.find_element_by_xpath('//*[@id="headingEleven"]/a/h4/span[1]').click()
arewedone = True
while (arewedone):
try:
WebDriverWait(driver, 60).until(EC.visibility_of_element_located((By.XPATH, '//*[@id="icerik_1070"]/table')))
WebDriverWait(driver, 60).until(EC.visibility_of_element_located((By.XPATH, '//*[@id="icerik_1000_2"]/table')))
arewedone = False
except TimeoutException:
driver.refresh()
university_last_person_score = driver.find_element_by_xpath('//*[@id="icerik_1070"]/table/tbody/tr[6]/td[2]').text
university_quota = driver.find_element_by_xpath('//*[@id="icerik_1000_2"]/table/tbody/tr[5]/td[2]').text
university_name = driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[1]/div/h3[1]').text
university_type = driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[1]/div/h3[2]').text.strip().split(" ", 2)[-1]
university_departmant = driver.find_element_by_xpath('/html/body/div[2]/div[1]/div[3]/div/h2').text.strip().split("-", 1)[-1].strip()
#Some basic printing in order to see the progress of the program
print("#{}: {} / {}".format(i, university_name, university_last_person_score))
if ( yokatlasparameters.alt_sinir <= stringToInt(university_last_person_score) <= yokatlasparameters.ust_sinir):
#If a university is in range then we will print it to console to know that it is worknig correctly
print("Passed: {}".format(university_name))
university = [
university_name,
university_type,
university_departmant,
stringToInt(university_last_person_score),
university_quota
]
university_list.append(university)
i += 1
#Lastly ordering them according to last entered person's score and writing them into the csv file
print("Data Scraping Completed Succesfully and Now Writing Into CSV File")
university_list.sort(key=operator.itemgetter(3))
for university in university_list:
writer.writerow([university[0], university[1], university[2], university[3], university[4]])
file.close()