-
Notifications
You must be signed in to change notification settings - Fork 46
/
restaurants_scraper.py
50 lines (36 loc) · 1.66 KB
/
restaurants_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import sys
import csv
from selenium import webdriver
import time
# default path to file to store data
path_to_file = "/Users/gius/Desktop/reviews.csv"
# default number of scraped pages
num_page = 10
# default tripadvisor website of restaurant
url = "https://www.tripadvisor.com/Restaurant_Review-g60763-d802686-Reviews-Hard_Rock_Cafe-New_York_City_New_York.html"
# if you pass the inputs in the command line
if (len(sys.argv) == 4):
path_to_file = sys.argv[1]
num_page = int(sys.argv[2])
url = sys.argv[3]
# Import the webdriver
driver = webdriver.Safari()
driver.get(url)
# Open the file to save the review
csvFile = open(path_to_file, 'a', encoding="utf-8")
csvWriter = csv.writer(csvFile)
# change the value inside the range to save more or less reviews
for i in range(0, num_page):
# expand the review
time.sleep(2)
driver.find_element_by_xpath("//span[@class='taLnk ulBlueLinks']").click()
container = driver.find_elements_by_xpath(".//div[@class='review-container']")
for j in range(len(container)):
title = container[j].find_element_by_xpath(".//span[@class='noQuotes']").text
date = container[j].find_element_by_xpath(".//span[contains(@class, 'ratingDate')]").get_attribute("title")
rating = container[j].find_element_by_xpath(".//span[contains(@class, 'ui_bubble_rating bubble_')]").get_attribute("class").split("_")[3]
review = container[j].find_element_by_xpath(".//p[@class='partial_entry']").text.replace("\n", " ")
csvWriter.writerow([date, rating, title, review])
# change the page
driver.find_element_by_xpath('.//a[@class="nav next ui_button primary"]').click()
driver.close()