-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcity_class.py
108 lines (70 loc) · 3.05 KB
/
city_class.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 4 22:15:13 2018
@author: piyush
"""
import re
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import warnings
class city():
def __init__(self):
warnings.filterwarnings("ignore")
site = "https://in.bookmyshow.com/"
try:
page = requests.get(site)
except:
raise ConnectionError("Check your Internet Connection")
soup = BeautifulSoup(page.content, "html.parser")
body = soup.find("body")
self.popular_cities = self.fetch_popular_cities(body)
self.other_cities = self.fetch_other_cities(body)
self.city_df = pd.concat([self.popular_cities, self.other_cities], join = "inner").reset_index(drop=True)
return
def get_name(self, string):
"""
Remove any whitespace or newline
"""
return re.sub(r'\n\s*\n', r'\n\n', string.strip(), flags=re.M)
def fetch_popular_cities(self, body):
top_cities = body.find(class_ = "__top-cities")
region_list = top_cities.find_all(class_ = "region-list")
popular_cities = []
popular_cities_codes = []
for region in region_list:
popular_cities.append(self.get_name(region.a.get_text()))
onclick_attr = region.a.attrs['onclick']
city_code = onclick_attr[onclick_attr.find("(")+1 : onclick_attr.find(")")].split(',')[0].strip('\'')
popular_cities_codes.append(city_code)
popular_city_df = pd.DataFrame({"city_name" : popular_cities, "city_id" : popular_cities_codes})
return popular_city_df
def fetch_other_cities(self, body):
other_cities_soup = body.find(class_ = "others-cities-list")
other_city_names = other_cities_soup.find_all(class_ = "city-name")
other_cities = []
other_cities_codes = []
for city_name in other_city_names:
other_cities.append(self.get_name(city_name.a.get_text()))
onclick_attr = city_name.a.attrs['onclick']
city_code = onclick_attr[onclick_attr.find("(")+1 : onclick_attr.find(")")].split(',')[0].strip('\'')
other_cities_codes.append(city_code)
other_city_df = pd.DataFrame({"city_name" : other_cities, "city_id" : other_cities_codes})
return other_city_df
@property
def get_cities(self):
return self.city_df
@property
def get_popular_cities(self):
return self.popular_cities
@property
def get_other_cities(self):
return self.other_cities
if __name__ == "__main__" :
cities = city()
print(cities.get_cities)
print(cities.get_popular_cities)
print(cities.get_other_cities)
cities.get_cities.to_csv("CITY_Table.csv", index=False,header=True)