-
Notifications
You must be signed in to change notification settings - Fork 0
/
meteor_data_crawler.py
78 lines (61 loc) · 1.84 KB
/
meteor_data_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 7 23:44:39 2019
@author: weichi
"""
import requests as rq
from bs4 import BeautifulSoup
import numpy as np
station = str(467410)
pre_url = 'https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=' + station + '&stname=&datepicker='
#%%
def rain_wind_crawler(month, date):
# create url
mon = ''
if month < 10:
mon = '0' + str(month)
else:
mon = str(month)
day = ''
if date < 10:
day = '0' + str(date)
else:
day = str(date)
datepicker = '2019-' + mon + '-' + day
# url: https://e-service.cwb.gov.tw/HistoryDataQuery/DayDataController.do?command=viewMain&station=467410&stname=&datepicker=2019-08-07
url = pre_url + datepicker
# print(url)
# request
response = rq.get(url)
# print(response.text)
# html parsing
soup = BeautifulSoup(response.text, features="html.parser")
# title = ['WS', 'WD']
# get the daily data
body = soup.tbody
trs = body.find_all('tr')
trs = trs[3:]
data = []
hour = 0
# extract wind speed and wind direction
for tds in trs:
sd = {}
td = tds.find_all('td')
if td[7].string == "V\xa0":
sd['wd'] = np.nan
else:
sd['wd'] = float(td[7].string)
if td[10].string == "T\xa0":
sd['precp'] = float(0.05)
else:
sd['precp'] = float(td[10].string)
sd['month'] = month
sd['day'] = date
sd['hour'] = hour
sd['ws'] = float(td[6].string)
data.append(sd)
hour += 1
# turn the list to dataframe
#df = pd.DataFrame(data=winddata, columns=title)
return data