-
Notifications
You must be signed in to change notification settings - Fork 0
/
htmltocsv.py
78 lines (63 loc) · 2.06 KB
/
htmltocsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# -*- coding: utf-8 -*-
"""
Created on Fri Apr 3 13:26:16 2020
@author: vishvesh
"""
import requests
import sys
import pandas as pd
from bs4 import BeautifulSoup
import os
import csv
def met_data(month, year):
file_html = open('Data/{}/{}.html'.format(year,month), 'rb')
plain_text = file_html.read()
tempD = []
finalD = []
soup = BeautifulSoup(plain_text, "lxml")
for table in soup.findAll('table', {'class': 'medias mensuales numspan'}):
for tbody in table:
for tr in tbody:
a = tr.get_text()
tempD.append(a)
rows = len(tempD) / 15
for times in range(round(rows)):
newtempD = []
for i in range(15):
newtempD.append(tempD[0])
tempD.pop(0)
finalD.append(newtempD)
length = len(finalD)
finalD.pop(length - 1)
finalD.pop(0)
for a in range(len(finalD)):
finalD[a].pop(13)
finalD[a].pop(12)
finalD[a].pop(11)
finalD[a].pop(10)
finalD[a].pop(9)
finalD[a].pop(8)
finalD[a].pop(6)
finalD[a].pop(0)
return finalD
if __name__ == "__main__":
if not os.path.exists("Data/Real-Data"):
os.makedirs("Data/Real-Data")
for year in range(2015, 2020):
final_data = []
with open('Data/Real-Data/real_' + str(year) + '.csv', 'w') as csvfile:
wr = csv.writer(csvfile, dialect='excel')
wr.writerow(
['T', 'TM', 'Tm', 'SLP', 'H', 'VV'])
for month in range(1, 13):
temp = met_data(month, year)
final_data = final_data + temp
with open('Data/Real-Data/real_' + str(year) + '.csv', 'a') as csvfile:
wr = csv.writer(csvfile, dialect='excel')
for row in final_data:
flag = 0
for elem in row:
if elem == "" or elem == "-":
flag = 1
if flag != 1:
wr.writerow(row)