This repository has been archived by the owner on Mar 29, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 59
/
data.py
104 lines (81 loc) · 2.86 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import pandas as pd
from pathlib import Path
import itertools
DATA_DIR = Path(__file__).resolve().parents[1] / 'data'
COVID_19_BY_CITY_URL=('https://raw.githubusercontent.com/wcota/covid19br/'
'master/cases-brazil-cities-time.csv')
IBGE_POPULATION_PATH=DATA_DIR / 'ibge_population.csv'
COVID_SAUDE_URL = ('https://raw.githubusercontent.com/3778/COVID-19/'
'master/data/latest_cases_ms.csv')
def load_cases(by, source='wcota'):
'''Load cases from wcota/covid19br or covid.saude.gov.br
Args:
by (string): either 'state' or 'city'.
Returns:
pandas.DataFrame
Examples:
>>> cases_city = load_cases('city')
>>> cases_city['São Paulo/SP']['newCases']['2020-03-20']
47
>>> cases_state = load_cases('state')
>>> cases_state['SP']['newCases']['2020-03-20']
110
>>> cases_ms = load_cases('state', source='ms')
>>> cases_ms['SP']['newCases']['2020-03-20']
110
'''
assert source in ['ms', 'wcota']
assert by in ['state', 'city']
separator = [',', ';']
if source == 'ms':
assert by == 'state'
df = (pd.read_csv(COVID_SAUDE_URL,
sep=';',
parse_dates=['date'],
dayfirst=True)
.rename(columns={'casosNovos': 'newCases',
'casosAcumulados': 'totalCases',
'estado': 'state'}))
elif source == 'wcota':
df = (pd.read_csv(COVID_19_BY_CITY_URL, parse_dates=['date'])
.query("state != 'TOTAL'"))
return (df.groupby(['date', by])
[['newCases', 'totalCases']]
.sum()
.unstack(by)
.sort_index()
.swaplevel(axis=1)
.fillna(0)
.astype(int))
def load_population(by):
''''Load population from IBGE.
Args:
by (string): either 'state' or 'city'.
Returns:
pandas.DataFrame
Examples:
>>> load_population('state').head()
state
AC 881935
AL 3337357
AM 4144597
AP 845731
BA 14873064
Name: estimated_population, dtype: int64
>>> load_population('city').head()
city
Abadia de Goiás/GO 8773
Abadia dos Dourados/MG 6989
Abadiânia/GO 20042
Abaetetuba/PA 157698
Abaeté/MG 23237
Name: estimated_population, dtype: int64
'''
assert by in ['state', 'city']
return (pd.read_csv(IBGE_POPULATION_PATH)
.rename(columns={'uf': 'state'})
.assign(city=lambda df: df.city + '/' + df.state)
.groupby(by)
['estimated_population']
.sum()
.sort_index())