forked from gcruzgar/UKsurvey
-
Notifications
You must be signed in to change notification settings - Fork 0
/
household_distributions.py
89 lines (76 loc) · 2.48 KB
/
household_distributions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
"""
author: Gonzalo
date started: 26/10/18
This script gives a count of each household type at each wave.
Things to do:
-percentage changes in household distribution
"""
import pandas as pd
from collections import Counter
filelist = ["a_hhresp.tab", "b_hhresp.tab", "c_hhresp.tab",
"d_hhresp.tab", "e_hhresp.tab", "f_hhresp.tab", "g_hhresp.tab"]
var_key = '_hhtype_dv'
possible_status = {
1: "1 male, aged 65+, no children",
2: "1 female, age 60+, no children",
3: "1 adult under pensionable age, no children",
4: "1 adult, 1 child",
5: "1 adult, 2 or more children",
6: "Couple both under pensionable age, no children",
8: "Couple 1 or more over pensionable age, no children",
10: "Couple with 1 child",
11: "Couple with 2 children",
12: "Couple with 3 or more children",
16: "2 adults, not a couple, both under pensionable age, no children",
17: "2 adults, not a couple, one or more over pensionable age, no children",
18: "2 adults, not a couple, 1 or more children",
19: "3 or more adults, no children ,incl. at least one couple",
20: "3 or more adults, 1-2 children ,incl. at least one couple",
21: "3 or more adults, >2 children ,incl. at least one couple",
22: "3 or more adults, no children, excl. any couples",
23: "3 or more adults, 1 or more children, excl. any couples",
-2: "refusal",
-9: "missing",
-8: "inapplicable",
-7: "proxy",
-1: "don't know"
}
#var_key = '_tenure_dv'
#possible_status = {
# 1: "Owned outright",
# 2: "Owned with mortgage",
# 3: "Local authority rent",
# 4: "Housing assoc rented",
# 5: "Rented from employer",
# 6: "Rented private unfurnished",
# 7: "Rented private furnished",
# 8: "Other",
# -2: "refusal",
# -9: "missing",
# -8: "inapplicable",
# -7: "proxy",
# -1: "don't know"
#}
#var_key = '_urban_dv'
#possible_status = {
# 1: "urban area",
# 2: "rural area",
# -2: "refusal",
# -9: "missing",
# -8: "inapplicable",
# -7: "proxy",
# -1: "don't know"
#}
def household_distribution(filelist, var_key):
hh_dist_waves = {}
wn = {1:'a', 2:'b', 3:'c', 4:'d', 5:'e', 6:'f', 7:'g'}
c=1
for name in filelist:
df = pd.read_csv(name, sep='\t')
kword = wn[c] + var_key
v = df[kword]
hh_dist = Counter(v) # or use df[kword].value_counts()
hh_dist_waves[wn[c]] = hh_dist
c+=1
return hh_dist_waves
hh_dist_waves = household_distribution(filelist, var_key)