-
Notifications
You must be signed in to change notification settings - Fork 0
/
Population analysis
117 lines (72 loc) · 3.63 KB
/
Population analysis
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#Population analysis in Africa
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('C:\\Users\\hp\\Documents\\Jupyter files\\Countries.csv')
df.rename(columns= {'Arable (%)' : 'Arable land (%)'}, inplace=True)
print(df)
#Characters replacement
df['Coastline (coast/area ratio)']= df['Coastline (coast/area ratio)'].str.replace(',', '.', case=False)
df['Infant mortality (per 1000 births)']= df['Infant mortality (per 1000 births)'].str.replace(',', '.', case=False)
df['Literacy (%)']= df['Literacy (%)'].str.replace(',', '.', case=False)
df['Phones (per 1000)']= df['Phones (per 1000)'].str.replace(',', '.', case=False)
df['Arable land (%)']= df['Arable land (%)'].str.replace(',', '.', case=False)
df['Net migration']= df['Net migration'].str.replace(',', '.', case=False)
df['Pop. Density (per sq. mi.)']= df['Pop. Density (per sq. mi.)'].str.replace(',', '.', case=False)
df['Crops (%)']= df['Crops (%)'].str.replace(',', '.', case=False)
df['Other (%)']= df['Other (%)'].str.replace(',', '.', case=False)
df['Birthrate']= df['Birthrate'].str.replace(',', '.', case=False)
df['Deathrate']= df['Deathrate'].str.replace(',', '.', case=False)
df['Agriculture']= df['Agriculture'].str.replace(',', '.', case=False)
df['Industry']= df['Industry'].str.replace(',', '.', case=False)
df['Service']= df['Service'].str.replace(',', '.', case=False)
df['Region']=df['Region'].replace({'C.W. OF IND. STATES ': 'EASTERN EUROPE'})
df['Region']=df['Region'].replace({'ASIA (EX. NEAR EAST) ' : 'MIDDLE EAST'})
df['Region']=df['Region'].replace({'NEAR EAST ' : 'MIDDLE EAST'})
df['Region']=df['Region'].replace({'SUB-SAHARAN AFRICA ': 'AFRICA'})
df['Region']=df['Region'].replace({'NORTHERN AFRICA ': 'AFRICA'})
def capcountry(Country):
return Country.upper()
df['Country'].apply(capcountry) #Capitalizing country names
filt = (df['Region'] == 'AFRICA')
Africa= df.loc[filt] #creating the african table
Africa.head(10)
Africa.to_csv('C:\\Users\\hp\\Documents\\Jupyter files\\Africa.csv') #exporting the data
Over_median=df[Africa]['Population'].median()
Tops = Africa.sort_values(by = df[Africa]['Population'], ascending = False)
country = Tops['Country'].head(25) #top 25
pop = Tops['Population'].head(25)
size =Tops['Area (sq. mi.)'].head(25)
plt.plot(country, pop, color = 'g', linestyle = '--', label = 'pop')
plt.plot(country,size, color = 'm', linestyle = '--', label = 'size')
plt.fill_between(country, pop,size,where = (pop >size)
interpolate = True, alpha = 0.25, label= 'Above avg')
plt.fill_between(country, pop,size,where = (pop <= size),
interpolate = True, alpha = 0.25, label = 'Below avg')
plt.title('Population per square kilometer')
plt.xlabel('country')
plt.ylabel('Pop/area')
plt.tight_layout()
plt.show()
Lit = Tops['Infant mortality (per 1000 births)'].head(25)
bins = [10, 20,30,40, 50,60,70,80,90,100,110,120]
plt.hist(Lit, bins = bins, edgecolor = 'white')
plt.title('StartUp')
plt.xlabel('Mine')
plt.ylabel('Gbez')
plt.tight_layout()
plt.show()
Tops.replace('NaN', 0, inplace = True)
den= Tops[''Pop. Density (per sq. mi.)''].head(30)
Litr = Tops['Coastline (coast/area ratio)'].head(30)
beach = [den/Litr for den,Litr in zip(den, Litr)] #Checking for the relationship between population and coastline availability
print(beach)
plt.style.use('seaborn')
plt.scatter(den,Litr,s=100, c =beach,cmap = 'cool', edgecolor = 'blue', linewidth = 1,
alpha = 0.75)
cbar = plt.colorbar()
cbar.set_label('Chillz')
plt.title('BEACHES')
plt.xlabel('Population density')
plt.ylabel('Coastline')
plt.tight_layout()
plt.show()