-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexplore_page.py
202 lines (168 loc) · 6.63 KB
/
explore_page.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
#Function to lower the countries in data & keep only countries which have more than threshold data available
def country_adjust(categories,threshold):
categorial_map={}
for i in range(len(categories)):
if(categories.values[i] >= threshold):
categorial_map[categories.index[i]]=categories.index[i]
else:
categorial_map[categories.index[i]]="Other"
return categorial_map
#Function to clean the experience feature in the dataset
def clean_experience(x):
if(x=="Less than 1 year"):
return 0.5
elif(x=="More than 50 years"):
return 50
else:
return float(x)
#Function to clean the education feature in the dataset
def clean_education(x):
if(x=="Bachelor’s degree (B.A., B.S., B.Eng., etc.)"):
return "Bachelor’s degree"
elif(x=="Master’s degree (M.A., M.S., M.Eng., MBA, etc.)"):
return "Master’s degree"
elif(x=="Professional degree (JD, MD, etc.)" or x=="Other doctoral degree (Ph.D., Ed.D., etc.)"):
return "Post Graduation"
else:
return "Less than a Bachelor"
#Function to load the dataset and clean the dataset using above function calls
@st.cache
def load_data():
df = pd.read_csv('survey_results_public.csv')
df = df[["Country","EdLevel","YearsCodePro","Employment","ConvertedComp"]]
df = df.rename({"ConvertedComp":"Salary"},axis=1)
df = df[df["Salary"].notnull()]
df = df.dropna()
df = df[df["Employment"] == "Employed full-time"]
df = df.drop("Employment",axis=1)
#Clean the country feature
country_map = country_adjust(df["Country"].value_counts(),400)
df["Country"] = df["Country"].map(country_map)
#Removing the outliers
df = df[df["Salary"] <=150000]
df = df[df["Salary"] >= 10000]
df = df[df["Country"] != "Other"]
#Clean the experience feature
df["YearsCodePro"] = df["YearsCodePro"].apply(clean_experience)
#Clean the education feature
df["EdLevel"] = df["EdLevel"].apply(clean_education)
return df
df = load_data()
def show_explore():
st.title("Explore Software Engineer Salaries")
st.write("""
### Stack Overflow Developer Survey 2020
""")
data = df["Country"].value_counts()
fig1 , ax1 = plt.subplots()
ax1.pie(data,labels=data.index,autopct="%1.1f%%",startangle=50)
ax1.axis("equal")
st.write("#### Number of records of Data from Different Countries")
st.pyplot(fig1)
st.write("""
### Mean Salary based on Country
""")
agree = st.button("Click to see Mean Salary based on Country")
if agree:
data_salmean = df.groupby(["Country"])["Salary"].mean().sort_values(ascending=False)
st.bar_chart(data_salmean)
print("called 2")
st.write("""
### Mean Years of Experience based on Country
""")
agree = st.button("Click to see Mean Years of Experience based on Country")
if agree:
data_expmean = df.groupby(["Country"])["YearsCodePro"].mean().sort_values(ascending=False)
st.bar_chart(data_expmean)
print("called 3")
st.write("""
### Mean Salary based on Years of Experience
""")
agree = st.button("Click to see Mean Salary based on Years of Experience")
if agree:
data_expsal = df.groupby(["YearsCodePro"])["Salary"].mean().sort_values(ascending=False)
st.line_chart(data_expsal)
print("called 4")
st.write("""
### All Data of Salaries
""")
agree = st.button("Click to see All Data of Salaries")
if agree:
chart_data = pd.DataFrame(df[:], columns=["Salary"])
st.area_chart(chart_data)
print("called 5")
st.write("""
### Box Plot of Salaries to Identify Outliers
""")
agree = st.button("Click to see Box Plot of Salaries")
if agree:
fig , ax = plt.subplots(1,1, figsize=(12,7))
df.boxplot('Salary' , 'Country' , ax=ax)
plt.suptitle("Salary USD vs Country")
plt.title("")
plt.ylabel("Salary")
plt.xticks(rotation=90)
st.pyplot(fig)
print("called 6")
st.write("""
### Box Plot of Years of Experience to Identify Outliers
""")
agree = st.button("Click to see Box Plot of Years of Experience")
if agree:
fig , ax = plt.subplots(1,1, figsize=(12,7))
df.boxplot('YearsCodePro' , 'Country' , ax=ax)
plt.suptitle("Years of Experience vs Country")
plt.title("")
plt.ylabel("Experience")
plt.xticks(rotation=90)
st.pyplot(fig)
print("called 7")
st.write("""
### Gender Percentage
""")
agree = st.button("Click to see Gender Percentages")
if agree:
fig , ax = plt.subplots(figsize=(2,2))
dataframe = pd.read_csv("survey_results_public.csv")
dataframe = dataframe[["Gender"]]
myexplode = [0.2, 0]
dataframe["Gender"] = dataframe[dataframe["Gender"].isin(["Man","Woman"])]
ax.pie(dataframe["Gender"].value_counts(),labels = dataframe["Gender"].value_counts().index,explode = myexplode,shadow=True,autopct='%1.0f%%',textprops={'fontsize': 4})
plt.title("Gender Analytics")
st.pyplot(fig)
print("called 8")
st.write("""
### No. of people that reported their Work Hours Per Week
""")
agree = st.button("Click to see Work Hours Per Week")
if agree:
fig , ax = plt.subplots()
dataframe = pd.read_csv("survey_results_public.csv")
dataframe = dataframe[["WorkWeekHrs"]]
dataframe["WorkWeekHrs"] = dataframe[dataframe["WorkWeekHrs"]<=100]
dataframe["WorkWeekHrs"] = dataframe[dataframe["WorkWeekHrs"]>=30]
ax.hist(dataframe["WorkWeekHrs"],color = "skyblue", ec="skyblue")
plt.title('Number of people that voted particular Work Hours in a Week')
plt.xlabel("Hours per Week")
plt.ylabel("No. of People")
st.pyplot(fig)
print("called 9")
st.write("""
### No. of people that reported their Developer Type
""")
agree = st.button("Click to see Developer Types")
if agree:
fig , ax = plt.subplots()
dataframe = pd.read_csv("survey_results_public.csv")
dataframe = dataframe[["DevType"]]
dataframe["DevType"] = dataframe[dataframe["DevType"].isin(["Developer, full-stack","Developer, back-end","Developer, front-end"])]
plt.barh(dataframe["DevType"].value_counts().index,dataframe["DevType"].value_counts(),color="khaki")
plt.title('Number of people that voted particular DevType')
plt.xlabel("No. of People")
plt.ylabel("Devloper Type")
st.pyplot(fig)
print("called 10")