-
Notifications
You must be signed in to change notification settings - Fork 0
/
functions.py
89 lines (67 loc) · 2.39 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import streamlit as st
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans
import scipy.cluster.hierarchy as sch
from kneed import KneeLocator
import seaborn as sns
df = pd.read_csv("./Mall_Customers_Data.csv")
X = df.iloc[:, [3,4]].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
wcss=[]
for i in range(1,15):
kmeans = KMeans(n_clusters=i, init = 'k-means++', random_state=0)
kmeans.fit(X_scaled)
wcss.append(kmeans.inertia_)
kneedle = KneeLocator(range(1,15),wcss,curve = 'convex', direction = 'decreasing')
elbow_point = kneedle.elbow
kmeansmodel = KMeans(n_clusters = elbow_point, init = 'k-means++' , random_state=0)
y_kmeans = kmeansmodel.fit_predict(X)
kmeans = pd.DataFrame(y_kmeans)
dataset_1 = pd.concat([df,kmeans],axis=1)
labels = kmeansmodel.labels_
b = df.copy()
b['Cluster'] = labels
@st.cache_resource
def load_data():
df = pd.read_csv("./Mall_Customers_Data.csv")
X = df.iloc[:, [3,4]].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
wcss=[]
for i in range(1,15):
kmeans = KMeans(n_clusters=i, init = 'k-means++', random_state=0)
kmeans.fit(X_scaled)
wcss.append(kmeans.inertia_)
kmeansmodel = KMeans(n_clusters = elbow_point, init = 'k-means++' , random_state=0)
y_kmeans = kmeansmodel.fit_predict(X)
kmeans = pd.DataFrame(y_kmeans)
dataset_1 = pd.concat([df,kmeans],axis=1)
labels = kmeansmodel.labels_
b = df.copy()
b['Cluster'] = labels
return(b)
df1 = load_data
def Main_page():
with open("./images/Client-segmentation.png", "rb") as file:
imagen_data = file.read()
st.image(imagen_data)
st.write("By Osthailyd Bautista")
def show_dataframe():
st.write("## Customer Data")
st.write(b)
def analyze_data():
import streamlit as st
from ydata_profiling import ProfileReport
from streamlit_pandas_profiling import st_profile_report
import matplotlib
import matplotlib.backends.backend_tkagg
import pandas as pd
st.write("## Analyze the Data:")
profile = ProfileReport(b, title="Profiling Report")
st_profile_report(profile)
def Clustering():
fig1=sns.pairplot(b,hue='Cluster',palette="Spectral")
st.write("## Customer Characteristics Based on Clusters")
st.pyplot(fig1)