-
Notifications
You must be signed in to change notification settings - Fork 0
/
ep_uid.py
26 lines (20 loc) · 1.04 KB
/
ep_uid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from embed import Sentence_Embedding
from cluster import Clustering_Visualization
from dataset import Load_Preprocess
directory = '../Dataset_Clusterin_Problems.csv'
load_preprocess = Load_Preprocess(directory)
df = load_preprocess.load_data()
df = load_preprocess.preprocess(df)
df_unique_labels = load_preprocess.unique_uid(df)
#The next four blocks corresponds to showing dendogram, extracted_problems and their cluster
#numbers and a scatter plot of K-means clustering for uids staring with EP.
embed = Sentence_Embedding(df, 'EP', 'mpnet')
uid_problem_pair = embed.uid_problem_pair()
uid_sentence_embed_pair = embed.sentence_embed(uid_problem_pair)
#shows the dendogram
clustering = Clustering_Visualization(uid_sentence_embed_pair)
clustering.dendrogram_draw()
#shows the cluster number of the extracted_problems of the uids starting with 'CN'
labels_out = clustering.k_means_labels(uid_problem_pair)
#Shows the scatter plot of K-means clustering
clustering.k_means_clustering(labels = labels_out[0], clf = labels_out[1])