-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathclustering_ver2.R
84 lines (60 loc) · 2.07 KB
/
clustering_ver2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# CLUSTERING IRIS.R
#
#
# Version:
#
# Date:
# Author:
#
#TOC> ==========================================================================
#TOC>
#TOC> Section Title Line
#TOC> --------------------------------------------------------------------------
#TOC> 1 Packages
#TOC> 2 Dataset exploration
#TOC> 3 Clustering
#TOC> 4 Multidimensional clustering
#TOC> ==========================================================================
# ==== 1 Packages =========================================================
if (!require(ggplot2, quietly = TRUE)) {
install.packages("ggplot2")
library(ggplot2)
}
# ==== 2 Dataset exploration ===============================================
my_data <- iris
# Let's explore the dataset
str(my_data)
summary(my_data$Petal.Length)
# Let's explore the distribution of petal length across the different species
qplot(
Petal.Length,
data = my_data,
geom = 'density',
color = Species,
fill = Species
)
# ==== 3 Clustering ========================================================
# Can we exctract the different species into clusters using petal lenght?
my_data2 <- my_data[, "Petal.Length"]
num_clusters <- 3
set.seed(1234)
result <- kmeans(my_data2, num_clusters, nstart = 20)
# Let's see how the original species have been assigned into clusters
table(result$cluster, my_data$Species)
# How many measurements have been misclassified?
# Let's plot sepal lenght per cluster
qplot(Petal.Length,
data = my_data,
color = factor(result$cluster))
# ==== 4 Multidimensional clustering =======================================
# Can we exctract the different species into clusters using 2 attributes?
my_data3 <- my_data[, c("Petal.Width", "Petal.Length")]
num_clusters <- 3
set.seed(1234)
result <- kmeans(my_data3, num_clusters, nstart = 20)
table(result$cluster, my_data$Species)
# Let's plot sepal lenght and petal width per cluster
qplot(Petal.Length,
Petal.Width,
data = my_data,
color = factor(result$cluster))