-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
47 lines (45 loc) · 2.49 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
## Load needed package
library(plyr)
## Read all the tables
test.subjects <- read.table("test/subject_test.txt")
test.data <- read.table("test/X_test.txt")
test.labels <- read.table("test/y_test.txt")
train.subjects <- read.table("train/subject_train.txt")
train.data <- read.table("train/X_train.txt")
train.labels <- read.table("train/y_train.txt")
column.names <- read.table("features.txt")
## Merge the training and the test sets to create one data set.
test.train <- rbind(test.data, train.data)
subjects <- rbind(test.subjects, train.subjects)
activities <- rbind(test.labels, train.labels)
data <- cbind(subjects, test.train, activities)
columns <- as.vector(column.names[,2])
names(data) <- c("subject", columns, "activity")
## Extract only the measurements on the mean and standard deviation for each measurement.
columns <- as.vector(names(data))
columns.mean <- as.vector(grep("mean", columns))
columns.std <- as.vector(grep("std", columns))
col <- c(1, columns.mean, columns.std, 563)
col <- sort(col, decreasing=FALSE)
data.mean.std <- data[,col]
## Use descriptive activity names to name the activities in the data set
data.mean.std$activity <- as.factor(data.mean.std$activity)
data.mean.std$activity <- mapvalues(data.mean.std$activity, from = c(1,2,3,4,5,6), to =c("walking", "walking upstairs", "walking downstairs", "sitting", "standing", "laying"))
# Make syntactically valid names
names(data.mean.std) <- gsub('\\(|\\)',"",names(data.mean.std), perl = TRUE)
names(data.mean.std) <- make.names(names(data.mean.std))
# Make clearer names
names(data.mean.std) <- gsub('Acc',"Acceleration",names(data.mean.std))
names(data.mean.std) <- gsub('GyroJerk',"AngularAcceleration",names(data.mean.std))
names(data.mean.std) <- gsub('Gyro',"AngularSpeed",names(data.mean.std))
names(data.mean.std) <- gsub('Mag',"Magnitude",names(data.mean.std))
names(data.mean.std) <- gsub('^t',"TimeDomain.",names(data.mean.std))
names(data.mean.std) <- gsub('^f',"FrequencyDomain.",names(data.mean.std))
names(data.mean.std) <- gsub('\\.mean',".Mean",names(data.mean.std))
names(data.mean.std) <- gsub('\\.std',".StandardDeviation",names(data.mean.std))
names(data.mean.std) <- gsub('Freq\\.',"Frequency.",names(data.mean.std))
names(data.mean.std) <- gsub('Freq$',"Frequency",names(data.mean.std))
## Create a second, independent tidy data set with the average of each variable for each
## activity and each subject.
tidy.data <- ddply(data.mean.std, c("subject", "activity"), numcolwise(mean))
write.table(tidy.data, "tidy.data.txt", row.name = FALSE)