-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
58 lines (46 loc) · 2.96 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
### Locations of relevant files (Make sure these are correct before attempting to run this script)
activity_labels.txt = 'UCI HAR Dataset/activity_labels.txt'
features.txt = 'UCI HAR Dataset/features.txt'
subject_train.txt = 'UCI HAR Dataset/train/subject_train.txt'
X_train.txt = 'UCI HAR Dataset/train/X_train.txt'
y_train.txt = 'UCI HAR Dataset/train/y_train.txt'
subject_test.txt = 'UCI HAR Dataset/test/subject_test.txt'
X_test.txt = 'UCI HAR Dataset/test/X_test.txt'
y_test.txt = 'UCI HAR Dataset/test/y_test.txt'
#--------------------------------------------------------------------------------------------------------------
### Use Regular Expression to locate features that are either means (mean) or standard deviations (std)
all_feat = read.table(features.txt, header = F, sep = " ", stringsAsFactors = F)
feat_pattern = '-(mean|std)([A-Z]|\\x28)' # \\x28 represents UTF-8 open parenthesis
feat_idx = grep(pattern = feat_pattern, x = all_feat[,2])
feat = all_feat[feat_idx,]
### Load measure columns that match the features selected above
colClasses = rep('NULL',nrow(all_feat))
colClasses[feat[,1]] = rep('numeric',nrow(feat))
measures_train = read.table(X_train.txt, header = F, colClasses = colClasses)
measures_test = read.table(X_test.txt , header = F, colClasses = colClasses)
colnames(measures_train) = colnames(measures_test) = feat[,2]
### Load activitie columns and convert numbers into readable strings via factoring
act_lbl = read.table(activity_labels.txt, header = F, sep = " ")
activities_train = read.table(y_train.txt, header = F, col.names = 'ACTIVITY')
activities_train[1] = factor(activities_train[[1]], levels = act_lbl[,1], labels = act_lbl[,2])
activities_test = read.table(y_test.txt , header = F, col.names = 'ACTIVITY')
activities_test[1] = factor(activities_test[[1]] , levels = act_lbl[,1], labels = act_lbl[,2])
### Load subject columns
subjects_train = read.table(subject_train.txt, header = F, col.names = 'SUBJECT')
subjects_test = read.table(subject_test.txt , header = F, col.names = 'SUBJECT')
#--------------------------------------------------------------------------------------------------------------
### Merge columns, then merge training and test data sets
train = cbind(subjects_train,activities_train,measures_train)
test = cbind(subjects_test ,activities_test ,measures_test )
data = rbind(train,test)
### Function to calculate the measures' averages of each group
getAverageMeasures <- function(group) {
means = colMeans(group[,3:length(group)])
cbind(group[1,1:2],t(means))
}
### Split and group data by Activities and Subjects, calculate averages for each componen, then merge
data = split(data,data[,1:2])
data = lapply(data,getAverageMeasures)
data = do.call('rbind',data)
### Output the final result as a text file in the working directory
write.table(data,file = 'tidy_data_set.txt', row.names = F)