-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
executable file
·77 lines (59 loc) · 3.34 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# Set data directories
dataDir <- "UCI HAR Dataset"
testDir <- "test"
trainDir <- "train"
# Read testdata and traindata and merge them, i.e. concatenate both datasets in one dataset.
testData <- read.table(paste(dataDir, testDir, "X_test.txt", sep = "/"))
trainData <- read.table(paste(dataDir, trainDir, "X_train.txt", sep = "/"))
allData <- rbind(testData, trainData)
## Objective 1 fulfilled:
## 1. Merges the training and the test sets to create one data set.
# Add column names/variable names to allData
# Read features
features <- read.table(paste(dataDir, "features.txt", sep = "/"))
colNames <- features[,2]
names(allData) <- colNames
## Objective 4 fulfilled:
## 4. Appropriately labels the data set with descriptive variable names.
# Extract only the measurements on the mean and standard deviation for each measurement.
# These measurements are characterized by their column names.
# These column names contain "-mean()" or contain "-std()".
columns <- grep(pattern="-mean\\(\\)|-std\\(\\)", colNames)
allData <- allData[, columns]
## Objective 2 fulfilled:
## 2. Extracts only the measurements on the mean and standard deviation for each measurement.
# Add the activity for each measurement
# Read the test activities and the train activities, merge them.
testActivities <- read.table(paste(dataDir, testDir, "y_test.txt", sep = "/"))
trainActivities <- read.table(paste(dataDir, trainDir, "y_train.txt", sep = "/"))
allActivities <- rbind(testActivities, trainActivities)
names(allActivities)[1] <- "ActivityId"
# Read the activity labels, a description of the activities,
# and add these to the dataset as the first column.
activityLabels <- read.table(paste(dataDir, "activity_labels.txt", sep = "/"))
allActivityLabels <- activityLabels[allActivities[,1],2]
allData <- cbind(allActivityLabels, allData)
names(allData)[1] <- "Activity"
## Objective 3 fulfilled:
## 3. Uses descriptive activity names to name the activities in the data set
## The column with activity names is labeled "Activity", according to
## objective 4.
# Save the dataset as a txt file
write.table(allData, file = "allData.txt", row.names = FALSE)
# Create a second independent tidy data set with the average of each variable
# for each activity and each subject.
# Read subjects for testdata and traindata and merge them.
subjectTest <- read.table(paste(dataDir, testDir, "subject_test.txt", sep = "/"))
subjectTrain <- read.table(paste(dataDir, trainDir, "subject_train.txt", sep = "/"))
subjects <- rbind(subjectTest, subjectTrain)
# Merge the subjects and the measurements to form a new dataset.
subjectData <- cbind(subjects, allData)
# Create from subjectData a new dataset that contains the average/mean of the measurements
# for each activity of a subject.
avgSubjectActivityData <- aggregate(subjectData[, 3:length(subjectData)], list(subjectData$V1, subjectData$Activity), mean)
# Reorder and label the subjet and activity columns
names(avgSubjectActivityData)[1:2] <- c("Subject", "Activity")
names(avgSubjectActivityData)[3:length(avgSubjectActivityData)] <- paste("Avg-",names(avgSubjectActivityData)[3:length(avgSubjectActivityData)], sep = "")
avgSubjectActivityData <- avgSubjectActivityData[with(avgSubjectActivityData, order(Subject, Activity)),]
# Save the dataset as a txt file
write.table(avgSubjectActivityData, file = "avgSubjectActivityData.txt", row.names = FALSE)