-
Notifications
You must be signed in to change notification settings - Fork 6
/
Sentiment Analysis.R
64 lines (56 loc) · 1.84 KB
/
Sentiment Analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
library(tidyverse)
library(stringr)
library(tidytext)
library(readxl)
library(xlsx)
scores <- matrix(ncol = 2)
temp <- matrix(nrow = 1, ncol = 2)
datetoday <- ""
for (year in 2012:2017)
{
for (month in 1:12)
{
month = 11
year = 2017
for (day in 1:15)
{
dd_ <- day
mm <- month
yy <- year
#Open file
day <- read_excel(paste("Data/News/", year, "/", month, "-", day, "-", year, ".xlsx", sep = ""))
#Store news articles in a data frame
text_tb <- tibble(text = day$X3)
summary <- tibble()
n = nrow(day)
for(i in 1:n)
{
#Tokenize articles into words
clean <- text_tb[i,] %>% unnest_tokens(word, text)
#Put all tokenized articles in one column in a new data frame
summary <- rbind(summary, clean)
}
#Remove all stop words from the data frame
cleanedsummary <- anti_join(summary, stop_words)
#Gets all words with corresponding sentiment and scores regardless of repetition/frequency
sentiment <- inner_join(cleanedsummary, get_sentiments("afinn"))
#Sums up all the scores to get the overall sentiment for one day
overallsentiment <- sum(sentiment$score)
datetoday <- paste(mm, "-", dd_, "-", yy, sep = "")
datetoday <- as.character(datetoday)
temp[1,1] <- datetoday
temp[1,2] <- overallsentiment
scores <- rbind(scores, temp)
}
}
write.xlsx(scores, paste("Data/Sentiment Score/", year, ".xlsx", sep = ""))
}
aggregate <- matrix(ncol = 3)
tablelabel <- c("number", "date", "score")
colnames(aggregate) <- tablelabel
for (year in 2012:2017)
{
scores <- read_excel(paste("Data/Sentiment Score/", year, ".xlsx", sep = ""))
colnames(scores) <- tablelabel
aggregate <- rbind(aggregate, scores)
}