SentimentAnalysis_FoodReview_R

# Install packages if needed
install.packages("tm")  # for text mining
install.packages("SnowballC") # for text stemming
install.packages("wordcloud2") # word-cloud generator 
install.packages("RColorBrewer") # color palettes
install.packages("syuzhet") # for sentiment analysis (I will print the graph)
install.packages('sentimentr') # sentiment analysis (I print sentiment with respect to every sentence)
install.packages("ggplot2") # for plotting graphs

# Load the packages
library("tm")
library("SnowballC")
library("wordcloud2")
library("RColorBrewer")
library("syuzhet")
library(sentimentr)
library("ggplot2")
library('dplyr')


# Read the text file from local machine, choose file interactively
library(readxl)
text <- read_excel('FoodReviews.xlsx')

# Load the data as a corpus
TextDoc <- Corpus(VectorSource(text$Text))


#####Cleaning up Text Data#######################################################

# Get the default stopword list
stopwords()
default_stopwords <- stopwords()# it will contain all 174 stopwords
words_to_exclude <- c("not", "doesn't")

# Create a custom stopwords list by removing words to exclude
custom_stopwords <- setdiff(default_stopwords, words_to_exclude)
custom_stopwords

# Update the TextDoc by setting custom stopwords
TextDoc <- tm_map(TextDoc, removeWords, custom_stopwords)
#####################################################################################

#Replacing "/", "@" and "|" with space
toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))

TextDoc <- tm_map(TextDoc, toSpace, "/")
TextDoc <- tm_map(TextDoc, toSpace, "@")
TextDoc <- tm_map(TextDoc, toSpace, "\\|")
TextDoc <- tm_map(TextDoc, toSpace, "!")

# Convert the text to lower case
TextDoc <- tm_map(TextDoc, content_transformer(tolower))

# Remove numbers
TextDoc <- tm_map(TextDoc, removeNumbers)

# Remove english common stopwords
stopwords()
TextDoc <- tm_map(TextDoc, removeWords, stopwords("english"))

# Remove your own stop word - specify your custom stopwords as a character vector
TextDoc <- tm_map(TextDoc, removeWords, c("service", "dogs", "food")) #### some example

# Remove punctuations
TextDoc <- tm_map(TextDoc, removePunctuation)

# Eliminate extra white spaces
TextDoc <- tm_map(TextDoc, stripWhitespace)

# Text stemming - which reduces words to their root form
TextDoc <- tm_map(TextDoc, stemDocument)


##########Building the term document matrix#######
# Build a term-document matrix
TextDoc_dtm <- TermDocumentMatrix(TextDoc)

dtm_m <- as.matrix(TextDoc_dtm)

# Sort by descearing value of frequency
dtm_v <- sort(rowSums(dtm_m),decreasing=TRUE)

dtm_d <- data.frame(words = names(dtm_v),freq = dtm_v)

# Display the top 20 most frequent words
head(dtm_d, 20)# helps in printing the top 20 words

# Display the lowest 10 most frequent words
# tail(dtm_d,10)

library(dplyr)

dtm_d[1:20,]$freq
dtm_d[1:20,]$words

#Plotting the top 5 most frequent words using a bar chart is a good basic way to visualize this word frequent data. 

# Plot the most frequent words
barplot(dtm_d[1:20,]$freq, las = 2, names.arg = dtm_d[1:20,]$words,
        col =rainbow(20), main ="Top 20 most frequent words",
        ylab = "WORD FREQUENCIES")### las shows legacy axis = 2 vertical


#GENDRATE THE WORD CLOUD

#generate word cloudlibrary(wordcloud2)
wordcloud2(dtm_d,
           size = 0.4,
           shape = 'triangle',
           rotateRatio = 0.9,
           minSize = 1)

# word association
# Find associations 
findAssocs(TextDoc_dtm, terms = c("like","flavor", "tast"), corlimit = 0.15)

# Find associations for words that occur at least 2000 times
findAssocs(TextDoc_dtm, terms = findFreqTerms(TextDoc_dtm, lowfreq = 1000), corlimit = 0.20)

# The top 10 words can be extracted from dtm_d using head(dtm_d,10)$words and the same can be passed
# as terms to the findAssocs function
findAssocs(TextDoc_dtm, terms = head(dtm_d,10)$words, corlimit = 0.15)


#sentiment score
library(syuzhet)
input <- iconv(text$Text)# your data is a character vector
sentiment <- get_nrc_sentiment(input)
head(sentiment, 20)

barplot(colSums(sentiment),
        las = 2,
        col = rainbow(10),
        ylab = 'Count',
        main = 'Sentiment Scores on Reviews')


#Plot two - count of words associated with each sentiment, expressed as a percentage
barplot(
  sort(colSums(prop.table(sentiment[, 1:8]))), 
  horiz = TRUE, 
  cex.names = 0.7, 
  las = 1, 
  main = "Emotions in Text", xlab="Percentage"
)