-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.r
122 lines (112 loc) · 3.74 KB
/
main.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Load required libraries
libraries <- c("audio", "rjson", "httr", "tm",
"SnowballC", "wordcloud", "RColorBrewer")
lapply(libraries, function(lib) {
if (!require(lib, character.only = TRUE)) {
install.packages(lib)
}
})
# Function to record audio
record_audio <- function(rec_time) {
library(audio)
set.audio.driver(NULL)
#rec_time <- 5
Samples <- rep(NA_real_, 44100 * rec_time)
print("Start speaking...")
audio_obj <- record(Samples, 44100, 1)
wait(rec_time+1)
print("Stop speaking")
rec <- audio_obj$data
file.create("temp.wav")
save.wave(rec, "temp.wav")
print("Audio saved")
return("temp.wav")
}
# Function to transcribe audio
transcribe_audio <- function(audio_path) {
library(httr)
library(rjson)
headers = c(
`Ocp-Apim-Subscription-Key` = "[replace with your own key]",
`Content-Type` = "audio/wav"
)
params = list(`language` = 'en-US')
print("Transcribing audio file...")
data = upload_file(audio_path)
response <- httr::POST(url = '[replace with your own url]',
httr::add_headers(.headers=headers), query = params, body = data)
result <- fromJSON(httr::content(response, "text", encoding = "UTF-8"))
print("Transcrb complete")
txt_output <- as.data.frame(result)
txt_input <- txt_output[1,4]
writeLines(txt_input, "temp.txt")
return("temp.txt")
}
# Function to generate word cloud from text
generate_wordcloud <- function(txt_path) {
library("tm")
library("SnowballC")
library("wordcloud")
library("RColorBrewer")
text <- readLines(txt_path)
docs <- Corpus(VectorSource(text))
inspect(docs)
to_space <- content_transformer(function(x, pattern) gsub(pattern, " ", x))
docs <- tm_map(docs, to_space, "/")
docs <- tm_map(docs, to_space, "@")
docs <- tm_map(docs, to_space, "\\|")
# Convert the text to lower case
docs <- tm_map(docs, content_transformer(tolower))
# Remove numbers
docs <- tm_map(docs, removeNumbers)
# Remove english common stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))
# Remove your own stop word
# specify your stopwords as a character vector
docs <- tm_map(docs, removeWords, c("blaa", "blaa2"))
# Remove punctuations
#docs <- tm_map(docs, removePunctuation)
# Eliminate extra white spaces
docs <- tm_map(docs, stripWhitespace)
# Text stemming
docs <- tm_map(docs, stemDocument)
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m), decreasing = TRUE)
d <- data.frame(word = names(v), freq = v)
head(d, 10)
set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 1,
max.words = 500, random.order = FALSE, rot.per = 0.35,
colors = brewer.pal(8, "Dark2"))
}
# Function to run the program
main <- function() {
cat("1) Record audio (beta - may not work properly)\n2) Import txt file\n3) Import wav file\n4) Exit\n")
choice <- as.integer(readline(prompt="Enter your choice: "))
switch(choice,
"1" = {
rec_time <- as.integer(readline(prompt="How much audio you want to record ? (sec): "))
audio_path <- record_audio(rec_time)
txt_path <- transcribe_audio(audio_path)
generate_wordcloud(txt_path)
},
"2" = {
txt_path <- file.choose()
generate_wordcloud(txt_path)
},
"3" = {
audio_path <- file.choose()
txt_path <- transcribe_audio(audio_path)
generate_wordcloud(txt_path)
},
"4" = {
cat("Exiting...\n")
break
},
{
cat("Invalid choice. Please try again.\n")
})
}
# Run the main function
main()