This repository has been archived by the owner on Sep 18, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspeech_text_speech.js
154 lines (133 loc) · 5.42 KB
/
speech_text_speech.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/************************************************************************
* Based on the https://github.com/ibmtjbot/tjbot/tree/master/recipes/speech_to_text code
* Author: Sebastian Pech
*
* Original file:
* Copyright 2016 IBM Corp. All Rights Reserved.
*
* Watson Maker Kits
*
* This project is licensed under the Apache License 2.0, see LICENSE.*
*
************************************************************************
*
* Build a talking robot with Watson.
* This module uses Watson Speech to Text and Watson Text to Speech.
* To run: node speech_text_speech.js
*/
var watson = require('watson-developer-cloud'); //to connect to Watson developer cloud
var config = require("./config.js") // to get our credentials and the attention word from the config.js files
var exec = require('child_process').exec;
var fs = require('fs');
var attentionWord = config.attentionWord; //you can change the attention word in the config file
/************************************************************************
* Step #1: Configuring your Bluemix Credentials
************************************************************************
In this step we will be configuring the Bluemix Credentials for Speech to Text
and Text to Speech services.
*/
var speech_to_text = watson.speech_to_text({
username: config.STTUsername,
password: config.STTPassword,
version: 'v1'
});
var text_to_speech = watson.text_to_speech({
username: config.TTSUsername,
password: config.TTSPassword,
version: 'v1'
});
/************************************************************************
* Step #2: Configuring the Microphone
************************************************************************
In this step, we configure your microphone to collect the audio samples as you talk.
See https://www.npmjs.com/package/mic for more information on
microphone input events e.g on error, startcomplete, pause, stopcomplete etc.
*/
// Initiate Microphone Instance to Get audio samples
var mic = require('mic');
var micInstance = mic({ 'rate': '44100', 'channels': '2', 'debug': false, 'exitOnSilence': 6 });
var micInputStream = micInstance.getAudioStream();
micInputStream.on('data', function(data) {
//console.log("Recieved Input Stream: " + data.length);
});
micInputStream.on('error', function(err) {
console.log("Error in Input Stream: " + err);
});
micInputStream.on('silence', function() {
// detect silence.
});
micInstance.start();
console.log("Listening, you may speak now.");
var textStream ;
/************************************************************************
* Step #3: Converting your Speech Commands to Text
************************************************************************
In this step, the audio sample is sent (piped) to "Watson Speech to Text" to transcribe.
The service converts the audio to text and saves the returned text in "textStream"
You can also set the language model for your speech input.
The following language models are available
ar-AR_BroadbandModel
en-UK_BroadbandModel
en-UK_NarrowbandModel
en-US_BroadbandModel (the default)
en-US_NarrowbandModel
es-ES_BroadbandModel
es-ES_NarrowbandModel
fr-FR_BroadbandModel
ja-JP_BroadbandModel
ja-JP_NarrowbandModel
pt-BR_BroadbandModel
pt-BR_NarrowbandModel
zh-CN_BroadbandModel
zh-CN_NarrowbandModel
*/
var recognizeparams = {
content_type: 'audio/l16; rate=44100; channels=2',
interim_results: true,
keywords: [attentionWord],
smart_formatting: true,
keywords_threshold: 0.5,
model: 'en-US_BroadbandModel' // Specify your language model here
};
textStream = micInputStream.pipe(speech_to_text.createRecognizeStream(recognizeparams));
textStream.setEncoding('utf8');
/*********************************************************************
* Step #4: Parsing the Text and create a response
*********************************************************************
In this step, we parse the text to look for attention word and send that sentence
to watson speech to text. You can change it to something else if needed.
Once the attention word is detected,the text is sent for processing.
*/
textStream.setEncoding('utf8');
textStream.on('data', function(str) {
console.log(' ===== Speech to Text ===== : ' + str); // print the text once received
if (str.toLowerCase().indexOf(attentionWord.toLowerCase()) >= 0) {
var res = str.toLowerCase().replace(attentionWord.toLowerCase(), "");
console.log("msg sent:" ,res);
var params = {
text: res,
voice: config.voice,
accept: 'audio/wav'
};
/*********************************************************************
Step #5: Speak out the response
*********************************************************************
In this step, we text is sent out to Watsons Text to Speech service and result is piped to wave file.
Wave files are then played using alsa (native audio) tool.
*/
tempStream = text_to_speech.synthesize(params).pipe(fs.createWriteStream('output.wav')).on('close', function() {
var create_audio = exec('aplay output.wav', function (error, stdout, stderr) {
if (error !== null) {
console.log('exec error: ' + error);
}
});
});
} else {
console.log("Waiting to hear", attentionWord);
}
});
textStream.on('error', function(err) {
console.log(' === Watson Speech to Text : An Error has occurred =====') ; // handle errors
console.log(err) ;
console.log("Press <ctrl>+C to exit.") ;
});