perf: set all defaults to the WebAudio ones in JavaScript

ReadAlongs · Jun 1, 2022 · acf016d · acf016d
1 parent 1d2b971
commit acf016d
Show file tree

Hide file tree

Showing 6 changed files with 63 additions and 12 deletions.
diff --git a/docs/source/config_params.rst b/docs/source/config_params.rst
@@ -31,8 +31,8 @@ parameters, may change in a subsequent release of SoundSwallower.
    :keyword float beam: Beam width applied to every frame in Viterbi search (smaller values mean wider beam), defaults to ``1e-48``
    :keyword float wbeam: Beam width applied to word exits, defaults to ``7e-29``
    :keyword float pbeam: Beam width applied to phone transitions, defaults to ``1e-48``
-   :keyword float samprate: Sampling rate, defaults to ``16000.0``
-   :keyword int nfft: Size of FFT, defaults to ``512``
+   :keyword float samprate: Sampling rate, defaults to ``16000.0`` in C and Python and ``44100.0`` in JavaScript
+   :keyword int nfft: Size of FFT, defaults to ``512`` in C and Python and ``2048`` in JavaScript
    :keyword str featparams: File containing feature extraction parameters.
    :keyword str mdef: Model definition input file
    :keyword str senmgau: Senone to codebook mapping input file (usually not needed)
@@ -80,6 +80,7 @@ parameters, may change in a subsequent release of SoundSwallower.
    :keyword int ncep: Number of cep coefficients, defaults to ``13``
    :keyword bool doublebw: Use double bandwidth filters (same center freq), defaults to ``False``
    :keyword int lifter: Length of sin-curve for liftering, or 0 for no liftering., defaults to ``0``
+   :keyword bool input_float32: Input is 32-bit floating point in [-1.0, 1.0], defaults to ``False`` in C and Python, ``True`` in JavaScript.
    :keyword str input_endian: Endianness of input data, big or little, ignored if NIST or MS Wav, defaults to ``little``
    :keyword str warp_type: Warping function type (or shape), defaults to ``inverse_linear``
    :keyword str warp_params: Parameters defining the warping function

diff --git a/docs/source/soundswallower.js.rst b/docs/source/soundswallower.js.rst
@@ -65,11 +65,13 @@ Using SoundSwallower under Node.js
 
 Using SoundSwallower-JS in Node.js is mostly straightforward.  Here is
 a fairly minimal example.  First you can record yourself saying some
-digits:
+digits (note that we record in 32-bit floating-point at 44.1kHz, which
+is the default format for WebAudio and thus the default in
+SoundSwallower-JS as well):
 
 .. code-block:: console
 
-   sox -c 1 -r 16000 -b 32 -e floating-point -d digits.raw
+   sox -c 1 -r 44100 -b 32 -e floating-point -d digits.raw
 
 Now run this with ``node``:
 

diff --git a/include/soundswallower/fe.h b/include/soundswallower/fe.h
@@ -79,7 +79,11 @@ extern "C" {
 #endif    
 
 /** Default number of samples per second. */
+#ifdef __EMSCRIPTEN__
+#define DEFAULT_SAMPLING_RATE 44100
+#else
 #define DEFAULT_SAMPLING_RATE 16000
+#endif
 /** Default number of frames per second. */
 #define DEFAULT_FRAME_RATE 100
 /** Default spacing between frame starts (equal to
@@ -88,7 +92,11 @@ extern "C" {
 /** Default size of each frame (410 samples @ 16000Hz). */
 #define DEFAULT_WINDOW_LENGTH 0.025625 
 /** Default number of FFT points. */
+#ifdef __EMSCRIPTEN__
+#define DEFAULT_FFT_SIZE 2048
+#else
 #define DEFAULT_FFT_SIZE 512
+#endif
 /** Default number of MFCC coefficients in output. */
 #define DEFAULT_NUM_CEPSTRA 13
 /** Default number of filter bands used to generate MFCCs. */

diff --git a/js/README.md b/js/README.md
@@ -144,10 +144,12 @@ below for an example.
 
 Okay, let's wreck a nice beach!  Record yourself saying something,
 preferably the sentence "go forward ten meters", using SoX, for
-example:
+example.  Note that we record at 44.1kHz in 32-bit floating point
+format as this is the default under JavaScript (due to WebAudio
+limitations).
 
 ```sh
-sox -c 1 -r 16000 -b 32 -e floating-point -d goforward.raw
+sox -c 1 -r 44100 -b 32 -e floating-point -d goforward.raw
 ```
 
 Now you can load it and recognize it with:

diff --git a/js/test.js b/js/test.js
@@ -72,7 +72,8 @@
     describe("Test decoding", () => {
 	it('Should recognize "go forward ten meters"', async () => {
 	    let decoder = new ssjs.Decoder({
-		fsg: "goforward.fsg"
+		fsg: "goforward.fsg",
+		samprate: 16000
 	    });
 	    await decoder.initialize();
 	    let pcm = await fs.readFile("../tests/data/goforward-float32.raw");
@@ -93,7 +94,8 @@
 	});
 	it('Should accept Float32Array as well as UInt8Array', async () => {
 	    let decoder = new ssjs.Decoder({
-		fsg: "goforward.fsg"
+		fsg: "goforward.fsg",
+		samprate: 16000
 	    });
 	    await decoder.initialize();
 	    let pcm = await fs.readFile("../tests/data/goforward-float32.raw");
@@ -106,7 +108,7 @@
     });
     describe("Test dictionary and FSG", () => {
 	it('Should recognize "_go _forward _ten _meters"', async () => {
-	    let decoder = new ssjs.Decoder();
+	    let decoder = new ssjs.Decoder({samprate: 16000});
 	    await decoder.initialize();
 	    await decoder.add_word("_go", "G OW", false);
 	    await decoder.add_word("_forward", "F AO R W ER D", false);
@@ -130,7 +132,7 @@
     });
     describe("Test loading model for other language", () => {
 	it('Should recognize "avance de dix mètres"', async () => {
-	    let decoder = new ssjs.Decoder({hmm: "fr-fr"});
+	    let decoder = new ssjs.Decoder({hmm: "fr-fr", samprate: 16000});
 	    await decoder.initialize();
 	    let fsg = decoder.create_fsg("goforward", 0, 4, [
 		{from: 0, to: 1, prob: 0.5, word: "avance"},
@@ -164,6 +166,7 @@
 	it('Should recognize "yo gimme four large all dressed pizzas"', async () => {
 	    let decoder = new ssjs.Decoder({
 		jsgf: "pizza.gram",
+		samprate: 16000
 	    });
 	    await decoder.initialize();
 	    let pcm = await fs.readFile("../tests/data/pizza-float32.raw");
@@ -176,7 +179,7 @@
     });
     describe("Test JSGF string", () => {
 	it('Should recognize "yo gimme four large all dressed pizzas"', async () => {
-	    let decoder = new ssjs.Decoder();
+	    let decoder = new ssjs.Decoder({samprate: 16000});
 	    await decoder.initialize();
 	    let fsg = decoder.parse_jsgf(`#JSGF V1.0;
 grammar pizza;
@@ -200,4 +203,38 @@ public <order> = [<greeting>] [<want>] [<quantity>] [<size>] [<style>]
 	    decoder.delete();
 	});
     });
+    describe("Test reinitialize_audio", () => {
+	it('Should recognize "go forward ten meters"', async () => {
+	    let decoder = new ssjs.Decoder({
+		fsg: "goforward.fsg",
+		samprate: 11025
+	    });
+	    await decoder.initialize();
+	    let pcm = await fs.readFile("../tests/data/goforward-float32.raw");
+	    decoder.config.set("samprate", 16000);
+	    await decoder.reinitialize_audio();
+	    await decoder.start();
+	    await decoder.process(pcm, false, true);
+	    await decoder.stop();
+	    assert.equal("go forward ten meters", decoder.get_hyp());
+	    let hypseg = decoder.get_hypseg();
+	    let hypseg_words = []
+	    for (const seg of hypseg) {
+		assert.ok(seg.end >= seg.start);
+		hypseg_words.push(seg.word);
+	    }
+	    assert.deepStrictEqual(hypseg_words,
+				   ["<sil>", "go", "forward",
+				    "(NULL)", "ten", "meters"]);
+	    decoder.delete();
+	});
+    });
+    describe("Test dictionary lookup", () => {
+	it('Should return "W AH N"', async () => {
+	    let decoder = new ssjs.Decoder();
+	    await decoder.initialize();
+	    const phones = decoder.lookup_word("one");
+	    assert.equal("W AH N", phones);
+	});
+    });
 })();
diff --git a/js/test_custom_load.js b/js/test_custom_load.js
@@ -25,7 +25,8 @@
 		// Need to specify this since we made defaultModel null
 		hmm: "en-us",
 		fsg: "goforward.fsg",
-		backtrace: true
+		backtrace: true,
+		samprate: 16000
 	    });
 	    await decoder.initialize();
 	    let pcm = await fs.readFile("../tests/data/goforward-float32.raw");