diff --git a/Makefile.am b/Makefile.am index 487f018c4..8c3d4b05f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -111,6 +111,8 @@ noinst_PROGRAMS = celt/tests/test_unit_cwrs32 \ celt/tests/test_unit_types \ opus_compare \ opus_demo \ + lpcnet_demo \ + dump_data \ repacketizer_demo \ silk/tests/test_unit_LPC_inv_pred_gain \ tests/test_opus_api \ @@ -239,6 +241,12 @@ opus_custom_demo_LDADD = libopus.la $(LIBM) endif endif +lpcnet_demo_SOURCES = dnn/lpcnet_demo.c +lpcnet_demo_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM) + +dump_data_SOURCES = dnn/dump_data.c +dump_data_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM) + EXTRA_DIST = opus.pc.in \ opus-uninstalled.pc.in \ opus.m4 \ diff --git a/dnn/dump_data.c b/dnn/dump_data.c index cc272993b..7a0da10be 100644 --- a/dnn/dump_data.c +++ b/dnn/dump_data.c @@ -55,7 +55,7 @@ static void biquad(float *y, float mem[2], const float *x, const float *b, const } } -static float uni_rand() { +static float uni_rand(void) { return rand()/(double)RAND_MAX-.5; } @@ -135,9 +135,6 @@ int main(int argc, char **argv) { LPCNetEncState *st; float noise_std=0; int training = -1; - int encode = 0; - int decode = 0; - int quantize = 0; int burg = 0; srand(getpid()); st = lpcnet_encoder_create(); @@ -151,24 +148,7 @@ int main(int argc, char **argv) { training = 0; } if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1; - if (argc == 5 && strcmp(argv[1], "-qtrain")==0) { - training = 1; - quantize = 1; - } if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0; - if (argc == 4 && strcmp(argv[1], "-qtest")==0) { - training = 0; - quantize = 1; - } - if (argc == 4 && strcmp(argv[1], "-encode")==0) { - training = 0; - quantize = 1; - encode = 1; - } - if (argc == 4 && strcmp(argv[1], "-decode")==0) { - training = 0; - decode = 1; - } if (training == -1) { fprintf(stderr, "usage: %s -train \n", argv0); fprintf(stderr, " or %s -test \n", argv0); @@ -184,23 +164,6 @@ int main(int argc, char **argv) { fprintf(stderr,"Error opening output feature file: %s\n", argv[3]); exit(1); } - if (decode) { - float vq_mem[NB_BANDS] = {0}; - while (1) { - int ret; - unsigned char buf[8]; - float features[4][NB_TOTAL_FEATURES]; - /*int c0_id, main_pitch, modulation, corr_id, vq_end[3], vq_mid, interp_id;*/ - /*ret = fscanf(f1, "%d %d %d %d %d %d %d %d %d\n", &c0_id, &main_pitch, &modulation, &corr_id, &vq_end[0], &vq_end[1], &vq_end[2], &vq_mid, &interp_id);*/ - ret = fread(buf, 1, 8, f1); - if (ret != 8) break; - decode_packet(features, vq_mem, buf); - for (i=0;i<4;i++) { - fwrite(features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat); - } - } - return 0; - } if (training) { fpcm = fopen(argv[4], "wb"); if (fpcm == NULL) { @@ -279,18 +242,11 @@ int main(int argc, char **argv) { compute_noise(&noisebuf[st->pcount*FRAME_SIZE], noise_std); } - if (!quantize) { - process_single_frame(st, ffeat); - if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1); - } + process_single_frame(st, ffeat); + if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1); st->pcount++; /* Running on groups of 4 frames. */ if (st->pcount == 4) { - if (quantize) { - unsigned char buf[8]; - process_superframe(st, buf, ffeat, encode, quantize); - if (fpcm) write_audio(st, pcmbuf, noisebuf, fpcm, 4); - } st->pcount = 0; } /*if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);*/ diff --git a/dnn/lpcnet.c b/dnn/lpcnet.c index eb179215f..82e27e1ca 100644 --- a/dnn/lpcnet.c +++ b/dnn/lpcnet.c @@ -279,43 +279,3 @@ void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, short *o LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, short *output, int N) { lpcnet_synthesize_impl(lpcnet, features, output, N, 0); } - -#ifndef OPUS_BUILD - -LPCNET_EXPORT int lpcnet_decoder_get_size() -{ - return sizeof(LPCNetDecState); -} - -LPCNET_EXPORT int lpcnet_decoder_init(LPCNetDecState *st) -{ - memset(st, 0, lpcnet_decoder_get_size()); - lpcnet_init(&st->lpcnet_state); - return 0; -} - -LPCNET_EXPORT LPCNetDecState *lpcnet_decoder_create() -{ - LPCNetDecState *st; - st = malloc(lpcnet_decoder_get_size()); - lpcnet_decoder_init(st); - return st; -} - -LPCNET_EXPORT void lpcnet_decoder_destroy(LPCNetDecState *st) -{ - free(st); -} - -LPCNET_EXPORT int lpcnet_decode(LPCNetDecState *st, const unsigned char *buf, short *pcm) -{ - int k; - float features[4][NB_TOTAL_FEATURES]; - decode_packet(features, st->vq_mem, buf); - for (k=0;k<4;k++) { - lpcnet_synthesize(&st->lpcnet_state, features[k], &pcm[k*FRAME_SIZE], FRAME_SIZE); - } - return 0; -} - -#endif diff --git a/dnn/lpcnet_dec.c b/dnn/lpcnet_dec.c deleted file mode 100644 index d8df4d1c8..000000000 --- a/dnn/lpcnet_dec.c +++ /dev/null @@ -1,156 +0,0 @@ -/* Copyright (c) 2017-2019 Mozilla */ -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include "kiss_fft.h" -#include "common.h" -#include -#include "freq.h" -#include "pitch.h" -#include "arch.h" -#include -#include "lpcnet_private.h" -#include "lpcnet.h" - - -typedef struct { - int byte_pos; - int bit_pos; - int max_bytes; - const unsigned char *chars; -} unpacker; - -void bits_unpacker_init(unpacker *bits, const unsigned char *buf, int size) { - bits->byte_pos = 0; - bits->bit_pos = 0; - bits->max_bytes = size; - bits->chars = buf; -} - -unsigned int bits_unpack(unpacker *bits, int nb_bits) { - unsigned int d=0; - while(nb_bits) - { - if (bits->byte_pos == bits->max_bytes) { - fprintf(stderr, "something went horribly wrong\n"); - return 0; - } - d<<=1; - d |= (bits->chars[bits->byte_pos]>>(BITS_PER_CHAR-1 - bits->bit_pos))&1; - bits->bit_pos++; - if (bits->bit_pos==BITS_PER_CHAR) - { - bits->bit_pos=0; - bits->byte_pos++; - } - nb_bits--; - } - return d; -} - -#ifndef OPUS_BUILD -void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const unsigned char buf[8]) -{ - int c0_id; - int main_pitch; - int modulation; - int corr_id; - int vq_end[3]; - int vq_mid; - int interp_id; - - int i; - int sub; - int voiced = 1; - float frame_corr; - float sign; - unpacker bits; - - bits_unpacker_init(&bits, buf, 8); - c0_id = bits_unpack(&bits, 7); - main_pitch = bits_unpack(&bits, 6); - modulation = bits_unpack(&bits, 3); - corr_id = bits_unpack(&bits, 2); - vq_end[0] = bits_unpack(&bits, 10); - vq_end[1] = bits_unpack(&bits, 10); - vq_end[2] = bits_unpack(&bits, 10); - vq_mid = bits_unpack(&bits, 13); - interp_id = bits_unpack(&bits, 3); - /*fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id, main_pitch, modulation, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);*/ - - - for (i=0;i<4;i++) RNN_CLEAR(&features[i][0], NB_TOTAL_FEATURES); - - modulation -= 4; - if (modulation==-4) { - voiced = 0; - modulation = 0; - } - if (voiced) { - frame_corr = 0.3875f + .175f*corr_id; - } else { - frame_corr = 0.0375f + .075f*corr_id; - } - for (sub=0;sub<4;sub++) { - float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD; - p *= 1.f + modulation/16.f/7.f*(2*sub-3); - p = MIN16(255, MAX16(33, p)); - features[sub][NB_BANDS] = .02f*(p-100.f); - features[sub][NB_BANDS + 1] = frame_corr-.5f; - } - - features[3][0] = (c0_id-64)/4.f; - for (i=0;i= 4096) { - vq_mid -= 4096; - sign = -1; - } - for (i=0;i \n"); - fprintf(stderr, " lpcnet_demo -decode \n"); - fprintf(stderr, " lpcnet_demo -features \n"); + fprintf(stderr, "usage: lpcnet_demo -features \n"); fprintf(stderr, " lpcnet_demo -synthesis \n"); fprintf(stderr, " lpcnet_demo -plc \n"); fprintf(stderr, " lpcnet_demo -plc_file \n"); @@ -113,9 +109,7 @@ int main(int argc, char **argv) { const char *filename = "weights_blob.bin"; #endif if (argc < 4) usage(); - if (strcmp(argv[1], "-encode") == 0) mode=MODE_ENCODE; - else if (strcmp(argv[1], "-decode") == 0) mode=MODE_DECODE; - else if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES; + if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES; else if (strcmp(argv[1], "-synthesis") == 0) mode=MODE_SYNTHESIS; else if (strcmp(argv[1], "-plc") == 0) { mode=MODE_PLC; @@ -160,33 +154,7 @@ int main(int argc, char **argv) { #ifdef USE_WEIGHTS_FILE data = load_blob(filename, &len); #endif - if (mode == MODE_ENCODE) { - LPCNetEncState *net; - net = lpcnet_encoder_create(); - while (1) { - unsigned char buf[LPCNET_COMPRESSED_SIZE]; - short pcm[LPCNET_PACKET_SAMPLES]; - size_t ret; - ret = fread(pcm, sizeof(pcm[0]), LPCNET_PACKET_SAMPLES, fin); - if (feof(fin) || ret != LPCNET_PACKET_SAMPLES) break; - lpcnet_encode(net, pcm, buf); - fwrite(buf, 1, LPCNET_COMPRESSED_SIZE, fout); - } - lpcnet_encoder_destroy(net); - } else if (mode == MODE_DECODE) { - LPCNetDecState *net; - net = lpcnet_decoder_create(); - while (1) { - unsigned char buf[LPCNET_COMPRESSED_SIZE]; - short pcm[LPCNET_PACKET_SAMPLES]; - size_t ret; - ret = fread(buf, sizeof(buf[0]), LPCNET_COMPRESSED_SIZE, fin); - if (feof(fin) || ret != LPCNET_COMPRESSED_SIZE) break; - lpcnet_decode(net, buf, pcm); - fwrite(pcm, sizeof(pcm[0]), LPCNET_PACKET_SAMPLES, fout); - } - lpcnet_decoder_destroy(net); - } else if (mode == MODE_FEATURES) { + if (mode == MODE_FEATURES) { LPCNetEncState *net; net = lpcnet_encoder_create(); while (1) { diff --git a/dnn/lpcnet_enc.c b/dnn/lpcnet_enc.c index 3854faf02..90fa652f9 100644 --- a/dnn/lpcnet_enc.c +++ b/dnn/lpcnet_enc.c @@ -28,9 +28,8 @@ #include "config.h" #endif -#ifdef OPUS_BUILD +/* FIXME: Use the optimized celt_pitch_xcorr() */ #define celt_pitch_xcorr celt_pitch_xcorr_c -#endif #include #include @@ -45,424 +44,6 @@ #include "lpcnet_private.h" #include "lpcnet.h" -#ifndef OPUS_BUILD - -#define SURVIVORS 5 - - -void vq_quantize_mbest(const float *codebook, int nb_entries, const float *x, int ndim, int mbest, float *dist, int *index) -{ - int i, j; - for (i=0;i=pos+1;j--) { - dist[j] = dist[j-1]; - index[j] = index[j-1]; - } - dist[pos] = d; - index[pos] = i; - } - } -} - - -int vq_quantize(const float *codebook, int nb_entries, const float *x, int ndim, float *dist_out) -{ - int i, j; - float min_dist = 1e15f; - int nearest = 0; - - for (i=0;i=pos+1;j--) { - glob_dist[j] = glob_dist[j-1]; - index2[j][0] = index2[j-1][0]; - index2[j][1] = index2[j-1][1]; - } - glob_dist[pos] = curr_dist[m]; - index2[pos][0] = index1[k][0]; - index2[pos][1] = curr_index[m]; - m++; - } - } - } - } - for (k=0;k=pos+1;j--) { - glob_dist[j] = glob_dist[j-1]; - index3[j][0] = index3[j-1][0]; - index3[j][1] = index3[j-1][1]; - index3[j][2] = index3[j-1][2]; - } - glob_dist[pos] = curr_dist[m]; - index3[pos][0] = index2[k][0]; - index3[pos][1] = index2[k][1]; - index3[pos][2] = curr_index[m]; - m++; - } - } - } - } - entry[0] = id = index3[0][0]; - entry[1] = id2 = index3[0][1]; - entry[2] = id3 = index3[0][2]; - /*printf("%f ", glob_dist[0]);*/ - for (i=0;i= 1<= FORBIDDEN_INTERP); -} - - -void perform_interp_relaxation(float features[4][NB_TOTAL_FEATURES], const float *mem) { - int id0, id1; - int best_id; - int i; - float count, count_1; - best_id = double_interp_search(features, mem); - best_id += (best_id >= FORBIDDEN_INTERP); - id0 = best_id / 3; - id1 = best_id % 3; - count = 1; - if (id0 != 1) { - float t = (id0==0) ? .5f : 1.f; - for (i=0;ibyte_pos = 0; - bits->bit_pos = 0; - bits->max_bytes = size; - bits->chars = buf; - RNN_CLEAR(buf, size); -} - -void bits_pack(packer *bits, unsigned int data, int nb_bits) { - while(nb_bits) - { - int bit; - if (bits->byte_pos == bits->max_bytes) { - fprintf(stderr, "something went horribly wrong\n"); - return; - } - bit = (data>>(nb_bits-1))&1; - bits->chars[bits->byte_pos] |= bit<<(BITS_PER_CHAR-1-bits->bit_pos); - bits->bit_pos++; - - if (bits->bit_pos==BITS_PER_CHAR) - { - bits->bit_pos=0; - bits->byte_pos++; - if (bits->byte_pos < bits->max_bytes) bits->chars[bits->byte_pos] = 0; - } - nb_bits--; - } -} - -#endif LPCNET_EXPORT int lpcnet_encoder_get_size() { return sizeof(LPCNetEncState); @@ -576,241 +157,6 @@ void compute_frame_features(LPCNetEncState *st, const float *in) { } } -void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int encode, int quantize) { - int i; - int sub; - int best_i; - int best[10]; - int pitch_prev[8][PITCH_MAX_PERIOD]; - float best_a=0; - float best_b=0; - float w; - float sx=0, sxx=0, sxy=0, sy=0, sw=0; - float frame_corr; - int voiced; - float frame_weight_sum = 1e-15f; - float center_pitch; - int main_pitch; - int modulation; - int corr_id = 0; -#ifndef OPUS_BUILD - int c0_id=0; - int vq_end[3]={0}; - int vq_mid=0; - int interp_id=0; -#endif - for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub]; - for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum); - for(sub=0;sub<8;sub++) { - float max_path_all = -1e15f; - best_i = 0; - for (i=0;ixc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]); - if (st->xc[2+sub][i] < xc_half*1.1f) st->xc[2+sub][i] *= .8f; - } - for (i=0;ipitch_max_path_all - 6.f; - pitch_prev[sub][i] = st->best_i; - for (j=IMAX(-4, -i);j<=4 && i+jpitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) { - max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j); - pitch_prev[sub][i] = i+j; - } - } - st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+sub]*st->xc[2+sub][i]; - if (st->pitch_max_path[1][i] > max_path_all) { - max_path_all = st->pitch_max_path[1][i]; - best_i = i; - } - } - /* Renormalize. */ - for (i=0;ipitch_max_path[1][i] -= max_path_all; - /*for (i=0;ipitch_max_path[1][i]); - printf("\n");*/ - RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD); - st->pitch_max_path_all = max_path_all; - st->best_i = best_i; - } - best_i = st->best_i; - frame_corr = 0; - /* Backward pass. */ - for (sub=7;sub>=0;sub--) { - best[2+sub] = PITCH_MAX_PERIOD-best_i; - frame_corr += st->frame_weight[2+sub]*st->xc[2+sub][best_i]; - best_i = pitch_prev[sub][best_i]; - } - frame_corr /= 8; - if (quantize && frame_corr < 0) frame_corr = 0; - for (sub=0;sub<8;sub++) { - /*printf("%d %f\n", best[2+sub], frame_corr);*/ - } - /*printf("\n");*/ - for (sub=2;sub<10;sub++) { - w = st->frame_weight[sub]; - sw += w; - sx += w*sub; - sxx += w*sub*sub; - sxy += w*sub*best[sub]; - sy += w*best[sub]; - } - voiced = frame_corr >= .3; - /* Linear regression to figure out the pitch contour. */ - best_a = (sw*sxy - sx*sy)/(sw*sxx - sx*sx); - if (voiced) { - float max_a; - float mean_pitch = sy/sw; - /* Allow a relative variation of up to 1/4 over 8 sub-frames. */ - max_a = mean_pitch/32; - best_a = MIN16(max_a, MAX16(-max_a, best_a)); - corr_id = (int)floor((frame_corr-.3f)/.175f); - if (quantize) frame_corr = 0.3875f + .175f*corr_id; - } else { - best_a = 0; - corr_id = (int)floor(frame_corr/.075f); - if (quantize) frame_corr = 0.0375f + .075f*corr_id; - } - /*best_b = (sxx*sy - sx*sxy)/(sw*sxx - sx*sx);*/ - best_b = (sy - best_a*sx)/sw; - /* Quantizing the pitch as "main" pitch + slope. */ - center_pitch = best_b+5.5f*best_a; - main_pitch = (int)floor(.5 + 21.*1.442695041*log(center_pitch/PITCH_MIN_PERIOD)); - main_pitch = IMAX(0, IMIN(63, main_pitch)); - modulation = (int)floor(.5 + 16*7*best_a/center_pitch); - modulation = IMAX(-3, IMIN(3, modulation)); - /*printf("%d %d\n", main_pitch, modulation);*/ - /*printf("%f %f\n", best_a/center_pitch, best_corr);*/ - /*for (sub=2;sub<10;sub++) printf("%f %d %f\n", best_b + sub*best_a, best[sub], best_corr);*/ - for (sub=0;sub<4;sub++) { - if (quantize) { - float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD; - p *= 1.f + modulation/16.f/7.f*(2*sub-3); - p = MIN16(255, MAX16(33, p)); - st->features[sub][NB_BANDS] = .02f*(p-100); - st->features[sub][NB_BANDS + 1] = frame_corr-.5f; - } else { - st->features[sub][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200); - st->features[sub][NB_BANDS + 1] = frame_corr-.5f; - } - /*printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);*/ - } - /*printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);*/ - RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD); - RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD); -#ifndef OPUS_BUILD - if (quantize) { - /*printf("%f\n", st->features[3][0]);*/ - c0_id = (int)floor(.5 + st->features[3][0]*4); - c0_id = IMAX(-64, IMIN(63, c0_id)); - st->features[3][0] = c0_id/4.f; - quantize_3stage_mbest(&st->features[3][1], vq_end); - /*perform_interp_relaxation(st->features, st->vq_mem);*/ - quantize_diff(&st->features[1][0], st->vq_mem, &st->features[3][0], ceps_codebook_diff4, 12, 1, &vq_mid); - interp_id = double_interp_search(st->features, st->vq_mem); - perform_double_interp(st->features, st->vq_mem, interp_id); - } -#endif - for (sub=0;sub<4;sub++) { - lpc_from_cepstrum(st->lpc, st->features[sub]); - for (i=0;ifeatures[sub][NB_BANDS+2+i] = st->lpc[i]; - } - /*printf("\n");*/ - RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS); - if (encode) { -#ifndef OPUS_BUILD - packer bits; - /*fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id+64, main_pitch, voiced ? modulation+4 : 0, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);*/ - bits_packer_init(&bits, buf, 8); - bits_pack(&bits, c0_id+64, 7); - bits_pack(&bits, main_pitch, 6); - bits_pack(&bits, voiced ? modulation+4 : 0, 3); - bits_pack(&bits, corr_id, 2); - bits_pack(&bits, vq_end[0], 10); - bits_pack(&bits, vq_end[1], 10); - bits_pack(&bits, vq_end[2], 10); - bits_pack(&bits, vq_mid, 13); - bits_pack(&bits, interp_id, 3); - if (ffeat) fwrite(buf, 1, 8, ffeat); -#else - (void)buf; -#endif - } else if (ffeat) { - for (i=0;i<4;i++) { - fwrite(st->features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat); - } - } -} - - -void process_multi_frame(LPCNetEncState *st, FILE *ffeat) { - int i; - int sub; - int best_i; - int best[10]; - int pitch_prev[8][PITCH_MAX_PERIOD]; - float frame_corr; - float frame_weight_sum = 1e-15f; - for(sub=0;sub<8;sub++) frame_weight_sum += st->frame_weight[2+sub]; - for(sub=0;sub<8;sub++) st->frame_weight[2+sub] *= (8.f/frame_weight_sum); - for(sub=0;sub<8;sub++) { - float max_path_all = -1e15f; - best_i = 0; - for (i=0;ixc[2+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+sub][(PITCH_MAX_PERIOD+i-1)/2]); - if (st->xc[2+sub][i] < xc_half*1.1) st->xc[2+sub][i] *= .8f; - } - for (i=0;ipitch_max_path_all - 6.f; - pitch_prev[sub][i] = st->best_i; - for (j=IMAX(-4, -i);j<=4 && i+jpitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) { - max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j); - pitch_prev[sub][i] = i+j; - } - } - st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+sub]*st->xc[2+sub][i]; - if (st->pitch_max_path[1][i] > max_path_all) { - max_path_all = st->pitch_max_path[1][i]; - best_i = i; - } - } - /* Renormalize. */ - for (i=0;ipitch_max_path[1][i] -= max_path_all; - /*for (i=0;ipitch_max_path[1][i]); - printf("\n");*/ - RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD); - st->pitch_max_path_all = max_path_all; - st->best_i = best_i; - } - best_i = st->best_i; - frame_corr = 0; - /* Backward pass. */ - for (sub=7;sub>=0;sub--) { - best[2+sub] = PITCH_MAX_PERIOD-best_i; - frame_corr += st->frame_weight[2+sub]*st->xc[2+sub][best_i]; - best_i = pitch_prev[sub][best_i]; - } - frame_corr /= 8; - for (sub=0;sub<4;sub++) { - st->features[sub][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200); - st->features[sub][NB_BANDS + 1] = frame_corr-.5f; - /*printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);*/ - } - /*printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);*/ - RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD); - RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD); - /*printf("\n");*/ - RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS); - if (ffeat) { - for (i=0;i<4;i++) { - fwrite(st->features[i], sizeof(float), NB_TOTAL_FEATURES, ffeat); - } - } -} - void process_single_frame(LPCNetEncState *st, FILE *ffeat) { int i; int sub; @@ -879,35 +225,6 @@ void preemphasis(float *y, float *mem, const float *x, float coef, int N) { } } -LPCNET_EXPORT int lpcnet_encode(LPCNetEncState *st, const short *pcm, unsigned char *buf) { - int i, k; - for (k=0;k<4;k++) { - float x[FRAME_SIZE]; - for (i=0;imem_preemph, x, PREEMPHASIS, FRAME_SIZE); - st->pcount = k; - compute_frame_features(st, x); - } - process_superframe(st, buf, NULL, 1, 1); - return 0; -} - -LPCNET_EXPORT int lpcnet_compute_features(LPCNetEncState *st, const short *pcm, float features[4][NB_TOTAL_FEATURES]) { - int i, k; - for (k=0;k<4;k++) { - float x[FRAME_SIZE]; - for (i=0;imem_preemph, x, PREEMPHASIS, FRAME_SIZE); - st->pcount = k; - compute_frame_features(st, x); - } - process_superframe(st, NULL, NULL, 0, 0); - for (k=0;k<4;k++) { - RNN_COPY(&features[k][0], &st->features[k][0], NB_TOTAL_FEATURES); - } - return 0; -} - static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES]) { preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE); compute_frame_features(st, x); diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h index 3a85847e7..e6fb57ed4 100644 --- a/dnn/lpcnet_private.h +++ b/dnn/lpcnet_private.h @@ -105,19 +105,10 @@ struct LPCNetPLCState { short queued_samples[FRAME_SIZE]; }; -#ifndef OPUS_BUILD -extern float ceps_codebook1[]; -extern float ceps_codebook2[]; -extern float ceps_codebook3[]; -extern float ceps_codebook_diff4[]; -#endif - void preemphasis(float *y, float *mem, const float *x, float coef, int N); void perform_double_interp(float features[4][NB_TOTAL_FEATURES], const float *mem, int best_id); -void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int encode, int quantize); - void compute_frame_features(LPCNetEncState *st, const float *in); void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const unsigned char buf[8]); diff --git a/lpcnet_sources.mk b/lpcnet_sources.mk index 778f60dc1..f029d5a74 100644 --- a/lpcnet_sources.mk +++ b/lpcnet_sources.mk @@ -4,7 +4,6 @@ dnn/common.c \ dnn/freq.c \ dnn/kiss99.c \ dnn/lpcnet.c \ -dnn/lpcnet_dec.c \ dnn/lpcnet_enc.c \ dnn/lpcnet_plc.c \ dnn/lpcnet_tables.c \