From 6cf192e21d550c8963784e9a20c6a68de091e7a9 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 3 Jun 2024 00:20:25 -0400 Subject: [PATCH 1/2] chore: Update version to 0.0.9 and whispercpp to 1.6.2 --- buildspec.json | 2 +- cmake/BuildWhispercpp.cmake | 22 ++--- src/whisper-utils/whisper-processing.cpp | 106 ++++++++++++----------- 3 files changed, 66 insertions(+), 64 deletions(-) diff --git a/buildspec.json b/buildspec.json index cbe3023..6a8ba75 100644 --- a/buildspec.json +++ b/buildspec.json @@ -45,7 +45,7 @@ } }, "name": "obs-cleanstream", - "version": "0.0.8", + "version": "0.0.9", "author": "Roy Shilkrot", "website": "https://github.com/occ-ai/obs-cleanstream/", "email": "roy.shil@gmail.com", diff --git a/cmake/BuildWhispercpp.cmake b/cmake/BuildWhispercpp.cmake index 284fa11..90d04e4 100644 --- a/cmake/BuildWhispercpp.cmake +++ b/cmake/BuildWhispercpp.cmake @@ -14,12 +14,12 @@ if(APPLE) endif(NOT DEFINED ENV{MACOS_ARCH}) set(WHISPER_CPP_URL - "https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.2/whispercpp-macos-$ENV{MACOS_ARCH}-0.0.2.tar.gz" + "https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.3/whispercpp-macos-$ENV{MACOS_ARCH}-0.0.3.tar.gz" ) if($ENV{MACOS_ARCH} STREQUAL "x86_64") - set(WHISPER_CPP_HASH "00C308AF0BFFF7619934403A8080CC9AFC4EDAA328D7587E617150A2C6A33313") + set(WHISPER_CPP_HASH "94DB35C69E958C8A59F0F331734F4F4F45F4BB13D2F54D3C838457E8590874C4") elseif($ENV{MACOS_ARCH} STREQUAL "arm64") - set(WHISPER_CPP_HASH "0478E2079E07FA81BEE77506101003F4A4C8F0DF9E23757BD7E1D25DCBD1DB30") + set(WHISPER_CPP_HASH "ACA1DF8F34F4946B56FEED89B7548C9AD56D1DD89615C96BDEB6E4734A946451") else() message( FATAL_ERROR @@ -59,16 +59,16 @@ elseif(WIN32) endif() set(WHISPER_CPP_URL - "https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.2/whispercpp-windows-${CUDA_PREFIX}-0.0.2.zip" + "https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.3/whispercpp-windows-${ARCH_PREFIX}-0.0.3.zip" ) if($ENV{CPU_OR_CUDA} STREQUAL "cpu") - set(WHISPER_CPP_HASH "6DE628A51B9352624A1EC397231591FA3370E6BB42D9364F4F91F11DD18F77D2") + set(WHISPER_CPP_HASH "A7243E649E0B6D080AA6D2210DB0AC08C597FA11B88C3068B8A60083AD9E62EF") elseif($ENV{CPU_OR_CUDA} STREQUAL "clblast") - set(WHISPER_CPP_HASH "97BF58520F1818B7C9F4E996197F3097934E5E0BBA92B0B016C6B28BE9FF1642") + set(WHISPER_CPP_HASH "7957AC76A0E6517C95951B3BECCB554CD992E30DAF8716681B40F375590F69F1") elseif($ENV{CPU_OR_CUDA} STREQUAL "12.2.0") - set(WHISPER_CPP_HASH "48C059A3364E0AAD9FB0D4194BA554865928D22A27ECE5E3C116DC672D5D6EDE") + set(WHISPER_CPP_HASH "0F6BC1F91C573A867D6972554FC29C3D8EAFD7994FA0FEBBEAFCF945DC8A9F41") elseif($ENV{CPU_OR_CUDA} STREQUAL "11.8.0") - set(WHISPER_CPP_HASH "29A5530E83896DE207F0199535CBBB24DF0D63B1373BA66139AD240BA67120EB") + set(WHISPER_CPP_HASH "51CB6750ADDF96F38106E4E88212FCC06500999E568E5A4EDC6D42CA6D7CA99D") else() message( FATAL_ERROR @@ -106,11 +106,11 @@ elseif(WIN32) install(FILES ${WHISPER_DLLS} DESTINATION "obs-plugins/64bit") else() - set(Whispercpp_Build_GIT_TAG "v1.5.5") + set(Whispercpp_Build_GIT_TAG "v1.6.2") set(WHISPER_EXTRA_CXX_FLAGS "-fPIC") set(WHISPER_ADDITIONAL_CMAKE_ARGS -DWHISPER_BLAS=OFF -DWHISPER_CUBLAS=OFF -DWHISPER_OPENBLAS=OFF) - # On Linux and MacOS build a static Whisper library + # On Linux build a static Whisper library ExternalProject_Add( Whispercpp_Build DOWNLOAD_EXTRACT_TIMESTAMP true @@ -131,7 +131,7 @@ else() ExternalProject_Get_Property(Whispercpp_Build INSTALL_DIR) - # on Linux and MacOS add the static Whisper library to the link line + # add the static Whisper library to the link line add_library(Whispercpp::Whisper STATIC IMPORTED) set_target_properties( Whispercpp::Whisper diff --git a/src/whisper-utils/whisper-processing.cpp b/src/whisper-utils/whisper-processing.cpp index ecbab6e..80391d8 100644 --- a/src/whisper-utils/whisper-processing.cpp +++ b/src/whisper-utils/whisper-processing.cpp @@ -66,6 +66,7 @@ struct whisper_context *init_whisper_context(const std::string &model_path_in, cparams.use_gpu = false; obs_log(LOG_INFO, "Using CPU for inference"); #endif + cparams.flash_attn = false; struct whisper_context *ctx = nullptr; try { @@ -136,77 +137,78 @@ int run_whisper_inference(struct cleanstream_data *gf, const float *pcm32f_data, int(pcm32f_size), float(pcm32f_size) / WHISPER_SAMPLE_RATE, gf->whisper_params.n_threads); + std::string text_preproc; + float sentence_p = 0.0f; + int64_t t0; + int64_t t1; + // run the inference - int whisper_full_result = -1; try { gf->whisper_params.duration_ms = (int)((float)pcm32f_size / WHISPER_SAMPLE_RATE * 1000.0f); - whisper_full_result = whisper_full(gf->whisper_context, gf->whisper_params, - pcm32f_data, (int)pcm32f_size); - } catch (const std::exception &e) { - obs_log(LOG_ERROR, "Whisper exception: %s. Filter restart is required", e.what()); - whisper_free(gf->whisper_context); - gf->whisper_context = nullptr; - return DETECTION_RESULT_UNKNOWN; - } + int whisper_full_result = whisper_full(gf->whisper_context, gf->whisper_params, + pcm32f_data, (int)pcm32f_size); - if (whisper_full_result != 0) { - obs_log(LOG_WARNING, "failed to process audio, error %d", whisper_full_result); - return DETECTION_RESULT_UNKNOWN; - } else { + if (whisper_full_result != 0) { + obs_log(LOG_WARNING, "failed to process audio, error %d", + whisper_full_result); + return DETECTION_RESULT_UNKNOWN; + } + if (whisper_full_n_segments(gf->whisper_context) == 0) { + return DETECTION_RESULT_SILENCE; + } const int n_segment = 0; - const char *text = whisper_full_get_segment_text(gf->whisper_context, n_segment); - const int64_t t0 = whisper_full_get_segment_t0(gf->whisper_context, n_segment); - const int64_t t1 = whisper_full_get_segment_t1(gf->whisper_context, n_segment); + text_preproc = whisper_full_get_segment_text(gf->whisper_context, n_segment); + t0 = whisper_full_get_segment_t0(gf->whisper_context, n_segment); + t1 = whisper_full_get_segment_t1(gf->whisper_context, n_segment); - float sentence_p = 0.0f; const int n_tokens = whisper_full_n_tokens(gf->whisper_context, n_segment); for (int j = 0; j < n_tokens; ++j) { sentence_p += whisper_full_get_token_p(gf->whisper_context, n_segment, j); } sentence_p /= (float)n_tokens; + } catch (const std::exception &e) { + obs_log(LOG_ERROR, "Whisper exception: %s. Filter restart is required", e.what()); + whisper_free(gf->whisper_context); + gf->whisper_context = nullptr; + return DETECTION_RESULT_UNKNOWN; + } - std::string text_preproc = text; - - if (text_preproc.empty()) { - return DETECTION_RESULT_SILENCE; - } + if (text_preproc.empty()) { + return DETECTION_RESULT_SILENCE; + } - // if language is en convert text to lowercase - if (strcmp(gf->whisper_params.language, "en") == 0) { - std::string text_lower; - std::transform(text_preproc.begin(), text_preproc.end(), text_lower.begin(), - ::tolower); - text_preproc = text_lower; - // remove leading and trailing non-alphanumeric characters - text_preproc = remove_leading_trailing_nonalpha(text_preproc); - } else { - // fix UTF8 encoding - std::string text_fixed = fix_utf8(text); - text_preproc = text_fixed; - } + // if language is en convert text to lowercase + if (strcmp(gf->whisper_params.language, "en") == 0) { + std::transform(text_preproc.begin(), text_preproc.end(), text_preproc.begin(), + ::tolower); + // remove leading and trailing non-alphanumeric characters + text_preproc = remove_leading_trailing_nonalpha(text_preproc); + } else { + // fix UTF8 encoding + text_preproc = fix_utf8(text_preproc); + } - if (gf->log_words) { - obs_log(LOG_INFO, "[%s --> %s] (%.3f) %s", to_timestamp(t0).c_str(), - to_timestamp(t1).c_str(), sentence_p, text_preproc.c_str()); - } + if (gf->log_words) { + obs_log(LOG_INFO, "[%s --> %s] (%.3f) %s", to_timestamp(t0).c_str(), + to_timestamp(t1).c_str(), sentence_p, text_preproc.c_str()); + } - if (text_preproc.empty()) { - return DETECTION_RESULT_SILENCE; - } + if (text_preproc.empty()) { + return DETECTION_RESULT_SILENCE; + } - // use a regular expression to detect filler words with a word boundary - try { - if (gf->detect_regex != nullptr && strlen(gf->detect_regex) > 0) { - std::regex filler_regex(gf->detect_regex); - if (std::regex_search(text_preproc, filler_regex, - std::regex_constants::match_any)) { - return DETECTION_RESULT_BEEP; - } + // use a regular expression to detect filler words with a word boundary + try { + if (gf->detect_regex != nullptr && strlen(gf->detect_regex) > 0) { + std::regex filler_regex(gf->detect_regex); + if (std::regex_search(text_preproc, filler_regex, + std::regex_constants::match_any)) { + return DETECTION_RESULT_BEEP; } - } catch (const std::regex_error &e) { - obs_log(LOG_ERROR, "Regex error: %s", e.what()); } + } catch (const std::regex_error &e) { + obs_log(LOG_ERROR, "Regex error: %s", e.what()); } return DETECTION_RESULT_SPEECH; From 74ebd0b4410a169826d95116eb6bdbc756380635 Mon Sep 17 00:00:00 2001 From: Roy Shilkrot Date: Mon, 3 Jun 2024 00:24:49 -0400 Subject: [PATCH 2/2] chore: Update whispercpp URL to use CUDA prefix in BuildWhispercpp.cmake --- cmake/BuildWhispercpp.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/BuildWhispercpp.cmake b/cmake/BuildWhispercpp.cmake index 90d04e4..756c01e 100644 --- a/cmake/BuildWhispercpp.cmake +++ b/cmake/BuildWhispercpp.cmake @@ -59,7 +59,7 @@ elseif(WIN32) endif() set(WHISPER_CPP_URL - "https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.3/whispercpp-windows-${ARCH_PREFIX}-0.0.3.zip" + "https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.3/whispercpp-windows-${CUDA_PREFIX}-0.0.3.zip" ) if($ENV{CPU_OR_CUDA} STREQUAL "cpu") set(WHISPER_CPP_HASH "A7243E649E0B6D080AA6D2210DB0AC08C597FA11B88C3068B8A60083AD9E62EF")