Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Update version to 0.0.9 and whispercpp to 1.6.2 #21

Merged
merged 2 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion buildspec.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
}
},
"name": "obs-cleanstream",
"version": "0.0.8",
"version": "0.0.9",
"author": "Roy Shilkrot",
"website": "https://github.com/occ-ai/obs-cleanstream/",
"email": "roy.shil@gmail.com",
Expand Down
22 changes: 11 additions & 11 deletions cmake/BuildWhispercpp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ if(APPLE)
endif(NOT DEFINED ENV{MACOS_ARCH})

set(WHISPER_CPP_URL
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.2/whispercpp-macos-$ENV{MACOS_ARCH}-0.0.2.tar.gz"
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.3/whispercpp-macos-$ENV{MACOS_ARCH}-0.0.3.tar.gz"
)
if($ENV{MACOS_ARCH} STREQUAL "x86_64")
set(WHISPER_CPP_HASH "00C308AF0BFFF7619934403A8080CC9AFC4EDAA328D7587E617150A2C6A33313")
set(WHISPER_CPP_HASH "94DB35C69E958C8A59F0F331734F4F4F45F4BB13D2F54D3C838457E8590874C4")
elseif($ENV{MACOS_ARCH} STREQUAL "arm64")
set(WHISPER_CPP_HASH "0478E2079E07FA81BEE77506101003F4A4C8F0DF9E23757BD7E1D25DCBD1DB30")
set(WHISPER_CPP_HASH "ACA1DF8F34F4946B56FEED89B7548C9AD56D1DD89615C96BDEB6E4734A946451")
else()
message(
FATAL_ERROR
Expand Down Expand Up @@ -59,16 +59,16 @@ elseif(WIN32)
endif()

set(WHISPER_CPP_URL
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.2/whispercpp-windows-${CUDA_PREFIX}-0.0.2.zip"
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.3/whispercpp-windows-${CUDA_PREFIX}-0.0.3.zip"
)
if($ENV{CPU_OR_CUDA} STREQUAL "cpu")
set(WHISPER_CPP_HASH "6DE628A51B9352624A1EC397231591FA3370E6BB42D9364F4F91F11DD18F77D2")
set(WHISPER_CPP_HASH "A7243E649E0B6D080AA6D2210DB0AC08C597FA11B88C3068B8A60083AD9E62EF")
elseif($ENV{CPU_OR_CUDA} STREQUAL "clblast")
set(WHISPER_CPP_HASH "97BF58520F1818B7C9F4E996197F3097934E5E0BBA92B0B016C6B28BE9FF1642")
set(WHISPER_CPP_HASH "7957AC76A0E6517C95951B3BECCB554CD992E30DAF8716681B40F375590F69F1")
elseif($ENV{CPU_OR_CUDA} STREQUAL "12.2.0")
set(WHISPER_CPP_HASH "48C059A3364E0AAD9FB0D4194BA554865928D22A27ECE5E3C116DC672D5D6EDE")
set(WHISPER_CPP_HASH "0F6BC1F91C573A867D6972554FC29C3D8EAFD7994FA0FEBBEAFCF945DC8A9F41")
elseif($ENV{CPU_OR_CUDA} STREQUAL "11.8.0")
set(WHISPER_CPP_HASH "29A5530E83896DE207F0199535CBBB24DF0D63B1373BA66139AD240BA67120EB")
set(WHISPER_CPP_HASH "51CB6750ADDF96F38106E4E88212FCC06500999E568E5A4EDC6D42CA6D7CA99D")
else()
message(
FATAL_ERROR
Expand Down Expand Up @@ -106,11 +106,11 @@ elseif(WIN32)
install(FILES ${WHISPER_DLLS} DESTINATION "obs-plugins/64bit")

else()
set(Whispercpp_Build_GIT_TAG "v1.5.5")
set(Whispercpp_Build_GIT_TAG "v1.6.2")
set(WHISPER_EXTRA_CXX_FLAGS "-fPIC")
set(WHISPER_ADDITIONAL_CMAKE_ARGS -DWHISPER_BLAS=OFF -DWHISPER_CUBLAS=OFF -DWHISPER_OPENBLAS=OFF)

# On Linux and MacOS build a static Whisper library
# On Linux build a static Whisper library
ExternalProject_Add(
Whispercpp_Build
DOWNLOAD_EXTRACT_TIMESTAMP true
Expand All @@ -131,7 +131,7 @@ else()

ExternalProject_Get_Property(Whispercpp_Build INSTALL_DIR)

# on Linux and MacOS add the static Whisper library to the link line
# add the static Whisper library to the link line
add_library(Whispercpp::Whisper STATIC IMPORTED)
set_target_properties(
Whispercpp::Whisper
Expand Down
106 changes: 54 additions & 52 deletions src/whisper-utils/whisper-processing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ struct whisper_context *init_whisper_context(const std::string &model_path_in,
cparams.use_gpu = false;
obs_log(LOG_INFO, "Using CPU for inference");
#endif
cparams.flash_attn = false;

struct whisper_context *ctx = nullptr;
try {
Expand Down Expand Up @@ -136,77 +137,78 @@ int run_whisper_inference(struct cleanstream_data *gf, const float *pcm32f_data,
int(pcm32f_size), float(pcm32f_size) / WHISPER_SAMPLE_RATE,
gf->whisper_params.n_threads);

std::string text_preproc;
float sentence_p = 0.0f;
int64_t t0;
int64_t t1;

// run the inference
int whisper_full_result = -1;
try {
gf->whisper_params.duration_ms =
(int)((float)pcm32f_size / WHISPER_SAMPLE_RATE * 1000.0f);
whisper_full_result = whisper_full(gf->whisper_context, gf->whisper_params,
pcm32f_data, (int)pcm32f_size);
} catch (const std::exception &e) {
obs_log(LOG_ERROR, "Whisper exception: %s. Filter restart is required", e.what());
whisper_free(gf->whisper_context);
gf->whisper_context = nullptr;
return DETECTION_RESULT_UNKNOWN;
}
int whisper_full_result = whisper_full(gf->whisper_context, gf->whisper_params,
pcm32f_data, (int)pcm32f_size);

if (whisper_full_result != 0) {
obs_log(LOG_WARNING, "failed to process audio, error %d", whisper_full_result);
return DETECTION_RESULT_UNKNOWN;
} else {
if (whisper_full_result != 0) {
obs_log(LOG_WARNING, "failed to process audio, error %d",
whisper_full_result);
return DETECTION_RESULT_UNKNOWN;
}
if (whisper_full_n_segments(gf->whisper_context) == 0) {
return DETECTION_RESULT_SILENCE;
}
const int n_segment = 0;
const char *text = whisper_full_get_segment_text(gf->whisper_context, n_segment);
const int64_t t0 = whisper_full_get_segment_t0(gf->whisper_context, n_segment);
const int64_t t1 = whisper_full_get_segment_t1(gf->whisper_context, n_segment);
text_preproc = whisper_full_get_segment_text(gf->whisper_context, n_segment);
t0 = whisper_full_get_segment_t0(gf->whisper_context, n_segment);
t1 = whisper_full_get_segment_t1(gf->whisper_context, n_segment);

float sentence_p = 0.0f;
const int n_tokens = whisper_full_n_tokens(gf->whisper_context, n_segment);
for (int j = 0; j < n_tokens; ++j) {
sentence_p += whisper_full_get_token_p(gf->whisper_context, n_segment, j);
}
sentence_p /= (float)n_tokens;
} catch (const std::exception &e) {
obs_log(LOG_ERROR, "Whisper exception: %s. Filter restart is required", e.what());
whisper_free(gf->whisper_context);
gf->whisper_context = nullptr;
return DETECTION_RESULT_UNKNOWN;
}

std::string text_preproc = text;

if (text_preproc.empty()) {
return DETECTION_RESULT_SILENCE;
}
if (text_preproc.empty()) {
return DETECTION_RESULT_SILENCE;
}

// if language is en convert text to lowercase
if (strcmp(gf->whisper_params.language, "en") == 0) {
std::string text_lower;
std::transform(text_preproc.begin(), text_preproc.end(), text_lower.begin(),
::tolower);
text_preproc = text_lower;
// remove leading and trailing non-alphanumeric characters
text_preproc = remove_leading_trailing_nonalpha(text_preproc);
} else {
// fix UTF8 encoding
std::string text_fixed = fix_utf8(text);
text_preproc = text_fixed;
}
// if language is en convert text to lowercase
if (strcmp(gf->whisper_params.language, "en") == 0) {
std::transform(text_preproc.begin(), text_preproc.end(), text_preproc.begin(),
::tolower);
// remove leading and trailing non-alphanumeric characters
text_preproc = remove_leading_trailing_nonalpha(text_preproc);
} else {
// fix UTF8 encoding
text_preproc = fix_utf8(text_preproc);
}

if (gf->log_words) {
obs_log(LOG_INFO, "[%s --> %s] (%.3f) %s", to_timestamp(t0).c_str(),
to_timestamp(t1).c_str(), sentence_p, text_preproc.c_str());
}
if (gf->log_words) {
obs_log(LOG_INFO, "[%s --> %s] (%.3f) %s", to_timestamp(t0).c_str(),
to_timestamp(t1).c_str(), sentence_p, text_preproc.c_str());
}

if (text_preproc.empty()) {
return DETECTION_RESULT_SILENCE;
}
if (text_preproc.empty()) {
return DETECTION_RESULT_SILENCE;
}

// use a regular expression to detect filler words with a word boundary
try {
if (gf->detect_regex != nullptr && strlen(gf->detect_regex) > 0) {
std::regex filler_regex(gf->detect_regex);
if (std::regex_search(text_preproc, filler_regex,
std::regex_constants::match_any)) {
return DETECTION_RESULT_BEEP;
}
// use a regular expression to detect filler words with a word boundary
try {
if (gf->detect_regex != nullptr && strlen(gf->detect_regex) > 0) {
std::regex filler_regex(gf->detect_regex);
if (std::regex_search(text_preproc, filler_regex,
std::regex_constants::match_any)) {
return DETECTION_RESULT_BEEP;
}
} catch (const std::regex_error &e) {
obs_log(LOG_ERROR, "Regex error: %s", e.what());
}
} catch (const std::regex_error &e) {
obs_log(LOG_ERROR, "Regex error: %s", e.what());
}

return DETECTION_RESULT_SPEECH;
Expand Down
Loading