diff --git a/package.json b/package.json index 683327a..9b03327 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "lycoris", "private": true, - "version": "0.9.13", + "version": "0.9.14", "type": "module", "license": "MIT", "engines": { diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 2a717db..51f3fec 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -2780,7 +2780,7 @@ dependencies = [ [[package]] name = "lycoris" -version = "0.9.13" +version = "0.9.14" dependencies = [ "chrono", "core-graphics 0.23.1", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 287461f..ee8085d 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lycoris" -version = "0.9.13" +version = "0.9.14" description = "Lycoris is an offline voice memo" authors = ["solaoi"] license = "MIT" @@ -24,7 +24,7 @@ unicode-segmentation = "1.9.0" once_cell = "1.13.1" crossbeam-channel = "0.5.6" chrono = "0.4.22" -hound = "3.4.0" +hound = "3.5.1" rusqlite = "*" samplerate-rs = "0.1.0" # download model and openai request diff --git a/src-tauri/migrations/001.sql b/src-tauri/migrations/001.sql index 3d50421..58bb9f3 100644 --- a/src-tauri/migrations/001.sql +++ b/src-tauri/migrations/001.sql @@ -30,6 +30,8 @@ VALUES("transcriptionAccuracy", "off"); INSERT INTO settings(setting_name, setting_status) VALUES("settingKeyOpenai", ""); INSERT INTO settings(setting_name, setting_status) +VALUES("settingKeyAmivoice", ""); +INSERT INTO settings(setting_name, setting_status) VALUES("settingLanguage", "日本語"); INSERT INTO settings(setting_name, setting_status) VALUES("settingTemplate", ""); @@ -44,6 +46,8 @@ VALUES("settingResource", ""); INSERT INTO settings(setting_name, setting_status) VALUES("settingModel", "gpt-3.5-turbo"); INSERT INTO settings(setting_name, setting_status) +VALUES("settingAmiVoiceModel", "general"); +INSERT INTO settings(setting_name, setting_status) VALUES("settingAILanguage", "None"); INSERT INTO settings(setting_name, setting_status) VALUES( diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs index 9fa75c9..5fa36ff 100644 --- a/src-tauri/src/main.rs +++ b/src-tauri/src/main.rs @@ -35,6 +35,7 @@ use module::{ screenshot::{self, AppWindow}, transcription::{TraceCompletion, Transcription}, transcription_online::TranscriptionOnline, + transcription_amivoice::TranscriptionAmivoice, }; struct RecordState(Arc>>>); @@ -189,6 +190,12 @@ fn start_trace_command( note_id, ); transcription_online.start(stop_convert_rx, true); + } else if transcription_accuracy.starts_with("online-amivoice") { + let mut transcription_amivoice = TranscriptionAmivoice::new( + window.app_handle(), + note_id, + ); + transcription_amivoice.start(stop_convert_rx, true); } else if transcription_accuracy.starts_with("online-chat") { let mut chat_online = ChatOnline::new(window.app_handle(), speaker_language, note_id); chat_online.start(stop_convert_rx, true); diff --git a/src-tauri/src/module/mod.rs b/src-tauri/src/module/mod.rs index 11a2804..9a153f9 100644 --- a/src-tauri/src/module/mod.rs +++ b/src-tauri/src/module/mod.rs @@ -11,6 +11,7 @@ pub mod record_desktop; mod sqlite; mod transcriber; pub mod transcription; +pub mod transcription_amivoice; pub mod transcription_online; mod writer; pub mod screenshot; \ No newline at end of file diff --git a/src-tauri/src/module/record.rs b/src-tauri/src/module/record.rs index e066756..cc93451 100644 --- a/src-tauri/src/module/record.rs +++ b/src-tauri/src/module/record.rs @@ -23,8 +23,8 @@ use crossbeam_channel::{unbounded, Receiver}; use tauri::{api::path::data_dir, AppHandle, Manager}; use super::{ - chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_online, - writer::Writer, + chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice, + transcription_online, writer::Writer, }; pub struct Record { @@ -198,6 +198,16 @@ impl Record { if let Some(singleton) = lock.as_mut() { singleton.start(stop_convert_rx_clone, false); } + } else if transcription_accuracy_clone.starts_with("online-amivoice") { + transcription_amivoice::initialize_transcription_amivoice( + app_handle_clone, + note_id, + ); + let mut lock = + transcription_amivoice::SINGLETON_INSTANCE.lock().unwrap(); + if let Some(singleton) = lock.as_mut() { + singleton.start(stop_convert_rx_clone, false); + } } else if transcription_accuracy_clone.starts_with("online-chat") { chat_online::initialize_chat_online( app_handle_clone, @@ -245,6 +255,7 @@ impl Record { stop_convert_tx.send(()).unwrap(); transcription::drop_transcription(); transcription_online::drop_transcription_online(); + transcription_amivoice::drop_transcription_amivoice(); chat_online::drop_chat_online(); } else { drop(stop_convert_tx) diff --git a/src-tauri/src/module/record_desktop.rs b/src-tauri/src/module/record_desktop.rs index 81ea45a..dc4bbea 100644 --- a/src-tauri/src/module/record_desktop.rs +++ b/src-tauri/src/module/record_desktop.rs @@ -38,8 +38,8 @@ use screencapturekit::{ use vosk::Recognizer; use super::{ - chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_online, - writer::Writer, + chat_online, recognizer::MyRecognizer, sqlite::Sqlite, transcription, transcription_amivoice, + transcription_online, writer::Writer, }; pub struct RecordDesktop { @@ -230,6 +230,16 @@ impl RecordDesktop { if let Some(singleton) = lock.as_mut() { singleton.start(stop_convert_rx_clone, false); } + } else if transcription_accuracy_clone.starts_with("online-amivoice") { + transcription_amivoice::initialize_transcription_amivoice( + app_handle_clone, + note_id, + ); + let mut lock = + transcription_amivoice::SINGLETON_INSTANCE.lock().unwrap(); + if let Some(singleton) = lock.as_mut() { + singleton.start(stop_convert_rx_clone, false); + } } else if transcription_accuracy_clone.starts_with("online-chat") { chat_online::initialize_chat_online( app_handle_clone, @@ -281,6 +291,7 @@ impl RecordDesktop { stop_convert_tx.send(()).unwrap(); transcription::drop_transcription(); transcription_online::drop_transcription_online(); + transcription_amivoice::drop_transcription_amivoice(); chat_online::drop_chat_online(); } else { drop(stop_convert_tx) diff --git a/src-tauri/src/module/sqlite.rs b/src-tauri/src/module/sqlite.rs index 788bdcc..7e5a8be 100644 --- a/src-tauri/src/module/sqlite.rs +++ b/src-tauri/src/module/sqlite.rs @@ -84,6 +84,14 @@ impl Sqlite { ); } + pub fn select_amivoice_token(&self) -> Result { + return self.conn.query_row( + "SELECT setting_status FROM settings WHERE setting_name = \"settingKeyAmivoice\"", + params![], + |row| Ok(row.get_unwrap(0)), + ); + } + pub fn select_ai_language(&self) -> Result { return self.conn.query_row( "SELECT setting_status FROM settings WHERE setting_name = \"settingAILanguage\"", @@ -100,6 +108,14 @@ impl Sqlite { ); } + pub fn select_amivoice_model(&self) -> Result { + return self.conn.query_row( + "SELECT setting_status FROM settings WHERE setting_name = \"settingAmiVoiceModel\"", + params![], + |row| Ok(row.get_unwrap(0)), + ); + } + pub fn select_ai_resource(&self) -> Result { return self.conn.query_row( "SELECT setting_status FROM settings WHERE setting_name = \"settingResource\"", diff --git a/src-tauri/src/module/transcription_amivoice.rs b/src-tauri/src/module/transcription_amivoice.rs new file mode 100644 index 0000000..d2524f6 --- /dev/null +++ b/src-tauri/src/module/transcription_amivoice.rs @@ -0,0 +1,186 @@ +use hound::{SampleFormat, WavReader, WavSpec, WavWriter}; +use tokio::{fs::File, io::AsyncReadExt}; + +use super::sqlite::Sqlite; + +use crossbeam_channel::Receiver; + +use reqwest::{multipart, Client}; +use serde_json::Value; +use std::io::Cursor; +use std::sync::Mutex; +use tauri::{AppHandle, Manager}; + +#[derive(Debug, Clone, serde::Serialize)] +pub struct TraceCompletion {} + +pub struct TranscriptionAmivoice { + app_handle: AppHandle, + sqlite: Sqlite, + note_id: u64, + token: String, + model: String, +} + +impl TranscriptionAmivoice { + pub fn new(app_handle: AppHandle, note_id: u64) -> Self { + let sqlite = Sqlite::new(); + let token = sqlite.select_amivoice_token().unwrap(); + let model = sqlite.select_amivoice_model().unwrap(); + Self { + app_handle, + sqlite, + note_id, + token, + model, + } + } + + pub fn start(&mut self, stop_convert_rx: Receiver<()>, is_continuous: bool) { + while Self::convert(self).is_ok() { + if is_continuous { + let vosk_speech = self.sqlite.select_vosk(self.note_id); + if vosk_speech.is_err() { + self.app_handle + .clone() + .emit_all("traceCompletion", TraceCompletion {}) + .unwrap(); + break; + } + } + if stop_convert_rx.try_recv().is_ok() { + let vosk_speech = self.sqlite.select_vosk(self.note_id); + if vosk_speech.is_err() { + self.app_handle + .clone() + .emit_all("traceCompletion", TraceCompletion {}) + .unwrap(); + } else { + self.app_handle + .clone() + .emit_all("traceUnCompletion", TraceCompletion {}) + .unwrap(); + } + break; + } + } + } + + #[tokio::main] + async fn request( + file_path: String, + token: String, + model: String, + ) -> Result> { + let url = "https://acp-api.amivoice.com/v1/nolog/recognize"; + let client = Client::new(); + + let mut file = File::open(file_path).await?; + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).await?; + + let cursor = Cursor::new(buffer); + let mut reader = WavReader::new(cursor)?; + let spec = WavSpec { + channels: 1, + sample_rate: 48000, + bits_per_sample: 16, + sample_format: SampleFormat::Int, + }; + let mut converted_buffer = Vec::new(); + { + let mut cursor = Cursor::new(&mut converted_buffer); + let mut writer = WavWriter::new(&mut cursor, spec)?; + + match reader.spec().sample_format { + SampleFormat::Int => { + for sample in reader.samples::() { + match sample { + Ok(sample) => { + let scaled_sample = (sample >> 16) as i16; + writer.write_sample(scaled_sample)?; + } + Err(e) => { + eprintln!("Error reading sample: {:?}", e); + } + } + } + } + SampleFormat::Float => { + for sample in reader.samples::() { + match sample { + Ok(sample) => { + let scaled_sample = (sample * i16::MAX as f32) + .clamp(i16::MIN as f32, i16::MAX as f32) + as i16; + writer.write_sample(scaled_sample)?; + } + Err(e) => { + eprintln!("Error reading sample: {:?}", e); + } + } + } + } + } + writer.finalize()?; + } + + let part_file = multipart::Part::bytes(converted_buffer) + .file_name("test.wav") + .mime_str("audio/wav")?; + let part_model = multipart::Part::text(format!("grammarFileNames=-a-{}", model)); + let part_token = multipart::Part::text(token.clone()); + + let form = multipart::Form::new() + .part("u", part_token) + .part("d", part_model) + .part("a", part_file); + + let response = client.post(url).multipart(form).send().await?; + + println!("Status: {}", response.status()); + let json_response: Value = response.json().await?; + println!("Response: {:?}", json_response); + let response_text = json_response["results"][0]["text"] + .as_str() + .unwrap_or("text field not found"); + + Ok(response_text.to_string()) + } + + fn convert(&mut self) -> Result<(), rusqlite::Error> { + let vosk_speech = self.sqlite.select_vosk(self.note_id); + return vosk_speech.and_then(|speech| { + let result = Self::request(speech.wav, self.token.clone(), self.model.clone()); + + if result.is_ok() { + let updated = self + .sqlite + .update_model_vosk_to_whisper(speech.id, result.unwrap()); + + self.app_handle + .clone() + .emit_all("finalTextConverted", updated.unwrap()) + .unwrap(); + } else { + println!("amivoice api is temporally failed, so skipping...") + } + + Ok(()) + }); + } +} + +pub static SINGLETON_INSTANCE: Mutex> = Mutex::new(None); + +pub fn initialize_transcription_amivoice(app_handle: AppHandle, note_id: u64) { + let mut singleton = SINGLETON_INSTANCE.lock().unwrap(); + if singleton.is_none() { + *singleton = Some(TranscriptionAmivoice::new(app_handle, note_id)); + } +} + +pub fn drop_transcription_amivoice() { + let mut singleton = SINGLETON_INSTANCE.lock().unwrap(); + *singleton = None; +} diff --git a/src-tauri/tauri.conf.json b/src-tauri/tauri.conf.json index f1765cb..dff0c38 100644 --- a/src-tauri/tauri.conf.json +++ b/src-tauri/tauri.conf.json @@ -8,7 +8,7 @@ }, "package": { "productName": "Lycoris", - "version": "0.9.13" + "version": "0.9.14" }, "tauri": { "allowlist": { diff --git a/src/components/molecules/SettingAmiVoiceModel.tsx b/src/components/molecules/SettingAmiVoiceModel.tsx new file mode 100644 index 0000000..eabd3a5 --- /dev/null +++ b/src/components/molecules/SettingAmiVoiceModel.tsx @@ -0,0 +1,42 @@ +import { ChangeEvent } from "react"; +import { useRecoilState } from 'recoil'; +import { settingKeyState } from "../../store/atoms/settingKeyState"; + +const SettingAmiVoiceModel = (): JSX.Element => { + const settingModels = ["general", "medgeneral", "bizmrreport", "bizfinance", "bizinsurance"] as const; + const modelNameMapper = (model: typeof settingModels[number])=>{ + switch(model){ + case "general": + return "汎用" + case "medgeneral": + return "医療" + case "bizmrreport": + return "製薬" + case "bizfinance": + return "金融" + case "bizinsurance": + return "保険" + } + } + const [settingKey, setSettingKey] = useRecoilState(settingKeyState("settingAmiVoiceModel")) + + const change = (e: ChangeEvent) => { + const settingKey = e.target.value + setSettingKey(settingKey) + } + + return ( +
+

利用モデル

+
+ +
+
+ ) +} + +export { SettingAmiVoiceModel } diff --git a/src/components/molecules/SpeakerLanguage.tsx b/src/components/molecules/SpeakerLanguage.tsx index 059b0f8..86f52f1 100644 --- a/src/components/molecules/SpeakerLanguage.tsx +++ b/src/components/molecules/SpeakerLanguage.tsx @@ -1,13 +1,15 @@ import { ChangeEvent, useRef, useState } from "react"; -import { useRecoilState, useRecoilValue } from 'recoil'; +import { useRecoilState, useRecoilValue, useSetRecoilState } from 'recoil'; import { speakerLanguageState } from "../../store/atoms/speakerLanguageState"; import { modelVoskDownloadedState } from "../../store/atoms/modelVoskDownloadedState"; import { recordState } from "../../store/atoms/recordState"; import { tracingState } from "../../store/atoms/tracingState"; +import { transcriptionAccuracyState } from "../../store/atoms/transcriptionAccuracyState"; const SpeakerLanguage = (): JSX.Element => { const downloadedModels = useRecoilValue(modelVoskDownloadedState) const [speakerLanguage, setSpeakerLanguage] = useRecoilState(speakerLanguageState) + const setTranscriptionAccuracy = useSetRecoilState(transcriptionAccuracyState) const isRecording = useRecoilValue(recordState) const isTracing = useRecoilValue(tracingState); const dropdownRef = useRef(null) @@ -17,6 +19,7 @@ const SpeakerLanguage = (): JSX.Element => { if (e.target.checked) { const speakerLanguage = e.target.value setSpeakerLanguage(speakerLanguage) + setTranscriptionAccuracy("off") } } diff --git a/src/components/molecules/TranscriptionAccuracy.tsx b/src/components/molecules/TranscriptionAccuracy.tsx index 7d7c953..62e4360 100644 --- a/src/components/molecules/TranscriptionAccuracy.tsx +++ b/src/components/molecules/TranscriptionAccuracy.tsx @@ -14,6 +14,7 @@ const TranscriptionAccuracy = (): JSX.Element => { const isTracing = useRecoilValue(tracingState); const speakerLanguage = useRecoilValue(speakerLanguageState) const settingKeyOpenai = useRecoilValue(settingKeyState("settingKeyOpenai")) + const settingKeyAmivoice = useRecoilValue(settingKeyState("settingKeyAmivoice")) const dropdownRef = useRef(null) @@ -46,9 +47,13 @@ const TranscriptionAccuracy = (): JSX.Element => { case "large-distil.ja": return "文字起こし:日"; case "online-transcript": - return "文字起こし:オンライン"; + return "文字起こし:WhisperAPI"; + case "online-transcript-to-en": + return "翻訳(英):WhisperAPI"; + case "online-amivoice": + return "文字起こし:AmiVoiceAPI"; case "online-chat": - return "AI:オンライン"; + return "AI:ChatGPT"; case "small-translate-to-en": return "翻訳(英):低"; case "medium-translate-to-en": @@ -62,7 +67,7 @@ const TranscriptionAccuracy = (): JSX.Element => { return (
- {((isRecording || isTracing) || downloadedModels.length === 0) ?