fix compatibility

- add requirements for os other than windows - import default pyaudio in - update readme
Dadangdut33 · Dec 14, 2022 · 874d517 · 874d517
1 parent 9aed834
commit 874d517
Show file tree

Hide file tree

Showing 5 changed files with 63 additions and 28 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ __pycache__/
 .venv
 env/
 venv/
+venvcpu/
 ENV/
 env.bak/
 venv.bak/

diff --git a/devSetup.py b/devSetup.py
@@ -3,13 +3,15 @@
 import time
 
 pip = "pip"
+req = "requirements"
 # check if not windows
 if platform.system() != "Windows":
     pip = "pip3"
+    req = "requirements_notwindows"
 
 
 def install_requirements():
-    os.system(f"{pip} install -r requirements.txt")
+    os.system(f"{pip} install -r {req}.txt")
 
 
 def uninstall_torch():
@@ -36,7 +38,7 @@ def install_torch():
     timeStart = time.time()
     # install requirements
     print("-" * 100)
-    print("Installing from requirements.txt")
+    print(f"Installing from {req}.txt")
     install_requirements()
 
     if use_gpu.lower() == "y":

diff --git a/readme.md b/readme.md
@@ -19,42 +19,55 @@ A speech transcription and translation application using whisper AI model.
 # Features
 
 - Speech to text
-- Translation of transcribed text
+- Translation of transcribed text (Speech to translated text)
 - Input from speaker, mic, and file
-
-<details open>
-  <summary>Preview</summary>
-  <img src="https://raw.githubusercontent.com/Dadangdut33/Speech-Translate/master/assets/1.png" width="700" alt="Speech Translate Looks">
-  <img src="https://raw.githubusercontent.com/Dadangdut33/Speech-Translate/master/assets/2.png" width="700" alt="Speech Translate Looks">
-  <img src="https://raw.githubusercontent.com/Dadangdut33/Speech-Translate/master/assets/3.png" width="700" alt="Speech Translate Looks">
-</details>
+- <details open>
+    <summary>Preview</summary>
+    <p align="center">
+      <img src="https://raw.githubusercontent.com/Dadangdut33/Speech-Translate/master/assets/1.png" width="700" alt="Speech Translate Looks">
+      <img src="https://raw.githubusercontent.com/Dadangdut33/Speech-Translate/master/assets/2.png" width="700" alt="Speech Translate Looks">
+      <img src="https://raw.githubusercontent.com/Dadangdut33/Speech-Translate/master/assets/3.png" width="700" alt="Speech Translate Looks">
+    </p>
+  </details>
 
 # User Requirements
 
-Whisper uses vram to process the audio, so it is recommended to have a CUDA compatible GPU. If there is no compatible GPU, the application will use the CPU to process the audio (This might make it slower). For each model requirement you can check directly at the [whisper repository](https://github.com/openai/whisper) or you can hover over the model selection in the app (there will be a tooltip about the model info).
+Whisper uses vram/gpu to process the audio, so it is recommended to have a CUDA compatible GPU. If there is no compatible GPU, the application will use the CPU to process the audio (This might make it slower). For each model requirement you can check directly at the [whisper repository](https://github.com/openai/whisper) or you can hover over the model selection in the app (there will be a tooltip about the model info).
 
 # Download & Installation
 
 1. Download the latest release [here](https://github.com/Dadangdut33/Speech-Translate/releases/latest)
 2. Install
 3. Run the program
 
-# Development
+# General Usage
+
+1. Select model
+2. Select mode and language
+3. Click the record button
+4. Stop record
+5. (Optionally) export the result to a file
+
+# User Settings
+
+You can change the settings by clicking the settings button on the menubar of the app. Alternatively, you can press F2 to open the menu window or you could also edit the settings file manually located at `./setting/setting.json`.
+
+---
+
+<h1 align="center">- Development -</h1>
 
 > **Warning** \
 > As of right now (4th of November 2022) I guess pytorch is not compatible with python 3.11 so you can't use python 3.11. I tried with 3.11 but it doesn't work so i rollback to python 3.10.8.
 
 ## Setup
 
 > **Note** \
-> It is recommended to create a virtual environment, but it is not required.
+> It is recommended to create a virtual environment, but it is not required. For OS other than windows, you can install the packages from [requirements_notwindows.txt](./requirements_notwindows.txt)
 
 1. Create your virtual environment by running `python -m venv venv`
 2. Activate your virtual environment by running `source venv/bin/activate`
 3. Install all the dependencies needed by running the [`devSetup.py`](./devSetup.py) located in **root directory** or install the packages yourself by installing from the requirements.txt yourself by running`pip install -r requirements.txt`
-4. Run the script by typing `python Main.py`
-
-**You must be at speech_translate directory when developing and compiling/building the project to avoid error**
+4. Get to root directory and Run the script by typing `python Main.py`
 
 ## Using GPU for Whisper
 
@@ -75,19 +88,21 @@ You can use [pyinstaller](https://pyinstaller.org/) or [auto-py-to-exe](https://
 
   This will produce an exceutable file in the `dist` directory.
 
-  **Note: Replace the venv with your venv name**
+  **Note: Replace the venv with your actual venv path**
 
-- If you use **auto-py-to-exe** you can load the [build.json file](./build.json) located in root directory. **You will need to replace the dot (.) with the actual path of the project**. This will produce an exceutable file in the `output` directory.
+- If you use **auto-py-to-exe** you can load the [build.json file](./build.json) located in root directory. **You will need to replace the dot (.) in the build.json file with the actual path of the project**. This will produce an exceutable file in the `output` directory.
 
 You should be able to compile it on other platform (mac/linux) but I only tested it on Windows.
 
 ## Compatibility
 
-This project should be compatible with Windows (preferrably windows 10 or later) and Linux but I haven't tested it on Mac.
+This project should be compatible with Windows (preferrably windows 10 or later) and other platforms. But I haven't tested it on platform other than windows.
+
+---
 
 # Contributing
 
-Feel free to contribute to this project by forking the repository, making your changes, and submitting a pull request. You can also contribute by creating an issue if you find a bug or have a feature request.
+Feel free to contribute to this project by forking the repository, making your changes, and submitting a pull request. You can also contribute by creating an issue if you find a bug or have a feature request. Also, feel free to give this project a star if you like it.
 
 # License
 

diff --git a/requirements_notwindows.txt b/requirements_notwindows.txt
@@ -0,0 +1,10 @@
+deep_translator==1.9.1
+notify-py==0.3.3
+Pillow==9.3.0
+pystray==0.19.4
+PyAudio==0.2.12
+requests==2.28.1
+scipy==1.9.3
+sounddevice==0.4.5
+soundfile==0.11.0
+git+https://github.com/openai/whisper.git 
diff --git a/speech_translate/utils/Record.py b/speech_translate/utils/Record.py
@@ -1,4 +1,5 @@
 import os
+import platform
 import threading
 import ast
 import shlex
@@ -10,7 +11,13 @@
 import whisper
 import sounddevice as sd
 from scipy.io.wavfile import write
-import pyaudiowpatch as pyaudio
+
+if platform.system() == "Windows":
+    import pyaudiowpatch as pyaudio
+else:
+    import pyaudio  # type: ignore
+
+
 import wave
 
 from speech_translate.Globals import app_icon, app_name, dir_temp, fJson, gClass
@@ -34,7 +41,7 @@ def getOutputDevices():
 
     devices = p.get_device_count()
     devices = [p.get_device_info_by_index(i) for i in range(devices)]
-    devices = [device for device in devices if device["maxOutputChannels"] > 0]  # Filter out devices that are not output devices
+    devices = [device for device in devices if device["maxOutputChannels"] > 0]  # type: ignore # Filter out devices that are not output devices
     devices = [f"{device['name']}, {sd.query_hostapis(device['hostApi'])['name']} [ID: {device['index']}]" for device in devices]  # type: ignore  # Map the name
 
     p.terminate()
@@ -48,7 +55,7 @@ def getDefaultOutputDevice():
     try:
         # Get default WASAPI info
         wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
-        default_device = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])
+        default_device = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])  # type: ignore
         sucess = True
     except OSError:
         print("Looks like WASAPI is not available on the system.")
@@ -352,7 +359,7 @@ def record_from_pc(audio_name: str, device: str, seconds=5) -> None:
     device_detail = p.get_device_info_by_index(int(device_id))  # type: ignore
 
     if not device_detail["isLoopbackDevice"]:
-        for loopback in p.get_loopback_device_info_generator():
+        for loopback in p.get_loopback_device_info_generator():  # type: ignore
             """
             Try to find loopback device with same name(and [Loopback suffix]).
             Unfortunately, this is the most adequate way at the moment.
@@ -367,7 +374,7 @@ def record_from_pc(audio_name: str, device: str, seconds=5) -> None:
     logger.debug(f"Recording from: ({device_detail['index']}){device_detail['name']}")
 
     wave_file = wave.open(audio_name, "wb")
-    wave_file.setnchannels(device_detail["maxInputChannels"])
+    wave_file.setnchannels(device_detail["maxInputChannels"])  # type: ignore
     wave_file.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16))
     wave_file.setframerate(int(device_detail["defaultSampleRate"]))
 
@@ -380,13 +387,13 @@ def callback(in_data, frame_count, time_info, status):
 
     with p.open(
         format=pyaudio.paInt16,
-        channels=device_detail["maxInputChannels"],
+        channels=device_detail["maxInputChannels"],  # type: ignore
         rate=int(device_detail["defaultSampleRate"]),
         frames_per_buffer=pyaudio.get_sample_size(pyaudio.paInt16),
         input=True,
-        input_device_index=device_detail["index"],
+        input_device_index=device_detail["index"],  # type: ignore
         stream_callback=callback,
-    ) as stream:
+    ) as stream:  # type: ignore
         """
         Opena PA stream via context manager.
         After leaving the context, everything will
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,6 +5,7 @@ __pycache__/ @@
     .venv
     env/
     venv/
+    venvcpu/
     ENV/
     env.bak/
     venv.bak/
@@ Expand Down @@