Skip to content

Commit

Permalink
Merge pull request #108 from chenqianhe/main
Browse files Browse the repository at this point in the history
Add faster-whisper
  • Loading branch information
chenqianhe authored Oct 25, 2023
2 parents e898bcc + 64b4063 commit 638f6d8
Show file tree
Hide file tree
Showing 10 changed files with 226 additions and 31 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/ci.yml → .github/workflows/base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install .
pip install pytest black
pip install pytest
- name: Run Test
run: pytest test/
- name: Run Lint
run: black . --check
44 changes: 44 additions & 0 deletions .github/workflows/faster-whisper
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: Test Faster Whisper

on:
pull_request:
push:
branches:
- main

jobs:
lint_and_test:
runs-on: ${{ matrix.os }}-latest
strategy:
matrix:
python-version: ['3.9', '3.10']
# macos did not support m1 for now
os: [ubuntu, windows, macos]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Set Variables
id: set_variables
shell: bash
run: |
echo "PY=$(python -c 'import hashlib, sys;print(hashlib.sha256(sys.version.encode()+sys.executable.encode()).hexdigest())')" >> $GITHUB_OUTPUT
echo "PIP_CACHE=$(pip cache dir)" >> $GITHUB_OUTPUT
- name: Cache PIP
uses: actions/cache@v3
with:
path: ${{ steps.set_variables.outputs.PIP_CACHE }}
key: ${{ runner.os }}-pip-${{ steps.set_variables.outputs.PY }}

- name: Setup ffmpeg for differnt platforms
uses: FedericoCarboni/setup-ffmpeg@master

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ".[faster]"
pip install pytest
- name: Run Test
run: WHISPER_MODE=faster pytest test/
40 changes: 40 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Test Lint

on:
pull_request:
push:
branches:
- main

jobs:
lint:
runs-on: ${{ matrix.os }}-latest
strategy:
matrix:
python-version: ['3.9']
os: [ubuntu]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Set Variables
id: set_variables
shell: bash
run: |
echo "PY=$(python -c 'import hashlib, sys;print(hashlib.sha256(sys.version.encode()+sys.executable.encode()).hexdigest())')" >> $GITHUB_OUTPUT
echo "PIP_CACHE=$(pip cache dir)" >> $GITHUB_OUTPUT
- name: Cache PIP
uses: actions/cache@v3
with:
path: ${{ steps.set_variables.outputs.PIP_CACHE }}
key: ${{ runner.os }}-pip-${{ steps.set_variables.outputs.PY }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install black
- name: Run Lint
run: black . --check
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,33 @@

AutoCut 对你的视频自动生成字幕。然后你选择需要保留的句子,AutoCut 将对你视频中对应的片段裁切并保存。你无需使用视频编辑软件,只需要编辑文本文件即可完成剪切。

**2023.10.14更新**:支持 faster-whisper 和指定依赖(但由于 Action 限制暂时移除了 faster-whisper 的测试运行)

```shell
# for whisper only
pip install .

# for whisper and faster-whisper
pip install '.[faster]'

# for whisper and openai-whisper
pip install '.[openai]'

# for all
pip install '.[all]'
```

```shell
# using faster-whisper
autocut -t xxx --whisper-mode=faster
```

```shell
# using openai api
export OPENAI_API_KEY=sk-xxx
autocut -t xxx --whisper-mode=openai --openai-rpm=3
```

**2023.8.13更新**:支持调用 Openai Whisper API
```shell
export OPENAI_API_KEY=sk-xxx
Expand Down
2 changes: 1 addition & 1 deletion autocut/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.0"
__version__ = "0.2.0"
6 changes: 6 additions & 0 deletions autocut/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ def __init__(self, args):
self.args.openai_rpm, self.sampling_rate
)
self.whisper_model.load()
elif self.args.whisper_mode == WhisperMode.FASTER.value:
self.whisper_model = whisper_model.FasterWhisperModel(
self.sampling_rate
)
self.whisper_model.load(self.args.whisper_model, self.args.device)
logging.info(f"Done Init model in {time.time() - tic:.1f} sec")

def run(self):
Expand Down Expand Up @@ -93,6 +98,7 @@ def _transcribe(
audio, speech_array_indices, self.args.lang, self.args.prompt
)
if self.args.whisper_mode == WhisperMode.WHISPER.value
or self.args.whisper_mode == WhisperMode.FASTER.value
else self.whisper_model.transcribe(
input, audio, speech_array_indices, self.args.lang, self.args.prompt
)
Expand Down
1 change: 1 addition & 0 deletions autocut/type.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def get_values():
class WhisperMode(Enum):
WHISPER = "whisper"
OPENAI = "openai"
FASTER = "faster"

@staticmethod
def get_values():
Expand Down
87 changes: 86 additions & 1 deletion autocut/whisper_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,12 @@ def __init__(self, rpm: int, sample_rate=16000):
raise Exception("OPENAI_API_KEY is not set")

def load(self, model_name: Literal["whisper-1"] = "whisper-1"):
import openai
try:
import openai
except ImportError:
raise Exception(
"Please use openai mode(pip install '.[openai]') or all mode(pip install '.[all]')"
)
from functools import partial

self.whisper_model = partial(openai.Audio.transcribe, model=model_name)
Expand Down Expand Up @@ -303,3 +308,83 @@ def gen_srt(self, transcribe_results: List[srt.Subtitle]):
)
subs.append(subtitle)
return subs


class FasterWhisperModel(AbstractWhisperModel):
def __init__(self, sample_rate=16000):
super().__init__("faster-whisper", sample_rate)
self.device = None

def load(
self,
model_name: Literal[
"tiny", "base", "small", "medium", "large", "large-v2"
] = "small",
device: Union[Literal["cpu", "cuda"], None] = None,
):
try:
from faster_whisper import WhisperModel
except ImportError:
raise Exception(
"Please use faster mode(pip install '.[faster]') or all mode(pip install '.[all]')"
)

self.device = device if device else "cpu"
self.whisper_model = WhisperModel(model_name, self.device)

def _transcribe(self):
raise Exception("Not implemented")

def transcribe(
self,
audio: np.ndarray,
speech_array_indices: List[SPEECH_ARRAY_INDEX],
lang: LANG,
prompt: str,
):
res = []
for seg in speech_array_indices:
segments, info = self.whisper_model.transcribe(
audio[int(seg["start"]) : int(seg["end"])],
task="transcribe",
language=lang,
initial_prompt=prompt,
vad_filter=False,
)
segments = list(segments) # The transcription will actually run here.
r = {"origin_timestamp": seg, "segments": segments, "info": info}
res.append(r)
return res

def gen_srt(self, transcribe_results):
subs = []

def _add_sub(start, end, text):
subs.append(
srt.Subtitle(
index=0,
start=datetime.timedelta(seconds=start),
end=datetime.timedelta(seconds=end),
content=cc.convert(text.strip()),
)
)

prev_end = 0
for r in transcribe_results:
origin = r["origin_timestamp"]
for seg in r["segments"]:
s = dict(start=seg.start, end=seg.end, text=seg.text)
start = s["start"] + origin["start"] / self.sample_rate
end = min(
s["end"] + origin["start"] / self.sample_rate,
origin["end"] / self.sample_rate,
)
if start > end:
continue
# mark any empty segment that is not very short
if start > prev_end + 1.0:
_add_sub(prev_end, start, "< No Speech >")
_add_sub(start, end, s["text"])
prev_end = end

return subs
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from setuptools import setup, find_packages

requirements = [
"ffmpeg-python",
"moviepy",
"openai",
"openai-whisper",
"opencc-python-reimplemented",
"parameterized",
Expand All @@ -16,6 +16,11 @@
setup(
name="autocut",
install_requires=requirements,
extras_require={
"all": ["openai", "faster-whisper"],
"openai": ["openai"],
"faster": ["faster-whisper"],
},
packages=find_packages(),
entry_points={
"console_scripts": [
Expand Down
39 changes: 14 additions & 25 deletions test/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,29 +43,18 @@


class TestArgs:
def __init__(
self,
encoding="utf-8",
sampling_rate=16000,
bitrate="10m",
lang="zh",
prompt="",
whisper_model="small",
device=None,
vad=False,
force=False,
whisper_mode="whisper",
openai_rpm=3,
):
def __init__(self):
self.inputs = []
self.bitrate = bitrate
self.encoding = encoding
self.sampling_rate = sampling_rate
self.lang = lang
self.prompt = prompt
self.whisper_model = whisper_model
self.device = device
self.vad = vad
self.force = force
self.whisper_mode = whisper_mode
self.openai_rpm = openai_rpm
self.bitrate = "10m"
self.encoding = "utf-8"
self.sampling_rate = 16000
self.lang = "zh"
self.prompt = ""
self.whisper_model = "small"
self.device = None
self.vad = False
self.force = False
self.whisper_mode = (
"faster" if os.environ.get("WHISPER_MODE") == "faster" else "whisper"
)
self.openai_rpm = 3

0 comments on commit 638f6d8

Please sign in to comment.