-
-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
cli.py
137 lines (114 loc) · 5.96 KB
/
cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# 废弃,请使用 api.py
# Please use api.py
import argparse
import os
import re
import subprocess
import sys
import time
from pathlib import Path
from urllib.parse import parse_qs, urlparse
import zhconv
def download_file(url):
"""Downloads a file from a URL and saves it to /content."""
if sys.platform!='linux':
raise Exception('仅在 Google Colab 上可下载文件,其他系统请传递文件绝对路径')
parsed_url = urlparse(url)
filename = None
filepath = None
Path('/content').mkdir(exist_ok=True)
# Case 1: Filename in URL path
if parsed_url.path:
potential_filename = os.path.basename(parsed_url.path)
if '.' in potential_filename:
filename = re.sub(r'[^\w\-_\.]', '', potential_filename) # Sanitize filename for Linux
filepath = os.path.join('/content', filename)
# Case 2: Filename in query parameters
if not filepath: # if no filename found in path
query_params = parse_qs(parsed_url.query)
video_audio_exts = ['mp4', 'mov', 'mkv', 'mpeg', 'avi', 'wmv', 'ts', 'wav', 'flac', 'mp3', 'm4a', 'wma']
for param_value in query_params.values(): # Check all the parameter's values
for value in param_value: # some parameter may have multiple values, we check all of them
potential_filename_with_ext = None
for ext in video_audio_exts:
if '.' + ext in value :
potential_filename_with_ext = value
break;
if potential_filename_with_ext:
filename = re.sub(r'[^\w\-_\.]', '', potential_filename_with_ext)
filepath = os.path.join('/content', filename)
break # Stop after finding the first valid filename
if filepath and filename:
try:
subprocess.run(['wget', '-O', filepath , url], check=True, capture_output=True) # Suppress output to avoid verbosity
return filepath
except subprocess.CalledProcessError as e:
print(f"Error downloading file: {e.stderr.decode()}") # Decode stderr for printing
return None
else:
print("No valid filename found in URL.")
return None
def speech_to_text(model_name='large-v2',language="auto",prompt=None,audio_file=None,device='cuda',compute_type='float16'):
from videotrans.configure import config
from videotrans.util import tools
from faster_whisper import WhisperModel
if audio_file.startswith('http'):
audio_file=download_file(audio_file)
if not audio_file or not Path(audio_file).exists():
raise Exception(f'未找到 {audio_file} ,请传递文件绝对路径或文件是否存在')
language=None if not language or language=='auto' else language[:2]
model = WhisperModel(
model_name,
device=device,
compute_type=compute_type,
download_root="./models",
local_files_only=False
)
Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True)
stem=Path(audio_file).stem
shibie_file=config.TEMP_DIR+f'/{time.time()}.wav'
tools.runffmpeg(['-y','-i',audio_file,'-ar','16000','-ac','1',shibie_file])
segments, info = model.transcribe(
shibie_file,
beam_size=5,
best_of=5,
condition_on_previous_text=False,
vad_filter=True,
vad_parameters=dict(
min_speech_duration_ms=500,
max_speech_duration_s= float('inf'),
min_silence_duration_ms=250,
speech_pad_ms=100
),
word_timestamps=True,
language=language,
initial_prompt=prompt if prompt else None
)
raws=[]
for segment in segments:
text=zhconv.convert(segment.text, 'zh-hans') if language=='zh' else segment.text
startraw=tools.ms_to_time_string(ms=segment.words[0].start*1000)
endraw=tools.ms_to_time_string(ms=segment.words[-1].end*1000)
raws.append(f'{len(raws)+1}\n{startraw} --> {endraw}\n{text.strip()}')
output=config.ROOT_DIR+'/../output'
Path(output).mkdir(parents=True, exist_ok=True)
with open(output+f'/{stem}.srt', 'w', encoding='utf-8') as f:
srts="\n\n".join(raws)
print(srts)
f.write(srts)
print(f'\n已保存到 {stem}.srt\n')
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog='视频翻译pyVideoTrans', description='')
parser.add_argument('-m', '--model', default='large-v2', type=str, choices=['tiny','tiny.en','base','base.en','small','small.en','medium', 'medium.en', 'large-v1', 'large-v2', 'large-v3', 'large-v3-turbo'], help='选择使用哪个模型')
parser.add_argument('-l', '--language', default='auto', type=str, choices=['zh', 'en', 'ja','ko','ru','fr','de','es','pt','it','id','hi','hu','ms','kk','cs','nl','sv','bn','he','vi','tr','th','ar','auto'], help='选择音视频发音语言')
parser.add_argument('-f', '--file', default='', type=str, help='填写要识别创建字幕的音频或视频名称,含后缀,文件请上传到cli.py本文件同目录下, 如果名字含空格或特殊符号,请用英文双引号包括起来')
parser.add_argument('-d', '--device', default='auto', type=str,choices=['cpu','cuda','auto'], help='填写要在cpu还是cuda上运行,auto为自动')
parser.add_argument('-c', '--compute_type', default='default', type=str,choices=['default','float16','float32','int8','int8_float16','int8_float32'], help='填写数据类型,最佳为float16,需显卡支持')
parser.add_argument('-p', '--prompt', default=None, type=str, help='设置prompt,用于模型识别')
DEFAULT_ARGS = vars(parser.parse_args([]))
kw=parser.parse_args()
speech_to_text(model_name=kw.model, language=kw.language, prompt=kw.prompt, audio_file=kw.file, device=kw.device, compute_type=kw.compute_type)
"""
## explame
python cli.py -m tiny -f "c:/users/c1/videos/5s.wav"
"""