diff --git a/src/database_interactions.py b/src/database_interactions.py index 41630c03..bbb38f90 100644 --- a/src/database_interactions.py +++ b/src/database_interactions.py @@ -58,13 +58,17 @@ def initialize_vector_model(self, embedding_model_name, config_data): encode_kwargs['batch_size'] = 2 else: batch_size_mapping = { - 'sentence-t5-xxl': 1, - ('instructor-xl', 'sentence-t5-xl'): 2, - 'instructor-large': 3, - ('jina-embedding-l', 'bge-large', 'gte-large', 'roberta-large'): 4, - 'jina-embedding-s': 9, - ('bge-small', 'gte-small'): 10, - ('MiniLM',): 30, + 'instructor-xl': 2, + 'bge-large': 4, + 'instructor-large': 4, + 'gte-large': 4, + 'instructor-base': 8, + 'mpnet': 8, + 'bge-base': 8, + 'gte-base': 8, + 'bge-small': 10, + 'gte-small': 10, + 'MiniLM': 30, } for key, value in batch_size_mapping.items(): diff --git a/src/whisperst2_table.html b/src/whisperst2_table.html new file mode 100644 index 00000000..29affc00 --- /dev/null +++ b/src/whisperst2_table.html @@ -0,0 +1,177 @@ + + +
+ +Model Size | +Batch Size | +Total Transcription Time (seconds) | +Max VRAM Usage (MB) | +
---|---|---|---|
Base | +70 | +11.71 | +14538.99 | +
Base | +60 | +11.72 | +12103.62 | +
Base | +90 | +12.08 | +17946.05 | +
Base | +50 | +12.11 | +10327.55 | +
Base | +80 | +12.50 | +16415.55 | +
Base | +100 | +12.77 | +20454.18 | +
Base | +40 | +13.15 | +8404.05 | +
Base | +30 | +14.14 | +6618.43 | +
Base | +20 | +16.94 | +4565.68 | +
Small | +70 | +23.20 | +20739.93 | +
Small | +60 | +24.21 | +17747.93 | +
Base | +10 | +24.48 | +2713.49 | +
Small | +50 | +24.98 | +15029.43 | +
Small | +40 | +26.57 | +11999.93 | +
Small | +30 | +28.15 | +9438.55 | +
Small | +20 | +33.77 | +6662.80 | +
Small | +10 | +47.61 | +3824.05 | +
Medium | +50 | +54.39 | +20651.55 | +
Medium | +40 | +58.67 | +16761.55 | +
Medium | +30 | +60.93 | +13173.93 | +
Medium | +20 | +72.14 | +9330.93 | +
Medium | +10 | +97.33 | +5476.05 | +