Files
tueit_Transkriptor/transcription.py
T

59 lines
2.1 KiB
Python

import asyncio
import httpx
class TranscriptionEngine:
_model = None
def _get_model(self, model_name: str = "large-v3", device: str = "auto"):
if self._model is None:
from faster_whisper import WhisperModel
if device == "auto":
try:
self._model = WhisperModel(model_name, device="cuda", compute_type="float16")
except Exception:
self._model = WhisperModel(model_name, device="cpu", compute_type="int8")
else:
compute = "float16" if device in ("cuda", "rocm") else "int8"
self._model = WhisperModel(model_name, device=device, compute_type=compute)
return self._model
async def transcribe_file(
self,
audio_path: str,
language: str = "de",
model_name: str = "large-v3",
device: str = "auto",
base_url: str = "",
) -> str:
if base_url:
return await self._transcribe_remote(audio_path, language, model_name, base_url)
return await self._transcribe_local(audio_path, language, model_name, device)
async def _transcribe_remote(
self, audio_path: str, language: str, model_name: str, base_url: str
) -> str:
async with httpx.AsyncClient(timeout=300) as client:
with open(audio_path, "rb") as f:
r = await client.post(
f"{base_url}/v1/audio/transcriptions",
files={"file": ("audio.wav", f, "audio/wav")},
data={"model": model_name, "language": language},
)
r.raise_for_status()
return r.json()["text"]
async def _transcribe_local(
self, audio_path: str, language: str, model_name: str, device: str
) -> str:
loop = asyncio.get_running_loop()
model = self._get_model(model_name, device)
segments, _ = await loop.run_in_executor(
None,
lambda: model.transcribe(audio_path, language=language),
)
return "".join(seg.text for seg in segments).strip()
engine = TranscriptionEngine()