import asyncio import httpx class TranscriptionEngine: _model = None def _get_model(self, model_name: str = "large-v3", device: str = "auto"): if self._model is None: from faster_whisper import WhisperModel if device == "auto": try: self._model = WhisperModel(model_name, device="cuda", compute_type="float16") except Exception: self._model = WhisperModel(model_name, device="cpu", compute_type="int8") else: compute = "float16" if device in ("cuda", "rocm") else "int8" self._model = WhisperModel(model_name, device=device, compute_type=compute) return self._model async def transcribe_file( self, audio_path: str, language: str = "de", model_name: str = "large-v3", device: str = "auto", base_url: str = "", ) -> str: if base_url: return await self._transcribe_remote(audio_path, language, model_name, base_url) return await self._transcribe_local(audio_path, language, model_name, device) async def _transcribe_remote( self, audio_path: str, language: str, model_name: str, base_url: str ) -> str: async with httpx.AsyncClient(timeout=300) as client: with open(audio_path, "rb") as f: r = await client.post( f"{base_url}/v1/audio/transcriptions", files={"file": ("audio.wav", f, "audio/wav")}, data={"model": model_name, "language": language}, ) r.raise_for_status() return r.json()["text"] async def _transcribe_local( self, audio_path: str, language: str, model_name: str, device: str ) -> str: loop = asyncio.get_event_loop() model = self._get_model(model_name, device) segments, _ = await loop.run_in_executor( None, lambda: model.transcribe(audio_path, language=language), ) return "".join(seg.text for seg in segments).strip() engine = TranscriptionEngine()