feat: add whisper.cpp ROCm backend support for AMD GPU acceleration
- transcription.py: new _transcribe_remote_whispercpp() using /inference endpoint - transcription.py: backend param routes to openai or whispercpp remote path - config.py: whisper.backend default 'openai', alt 'whispercpp' - pipeline.py: passes backend from config to transcribe_file - settings: backend dropdown (OpenAI-compat / whisper.cpp) - SETUP.md: whisper.cpp ROCm build and systemd setup instructions whisper-cpp-server running on beastix :8080 (ROCm0, gfx1030, RX 6800 XT)
This commit is contained in:
@@ -27,8 +27,13 @@ class TranscriptionEngine:
|
||||
device: str = "auto",
|
||||
base_url: str = "",
|
||||
with_segments: bool = False,
|
||||
backend: str = "openai",
|
||||
) -> Union[str, list[dict]]:
|
||||
if base_url:
|
||||
if backend == "whispercpp":
|
||||
return await self._transcribe_remote_whispercpp(
|
||||
audio_path, language, base_url, with_segments
|
||||
)
|
||||
return await self._transcribe_remote(
|
||||
audio_path, language, model_name, base_url, with_segments
|
||||
)
|
||||
@@ -67,6 +72,35 @@ class TranscriptionEngine:
|
||||
]
|
||||
return [{"start": 0.0, "end": 9999.0, "text": body["text"].strip()}]
|
||||
|
||||
async def _transcribe_remote_whispercpp(
|
||||
self,
|
||||
audio_path: str,
|
||||
language: str,
|
||||
base_url: str,
|
||||
with_segments: bool,
|
||||
) -> Union[str, list[dict]]:
|
||||
async with httpx.AsyncClient(timeout=300) as client:
|
||||
with open(audio_path, "rb") as f:
|
||||
data = {"language": language}
|
||||
if with_segments:
|
||||
data["response_format"] = "verbose_json"
|
||||
r = await client.post(
|
||||
f"{base_url}/inference",
|
||||
files={"file": ("audio.wav", f, "audio/wav")},
|
||||
data=data,
|
||||
)
|
||||
r.raise_for_status()
|
||||
body = r.json()
|
||||
if not with_segments:
|
||||
return body.get("text", "").strip()
|
||||
raw_segs = body.get("segments") or []
|
||||
if raw_segs:
|
||||
return [
|
||||
{"start": s["start"], "end": s["end"], "text": s["text"].strip()}
|
||||
for s in raw_segs
|
||||
]
|
||||
return [{"start": 0.0, "end": 9999.0, "text": body.get("text", "").strip()}]
|
||||
|
||||
async def _transcribe_local(
|
||||
self,
|
||||
audio_path: str,
|
||||
|
||||
Reference in New Issue
Block a user