feat: add whisper.cpp ROCm backend support for AMD GPU acceleration
- transcription.py: new _transcribe_remote_whispercpp() using /inference endpoint - transcription.py: backend param routes to openai or whispercpp remote path - config.py: whisper.backend default 'openai', alt 'whispercpp' - pipeline.py: passes backend from config to transcribe_file - settings: backend dropdown (OpenAI-compat / whisper.cpp) - SETUP.md: whisper.cpp ROCm build and systemd setup instructions whisper-cpp-server running on beastix :8080 (ROCm0, gfx1030, RX 6800 XT)
This commit is contained in:
@@ -103,6 +103,7 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf
|
|||||||
model_name=cfg["whisper"]["model"],
|
model_name=cfg["whisper"]["model"],
|
||||||
device=cfg["whisper"]["device"],
|
device=cfg["whisper"]["device"],
|
||||||
base_url=cfg["whisper"].get("base_url", ""),
|
base_url=cfg["whisper"].get("base_url", ""),
|
||||||
|
backend=cfg["whisper"].get("backend", "openai"),
|
||||||
with_segments=True,
|
with_segments=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ DEFAULTS = {
|
|||||||
"language": "de",
|
"language": "de",
|
||||||
"device": "auto", # "auto" = use GPU if ROCm available, else CPU
|
"device": "auto", # "auto" = use GPU if ROCm available, else CPU
|
||||||
"base_url": "",
|
"base_url": "",
|
||||||
|
"backend": "openai", # "openai" = OpenAI-compatible API, "whispercpp" = whisper.cpp /inference
|
||||||
},
|
},
|
||||||
"audio": {
|
"audio": {
|
||||||
"device": "",
|
"device": "",
|
||||||
|
|||||||
+29
-18
@@ -20,34 +20,41 @@ Einstellungsseite.
|
|||||||
|
|
||||||
## Beastix (Server-Setup, einmalig)
|
## Beastix (Server-Setup, einmalig)
|
||||||
|
|
||||||
### 1. faster-whisper-server installieren
|
### 1. whisper.cpp mit ROCm/GPU kompilieren
|
||||||
|
|
||||||
|
Voraussetzung: ROCm installiert (Arch: `sudo pacman -S rocm-hip-sdk`).
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sudo pacman -S python-pipx # Arch Linux
|
mkdir -p ~/src && cd ~/src
|
||||||
pipx install faster-whisper-server
|
git clone https://github.com/ggml-org/whisper.cpp.git --depth=1
|
||||||
pipx ensurepath
|
cd whisper.cpp
|
||||||
|
|
||||||
|
# Für AMD RX 6800 XT (gfx1030) — gfx-Target ggf. anpassen
|
||||||
|
cmake -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release -DWHISPER_BUILD_SERVER=ON
|
||||||
|
cmake --build build -j$(nproc)
|
||||||
|
|
||||||
|
# Modell large-v3 herunterladen (~2.9 GB)
|
||||||
|
bash models/download-ggml-model.sh large-v3
|
||||||
```
|
```
|
||||||
|
|
||||||
**Bekannter Bug in Version 0.0.2** — fehlende `pyproject.toml` im pipx-venv:
|
`gfx1030` = RX 6800 XT. Andere AMD GPUs: `rocminfo | grep gfx`
|
||||||
|
|
||||||
```bash
|
|
||||||
cat > ~/.local/share/pipx/venvs/faster-whisper-server/lib/python*/site-packages/pyproject.toml << 'EOF'
|
|
||||||
[project]
|
|
||||||
name = "faster-whisper-server"
|
|
||||||
version = "0.0.2"
|
|
||||||
EOF
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. Als systemd-User-Service einrichten
|
### 2. Als systemd-User-Service einrichten
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cat > ~/.config/systemd/user/faster-whisper-server.service << 'EOF'
|
cat > ~/.config/systemd/user/whisper-cpp-server.service << 'EOF'
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=faster-whisper-server (OpenAI-compatible Whisper API)
|
Description=whisper.cpp Server (ROCm/GPU)
|
||||||
After=network.target
|
After=network.target
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
ExecStart=%h/.local/bin/faster-whisper-server --host 0.0.0.0 --port 8000 --model large-v3
|
ExecStart=%h/src/whisper.cpp/build/bin/whisper-server \
|
||||||
|
--host 0.0.0.0 \
|
||||||
|
--port 8080 \
|
||||||
|
--model %h/src/whisper.cpp/models/ggml-large-v3.bin \
|
||||||
|
--language de \
|
||||||
|
--threads 4 \
|
||||||
|
--convert
|
||||||
Restart=on-failure
|
Restart=on-failure
|
||||||
RestartSec=5
|
RestartSec=5
|
||||||
|
|
||||||
@@ -56,9 +63,12 @@ WantedBy=default.target
|
|||||||
EOF
|
EOF
|
||||||
|
|
||||||
systemctl --user daemon-reload
|
systemctl --user daemon-reload
|
||||||
systemctl --user enable --now faster-whisper-server.service
|
systemctl --user enable --now whisper-cpp-server.service
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Logs prüfen: `journalctl --user -u whisper-cpp-server -f`
|
||||||
|
GPU-Nutzung bestätigt wenn in den Logs steht: `using ROCm0 backend`
|
||||||
|
|
||||||
### 3. Ollama installieren (falls noch nicht vorhanden)
|
### 3. Ollama installieren (falls noch nicht vorhanden)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -105,7 +115,8 @@ Als Admin einloggen → Zahnrad-Icon im Header → Einstellungen:
|
|||||||
|
|
||||||
| Feld | Wert (Beispiel) |
|
| Feld | Wert (Beispiel) |
|
||||||
|------|-----------------|
|
|------|-----------------|
|
||||||
| Whisper Server URL | `http://beastix:8000` |
|
| Whisper Backend | `whisper.cpp Server` |
|
||||||
|
| Whisper Server URL | `http://beastix:8080` |
|
||||||
| Whisper Modell | `large-v3` |
|
| Whisper Modell | `large-v3` |
|
||||||
| Ollama Server URL | `http://beastix:11434` |
|
| Ollama Server URL | `http://beastix:11434` |
|
||||||
| Ollama Modell | `gemma3:12b` (aus Dropdown wählen) |
|
| Ollama Modell | `gemma3:12b` (aus Dropdown wählen) |
|
||||||
|
|||||||
@@ -74,9 +74,16 @@
|
|||||||
|
|
||||||
<section>
|
<section>
|
||||||
<h2>Verarbeitung</h2>
|
<h2>Verarbeitung</h2>
|
||||||
|
<div class="field">
|
||||||
|
<label>Whisper Backend</label>
|
||||||
|
<select id="whisper-backend">
|
||||||
|
<option value="openai">OpenAI-kompatibel (faster-whisper-server)</option>
|
||||||
|
<option value="whispercpp">whisper.cpp Server</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label>Whisper Server URL (leer = lokal)</label>
|
<label>Whisper Server URL (leer = lokal)</label>
|
||||||
<input type="text" id="whisper-url" placeholder="http://beastix:8000">
|
<input type="text" id="whisper-url" placeholder="http://beastix:8080">
|
||||||
</div>
|
</div>
|
||||||
<div class="field">
|
<div class="field">
|
||||||
<label>Whisper Modell</label>
|
<label>Whisper Modell</label>
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ async function loadConfig() {
|
|||||||
if (!r.ok) return;
|
if (!r.ok) return;
|
||||||
const cfg = await r.json();
|
const cfg = await r.json();
|
||||||
document.getElementById('audio-device').value = (cfg.audio && cfg.audio.device) || '';
|
document.getElementById('audio-device').value = (cfg.audio && cfg.audio.device) || '';
|
||||||
|
document.getElementById('whisper-backend').value = (cfg.whisper && cfg.whisper.backend) || 'openai';
|
||||||
document.getElementById('whisper-url').value = (cfg.whisper && cfg.whisper.base_url) || '';
|
document.getElementById('whisper-url').value = (cfg.whisper && cfg.whisper.base_url) || '';
|
||||||
document.getElementById('whisper-model').value = (cfg.whisper && cfg.whisper.model) || 'large-v3';
|
document.getElementById('whisper-model').value = (cfg.whisper && cfg.whisper.model) || 'large-v3';
|
||||||
const ollamaUrl = (cfg.ollama && cfg.ollama.base_url) || 'http://localhost:11434';
|
const ollamaUrl = (cfg.ollama && cfg.ollama.base_url) || 'http://localhost:11434';
|
||||||
@@ -96,6 +97,7 @@ document.getElementById('save-btn').addEventListener('click', async function() {
|
|||||||
whisper: {
|
whisper: {
|
||||||
base_url: document.getElementById('whisper-url').value,
|
base_url: document.getElementById('whisper-url').value,
|
||||||
model: document.getElementById('whisper-model').value,
|
model: document.getElementById('whisper-model').value,
|
||||||
|
backend: document.getElementById('whisper-backend').value,
|
||||||
},
|
},
|
||||||
ollama: {
|
ollama: {
|
||||||
base_url: document.getElementById('ollama-url').value,
|
base_url: document.getElementById('ollama-url').value,
|
||||||
|
|||||||
@@ -27,8 +27,13 @@ class TranscriptionEngine:
|
|||||||
device: str = "auto",
|
device: str = "auto",
|
||||||
base_url: str = "",
|
base_url: str = "",
|
||||||
with_segments: bool = False,
|
with_segments: bool = False,
|
||||||
|
backend: str = "openai",
|
||||||
) -> Union[str, list[dict]]:
|
) -> Union[str, list[dict]]:
|
||||||
if base_url:
|
if base_url:
|
||||||
|
if backend == "whispercpp":
|
||||||
|
return await self._transcribe_remote_whispercpp(
|
||||||
|
audio_path, language, base_url, with_segments
|
||||||
|
)
|
||||||
return await self._transcribe_remote(
|
return await self._transcribe_remote(
|
||||||
audio_path, language, model_name, base_url, with_segments
|
audio_path, language, model_name, base_url, with_segments
|
||||||
)
|
)
|
||||||
@@ -67,6 +72,35 @@ class TranscriptionEngine:
|
|||||||
]
|
]
|
||||||
return [{"start": 0.0, "end": 9999.0, "text": body["text"].strip()}]
|
return [{"start": 0.0, "end": 9999.0, "text": body["text"].strip()}]
|
||||||
|
|
||||||
|
async def _transcribe_remote_whispercpp(
|
||||||
|
self,
|
||||||
|
audio_path: str,
|
||||||
|
language: str,
|
||||||
|
base_url: str,
|
||||||
|
with_segments: bool,
|
||||||
|
) -> Union[str, list[dict]]:
|
||||||
|
async with httpx.AsyncClient(timeout=300) as client:
|
||||||
|
with open(audio_path, "rb") as f:
|
||||||
|
data = {"language": language}
|
||||||
|
if with_segments:
|
||||||
|
data["response_format"] = "verbose_json"
|
||||||
|
r = await client.post(
|
||||||
|
f"{base_url}/inference",
|
||||||
|
files={"file": ("audio.wav", f, "audio/wav")},
|
||||||
|
data=data,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
body = r.json()
|
||||||
|
if not with_segments:
|
||||||
|
return body.get("text", "").strip()
|
||||||
|
raw_segs = body.get("segments") or []
|
||||||
|
if raw_segs:
|
||||||
|
return [
|
||||||
|
{"start": s["start"], "end": s["end"], "text": s["text"].strip()}
|
||||||
|
for s in raw_segs
|
||||||
|
]
|
||||||
|
return [{"start": 0.0, "end": 9999.0, "text": body.get("text", "").strip()}]
|
||||||
|
|
||||||
async def _transcribe_local(
|
async def _transcribe_local(
|
||||||
self,
|
self,
|
||||||
audio_path: str,
|
audio_path: str,
|
||||||
|
|||||||
Reference in New Issue
Block a user