diff --git a/api/pipeline.py b/api/pipeline.py index f5d4bf8..747dcae 100644 --- a/api/pipeline.py +++ b/api/pipeline.py @@ -81,13 +81,8 @@ async def _run_solo_pipeline(cfg, wav_path, output_dir, instructions): ) dt = datetime.now() - paths = write_solo_docs(raw_text=raw_text, refined=refined, output_dir=output_dir, dt=dt) - - title = "Diktat" - for line in refined.splitlines(): - if line.startswith("# "): - title = line[2:].strip() - break + title, tldr = await client.generate_title_and_tldr(refined, model=cfg["ollama"]["model"]) + paths = write_solo_docs(raw_text=raw_text, refined=refined, output_dir=output_dir, dt=dt, title=title, tldr=tldr) await state.set_status(Status.IDLE) await broadcast({ @@ -152,6 +147,7 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf transcript_text = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned) summary = await client.summarize(transcript_text, model=cfg["ollama"]["model"]) + title, tldr = await client.generate_title_and_tldr(summary, model=cfg["ollama"]["model"]) dt = datetime.now() paths = write_meeting_docs( @@ -161,13 +157,15 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf duration_min=duration_min, output_dir=output_dir, dt=dt, + title=title, + tldr=tldr, ) await state.set_status(Status.IDLE) await broadcast({ "event": "saved", "path": paths["index"], - "title": f"Meeting {dt.strftime('%d.%m.%Y %H:%M')}", + "title": title, "meeting": True, "paths": paths, }) diff --git a/api/router.py b/api/router.py index b851ed7..57b2bbb 100644 --- a/api/router.py +++ b/api/router.py @@ -223,22 +223,26 @@ async def open_file(body: dict, user: dict = Depends(current_user)): from urllib.parse import quote cfg = load_config() vault = cfg.get("obsidian", {}).get("vault", "").strip() - # If only the index was passed, also include sibling transkript/zusammenfassung + # If only the index was passed, also include siblings from subdir all_paths = list(paths) for p in paths: if p.endswith("-index.md"): - base = p[: -len("-index.md")] + base = os.path.basename(p)[: -len("-index.md")] + subdir = os.path.join(os.path.dirname(p), base) for suffix in ("-transkript.md", "-zusammenfassung.md"): - sibling = base + suffix + sibling = os.path.join(subdir, base + suffix) if os.path.exists(sibling) and sibling not in all_paths: all_paths.append(sibling) open_target = all_paths[0] if vault and os.path.isdir(vault): + # Mirror directory structure: index → vault root, others → vault/{base}/ for p in all_paths: - dest = os.path.join(vault, os.path.basename(p)) + rel = os.path.relpath(p, abs_user_dir) + dest = os.path.join(vault, rel) + os.makedirs(os.path.dirname(dest), exist_ok=True) shutil.copy2(p, dest) - open_target = os.path.join(vault, os.path.basename(all_paths[0])) + open_target = os.path.join(vault, os.path.relpath(all_paths[0], abs_user_dir)) vault_name = os.path.basename(vault.rstrip("/")) if vault else "" file_name = os.path.basename(open_target) if vault_name: diff --git a/docs/plans/2026-04-01-settings-remote-whisper.md b/docs/plans/2026-04-01-settings-remote-whisper.md new file mode 100644 index 0000000..b92e162 --- /dev/null +++ b/docs/plans/2026-04-01-settings-remote-whisper.md @@ -0,0 +1,921 @@ +# Settings Page & Remote Whisper Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Add a settings page with PipeWire audio device selection and remote Whisper/Ollama URL configuration, enabling clients to offload AI processing to Beastix. + +**Architecture:** Config gains `audio.device` and `whisper.base_url`. `transcription.py` branches on `base_url`: local faster-whisper or remote OpenAI-compatible HTTP upload. A new `/settings` page (admin-only) lets users pick PipeWire sources via `pactl` and configure server URLs. PipeWire combined source is created on demand via `pactl load-module`. + +**Tech Stack:** FastAPI, httpx (already in deps), pactl (PipeWire), sounddevice, faster-whisper, vanilla JS/CSS (tüit CI dark theme) + +--- + +### Task 1: Extend config defaults + +**Files:** +- Modify: `config.py` +- Test: `tests/test_config.py` + +**Step 1: Write the failing test** + +Add to `tests/test_config.py`: + +```python +def test_config_has_audio_and_whisper_base_url(): + import config + from unittest.mock import patch + import tempfile, os + with tempfile.TemporaryDirectory() as tmpdir: + cfg_path = os.path.join(tmpdir, "config.toml") + with patch("config.CONFIG_PATH", cfg_path): + cfg = config.load() + assert "audio" in cfg + assert cfg["audio"]["device"] == "" + assert cfg["whisper"]["base_url"] == "" +``` + +**Step 2: Run to verify it fails** + +```bash +pytest tests/test_config.py::test_config_has_audio_and_whisper_base_url -v +``` +Expected: FAIL — KeyError or AssertionError + +**Step 3: Update `config.py` DEFAULTS** + +```python +DEFAULTS = { + "ollama": { + "base_url": "http://localhost:11434", + "model": "gemma3:12b", + }, + "whisper": { + "model": "large-v3", + "language": "de", + "device": "auto", + "base_url": "", # empty = local, else http://beastix:8000 + }, + "audio": { + "device": "", # empty = system default + }, + "server": { + "port": 8765, + }, + "output": { + "path": os.path.expanduser("~/cloud.shron.de/Hetzner Storagebox/work"), + }, + "network": { + "host": "127.0.0.1", + }, + "pid_file": os.path.expanduser("~/.local/run/tueit-transcriber.pid"), +} +``` + +Update the fallback string writer in `_write_defaults`: + +```python +f.write('[whisper]\nmodel = "large-v3"\nlanguage = "de"\ndevice = "auto"\nbase_url = ""\n\n') +f.write('[audio]\ndevice = ""\n\n') +``` + +**Step 4: Run tests** + +```bash +pytest tests/test_config.py -v +``` +Expected: all PASS + +**Step 5: Commit** + +```bash +git add config.py tests/test_config.py +git commit -m "feat: add audio.device and whisper.base_url to config defaults" +``` + +--- + +### Task 2: Remote Whisper in transcription.py + +**Files:** +- Modify: `transcription.py` +- Modify: `api/pipeline.py` +- Test: `tests/test_transcription.py` + +**Step 1: Write the failing test** + +Add to `tests/test_transcription.py`: + +```python +import pytest + +@pytest.mark.asyncio +async def test_transcribe_uses_remote_when_base_url_set(tmp_path): + import wave, struct + wav = tmp_path / "test.wav" + with wave.open(str(wav), "wb") as wf: + wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(16000) + wf.writeframes(struct.pack("<100h", *([0] * 100))) + + import respx, httpx + from transcription import TranscriptionEngine + eng = TranscriptionEngine() + + with respx.mock: + respx.post("http://beastix:8000/v1/audio/transcriptions").mock( + return_value=httpx.Response(200, json={"text": "Hallo Welt"}) + ) + result = await eng.transcribe_file( + str(wav), language="de", model_name="large-v3", + device="auto", base_url="http://beastix:8000", + ) + assert result == "Hallo Welt" +``` + +**Step 2: Run to verify it fails** + +```bash +pytest tests/test_transcription.py::test_transcribe_uses_remote_when_base_url_set -v +``` +Expected: FAIL — `transcribe_file` doesn't accept `base_url` + +**Step 3: Rewrite `transcription.py`** + +```python +import asyncio +import httpx + + +class TranscriptionEngine: + _model = None + + def _get_model(self, model_name: str = "large-v3", device: str = "auto"): + if self._model is None: + from faster_whisper import WhisperModel + if device == "auto": + try: + self._model = WhisperModel(model_name, device="cuda", compute_type="float16") + except Exception: + self._model = WhisperModel(model_name, device="cpu", compute_type="int8") + else: + compute = "float16" if device in ("cuda", "rocm") else "int8" + self._model = WhisperModel(model_name, device=device, compute_type=compute) + return self._model + + async def transcribe_file( + self, + audio_path: str, + language: str = "de", + model_name: str = "large-v3", + device: str = "auto", + base_url: str = "", + ) -> str: + if base_url: + return await self._transcribe_remote(audio_path, language, model_name, base_url) + return await self._transcribe_local(audio_path, language, model_name, device) + + async def _transcribe_remote( + self, audio_path: str, language: str, model_name: str, base_url: str + ) -> str: + async with httpx.AsyncClient(timeout=300) as client: + with open(audio_path, "rb") as f: + r = await client.post( + f"{base_url}/v1/audio/transcriptions", + files={"file": ("audio.wav", f, "audio/wav")}, + data={"model": model_name, "language": language}, + ) + r.raise_for_status() + return r.json()["text"] + + async def _transcribe_local( + self, audio_path: str, language: str, model_name: str, device: str + ) -> str: + loop = asyncio.get_event_loop() + model = self._get_model(model_name, device) + segments, _ = await loop.run_in_executor( + None, + lambda: model.transcribe(audio_path, language=language), + ) + return "".join(seg.text for seg in segments).strip() + + +engine = TranscriptionEngine() +``` + +**Step 4: Update `api/pipeline.py` — pass base_url** + +In `run_pipeline`, update the `transcribe_file` call: + +```python +raw_text = await transcription_engine.transcribe_file( + wav_path, + language=cfg["whisper"]["language"], + model_name=cfg["whisper"]["model"], + device=cfg["whisper"]["device"], + base_url=cfg["whisper"].get("base_url", ""), +) +``` + +**Step 5: Run all transcription tests** + +```bash +pytest tests/test_transcription.py -v +``` +Expected: all PASS + +**Step 6: Commit** + +```bash +git add transcription.py api/pipeline.py tests/test_transcription.py +git commit -m "feat: remote Whisper via whisper.base_url — OpenAI-compatible upload" +``` + +--- + +### Task 3: Audio device in AudioRecorder + +**Files:** +- Modify: `audio.py` +- Modify: `api/router.py` (toggle endpoint) +- Test: `tests/test_audio.py` + +**Step 1: Write the failing test** + +Add to `tests/test_audio.py`: + +```python +def test_recorder_stores_device_param(): + from audio import AudioRecorder + rec = AudioRecorder(device="my-pipewire-source") + assert rec.device == "my-pipewire-source" + +def test_recorder_device_none_when_empty_string(): + from audio import AudioRecorder + rec = AudioRecorder(device="") + assert rec.device is None +``` + +**Step 2: Run to verify they fail** + +```bash +pytest tests/test_audio.py::test_recorder_stores_device_param tests/test_audio.py::test_recorder_device_none_when_empty_string -v +``` +Expected: FAIL + +**Step 3: Update `audio.py`** + +```python +import wave +import threading +import numpy as np + + +class AudioRecorder: + def __init__(self, sample_rate: int = 16000, device: str | None = None): + self.sample_rate = sample_rate + self.device = device or None # empty string becomes None = system default + self._buffer: list[np.ndarray] = [] + self._stream = None + self.is_recording = False + self._lock = threading.Lock() + + def _callback(self, indata, frames, time, status): + if self.is_recording: + with self._lock: + self._buffer.append(indata[:, 0].copy().astype(np.int16)) + + def start(self): + import sounddevice as sd + self._buffer = [] + self.is_recording = True + self._stream = sd.InputStream( + samplerate=self.sample_rate, + channels=1, + dtype="int16", + callback=self._callback, + device=self.device, + ) + self._stream.start() + + def stop(self): + self.is_recording = False + if self._stream: + self._stream.stop() + self._stream.close() + self._stream = None + + def save_wav(self, path: str) -> str: + with self._lock: + data = np.concatenate(self._buffer) if self._buffer else np.zeros(0, dtype=np.int16) + with wave.open(path, "wb") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(self.sample_rate) + wf.writeframes(data.tobytes()) + return path +``` + +**Step 4: Pass device from config in `api/router.py` toggle endpoint** + +In `toggle_recording`, update the `Status.IDLE` branch: + +```python +if state.status == Status.IDLE: + from audio import AudioRecorder + audio_device = cfg.get("audio", {}).get("device") or None + state._recorder = AudioRecorder(device=audio_device) + ... +``` + +Also load config at the top of toggle_recording (it's already imported): +```python +cfg = load_config() +``` + +**Step 5: Run tests** + +```bash +pytest tests/test_audio.py -v +``` +Expected: all PASS + +**Step 6: Commit** + +```bash +git add audio.py api/router.py tests/test_audio.py +git commit -m "feat: AudioRecorder accepts device param — reads audio.device from config" +``` + +--- + +### Task 4: API — GET /audio/devices and POST /audio/combined + +**Files:** +- Modify: `api/router.py` +- Test: `tests/test_api.py` + +**Step 1: Write the failing tests** + +Add to `tests/test_api.py`: + +```python +def test_audio_devices_returns_list(monkeypatch): + import subprocess + pactl_output = ( + "1\talsa_input.pci.analog-stereo\tPipeWire\ts32le 2ch 48000Hz\tRUNNING\n" + "2\talsa_output.pci.analog-stereo.monitor\tPipeWire\ts32le 2ch 48000Hz\tIDLE\n" + ) + monkeypatch.setattr(subprocess, "check_output", lambda *a, **kw: pactl_output.encode()) + from unittest.mock import patch + with patch("api.router.current_user", + return_value={"username": "u", "output_dir": "/tmp", "is_admin": True}): + from fastapi.testclient import TestClient + from main import app + client = TestClient(app) + r = client.get("/audio/devices", headers={"Authorization": "Bearer fake"}) + assert r.status_code == 200 + devices = r.json() + assert len(devices) == 2 + assert devices[0]["name"] == "alsa_input.pci.analog-stereo" + +def test_audio_devices_forbidden_for_non_admin(): + from unittest.mock import patch + with patch("api.router.current_user", + return_value={"username": "u", "output_dir": "/tmp", "is_admin": False}): + from fastapi.testclient import TestClient + from main import app + client = TestClient(app) + r = client.get("/audio/devices", headers={"Authorization": "Bearer fake"}) + assert r.status_code == 403 +``` + +**Step 2: Run to verify they fail** + +```bash +pytest tests/test_api.py::test_audio_devices_returns_list tests/test_api.py::test_audio_devices_forbidden_for_non_admin -v +``` +Expected: FAIL — routes don't exist + +**Step 3: Add endpoints to `api/router.py`** + +```python +@router.get("/audio/devices") +async def list_audio_devices(user: dict = Depends(current_user)): + import subprocess + if not user.get("is_admin"): + raise HTTPException(status_code=403, detail="Nur Administratoren") + try: + out = subprocess.check_output( + ["pactl", "list", "sources", "short"], + stderr=subprocess.DEVNULL, timeout=5, + ).decode() + except Exception as e: + raise HTTPException(status_code=500, detail=f"pactl fehlgeschlagen: {e}") + devices = [] + for line in out.strip().splitlines(): + parts = line.split("\t") + if len(parts) >= 2: + devices.append({ + "index": parts[0], + "name": parts[1], + "state": parts[4] if len(parts) > 4 else "", + }) + return devices + + +@router.post("/audio/combined") +async def create_combined_source(body: dict, user: dict = Depends(current_user)): + import subprocess, json, pathlib + if not user.get("is_admin"): + raise HTTPException(status_code=403, detail="Nur Administratoren") + mic = body.get("mic", "") + monitor = body.get("monitor", "") + if not mic or not monitor: + raise HTTPException(status_code=400, detail="mic und monitor erforderlich") + # Validate: names must come from pactl list — no shell injection via user input + out = subprocess.check_output( + ["pactl", "list", "sources", "short"], stderr=subprocess.DEVNULL, timeout=5 + ).decode() + known = {line.split("\t")[1] for line in out.strip().splitlines() if "\t" in line} + if mic not in known or monitor not in known: + raise HTTPException(status_code=400, detail="Unbekanntes Audio-Device") + sink_id = subprocess.check_output([ + "pactl", "load-module", "module-null-sink", + "sink_name=transkriptor-combined", + "sink_properties=device.description=Transkriptor Combined", + ], timeout=5).decode().strip() + mic_id = subprocess.check_output([ + "pactl", "load-module", "module-loopback", + f"source={mic}", "sink=transkriptor-combined", + ], timeout=5).decode().strip() + mon_id = subprocess.check_output([ + "pactl", "load-module", "module-loopback", + f"source={monitor}", "sink=transkriptor-combined", + ], timeout=5).decode().strip() + state_path = pathlib.Path( + os.path.expanduser("~/.config/tueit-transcriber/pipewire-modules.json") + ) + state_path.write_text(json.dumps({"ids": [int(sink_id), int(mic_id), int(mon_id)]})) + return {"device": "transkriptor-combined.monitor", "module_ids": [sink_id, mic_id, mon_id]} +``` + +**Step 4: Run tests** + +```bash +pytest tests/test_api.py::test_audio_devices_returns_list tests/test_api.py::test_audio_devices_forbidden_for_non_admin -v +``` +Expected: PASS + +**Step 5: Commit** + +```bash +git add api/router.py tests/test_api.py +git commit -m "feat: GET /audio/devices, POST /audio/combined — PipeWire source management" +``` + +--- + +### Task 5: Fix PUT /config to deep-merge + +**Files:** +- Modify: `api/router.py` +- Test: `tests/test_api.py` + +Current `put_config` does a shallow `cfg.update(body)` — overwrites nested dicts. Must deep-merge. + +**Step 1: Write the failing test** + +Add to `tests/test_api.py`: + +```python +def test_put_config_deep_merges(tmp_path, monkeypatch): + import config as cfg_mod + monkeypatch.setattr(cfg_mod, "CONFIG_PATH", + str(tmp_path / "config.toml")) + from unittest.mock import patch + with patch("api.router.current_user", + return_value={"username": "u", "output_dir": "/tmp", "is_admin": True}): + from fastapi.testclient import TestClient + from main import app + client = TestClient(app) + r = client.put("/config", + json={"whisper": {"base_url": "http://beastix:8000"}}, + headers={"Authorization": "Bearer fake"}) + assert r.status_code == 200 + data = r.json() + # base_url updated, model preserved + assert data["whisper"]["base_url"] == "http://beastix:8000" + assert data["whisper"]["model"] == "large-v3" +``` + +**Step 2: Run to verify it fails** + +```bash +pytest tests/test_api.py::test_put_config_deep_merges -v +``` +Expected: FAIL — shallow update loses whisper.model + +**Step 3: Fix `put_config` in `api/router.py`** + +```python +@router.put("/config") +async def put_config(body: dict, user: dict = Depends(current_user)): + if not user.get("is_admin"): + raise HTTPException(status_code=403, detail="Nur Administratoren können die Config ändern") + import tomli_w + from config import _deep_merge, CONFIG_PATH + cfg = load_config() + merged = _deep_merge(cfg, body) + os.makedirs(os.path.dirname(CONFIG_PATH), exist_ok=True) + with open(CONFIG_PATH, "wb") as f: + tomli_w.dump(merged, f) + return merged +``` + +**Step 4: Run tests** + +```bash +pytest tests/test_api.py::test_put_config_deep_merges -v +``` +Expected: PASS + +**Step 5: Commit** + +```bash +git add api/router.py tests/test_api.py +git commit -m "fix: PUT /config deep-merges nested config instead of shallow update" +``` + +--- + +### Task 6: GET /status returns is_admin + +**Files:** +- Modify: `api/router.py` +- Test: `tests/test_api.py` + +**Step 1: Write the failing test** + +Add to `tests/test_api.py`: + +```python +def test_status_includes_is_admin(): + from unittest.mock import patch + with patch("api.router.current_user", + return_value={"username": "u", "output_dir": "/tmp", "is_admin": True}): + from fastapi.testclient import TestClient + from main import app + client = TestClient(app) + r = client.get("/status", headers={"Authorization": "Bearer fake"}) + assert r.status_code == 200 + assert r.json()["is_admin"] is True +``` + +**Step 2: Run to verify it fails** + +```bash +pytest tests/test_api.py::test_status_includes_is_admin -v +``` +Expected: FAIL + +**Step 3: Update `get_status` in `api/router.py`** + +```python +@router.get("/status") +async def get_status(user: dict = Depends(current_user)): + return { + "status": state.status, + "username": user["username"], + "is_admin": user.get("is_admin", False), + } +``` + +**Step 4: Add gear icon in `frontend/app.js` init block** + +In the `(async () => { ... })()` init, after `userChip.textContent = data.username`: + +```javascript + if (data.is_admin) { + const gearLink = document.createElement('a'); + gearLink.href = '/settings'; + gearLink.className = 'back-btn'; + gearLink.title = 'Einstellungen'; + gearLink.textContent = '\u2699'; // ⚙ gear symbol + document.querySelector('.header-right').prepend(gearLink); + } +``` + +**Step 5: Run tests** + +```bash +pytest tests/test_api.py::test_status_includes_is_admin -v +``` +Expected: PASS + +**Step 6: Commit** + +```bash +git add api/router.py frontend/app.js tests/test_api.py +git commit -m "feat: status includes is_admin, gear icon in header for admins" +``` + +--- + +### Task 7: Settings page HTML + JS + routes + +**Files:** +- Create: `frontend/settings.html` +- Create: `frontend/settings.js` +- Modify: `api/router.py` +- Modify: `main.py` + +**Step 1: Add GET /settings to `api/router.py`** + +```python +@router.get("/settings") +async def settings_page_route(user: dict = Depends(current_user)): + from fastapi.responses import FileResponse, RedirectResponse + from pathlib import Path + if not user.get("is_admin"): + return RedirectResponse("/") + return FileResponse(str(Path(__file__).parent.parent / "frontend" / "settings.html")) +``` + +**Step 2: Add `/settings.js` route to `main.py`** + +```python +@app.get("/settings.js") +async def settingsjs(): + return FileResponse(str(FRONTEND_DIR / "settings.js")) +``` + +**Step 3: Create `frontend/settings.html`** + +```html + + +
+ + +