fix: whisper repetition loops, meeting transcript punctuation

- transcription: add temperature_inc=0 to whispercpp to disable fallback (prevents loops) - pipeline: punctuate meeting transcript in one pass (parallel with summarize) - output: write_meeting_docs accepts pre-built transcript_text - llm: punctuate prompt preserves speaker labels Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
fix: punctuate raw transcript, strip JSON code fences, filter null speaker names
2026-04-02 12:34:11 +02:00 · 2026-04-02 12:23:25 +02:00 · 2026-04-02 12:10:50 +02:00 · 2026-04-02 12:07:54 +02:00 · 2026-04-02 11:57:47 +02:00 · 2026-04-02 11:47:29 +02:00
33 changed files with 5466 additions and 102 deletions
@@ -0,0 +1,32 @@
+def align_segments(
+    whisper_segs: list[dict],
+    speaker_segs: list[tuple[float, float, str]],
+) -> list[tuple[str, str]]:
+    """Assign each Whisper segment to the speaker with the greatest time overlap.
+    Consecutive segments from the same speaker are merged into one block."""
+    result: list[tuple[str, str]] = []
+    for seg in whisper_segs:
+        speaker = _best_speaker(seg["start"], seg["end"], speaker_segs)
+        text = seg["text"].strip()
+        if not text:
+            continue
+        if result and result[-1][0] == speaker:
+            result[-1] = (speaker, result[-1][1] + " " + text)
+        else:
+            result.append((speaker, text))
+    return result
+
+
+def _best_speaker(
+    start: float,
+    end: float,
+    speaker_segs: list[tuple[float, float, str]],
+) -> str:
+    best_label = "SPEAKER_00"
+    best_overlap = 0.0
+    for s_start, s_end, label in speaker_segs:
+        overlap = max(0.0, min(end, s_end) - max(start, s_start))
+        if overlap > best_overlap:
+            best_overlap = overlap
+            best_label = label
+    return best_label
@@ -1,12 +1,18 @@
+import asyncio
+import logging
 import os
 import tempfile
+import traceback
+from datetime import datetime

 from api.state import state, Status
+from api.router import broadcast
 from config import load as load_config
 from transcription import engine as transcription_engine
 from llm import OllamaClient
-from output import save_transcript
-from api.router import broadcast
+from output import write_solo_docs, write_meeting_docs
+
+logger = logging.getLogger(__name__)


 async def run_pipeline():
@@ -17,6 +23,8 @@ async def run_pipeline():

    output_dir = getattr(state, "_recording_output_dir", cfg["output"]["path"])
    instructions = getattr(state, "_recording_instructions", "")
+    diar_cfg = cfg.get("diarization", {})
+    use_diarization = diar_cfg.get("enabled") and diar_cfg.get("hf_token")

    recorder.stop()
    await state.set_status(Status.PROCESSING)
@@ -28,37 +36,14 @@ async def run_pipeline():
            wav_path = f.name
        recorder.save_wav(wav_path)

-        raw_text = await transcription_engine.transcribe_file(
-            wav_path,
-            language=cfg["whisper"]["language"],
-            model_name=cfg["whisper"]["model"],
-            device=cfg["whisper"]["device"],
-        )
-        await broadcast({"event": "transcribed", "raw": raw_text})
-
-        client = OllamaClient(base_url=cfg["ollama"]["base_url"])
-        refined = await client.refine(
-            raw_text=raw_text,
-            instructions=instructions,
-            model=cfg["ollama"]["model"],
-        )
-        await broadcast({"event": "refined", "markdown": refined})
-
-        title = "Diktat"
-        for line in refined.splitlines():
-            if line.startswith("# "):
-                title = line[2:].strip()
-                break
-
-        path = save_transcript(
-            title=title,
-            content=refined,
-            output_dir=output_dir,
-        )
-        await broadcast({"event": "saved", "path": path, "title": title})
-        await state.set_status(Status.IDLE)
+        if use_diarization:
+            await _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cfg)
+        else:
+            await _run_solo_pipeline(cfg, wav_path, output_dir, instructions)

    except Exception as e:
+        tb = traceback.format_exc()
+        logger.error("Pipeline error:\n%s", tb)
        state.last_error = str(e)
        await state.set_status(Status.ERROR)
        await broadcast({"event": "error", "message": str(e)})
@@ -66,8 +51,134 @@ async def run_pipeline():
        state.recording_user = None
        state._recording_output_dir = None
        state._recording_instructions = ""
+        state._speakers_event = None
+        state._pending_aligned_segments = None
+        state._speaker_names = None
        if wav_path:
            try:
                os.unlink(wav_path)
            except OSError:
                pass
+
+
+async def _run_solo_pipeline(cfg, wav_path, output_dir, instructions):
+    """Original single-document pipeline (no diarization)."""
+    raw_text = await transcription_engine.transcribe_file(
+        wav_path,
+        language=cfg["whisper"]["language"],
+        model_name=cfg["whisper"]["model"],
+        device=cfg["whisper"]["device"],
+        base_url=cfg["whisper"].get("base_url", ""),
+        backend=cfg["whisper"].get("backend", "openai"),
+    )
+    await broadcast({"event": "transcribed", "raw": raw_text})
+
+    client = OllamaClient(base_url=cfg["ollama"]["base_url"])
+    punctuated, refined = await asyncio.gather(
+        client.punctuate(raw_text, model=cfg["ollama"]["model"]),
+        client.refine(raw_text=raw_text, instructions=instructions, model=cfg["ollama"]["model"]),
+    )
+    title, tldr = await client.generate_title_and_tldr(refined, model=cfg["ollama"]["model"])
+
+    dt = datetime.now()
+    paths = write_solo_docs(raw_text=punctuated, refined=refined, output_dir=output_dir, dt=dt, title=title, tldr=tldr)
+
+    await state.set_status(Status.IDLE)
+    await broadcast({
+        "event": "saved",
+        "path": paths["index"],
+        "title": title,
+        "paths": paths,
+    })
+
+
+async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cfg):
+    """Diarization pipeline: 3 documents, speaker identification."""
+    from diarization import Diarizer
+    from alignment import align_segments
+
+    diarizer = Diarizer(hf_token=diar_cfg["hf_token"])
+    whisper_task = asyncio.create_task(
+        transcription_engine.transcribe_file(
+            wav_path,
+            language=cfg["whisper"]["language"],
+            model_name=cfg["whisper"]["model"],
+            device=cfg["whisper"]["device"],
+            base_url=cfg["whisper"].get("base_url", ""),
+            backend=cfg["whisper"].get("backend", "openai"),
+            with_segments=True,
+        )
+    )
+    diar_task = asyncio.create_task(diarizer.diarize(wav_path))
+    whisper_segs, speaker_segs = await asyncio.gather(whisper_task, diar_task)
+
+    aligned = align_segments(whisper_segs, speaker_segs)
+    await broadcast({"event": "transcribed", "raw": " ".join(t for _, t in aligned)})
+
+    excerpt = "\n".join(f"{s}: {t}" for s, t in aligned[:20])
+    client = OllamaClient(base_url=cfg["ollama"]["base_url"])
+    name_map = await client.identify_speakers(excerpt, model=cfg["ollama"]["model"])
+
+    if not name_map:
+        excerpts_per_speaker = _build_excerpts(aligned)
+        state._speakers_event = asyncio.Event()
+        state._pending_aligned_segments = aligned
+        await state.set_status(Status.AWAITING_SPEAKERS)
+        await broadcast({"event": "speakers_unknown", "speakers": [
+            {"id": spk, "excerpts": exs}
+            for spk, exs in excerpts_per_speaker.items()
+        ]})
+        await state._speakers_event.wait()
+        name_map = state._speaker_names or {}
+
+    def resolve(label):
+        name = name_map.get(label, "")
+        if name:
+            return name
+        num = label.replace("SPEAKER_", "").lstrip("0") or "1"
+        return f"Sprecher {num}"
+
+    named_aligned = [(resolve(spk), text) for spk, text in aligned]
+    speakers = sorted({spk for spk, _ in named_aligned})
+
+    total_secs = sum(s["end"] - s["start"] for s in whisper_segs) if whisper_segs else 0
+    duration_min = max(1, round(total_secs / 60))
+
+    raw_transcript = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned)
+    summary, punctuated_transcript = await asyncio.gather(
+        client.summarize(raw_transcript, model=cfg["ollama"]["model"]),
+        client.punctuate(raw_transcript, model=cfg["ollama"]["model"]),
+    )
+    title, tldr = await client.generate_title_and_tldr(summary, model=cfg["ollama"]["model"])
+
+    dt = datetime.now()
+    paths = write_meeting_docs(
+        aligned_segments=named_aligned,
+        summary=summary,
+        speakers=speakers,
+        duration_min=duration_min,
+        output_dir=output_dir,
+        dt=dt,
+        title=title,
+        tldr=tldr,
+        transcript_text=punctuated_transcript,
+    )
+
+    await state.set_status(Status.IDLE)
+    await broadcast({
+        "event": "saved",
+        "path": paths["index"],
+        "title": title,
+        "meeting": True,
+        "paths": paths,
+    })
+
+
+def _build_excerpts(aligned: list[tuple[str, str]], max_per_speaker: int = 4) -> dict[str, list[str]]:
+    """Build a dict of speaker → list of text excerpts."""
+    from collections import defaultdict
+    buckets: dict[str, list[str]] = defaultdict(list)
+    for spk, text in aligned:
+        if len(buckets[spk]) < max_per_speaker:
+            buckets[spk].append(text[:200])
+    return dict(buckets)
@@ -6,12 +6,22 @@ from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Depends, HTTPExce

 from api.state import state, Status
 from config import load as load_config
-from output import list_transcripts
+from output import list_transcripts, read_transcript

 router = APIRouter()
 _ws_clients: list[WebSocket] = []


+def _guest_user() -> dict:
+    """Return the first registered user — used for hotkey/tray-triggered recordings."""
+    from auth import _load_users
+    users = _load_users()
+    if not users:
+        raise RuntimeError("Kein Nutzer eingerichtet")
+    username, data = next(iter(users.items()))
+    return {"username": username, "output_dir": data["output_dir"], "is_admin": data.get("is_admin", False)}
+
+
 # ---------------------------------------------------------------------------
 # Auth dependency
 # ---------------------------------------------------------------------------
@@ -54,13 +64,41 @@ async def logout(authorization: Optional[str] = Header(None)):
    return {"ok": True}


+@router.get("/setup")
+async def setup_page():
+    from fastapi.responses import FileResponse
+    from auth import has_users
+    from pathlib import Path
+    if has_users():
+        from fastapi.responses import RedirectResponse
+        return RedirectResponse("/")
+    return FileResponse(str(Path(__file__).parent.parent / "frontend" / "setup.html"))
+
+
+@router.post("/setup")
+async def setup_post(body: dict):
+    from auth import has_users, create_user
+    from config import load as load_config
+    if has_users():
+        raise HTTPException(status_code=403, detail="Bereits eingerichtet")
+    username = body.get("username", "").strip()
+    password = body.get("password", "")
+    if not username or len(password) < 6:
+        raise HTTPException(status_code=400, detail="Ungültige Eingabe")
+    cfg = load_config()
+    default_dir = cfg["output"]["path"]
+    output_dir = body.get("output_dir") or default_dir
+    create_user(username, password, output_dir, is_admin=True)
+    return {"ok": True}
+
+
 # ---------------------------------------------------------------------------
 # Protected endpoints
 # ---------------------------------------------------------------------------

@router.get("/status")
 async def get_status(user: dict = Depends(current_user)):
-    return {"status": state.status, "username": user["username"]}
+    return {"status": state.status, "username": user["username"], "is_admin": user.get("is_admin", False)}


@router.post("/toggle")
@@ -69,9 +107,14 @@ async def toggle_recording(user: dict = Depends(current_user)):
    if state.status == Status.RECORDING:
        asyncio.create_task(run_pipeline())
        return {"action": "stopped"}
+    if state.status == Status.ERROR:
+        await state.set_status(Status.IDLE)
+        return {"action": "reset"}
    if state.status == Status.IDLE:
        from audio import AudioRecorder
-        state._recorder = AudioRecorder()
+        cfg = load_config()
+        audio_device = cfg.get("audio", {}).get("device") or None
+        state._recorder = AudioRecorder(device=audio_device)
        state._recorder.start()
        state.recording_user = user["username"]
        state._recording_output_dir = os.path.join(user["output_dir"], user["username"])
@@ -93,6 +136,58 @@ async def get_transcripts(user: dict = Depends(current_user)):
    return list_transcripts(user_dir)


+@router.get("/transcripts/{filename:path}")
+async def get_transcript(filename: str, user: dict = Depends(current_user)):
+    from fastapi.responses import PlainTextResponse
+    user_dir = os.path.join(user["output_dir"], user["username"])
+    content = read_transcript(user_dir, filename)
+    if content is None:
+        raise HTTPException(status_code=404, detail="Nicht gefunden")
+    return PlainTextResponse(content)
+
+
+@router.post("/transcripts/{filename:path}/reprocess")
+async def reprocess_transcript(filename: str, body: dict, user: dict = Depends(current_user)):
+    from output import read_transcript
+    from fastapi.responses import PlainTextResponse
+    from llm import OllamaClient
+    user_dir = os.path.join(user["output_dir"], user["username"])
+    content = read_transcript(user_dir, filename)
+    if content is None:
+        raise HTTPException(status_code=404, detail="Nicht gefunden")
+    # Strip YAML frontmatter before sending to LLM
+    body_text = content
+    if content.startswith("---\n"):
+        end = content.find("\n---\n", 4)
+        if end != -1:
+            body_text = content[end + 5:].lstrip("\n")
+    cfg = load_config()
+    instructions = body.get("instructions", "")
+    client = OllamaClient(base_url=cfg["ollama"]["base_url"])
+    refined = await client.refine(body_text, instructions=instructions, model=cfg["ollama"]["model"])
+    # Overwrite same file (keep filename stable, update frontmatter date)
+    from datetime import datetime
+    path = os.path.join(user_dir, filename)
+    with open(path, "w", encoding="utf-8") as f:
+        now = datetime.now()
+        f.write(f"---\ndate: {now.isoformat(timespec='seconds')}\ntags: [transkript]\n---\n\n")
+        f.write(refined if refined.endswith("\n") else refined + "\n")
+    return PlainTextResponse(refined)
+
+
+@router.delete("/transcripts/{filename:path}")
+async def delete_transcript(filename: str, user: dict = Depends(current_user)):
+    user_dir = os.path.join(user["output_dir"], user["username"])
+    parts = filename.split("/")
+    if len(parts) > 2 or any(p in (".", "..") or not p for p in parts) or not filename.endswith(".md"):
+        raise HTTPException(status_code=404, detail="Nicht gefunden")
+    path = os.path.join(user_dir, filename)
+    if not os.path.exists(path):
+        raise HTTPException(status_code=404, detail="Nicht gefunden")
+    os.unlink(path)
+    return {"ok": True}
+
+
@router.get("/config")
 async def get_config(user: dict = Depends(current_user)):
    return load_config()
@@ -102,25 +197,173 @@ async def get_config(user: dict = Depends(current_user)):
 async def put_config(body: dict, user: dict = Depends(current_user)):
    if not user.get("is_admin"):
        raise HTTPException(status_code=403, detail="Nur Administratoren können die Config ändern")
+    from config import _deep_merge, CONFIG_PATH
+    import tomli_w
    cfg = load_config()
-    cfg.update(body)
-    return cfg
+    merged = _deep_merge(cfg, body)
+    os.makedirs(os.path.dirname(CONFIG_PATH), exist_ok=True)
+    with open(CONFIG_PATH, "wb") as f:
+        tomli_w.dump(merged, f)
+    return merged


@router.post("/open")
 async def open_file(body: dict, user: dict = Depends(current_user)):
-    import subprocess
-    path = body.get("path", "")
-    # Only allow opening files within the user's own output directory
+    import subprocess, shutil
    user_dir = os.path.join(user["output_dir"], user["username"])
-    if path and os.path.exists(path) and os.path.abspath(path).startswith(os.path.abspath(user_dir)):
-        subprocess.Popen(["xdg-open", path])
+    abs_user_dir = os.path.abspath(user_dir)
+
+    # Accept either a single path or a list of paths (for 3-file recordings)
+    raw_paths = body.get("paths") or ([body.get("path")] if body.get("path") else [])
+    paths = [p for p in raw_paths if p and os.path.exists(p) and os.path.abspath(p).startswith(abs_user_dir)]
+    if not paths:
+        return {"ok": False}
+
+    mode = body.get("mode", "editor")  # "editor" | "folder" | "obsidian"
+    if mode == "obsidian":
+        from urllib.parse import quote
+        cfg = load_config()
+        vault = cfg.get("obsidian", {}).get("vault", "").strip()
+        # If only the index was passed, also include siblings from subdir
+        all_paths = list(paths)
+        for p in paths:
+            if p.endswith("-index.md"):
+                base = os.path.basename(p)[: -len("-index.md")]
+                subdir = os.path.join(os.path.dirname(p), base)
+                for suffix in ("-transkript.md", "-zusammenfassung.md"):
+                    sibling = os.path.join(subdir, base + suffix)
+                    if os.path.exists(sibling) and sibling not in all_paths:
+                        all_paths.append(sibling)
+
+        open_target = all_paths[0]
+        if vault and os.path.isdir(vault):
+            # Mirror directory structure: index → vault root, others → vault/{base}/
+            for p in all_paths:
+                rel = os.path.relpath(p, abs_user_dir)
+                dest = os.path.join(vault, rel)
+                os.makedirs(os.path.dirname(dest), exist_ok=True)
+                shutil.copy2(p, dest)
+            open_target = os.path.join(vault, os.path.relpath(all_paths[0], abs_user_dir))
+        vault_name = os.path.basename(vault.rstrip("/")) if vault else ""
+        file_name = os.path.basename(open_target)
+        if vault_name:
+            uri = f"obsidian://open?vault={quote(vault_name)}&file={quote(file_name)}"
+        else:
+            uri = f"obsidian://open?path={quote(open_target, safe='/')}"
+        obsidian_bin = shutil.which("obsidian") or "/usr/bin/obsidian"
+        subprocess.Popen([obsidian_bin, uri])
+    elif mode == "folder" and shutil.which("dolphin"):
+        subprocess.Popen(["dolphin", "--select", paths[0]])
+    elif mode == "folder":
+        subprocess.Popen(["xdg-open", os.path.dirname(paths[0])])
+    else:
+        subprocess.Popen(["xdg-open", paths[0]])
+    return {"ok": True}
+
+
+def _pactl_source_for_sd_name(sd_name: str) -> str:
+    """Map a sounddevice device name to its pactl source name via description matching.
+    sounddevice strips the 'Monitor of ' prefix from pactl source descriptions.
+    Falls back to sd_name if no match found."""
+    import subprocess
+    try:
+        out = subprocess.check_output(
+            ["pactl", "list", "sources"], stderr=subprocess.DEVNULL, timeout=5
+        ).decode()
+        current_name = None
+        for line in out.splitlines():
+            line = line.strip()
+            if line.startswith("Name:"):
+                current_name = line.split(":", 1)[1].strip()
+            elif line.startswith("Description:") and current_name:
+                desc = line.split(":", 1)[1].strip().removeprefix("Monitor of ")
+                if desc == sd_name:
+                    return current_name
+                current_name = None
+    except Exception:
+        pass
+    return sd_name
+
+
+@router.get("/audio/devices")
+async def list_audio_devices(user: dict = Depends(current_user)):
+    import sounddevice as sd
+    if not user.get("is_admin"):
+        raise HTTPException(status_code=403, detail="Nur Administratoren")
+    try:
+        devices = [
+            {"index": i, "name": d["name"]}
+            for i, d in enumerate(sd.query_devices())
+            if d["max_input_channels"] > 0
+        ]
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"sounddevice fehlgeschlagen: {e}")
+    return devices
+
+
+@router.post("/audio/combined")
+async def create_combined_source(body: dict, user: dict = Depends(current_user)):
+    import subprocess, json, pathlib
+    if not user.get("is_admin"):
+        raise HTTPException(status_code=403, detail="Nur Administratoren")
+    mic_sd = body.get("mic", "")
+    monitor_sd = body.get("monitor", "")
+    if not mic_sd or not monitor_sd:
+        raise HTTPException(status_code=400, detail="mic und monitor erforderlich")
+    # Map sounddevice names → pactl source names for loopback commands
+    mic = _pactl_source_for_sd_name(mic_sd)
+    monitor = _pactl_source_for_sd_name(monitor_sd)
+    # Validate pactl names exist
+    out = subprocess.check_output(
+        ["pactl", "list", "sources", "short"], stderr=subprocess.DEVNULL, timeout=5
+    ).decode()
+    known = {line.split("\t")[1] for line in out.strip().splitlines() if "\t" in line}
+    if mic not in known or monitor not in known:
+        raise HTTPException(status_code=400, detail="Unbekanntes Audio-Device")
+    # Use description without spaces so sounddevice name == sink_name
+    sink_id = subprocess.check_output([
+        "pactl", "load-module", "module-null-sink",
+        "sink_name=transkriptor-combined",
+        "sink_properties=device.description=transkriptor-combined",
+    ], timeout=5).decode().strip()
+    mic_id = subprocess.check_output([
+        "pactl", "load-module", "module-loopback",
+        f"source={mic}", "sink=transkriptor-combined",
+    ], timeout=5).decode().strip()
+    mon_id = subprocess.check_output([
+        "pactl", "load-module", "module-loopback",
+        f"source={monitor}", "sink=transkriptor-combined",
+    ], timeout=5).decode().strip()
+    state_path = pathlib.Path(
+        os.path.expanduser("~/.config/tueit-transcriber/pipewire-modules.json")
+    )
+    state_path.parent.mkdir(parents=True, exist_ok=True)
+    ids = [int(sink_id), int(mic_id), int(mon_id)]
+    # Store pactl names for restore, sounddevice name as device
+    state_path.write_text(json.dumps({"ids": ids, "mic": mic, "monitor": monitor}))
+    return {"device": "transkriptor-combined", "module_ids": ids}
+
+
+@router.get("/settings")
+async def settings_page_route():
+    from fastapi.responses import FileResponse
+    from pathlib import Path
+    return FileResponse(str(Path(__file__).parent.parent / "frontend" / "settings.html"))
+
+
+@router.post("/speakers")
+async def post_speakers(body: dict, user: dict = Depends(current_user)):
+    if state._speakers_event is None:
+        raise HTTPException(status_code=409, detail="Keine ausstehende Sprecher-Zuordnung")
+    state._speaker_names = {k: v for k, v in body.items() if isinstance(k, str)}
+    state._speakers_event.set()
    return {"ok": True}


@router.websocket("/ws")
-async def websocket_endpoint(ws: WebSocket, token: str = ""):
+async def websocket_endpoint(ws: WebSocket):
    from auth import get_user_for_token
+    token = ws.query_params.get("token", "")
    user = get_user_for_token(token)
    if not user:
        await ws.close(code=4001)
@@ -8,15 +8,20 @@ class Status(str, Enum):
    IDLE = "idle"
    RECORDING = "recording"
    PROCESSING = "processing"
+    AWAITING_SPEAKERS = "awaiting_speakers"
    ERROR = "error"


@dataclass
 class AppState:
    status: Status = Status.IDLE
-    recording_user: str | None = None  # which user triggered the current recording
+    recording_user: str | None = None
    last_error: str | None = None
    _listeners: list[Callable] = field(default_factory=list, repr=False)
+    # Diarization pipeline pause
+    _speakers_event: asyncio.Event | None = None
+    _pending_aligned_segments: list[tuple[str, str]] | None = None
+    _speaker_names: dict[str, str] | None = None

    def subscribe(self, callback: Callable):
        self._listeners.append(callback)
@@ -4,8 +4,9 @@ import numpy as np


 class AudioRecorder:
-    def __init__(self, sample_rate: int = 16000):
+    def __init__(self, sample_rate: int = 48000, device: str | None = None):
        self.sample_rate = sample_rate
+        self.device = device or None
        self._buffer: list[np.ndarray] = []
        self._stream = None
        self.is_recording = False
@@ -25,6 +26,7 @@ class AudioRecorder:
            channels=1,
            dtype="int16",
            callback=self._callback,
+            device=self.device,
        )
        self._stream.start()

@@ -12,6 +12,11 @@ DEFAULTS = {
        "model": "large-v3",
        "language": "de",
        "device": "auto",  # "auto" = use GPU if ROCm available, else CPU
+        "base_url": "",
+        "backend": "openai",  # "openai" = OpenAI-compatible API, "whispercpp" = whisper.cpp /inference
+    },
+    "audio": {
+        "device": "",
    },
    "server": {
        "port": 8765,
@@ -24,6 +29,13 @@ DEFAULTS = {
    "network": {
        "host": "127.0.0.1",
    },
+    "diarization": {
+        "enabled": False,
+        "hf_token": "",
+    },
+    "obsidian": {
+        "vault": "",
+    },
    "pid_file": os.path.expanduser("~/.local/run/tueit-transcriber.pid"),
 }

@@ -56,6 +68,8 @@ def _write_defaults():
        with open(CONFIG_PATH, "w") as f:
            f.write("# tüit Transkriptor config\n\n")
            f.write('[ollama]\nbase_url = "http://localhost:11434"\nmodel = "gemma3:12b"\n\n')
-            f.write('[whisper]\nmodel = "large-v3"\nlanguage = "de"\ndevice = "auto"\n\n')
+            f.write('[whisper]\nmodel = "large-v3"\nlanguage = "de"\ndevice = "auto"\nbase_url = ""\n\n')
+            f.write('[audio]\ndevice = ""\n\n')
            f.write('[server]\nport = 8765\n\n')
-            f.write(f'[output]\npath = "{DEFAULTS["output"]["path"]}"\n')
+            f.write(f'[output]\npath = "{DEFAULTS["output"]["path"]}"\n\n')
+            f.write('[diarization]\nenabled = false\nhf_token = ""\n\n')
@@ -0,0 +1,29 @@
+import asyncio
+
+
+class Diarizer:
+    def __init__(self, hf_token: str):
+        if not hf_token:
+            raise ValueError("hf_token is required for diarization")
+        self._hf_token = hf_token
+        self._pipeline = None
+
+    def _load_pipeline(self):
+        if self._pipeline is None:
+            from pyannote.audio import Pipeline
+            self._pipeline = Pipeline.from_pretrained(
+                "pyannote/speaker-diarization-3.1",
+                token=self._hf_token,
+            )
+        return self._pipeline
+
+    async def diarize(self, wav_path: str) -> list[tuple[float, float, str]]:
+        loop = asyncio.get_running_loop()
+        pipeline = await loop.run_in_executor(None, self._load_pipeline)
+        result = await loop.run_in_executor(None, lambda: pipeline(wav_path))
+        # pyannote 4.x returns DiarizeOutput; older versions return Annotation directly
+        annotation = getattr(result, "speaker_diarization", result)
+        return [
+            (turn.start, turn.end, speaker)
+            for turn, _, speaker in annotation.itertracks(yield_label=True)
+        ]
@@ -0,0 +1,194 @@
+# tüit Transkriptor — Setup-Anleitung
+
+## Architektur
+
+```
+┌─────────────────────────┐         ┌──────────────────────────────┐
+│  Client (Linux)         │         │  Beastix (LAN-Server)        │
+│                         │   WAV   │                              │
+│  Transkriptor-App       │ ──────► │  faster-whisper-server :8000 │
+│  Browser-UI :8765       │  Text   │  Ollama :11434               │
+│                         │ ◄────── │                              │
+└─────────────────────────┘         └──────────────────────────────┘
+```
+
+Jeder Client läuft komplett unabhängig. Die KI-Verarbeitung (Whisper + Ollama) kann
+entweder lokal oder auf Beastix ausgeführt werden — konfigurierbar über die
+Einstellungsseite.
+
+---
+
+## Beastix (Server-Setup, einmalig)
+
+### 1. whisper.cpp mit ROCm/GPU kompilieren
+
+Voraussetzung: ROCm installiert (Arch: `sudo pacman -S rocm-hip-sdk`).
+
+```bash
+mkdir -p ~/src && cd ~/src
+git clone https://github.com/ggml-org/whisper.cpp.git --depth=1
+cd whisper.cpp
+
+# Für AMD RX 6800 XT (gfx1030) — gfx-Target ggf. anpassen
+cmake -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release -DWHISPER_BUILD_SERVER=ON
+cmake --build build -j$(nproc)
+
+# Modell large-v3 herunterladen (~2.9 GB)
+bash models/download-ggml-model.sh large-v3
+```
+
+`gfx1030` = RX 6800 XT. Andere AMD GPUs: `rocminfo | grep gfx`
+
+### 2. Als systemd-User-Service einrichten
+
+```bash
+cat > ~/.config/systemd/user/whisper-cpp-server.service << 'EOF'
+[Unit]
+Description=whisper.cpp Server (ROCm/GPU)
+After=network.target
+
+[Service]
+ExecStart=%h/src/whisper.cpp/build/bin/whisper-server \
+    --host 0.0.0.0 \
+    --port 8080 \
+    --model %h/src/whisper.cpp/models/ggml-large-v3.bin \
+    --language de \
+    --threads 4 \
+    --convert
+Restart=on-failure
+RestartSec=5
+
+[Install]
+WantedBy=default.target
+EOF
+
+systemctl --user daemon-reload
+systemctl --user enable --now whisper-cpp-server.service
+```
+
+Logs prüfen: `journalctl --user -u whisper-cpp-server -f`
+GPU-Nutzung bestätigt wenn in den Logs steht: `using ROCm0 backend`
+
+### 3. Ollama installieren (falls noch nicht vorhanden)
+
+```bash
+curl -fsSL https://ollama.com/install.sh | sh
+ollama pull gemma3:12b
+```
+
+### 4. Firewall — Ports freigeben (LAN-intern)
+
+```bash
+# Whisper-Server
+sudo ufw allow from 192.168.0.0/16 to any port 8000
+# Ollama
+sudo ufw allow from 192.168.0.0/16 to any port 11434
+```
+
+Ports nur für LAN freigeben — nicht ins Internet exponieren.
+
+---
+
+## Client-Setup (jeder Linux-Rechner)
+
+### 1. Transkriptor installieren
+
+```bash
+git clone git@git.tueit.de:tueit_GmbH/tueit_Transkriptor.git
+cd tueit_Transkriptor
+python -m venv .venv
+.venv/bin/pip install -r requirements.txt
+```
+
+### 2. App starten
+
+```bash
+.venv/bin/python main.py
+```
+
+Beim ersten Start: Browser öffnet sich automatisch (oder manuell: http://localhost:8765).
+Einmal Admin-Account anlegen, dann einloggen.
+
+### 3. Beastix konfigurieren (Einstellungen → ⚙)
+
+Als Admin einloggen → Zahnrad-Icon im Header → Einstellungen:
+
+| Feld | Wert (Beispiel) |
+|------|-----------------|
+| Whisper Backend | `whisper.cpp Server` |
+| Whisper Server URL | `http://beastix:8080` |
+| Whisper Modell | `large-v3` |
+| Ollama Server URL | `http://beastix:11434` |
+| Ollama Modell | `gemma3:12b` (aus Dropdown wählen) |
+
+Leer lassen = lokale Verarbeitung (benötigt lokales Whisper-Modell).
+
+### 4. Als systemd-User-Service einrichten (optional)
+
+```bash
+cat > ~/.config/systemd/user/tueit-transcriber.service << 'EOF'
+[Unit]
+Description=tüit Transkriptor
+After=network.target
+
+[Service]
+WorkingDirectory=%h/work/tueit_Transkriptor
+ExecStart=%h/work/tueit_Transkriptor/.venv/bin/python main.py
+Restart=on-failure
+RestartSec=5
+
+[Install]
+WantedBy=default.target
+EOF
+
+systemctl --user daemon-reload
+systemctl --user enable --now tueit-transcriber.service
+```
+
+---
+
+## Audio — PipeWire Combined Source
+
+Um Mikrofon + System-Audio gleichzeitig aufzunehmen (z.B. für Konferenzen):
+
+Einstellungen → Audio → **Combined Source erstellen**
+
+- Mikrofon auswählen
+- System-Audio Monitor auswählen (z.B. `alsa_output.*.monitor`)
+- Erstellen → neues Device `transkriptor-combined.monitor` erscheint in der Liste
+- Als Aufnahmequelle auswählen und speichern
+
+Die PipeWire-Module werden in `~/.config/tueit-transcriber/pipewire-modules.json`
+gespeichert. Bei App-Stop können sie mit `pactl unload-module <id>` entladen werden.
+
+---
+
+## Diarisierung (Sprecher-Erkennung) — HuggingFace-Setup
+
+Die Diarisierung verwendet das Modell `pyannote/speaker-diarization-3.1`. Das Modell
+ist kostenfrei, erfordert aber eine einmalige Zustimmung zu den Nutzungsbedingungen.
+
+### 1. HuggingFace-Account
+
+Falls noch kein Account vorhanden: https://huggingface.co/join
+
+### 2. Modell-Zugriff beantragen
+
+1. https://huggingface.co/pyannote/speaker-diarization-3.1 aufrufen
+2. **"Access repository"** klicken und die Nutzungsbedingungen akzeptieren
+
+### 3. Read-Token erstellen
+
+1. https://huggingface.co/settings/tokens aufrufen
+2. **"New token"** → Name z.B. `transkriptor` → Typ **Read** → erstellen
+3. Token kopieren (beginnt mit `hf_`)
+
+### 4. Token in Transkriptor eintragen
+
+Als Admin einloggen → Zahnrad-Icon → Einstellungen → **Diarisierung**:
+
+- Checkbox **"Sprecher-Erkennung aktivieren"** aktivieren
+- Token in das Feld **HuggingFace Token** eintragen
+- **Speichern**
+
+Beim ersten Einsatz lädt pyannote das Modell herunter (~1 GB) und cached es lokal.
@@ -0,0 +1,113 @@
+# Settings Page & Remote Whisper Design
+
+**Date:** 2026-04-01
+
+## Goal
+
+Give each Linux client a settings page to configure audio device (via PipeWire/pactl) and remote server URLs (Whisper + Ollama). Beastix runs faster-whisper-server and Ollama; clients point their config at it.
+
+## Architecture
+
+```
+┌─────────────────┐         ┌─────────────────────────────┐
+│  Client (Linux) │         │  Beastix (LAN)              │
+│                 │   WAV   │                             │
+│  sounddevice    │ ──────► │  faster-whisper-server      │
+│  PipeWire src   │         │  :8000 (OpenAI-kompatibel)  │
+│  localhost:8765 │  Text   │                             │
+│  Browser UI     │ ◄────── │  Ollama :11434              │
+└─────────────────┘         └─────────────────────────────┘
+```
+
+Default: `whisper.base_url` leer → lokale faster-whisper-Instanz. Gesetzt → HTTP-Upload an Beastix.
+
+## Settings Page (`/settings`)
+
+Nur für Admins sichtbar (Gear-Icon im Header). Zwei Abschnitte:
+
+### Abschnitt 1 — Audio
+
+- Dropdown: alle verfügbaren PipeWire-Sources (live via `pactl list sources short`)
+- Button "Combined Source erstellen" → App führt pactl-Kommandos aus, Combined Source erscheint in der Liste
+- Gewähltes Device → `config.toml [audio] device = "..."`
+- sounddevice nutzt diesen Device-Namen beim nächsten Start einer Aufnahme
+
+### Abschnitt 2 — Verarbeitung
+
+- Whisper `base_url`: leer = lokal, sonst z.B. `http://beastix:8000`
+- Whisper `model`: Freitextfeld (default: `large-v3`)
+- Ollama `base_url`: z.B. `http://beastix:11434`
+- Ollama `model`: Dropdown gefüllt via `GET {ollama_base_url}/api/tags`
+
+Änderungen werden sofort in `config.toml` gespeichert (PUT /config).
+
+## Remote Whisper (transcription.py)
+
+```python
+if cfg["whisper"].get("base_url"):
+    # OpenAI-kompatibler Upload
+    POST {base_url}/v1/audio/transcriptions
+    multipart: file=<wav>, model=<model>, language=<lang>
+    → response.text
+else:
+    # Lokal wie bisher
+    WhisperModel(model_name, device=device).transcribe(...)
+```
+
+## Neue API-Endpoints
+
+| Method | Path | Beschreibung |
+|--------|------|--------------|
+| GET | `/settings` | Liefert settings.html |
+| GET | `/audio/devices` | pactl sources geparst → JSON-Liste |
+| POST | `/audio/combined` | Erstellt PipeWire Combined Source via pactl |
+
+PUT `/config` ist bereits vorhanden — wird um `audio.device` und `whisper.base_url` erweitert.
+
+## Config-Schema Erweiterung
+
+```toml
+[audio]
+device = ""   # leer = Systemstandard, sonst PipeWire-Source-Name
+
+[whisper]
+model = "large-v3"
+language = "de"
+device = "auto"
+base_url = ""   # leer = lokal, sonst http://beastix:8000
+```
+
+## PipeWire Combined Source
+
+Beim Klick auf "Combined Source erstellen":
+
+```bash
+pactl load-module module-null-sink \
+  sink_name=transkriptor-combined \
+  sink_properties=device.description="Transkriptor\ Combined"
+
+pactl load-module module-loopback \
+  source=<mic-device> sink=transkriptor-combined
+
+pactl load-module module-loopback \
+  source=<default-output>.monitor sink=transkriptor-combined
+```
+
+Ergebnis: `transkriptor-combined.monitor` erscheint als aufnehmbare Source in der Liste.
+
+Die Module-IDs werden in `config.toml` gespeichert damit sie beim App-Stop sauber entladen werden können (`pactl unload-module <id>`).
+
+## Beastix Setup (einmalig)
+
+```bash
+pip install faster-whisper-server
+faster-whisper-server --host 0.0.0.0 --port 8000 --model large-v3
+```
+
+Oder als systemd user service. Clients tragen dann `whisper.base_url = "http://beastix:8000"` ein.
+
+## Nicht in diesem Scope
+
+- Nutzer-Verwaltung über Settings (eigene Seite)
+- Automatischer Neustart wenn Config sich ändert
+- Windows-Support
@@ -0,0 +1,921 @@
+# Settings Page & Remote Whisper Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** Add a settings page with PipeWire audio device selection and remote Whisper/Ollama URL configuration, enabling clients to offload AI processing to Beastix.
+
+**Architecture:** Config gains `audio.device` and `whisper.base_url`. `transcription.py` branches on `base_url`: local faster-whisper or remote OpenAI-compatible HTTP upload. A new `/settings` page (admin-only) lets users pick PipeWire sources via `pactl` and configure server URLs. PipeWire combined source is created on demand via `pactl load-module`.
+
+**Tech Stack:** FastAPI, httpx (already in deps), pactl (PipeWire), sounddevice, faster-whisper, vanilla JS/CSS (tüit CI dark theme)
+
+---
+
+### Task 1: Extend config defaults
+
+**Files:**
+- Modify: `config.py`
+- Test: `tests/test_config.py`
+
+**Step 1: Write the failing test**
+
+Add to `tests/test_config.py`:
+
+```python
+def test_config_has_audio_and_whisper_base_url():
+    import config
+    from unittest.mock import patch
+    import tempfile, os
+    with tempfile.TemporaryDirectory() as tmpdir:
+        cfg_path = os.path.join(tmpdir, "config.toml")
+        with patch("config.CONFIG_PATH", cfg_path):
+            cfg = config.load()
+            assert "audio" in cfg
+            assert cfg["audio"]["device"] == ""
+            assert cfg["whisper"]["base_url"] == ""
+```
+
+**Step 2: Run to verify it fails**
+
+```bash
+pytest tests/test_config.py::test_config_has_audio_and_whisper_base_url -v
+```
+Expected: FAIL — KeyError or AssertionError
+
+**Step 3: Update `config.py` DEFAULTS**
+
+```python
+DEFAULTS = {
+    "ollama": {
+        "base_url": "http://localhost:11434",
+        "model": "gemma3:12b",
+    },
+    "whisper": {
+        "model": "large-v3",
+        "language": "de",
+        "device": "auto",
+        "base_url": "",   # empty = local, else http://beastix:8000
+    },
+    "audio": {
+        "device": "",     # empty = system default
+    },
+    "server": {
+        "port": 8765,
+    },
+    "output": {
+        "path": os.path.expanduser("~/cloud.shron.de/Hetzner Storagebox/work"),
+    },
+    "network": {
+        "host": "127.0.0.1",
+    },
+    "pid_file": os.path.expanduser("~/.local/run/tueit-transcriber.pid"),
+}
+```
+
+Update the fallback string writer in `_write_defaults`:
+
+```python
+f.write('[whisper]\nmodel = "large-v3"\nlanguage = "de"\ndevice = "auto"\nbase_url = ""\n\n')
+f.write('[audio]\ndevice = ""\n\n')
+```
+
+**Step 4: Run tests**
+
+```bash
+pytest tests/test_config.py -v
+```
+Expected: all PASS
+
+**Step 5: Commit**
+
+```bash
+git add config.py tests/test_config.py
+git commit -m "feat: add audio.device and whisper.base_url to config defaults"
+```
+
+---
+
+### Task 2: Remote Whisper in transcription.py
+
+**Files:**
+- Modify: `transcription.py`
+- Modify: `api/pipeline.py`
+- Test: `tests/test_transcription.py`
+
+**Step 1: Write the failing test**
+
+Add to `tests/test_transcription.py`:
+
+```python
+import pytest
+
+@pytest.mark.asyncio
+async def test_transcribe_uses_remote_when_base_url_set(tmp_path):
+    import wave, struct
+    wav = tmp_path / "test.wav"
+    with wave.open(str(wav), "wb") as wf:
+        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(16000)
+        wf.writeframes(struct.pack("<100h", *([0] * 100)))
+
+    import respx, httpx
+    from transcription import TranscriptionEngine
+    eng = TranscriptionEngine()
+
+    with respx.mock:
+        respx.post("http://beastix:8000/v1/audio/transcriptions").mock(
+            return_value=httpx.Response(200, json={"text": "Hallo Welt"})
+        )
+        result = await eng.transcribe_file(
+            str(wav), language="de", model_name="large-v3",
+            device="auto", base_url="http://beastix:8000",
+        )
+    assert result == "Hallo Welt"
+```
+
+**Step 2: Run to verify it fails**
+
+```bash
+pytest tests/test_transcription.py::test_transcribe_uses_remote_when_base_url_set -v
+```
+Expected: FAIL — `transcribe_file` doesn't accept `base_url`
+
+**Step 3: Rewrite `transcription.py`**
+
+```python
+import asyncio
+import httpx
+
+
+class TranscriptionEngine:
+    _model = None
+
+    def _get_model(self, model_name: str = "large-v3", device: str = "auto"):
+        if self._model is None:
+            from faster_whisper import WhisperModel
+            if device == "auto":
+                try:
+                    self._model = WhisperModel(model_name, device="cuda", compute_type="float16")
+                except Exception:
+                    self._model = WhisperModel(model_name, device="cpu", compute_type="int8")
+            else:
+                compute = "float16" if device in ("cuda", "rocm") else "int8"
+                self._model = WhisperModel(model_name, device=device, compute_type=compute)
+        return self._model
+
+    async def transcribe_file(
+        self,
+        audio_path: str,
+        language: str = "de",
+        model_name: str = "large-v3",
+        device: str = "auto",
+        base_url: str = "",
+    ) -> str:
+        if base_url:
+            return await self._transcribe_remote(audio_path, language, model_name, base_url)
+        return await self._transcribe_local(audio_path, language, model_name, device)
+
+    async def _transcribe_remote(
+        self, audio_path: str, language: str, model_name: str, base_url: str
+    ) -> str:
+        async with httpx.AsyncClient(timeout=300) as client:
+            with open(audio_path, "rb") as f:
+                r = await client.post(
+                    f"{base_url}/v1/audio/transcriptions",
+                    files={"file": ("audio.wav", f, "audio/wav")},
+                    data={"model": model_name, "language": language},
+                )
+            r.raise_for_status()
+            return r.json()["text"]
+
+    async def _transcribe_local(
+        self, audio_path: str, language: str, model_name: str, device: str
+    ) -> str:
+        loop = asyncio.get_event_loop()
+        model = self._get_model(model_name, device)
+        segments, _ = await loop.run_in_executor(
+            None,
+            lambda: model.transcribe(audio_path, language=language),
+        )
+        return "".join(seg.text for seg in segments).strip()
+
+
+engine = TranscriptionEngine()
+```
+
+**Step 4: Update `api/pipeline.py` — pass base_url**
+
+In `run_pipeline`, update the `transcribe_file` call:
+
+```python
+raw_text = await transcription_engine.transcribe_file(
+    wav_path,
+    language=cfg["whisper"]["language"],
+    model_name=cfg["whisper"]["model"],
+    device=cfg["whisper"]["device"],
+    base_url=cfg["whisper"].get("base_url", ""),
+)
+```
+
+**Step 5: Run all transcription tests**
+
+```bash
+pytest tests/test_transcription.py -v
+```
+Expected: all PASS
+
+**Step 6: Commit**
+
+```bash
+git add transcription.py api/pipeline.py tests/test_transcription.py
+git commit -m "feat: remote Whisper via whisper.base_url — OpenAI-compatible upload"
+```
+
+---
+
+### Task 3: Audio device in AudioRecorder
+
+**Files:**
+- Modify: `audio.py`
+- Modify: `api/router.py` (toggle endpoint)
+- Test: `tests/test_audio.py`
+
+**Step 1: Write the failing test**
+
+Add to `tests/test_audio.py`:
+
+```python
+def test_recorder_stores_device_param():
+    from audio import AudioRecorder
+    rec = AudioRecorder(device="my-pipewire-source")
+    assert rec.device == "my-pipewire-source"
+
+def test_recorder_device_none_when_empty_string():
+    from audio import AudioRecorder
+    rec = AudioRecorder(device="")
+    assert rec.device is None
+```
+
+**Step 2: Run to verify they fail**
+
+```bash
+pytest tests/test_audio.py::test_recorder_stores_device_param tests/test_audio.py::test_recorder_device_none_when_empty_string -v
+```
+Expected: FAIL
+
+**Step 3: Update `audio.py`**
+
+```python
+import wave
+import threading
+import numpy as np
+
+
+class AudioRecorder:
+    def __init__(self, sample_rate: int = 16000, device: str | None = None):
+        self.sample_rate = sample_rate
+        self.device = device or None  # empty string becomes None = system default
+        self._buffer: list[np.ndarray] = []
+        self._stream = None
+        self.is_recording = False
+        self._lock = threading.Lock()
+
+    def _callback(self, indata, frames, time, status):
+        if self.is_recording:
+            with self._lock:
+                self._buffer.append(indata[:, 0].copy().astype(np.int16))
+
+    def start(self):
+        import sounddevice as sd
+        self._buffer = []
+        self.is_recording = True
+        self._stream = sd.InputStream(
+            samplerate=self.sample_rate,
+            channels=1,
+            dtype="int16",
+            callback=self._callback,
+            device=self.device,
+        )
+        self._stream.start()
+
+    def stop(self):
+        self.is_recording = False
+        if self._stream:
+            self._stream.stop()
+            self._stream.close()
+            self._stream = None
+
+    def save_wav(self, path: str) -> str:
+        with self._lock:
+            data = np.concatenate(self._buffer) if self._buffer else np.zeros(0, dtype=np.int16)
+        with wave.open(path, "wb") as wf:
+            wf.setnchannels(1)
+            wf.setsampwidth(2)
+            wf.setframerate(self.sample_rate)
+            wf.writeframes(data.tobytes())
+        return path
+```
+
+**Step 4: Pass device from config in `api/router.py` toggle endpoint**
+
+In `toggle_recording`, update the `Status.IDLE` branch:
+
+```python
+if state.status == Status.IDLE:
+    from audio import AudioRecorder
+    audio_device = cfg.get("audio", {}).get("device") or None
+    state._recorder = AudioRecorder(device=audio_device)
+    ...
+```
+
+Also load config at the top of toggle_recording (it's already imported):
+```python
+cfg = load_config()
+```
+
+**Step 5: Run tests**
+
+```bash
+pytest tests/test_audio.py -v
+```
+Expected: all PASS
+
+**Step 6: Commit**
+
+```bash
+git add audio.py api/router.py tests/test_audio.py
+git commit -m "feat: AudioRecorder accepts device param — reads audio.device from config"
+```
+
+---
+
+### Task 4: API — GET /audio/devices and POST /audio/combined
+
+**Files:**
+- Modify: `api/router.py`
+- Test: `tests/test_api.py`
+
+**Step 1: Write the failing tests**
+
+Add to `tests/test_api.py`:
+
+```python
+def test_audio_devices_returns_list(monkeypatch):
+    import subprocess
+    pactl_output = (
+        "1\talsa_input.pci.analog-stereo\tPipeWire\ts32le 2ch 48000Hz\tRUNNING\n"
+        "2\talsa_output.pci.analog-stereo.monitor\tPipeWire\ts32le 2ch 48000Hz\tIDLE\n"
+    )
+    monkeypatch.setattr(subprocess, "check_output", lambda *a, **kw: pactl_output.encode())
+    from unittest.mock import patch
+    with patch("api.router.current_user",
+               return_value={"username": "u", "output_dir": "/tmp", "is_admin": True}):
+        from fastapi.testclient import TestClient
+        from main import app
+        client = TestClient(app)
+        r = client.get("/audio/devices", headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 200
+        devices = r.json()
+        assert len(devices) == 2
+        assert devices[0]["name"] == "alsa_input.pci.analog-stereo"
+
+def test_audio_devices_forbidden_for_non_admin():
+    from unittest.mock import patch
+    with patch("api.router.current_user",
+               return_value={"username": "u", "output_dir": "/tmp", "is_admin": False}):
+        from fastapi.testclient import TestClient
+        from main import app
+        client = TestClient(app)
+        r = client.get("/audio/devices", headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 403
+```
+
+**Step 2: Run to verify they fail**
+
+```bash
+pytest tests/test_api.py::test_audio_devices_returns_list tests/test_api.py::test_audio_devices_forbidden_for_non_admin -v
+```
+Expected: FAIL — routes don't exist
+
+**Step 3: Add endpoints to `api/router.py`**
+
+```python
+@router.get("/audio/devices")
+async def list_audio_devices(user: dict = Depends(current_user)):
+    import subprocess
+    if not user.get("is_admin"):
+        raise HTTPException(status_code=403, detail="Nur Administratoren")
+    try:
+        out = subprocess.check_output(
+            ["pactl", "list", "sources", "short"],
+            stderr=subprocess.DEVNULL, timeout=5,
+        ).decode()
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"pactl fehlgeschlagen: {e}")
+    devices = []
+    for line in out.strip().splitlines():
+        parts = line.split("\t")
+        if len(parts) >= 2:
+            devices.append({
+                "index": parts[0],
+                "name": parts[1],
+                "state": parts[4] if len(parts) > 4 else "",
+            })
+    return devices
+
+
+@router.post("/audio/combined")
+async def create_combined_source(body: dict, user: dict = Depends(current_user)):
+    import subprocess, json, pathlib
+    if not user.get("is_admin"):
+        raise HTTPException(status_code=403, detail="Nur Administratoren")
+    mic = body.get("mic", "")
+    monitor = body.get("monitor", "")
+    if not mic or not monitor:
+        raise HTTPException(status_code=400, detail="mic und monitor erforderlich")
+    # Validate: names must come from pactl list — no shell injection via user input
+    out = subprocess.check_output(
+        ["pactl", "list", "sources", "short"], stderr=subprocess.DEVNULL, timeout=5
+    ).decode()
+    known = {line.split("\t")[1] for line in out.strip().splitlines() if "\t" in line}
+    if mic not in known or monitor not in known:
+        raise HTTPException(status_code=400, detail="Unbekanntes Audio-Device")
+    sink_id = subprocess.check_output([
+        "pactl", "load-module", "module-null-sink",
+        "sink_name=transkriptor-combined",
+        "sink_properties=device.description=Transkriptor Combined",
+    ], timeout=5).decode().strip()
+    mic_id = subprocess.check_output([
+        "pactl", "load-module", "module-loopback",
+        f"source={mic}", "sink=transkriptor-combined",
+    ], timeout=5).decode().strip()
+    mon_id = subprocess.check_output([
+        "pactl", "load-module", "module-loopback",
+        f"source={monitor}", "sink=transkriptor-combined",
+    ], timeout=5).decode().strip()
+    state_path = pathlib.Path(
+        os.path.expanduser("~/.config/tueit-transcriber/pipewire-modules.json")
+    )
+    state_path.write_text(json.dumps({"ids": [int(sink_id), int(mic_id), int(mon_id)]}))
+    return {"device": "transkriptor-combined.monitor", "module_ids": [sink_id, mic_id, mon_id]}
+```
+
+**Step 4: Run tests**
+
+```bash
+pytest tests/test_api.py::test_audio_devices_returns_list tests/test_api.py::test_audio_devices_forbidden_for_non_admin -v
+```
+Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add api/router.py tests/test_api.py
+git commit -m "feat: GET /audio/devices, POST /audio/combined — PipeWire source management"
+```
+
+---
+
+### Task 5: Fix PUT /config to deep-merge
+
+**Files:**
+- Modify: `api/router.py`
+- Test: `tests/test_api.py`
+
+Current `put_config` does a shallow `cfg.update(body)` — overwrites nested dicts. Must deep-merge.
+
+**Step 1: Write the failing test**
+
+Add to `tests/test_api.py`:
+
+```python
+def test_put_config_deep_merges(tmp_path, monkeypatch):
+    import config as cfg_mod
+    monkeypatch.setattr(cfg_mod, "CONFIG_PATH",
+                        str(tmp_path / "config.toml"))
+    from unittest.mock import patch
+    with patch("api.router.current_user",
+               return_value={"username": "u", "output_dir": "/tmp", "is_admin": True}):
+        from fastapi.testclient import TestClient
+        from main import app
+        client = TestClient(app)
+        r = client.put("/config",
+                        json={"whisper": {"base_url": "http://beastix:8000"}},
+                        headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 200
+        data = r.json()
+        # base_url updated, model preserved
+        assert data["whisper"]["base_url"] == "http://beastix:8000"
+        assert data["whisper"]["model"] == "large-v3"
+```
+
+**Step 2: Run to verify it fails**
+
+```bash
+pytest tests/test_api.py::test_put_config_deep_merges -v
+```
+Expected: FAIL — shallow update loses whisper.model
+
+**Step 3: Fix `put_config` in `api/router.py`**
+
+```python
+@router.put("/config")
+async def put_config(body: dict, user: dict = Depends(current_user)):
+    if not user.get("is_admin"):
+        raise HTTPException(status_code=403, detail="Nur Administratoren können die Config ändern")
+    import tomli_w
+    from config import _deep_merge, CONFIG_PATH
+    cfg = load_config()
+    merged = _deep_merge(cfg, body)
+    os.makedirs(os.path.dirname(CONFIG_PATH), exist_ok=True)
+    with open(CONFIG_PATH, "wb") as f:
+        tomli_w.dump(merged, f)
+    return merged
+```
+
+**Step 4: Run tests**
+
+```bash
+pytest tests/test_api.py::test_put_config_deep_merges -v
+```
+Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add api/router.py tests/test_api.py
+git commit -m "fix: PUT /config deep-merges nested config instead of shallow update"
+```
+
+---
+
+### Task 6: GET /status returns is_admin
+
+**Files:**
+- Modify: `api/router.py`
+- Test: `tests/test_api.py`
+
+**Step 1: Write the failing test**
+
+Add to `tests/test_api.py`:
+
+```python
+def test_status_includes_is_admin():
+    from unittest.mock import patch
+    with patch("api.router.current_user",
+               return_value={"username": "u", "output_dir": "/tmp", "is_admin": True}):
+        from fastapi.testclient import TestClient
+        from main import app
+        client = TestClient(app)
+        r = client.get("/status", headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 200
+        assert r.json()["is_admin"] is True
+```
+
+**Step 2: Run to verify it fails**
+
+```bash
+pytest tests/test_api.py::test_status_includes_is_admin -v
+```
+Expected: FAIL
+
+**Step 3: Update `get_status` in `api/router.py`**
+
+```python
+@router.get("/status")
+async def get_status(user: dict = Depends(current_user)):
+    return {
+        "status": state.status,
+        "username": user["username"],
+        "is_admin": user.get("is_admin", False),
+    }
+```
+
+**Step 4: Add gear icon in `frontend/app.js` init block**
+
+In the `(async () => { ... })()` init, after `userChip.textContent = data.username`:
+
+```javascript
+  if (data.is_admin) {
+    const gearLink = document.createElement('a');
+    gearLink.href = '/settings';
+    gearLink.className = 'back-btn';
+    gearLink.title = 'Einstellungen';
+    gearLink.textContent = '\u2699';  // ⚙ gear symbol
+    document.querySelector('.header-right').prepend(gearLink);
+  }
+```
+
+**Step 5: Run tests**
+
+```bash
+pytest tests/test_api.py::test_status_includes_is_admin -v
+```
+Expected: PASS
+
+**Step 6: Commit**
+
+```bash
+git add api/router.py frontend/app.js tests/test_api.py
+git commit -m "feat: status includes is_admin, gear icon in header for admins"
+```
+
+---
+
+### Task 7: Settings page HTML + JS + routes
+
+**Files:**
+- Create: `frontend/settings.html`
+- Create: `frontend/settings.js`
+- Modify: `api/router.py`
+- Modify: `main.py`
+
+**Step 1: Add GET /settings to `api/router.py`**
+
+```python
+@router.get("/settings")
+async def settings_page_route(user: dict = Depends(current_user)):
+    from fastapi.responses import FileResponse, RedirectResponse
+    from pathlib import Path
+    if not user.get("is_admin"):
+        return RedirectResponse("/")
+    return FileResponse(str(Path(__file__).parent.parent / "frontend" / "settings.html"))
+```
+
+**Step 2: Add `/settings.js` route to `main.py`**
+
+```python
+@app.get("/settings.js")
+async def settingsjs():
+    return FileResponse(str(FRONTEND_DIR / "settings.js"))
+```
+
+**Step 3: Create `frontend/settings.html`**
+
+```html
+<!DOCTYPE html>
+<html lang="de">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>tüit Transkriptor — Einstellungen</title>
+  <link rel="preconnect" href="https://fonts.googleapis.com">
+  <link href="https://fonts.googleapis.com/css2?family=Overpass:wght@300;400;600;700&display=swap" rel="stylesheet">
+  <style>
+    :root { --red:#DA251C;--yellow:#FFD802;--bg:#111;--surface:#1a1a1a;--surface2:#232323;--text:#e8e8e8;--muted:#888;--border:#2e2e2e; }
+    *{box-sizing:border-box;margin:0;padding:0;}
+    body{font-family:'Overpass',system-ui,sans-serif;background:var(--bg);color:var(--text);min-height:100vh;display:flex;flex-direction:column;}
+    header{display:flex;align-items:center;gap:12px;padding:16px 24px;border-bottom:1px solid var(--border);}
+    .header-logo{height:28px;width:auto;display:block;}
+    .header-divider{width:1px;height:20px;background:var(--border);flex-shrink:0;}
+    .header-appname{font-size:1rem;font-weight:600;letter-spacing:.04em;color:var(--muted);}
+    .header-right{margin-left:auto;display:flex;align-items:center;gap:12px;}
+    .back-btn{font-size:.75rem;padding:4px 10px;border-radius:20px;background:none;border:1px solid var(--border);color:var(--muted);cursor:pointer;font-family:inherit;text-decoration:none;transition:border-color .15s,color .15s;}
+    .back-btn:hover{border-color:var(--red);color:var(--red);}
+    main{flex:1;display:flex;flex-direction:column;gap:24px;padding:24px;max-width:700px;width:100%;margin:0 auto;}
+    h2{font-size:.8rem;color:var(--muted);text-transform:uppercase;letter-spacing:.06em;margin-bottom:12px;padding-bottom:8px;border-bottom:1px solid var(--border);}
+    .field{display:flex;flex-direction:column;gap:6px;margin-bottom:14px;}
+    label{font-size:.78rem;color:var(--muted);letter-spacing:.04em;}
+    select,input[type=text]{background:var(--surface);border:1px solid var(--border);color:var(--text);border-radius:8px;padding:10px 12px;font-family:inherit;font-size:.9rem;outline:none;transition:border-color .15s;width:100%;}
+    select:focus,input[type=text]:focus{border-color:var(--yellow);}
+    .btn-row{display:flex;gap:10px;margin-top:4px;}
+    .btn{font-size:.82rem;padding:8px 16px;border-radius:8px;border:1px solid var(--border);background:var(--surface2);color:var(--text);cursor:pointer;font-family:inherit;transition:border-color .15s,background .15s;}
+    .btn:hover{border-color:var(--red);}
+    .btn.primary{background:var(--red);border-color:var(--red);color:#fff;}
+    .btn.primary:hover{background:#b81e16;border-color:#b81e16;}
+    .toast{position:fixed;bottom:24px;right:24px;background:var(--surface2);border:1px solid var(--border);border-radius:8px;padding:10px 16px;font-size:.85rem;opacity:0;transition:opacity .2s;pointer-events:none;}
+    .toast.show{opacity:1;}
+    .combined-form{display:none;flex-direction:column;gap:10px;margin-top:10px;padding:12px;background:var(--surface2);border-radius:8px;border:1px solid var(--border);}
+    .combined-form.visible{display:flex;}
+  </style>
+</head>
+<body>
+  <header>
+    <img src="/logo.svg" class="header-logo" alt="tüit">
+    <div class="header-divider"></div>
+    <span class="header-appname">Transkriptor — Einstellungen</span>
+    <div class="header-right">
+      <a href="/" class="back-btn">&#8592; Zurück</a>
+    </div>
+  </header>
+  <main>
+    <section>
+      <h2>Audio</h2>
+      <div class="field">
+        <label>Aufnahmequelle</label>
+        <select id="audio-device">
+          <option value="">Systemstandard</option>
+        </select>
+      </div>
+      <div class="btn-row">
+        <button class="btn" id="refresh-devices-btn">Geräte aktualisieren</button>
+        <button class="btn" id="create-combined-btn">Combined Source erstellen</button>
+      </div>
+      <div class="combined-form" id="combined-form">
+        <div class="field">
+          <label>Mikrofon</label>
+          <select id="combined-mic"></select>
+        </div>
+        <div class="field">
+          <label>System-Audio Monitor</label>
+          <select id="combined-monitor"></select>
+        </div>
+        <div class="btn-row">
+          <button class="btn primary" id="combined-confirm-btn">Erstellen</button>
+          <button class="btn" id="combined-cancel-btn">Abbrechen</button>
+        </div>
+      </div>
+    </section>
+
+    <section>
+      <h2>Verarbeitung</h2>
+      <div class="field">
+        <label>Whisper Server URL (leer = lokal)</label>
+        <input type="text" id="whisper-url" placeholder="http://beastix:8000">
+      </div>
+      <div class="field">
+        <label>Whisper Modell</label>
+        <input type="text" id="whisper-model" placeholder="large-v3">
+      </div>
+      <div class="field">
+        <label>Ollama Server URL</label>
+        <input type="text" id="ollama-url" placeholder="http://localhost:11434">
+      </div>
+      <div class="field">
+        <label>Ollama Modell</label>
+        <select id="ollama-model"></select>
+      </div>
+      <div class="btn-row">
+        <button class="btn primary" id="save-btn">Speichern</button>
+      </div>
+    </section>
+  </main>
+  <div class="toast" id="toast"></div>
+  <script src="/settings.js"></script>
+</body>
+</html>
+```
+
+**Step 4: Create `frontend/settings.js`**
+
+```javascript
+const token = sessionStorage.getItem('token');
+function authHeaders() {
+  return token ? { 'Authorization': 'Bearer ' + token } : {};
+}
+function apiFetch(url, options) {
+  options = options || {};
+  return fetch(url, Object.assign({}, options, {
+    headers: Object.assign({'Content-Type': 'application/json'}, authHeaders(), options.headers || {}),
+  }));
+}
+
+let _devices = [];
+
+function showToast(msg) {
+  const t = document.getElementById('toast');
+  t.textContent = msg;
+  t.classList.add('show');
+  setTimeout(function() { t.classList.remove('show'); }, 2500);
+}
+
+async function loadDevices() {
+  const r = await apiFetch('/audio/devices');
+  if (!r.ok) return;
+  _devices = await r.json();
+  const sel = document.getElementById('audio-device');
+  const current = sel.value;
+  sel.replaceChildren(new Option('Systemstandard', ''));
+  _devices.forEach(function(d) { sel.appendChild(new Option(d.name, d.name)); });
+  if (current) sel.value = current;
+  ['combined-mic', 'combined-monitor'].forEach(function(id) {
+    const el = document.getElementById(id);
+    el.replaceChildren();
+    _devices.forEach(function(d) { el.appendChild(new Option(d.name, d.name)); });
+  });
+}
+
+async function loadOllamaModels(baseUrl, current) {
+  try {
+    const r = await fetch(baseUrl + '/api/tags');
+    if (!r.ok) return;
+    const data = await r.json();
+    const sel = document.getElementById('ollama-model');
+    sel.replaceChildren();
+    (data.models || []).forEach(function(m) { sel.appendChild(new Option(m.name, m.name)); });
+    if (current) sel.value = current;
+  } catch(e) {}
+}
+
+async function loadConfig() {
+  const r = await apiFetch('/config');
+  if (!r.ok) return;
+  const cfg = await r.json();
+  document.getElementById('audio-device').value = (cfg.audio && cfg.audio.device) || '';
+  document.getElementById('whisper-url').value = (cfg.whisper && cfg.whisper.base_url) || '';
+  document.getElementById('whisper-model').value = (cfg.whisper && cfg.whisper.model) || 'large-v3';
+  const ollamaUrl = (cfg.ollama && cfg.ollama.base_url) || 'http://localhost:11434';
+  document.getElementById('ollama-url').value = ollamaUrl;
+  await loadOllamaModels(ollamaUrl, cfg.ollama && cfg.ollama.model);
+}
+
+document.getElementById('refresh-devices-btn').addEventListener('click', loadDevices);
+
+document.getElementById('create-combined-btn').addEventListener('click', function() {
+  document.getElementById('combined-form').classList.toggle('visible');
+});
+document.getElementById('combined-cancel-btn').addEventListener('click', function() {
+  document.getElementById('combined-form').classList.remove('visible');
+});
+document.getElementById('combined-confirm-btn').addEventListener('click', async function() {
+  const mic = document.getElementById('combined-mic').value;
+  const monitor = document.getElementById('combined-monitor').value;
+  const r = await apiFetch('/audio/combined', {
+    method: 'POST',
+    body: JSON.stringify({ mic: mic, monitor: monitor }),
+  });
+  if (!r.ok) { showToast('Fehler beim Erstellen'); return; }
+  const data = await r.json();
+  showToast('Erstellt: ' + data.device);
+  document.getElementById('combined-form').classList.remove('visible');
+  await loadDevices();
+  document.getElementById('audio-device').value = data.device;
+});
+
+document.getElementById('ollama-url').addEventListener('change', function(e) {
+  loadOllamaModels(e.target.value, document.getElementById('ollama-model').value);
+});
+
+document.getElementById('save-btn').addEventListener('click', async function() {
+  const body = {
+    audio: { device: document.getElementById('audio-device').value },
+    whisper: {
+      base_url: document.getElementById('whisper-url').value,
+      model: document.getElementById('whisper-model').value,
+    },
+    ollama: {
+      base_url: document.getElementById('ollama-url').value,
+      model: document.getElementById('ollama-model').value,
+    },
+  };
+  const r = await apiFetch('/config', { method: 'PUT', body: JSON.stringify(body) });
+  if (r.ok) { showToast('Gespeichert'); } else { showToast('Fehler beim Speichern'); }
+});
+
+(async function() {
+  if (!token) { location.href = '/login'; return; }
+  await loadDevices();
+  await loadConfig();
+})();
+```
+
+**Step 5: Manual verification checklist**
+
+Restart app, open browser as admin:
+
+- [ ] Gear icon (⚙) sichtbar im Header
+- [ ] Klick öffnet `/settings`
+- [ ] Audio-Dropdown listet PipeWire-Sources
+- [ ] "Geräte aktualisieren" lädt Liste neu
+- [ ] "Combined Source erstellen" zeigt Mic/Monitor-Dropdowns
+- [ ] Nach Erstellen: neues Device in der Liste wählbar
+- [ ] Whisper-URL leer → lokale Verarbeitung
+- [ ] Whisper-URL gesetzt → Transkript wird remote verarbeitet
+- [ ] Ollama-Modelle laden aus konfiguriertem Ollama-Server
+- [ ] Speichern → Toast, config.toml aktualisiert
+- [ ] Aufnahme nutzt konfiguriertes Audio-Device
+- [ ] Non-Admin sieht kein Gear-Icon, `/settings` leitet zu `/` um
+
+**Step 6: Commit**
+
+```bash
+git add api/router.py main.py frontend/settings.html frontend/settings.js
+git commit -m "feat: settings page — PipeWire audio device + remote Whisper/Ollama config"
+```
+
+---
+
+### Task 8: Run full test suite + push
+
+```bash
+pytest -v
+```
+
+Expected: all tests pass.
+
+```bash
+git push
+```
+
+---
+
+## Beastix Setup (einmalig, außerhalb App-Code)
+
+```bash
+pip install faster-whisper-server
+uvicorn faster_whisper_server.main:app --host 0.0.0.0 --port 8000
+```
+
+Clients tragen ein:
+```toml
+[whisper]
+base_url = "http://beastix:8000"
+```
@@ -0,0 +1,383 @@
+# Transcript Modal & Delete Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** Add transcript delete and a markdown-rendering modal viewer, removing the existing preview section.
+
+**Architecture:** Two new REST endpoints (GET + DELETE `/transcripts/{filename}`) with path-confinement security. Frontend gains a full-screen modal using marked.js + DOMPurify for safe rendering; the static preview div is removed entirely. Each list item gets a trash icon that stops event propagation so it doesn't trigger the modal.
+
+**Tech Stack:** FastAPI (existing), marked.js 14 + DOMPurify 3 via CDN, vanilla JS/CSS (no new build step)
+
+---
+
+### Task 1: Backend — GET /transcripts/{filename}
+
+**Files:**
+- Modify: `api/router.py`
+- Modify: `output.py`
+- Test: `tests/test_api.py`
+
+**Step 1: Write the failing test**
+
+Add to `tests/test_api.py`:
+
+```python
+def test_get_transcript_returns_content(tmp_path, monkeypatch):
+    f = tmp_path / "2026-01-01-0900-test.md"
+    f.write_text("# Hello\n\ncontent here\n")
+    from unittest.mock import patch
+    with patch("api.router.current_user", return_value={"username": "", "output_dir": str(tmp_path), "is_admin": False}):
+        from fastapi.testclient import TestClient
+        from main import app
+        client = TestClient(app)
+        r = client.get("/transcripts/2026-01-01-0900-test.md",
+                       headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 200
+        assert "Hello" in r.text
+
+def test_get_transcript_rejects_path_traversal(tmp_path):
+    from unittest.mock import patch
+    with patch("api.router.current_user", return_value={"username": "", "output_dir": str(tmp_path), "is_admin": False}):
+        from fastapi.testclient import TestClient
+        from main import app
+        client = TestClient(app)
+        r = client.get("/transcripts/..%2Fsecret.md",
+                       headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 404
+```
+
+**Step 2: Run to verify it fails**
+
+```bash
+pytest tests/test_api.py::test_get_transcript_returns_content tests/test_api.py::test_get_transcript_rejects_path_traversal -v
+```
+Expected: FAIL — 404 or 405 (route doesn't exist yet)
+
+**Step 3: Add `read_transcript` to `output.py`**
+
+```python
+def read_transcript(output_dir: str, filename: str) -> str | None:
+    """Return file content if filename is a plain .md file inside output_dir."""
+    if os.path.basename(filename) != filename or not filename.endswith(".md"):
+        return None
+    path = os.path.join(output_dir, filename)
+    if not os.path.exists(path):
+        return None
+    with open(path, encoding="utf-8") as f:
+        return f.read()
+```
+
+**Step 4: Add GET endpoint to `api/router.py`**
+
+Add after the existing `get_transcripts` endpoint:
+
+```python
+@router.get("/transcripts/{filename}")
+async def get_transcript(filename: str, user: dict = Depends(current_user)):
+    from output import read_transcript
+    from fastapi.responses import PlainTextResponse
+    user_dir = os.path.join(user["output_dir"], user["username"])
+    content = read_transcript(user_dir, filename)
+    if content is None:
+        raise HTTPException(status_code=404, detail="Nicht gefunden")
+    return PlainTextResponse(content)
+```
+
+**Step 5: Run tests**
+
+```bash
+pytest tests/test_api.py::test_get_transcript_returns_content tests/test_api.py::test_get_transcript_rejects_path_traversal -v
+```
+Expected: PASS
+
+**Step 6: Commit**
+
+```bash
+git add output.py api/router.py tests/test_api.py
+git commit -m "feat: GET /transcripts/{filename} — serve transcript content"
+```
+
+---
+
+### Task 2: Backend — DELETE /transcripts/{filename}
+
+**Files:**
+- Modify: `api/router.py`
+- Test: `tests/test_api.py`
+
+**Step 1: Write the failing tests**
+
+Add to `tests/test_api.py`:
+
+```python
+def test_delete_transcript_removes_file(tmp_path):
+    f = tmp_path / "2026-01-01-0900-test.md"
+    f.write_text("content")
+    from unittest.mock import patch
+    with patch("api.router.current_user", return_value={"username": "", "output_dir": str(tmp_path), "is_admin": False}):
+        from fastapi.testclient import TestClient
+        from main import app
+        client = TestClient(app)
+        r = client.delete("/transcripts/2026-01-01-0900-test.md",
+                          headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 200
+        assert not f.exists()
+
+def test_delete_transcript_rejects_path_traversal(tmp_path):
+    from unittest.mock import patch
+    with patch("api.router.current_user", return_value={"username": "", "output_dir": str(tmp_path), "is_admin": False}):
+        from fastapi.testclient import TestClient
+        from main import app
+        client = TestClient(app)
+        r = client.delete("/transcripts/..%2Fsecret.md",
+                          headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 404
+```
+
+**Step 2: Run to verify they fail**
+
+```bash
+pytest tests/test_api.py::test_delete_transcript_removes_file tests/test_api.py::test_delete_transcript_rejects_path_traversal -v
+```
+Expected: FAIL — route doesn't exist
+
+**Step 3: Add DELETE endpoint to `api/router.py`**
+
+```python
+@router.delete("/transcripts/{filename}")
+async def delete_transcript(filename: str, user: dict = Depends(current_user)):
+    user_dir = os.path.join(user["output_dir"], user["username"])
+    if os.path.basename(filename) != filename or not filename.endswith(".md"):
+        raise HTTPException(status_code=404, detail="Nicht gefunden")
+    path = os.path.join(user_dir, filename)
+    if not os.path.exists(path):
+        raise HTTPException(status_code=404, detail="Nicht gefunden")
+    os.unlink(path)
+    return {"ok": True}
+```
+
+**Step 4: Run tests**
+
+```bash
+pytest tests/test_api.py::test_delete_transcript_removes_file tests/test_api.py::test_delete_transcript_rejects_path_traversal -v
+```
+Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add api/router.py tests/test_api.py
+git commit -m "feat: DELETE /transcripts/{filename} — delete transcript with path-confinement check"
+```
+
+---
+
+### Task 3: Frontend — Remove preview section, add modal + marked.js + DOMPurify
+
+**Files:**
+- Modify: `frontend/index.html`
+- Modify: `frontend/app.js`
+
+No automated tests; manual verification checklist at end.
+
+**Step 1: Remove preview section from `index.html`**
+
+Delete this block from `<main>`:
+
+```html
+    <section class="preview-section">
+      <label>Vorschau</label>
+      <div id="preview">Noch keine Aufnahme verarbeitet.</div>
+    </section>
+```
+
+Delete these CSS rules (search for them by selector):
+- `.preview-section`
+- `#preview`
+- `#preview.has-content`
+
+**Step 2: Add script tags — replace existing `<script src="/app.js">` line**
+
+```html
+  <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+  <script src="https://cdn.jsdelivr.net/npm/dompurify/dist/purify.min.js"></script>
+  <script src="/app.js"></script>
+```
+
+**Step 3: Add modal HTML + CSS**
+
+Add this block inside `<main>` before `</main>` closing tag (it's a fixed overlay, position doesn't matter):
+
+```html
+  <div id="modal" class="modal hidden" role="dialog" aria-modal="true">
+    <div class="modal-backdrop"></div>
+    <div class="modal-panel">
+      <div class="modal-header">
+        <span id="modal-title" class="modal-title"></span>
+        <div class="modal-actions">
+          <button id="modal-open-btn" class="modal-btn" title="Im Editor öffnen">
+            <svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
+              <path d="M14 3h7v7h-2V6.41l-9.29 9.3-1.42-1.42L17.59 5H14V3zm-1 2H5a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2v-8h-2v8H5V7h8V5z"/>
+            </svg>
+          </button>
+          <button id="modal-close-btn" class="modal-btn" title="Schließen">✕</button>
+        </div>
+      </div>
+      <div id="modal-body" class="modal-body"></div>
+    </div>
+  </div>
+```
+
+Add CSS inside `<style>`:
+
+```css
+    .modal { position: fixed; inset: 0; z-index: 100; display: flex; align-items: center; justify-content: center; }
+    .modal.hidden { display: none; }
+    .modal-backdrop { position: absolute; inset: 0; background: rgba(0,0,0,0.7); }
+    .modal-panel {
+      position: relative; z-index: 1;
+      background: var(--surface); border: 1px solid var(--border); border-radius: 10px;
+      width: min(800px, 95vw); max-height: 85vh;
+      display: flex; flex-direction: column;
+    }
+    .modal-header {
+      display: flex; align-items: center; justify-content: space-between;
+      padding: 14px 18px; border-bottom: 1px solid var(--border);
+      flex-shrink: 0;
+    }
+    .modal-title { font-size: 0.9rem; font-weight: 600; }
+    .modal-actions { display: flex; gap: 8px; }
+    .modal-btn {
+      background: none; border: 1px solid var(--border); color: var(--muted);
+      border-radius: 6px; padding: 4px 8px; cursor: pointer; font-family: inherit;
+      font-size: 0.85rem; display: flex; align-items: center;
+      transition: border-color 0.15s, color 0.15s;
+    }
+    .modal-btn:hover { border-color: var(--red); color: var(--red); }
+    .modal-body {
+      padding: 20px 24px; overflow-y: auto; flex: 1;
+      font-size: 0.9rem; line-height: 1.7; color: var(--text);
+    }
+    .modal-body h1,.modal-body h2,.modal-body h3 { margin: 1em 0 0.4em; font-weight: 600; }
+    .modal-body h1 { font-size: 1.3rem; }
+    .modal-body h2 { font-size: 1.1rem; }
+    .modal-body p { margin: 0 0 0.8em; }
+    .modal-body ul,.modal-body ol { padding-left: 1.5em; margin: 0 0 0.8em; }
+    .modal-body code { background: var(--surface2); padding: 2px 5px; border-radius: 3px; font-size: 0.85em; }
+    .modal-body pre { background: var(--surface2); padding: 12px; border-radius: 6px; overflow-x: auto; margin: 0 0 0.8em; }
+    .modal-body pre code { background: none; padding: 0; }
+    .modal-body hr { border: none; border-top: 1px solid var(--border); margin: 1em 0; }
+    .del-btn {
+      background: none; border: none; color: var(--muted); cursor: pointer;
+      padding: 4px; border-radius: 4px; display: flex; align-items: center;
+      transition: color 0.15s; flex-shrink: 0;
+    }
+    .del-btn:hover { color: var(--red); }
+```
+
+**Step 4: Update `app.js`**
+
+Remove these variable declarations at the top:
+```javascript
+const preview = document.getElementById('preview');
+```
+
+Add these variable declarations at the top:
+```javascript
+const modal = document.getElementById('modal');
+const modalTitle = document.getElementById('modal-title');
+const modalBody = document.getElementById('modal-body');
+const modalOpenBtn = document.getElementById('modal-open-btn');
+const modalCloseBtn = document.getElementById('modal-close-btn');
+let _modalPath = null;
+```
+
+Add these functions and event listeners (after the `logoutBtn` listener):
+```javascript
+function openModal(filename, path) {
+  _modalPath = path;
+  modalTitle.textContent = filename.replace(/\.md$/, '').replace(/^\d{4}-\d{2}-\d{2}-\d{4}-/, '');
+  modalBody.innerHTML = '';
+  modal.classList.remove('hidden');
+  apiFetch(`/transcripts/${encodeURIComponent(filename)}`)
+    .then(r => r.text())
+    .then(md => {
+      modalBody.innerHTML = DOMPurify.sanitize(marked.parse(md));
+    });
+}
+
+function closeModal() {
+  modal.classList.add('hidden');
+  _modalPath = null;
+}
+
+modalCloseBtn.addEventListener('click', closeModal);
+modal.querySelector('.modal-backdrop').addEventListener('click', closeModal);
+document.addEventListener('keydown', e => { if (e.key === 'Escape') closeModal(); });
+modalOpenBtn.addEventListener('click', () => {
+  if (_modalPath) apiFetch('/open', { method: 'POST', body: JSON.stringify({ path: _modalPath }) });
+});
+```
+
+In `loadTranscripts`, replace the existing `div.addEventListener` block and the `div.append(name, meta)` line with:
+
+```javascript
+      div.addEventListener('click', () => openModal(t.filename, t.path));
+
+      const delBtn = document.createElement('button');
+      delBtn.className = 'del-btn';
+      delBtn.title = 'Löschen';
+      delBtn.innerHTML = '<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor"><path d="M9 3h6l1 1h4v2H4V4h4l1-1zm-3 5h12l-1 13H7L6 8zm5 2v9h2v-9h-2zm-3 0v9h2v-9H8zm8 0v9h2v-9h-2z"/></svg>';
+      delBtn.addEventListener('click', async (e) => {
+        e.stopPropagation();
+        await apiFetch(`/transcripts/${encodeURIComponent(t.filename)}`, { method: 'DELETE' });
+        loadTranscripts();
+      });
+
+      div.append(name, meta, delBtn);
+```
+
+Remove the WS handler block that references preview:
+```javascript
+    if (msg.event === 'transcribed' || msg.event === 'refined') {
+      const text = msg.raw || msg.markdown || '';
+      preview.textContent = text;
+      preview.classList.add('has-content');
+    }
+```
+
+Also remove the `setStatus('idle')` + `preview.*` lines from the click handler's `reset` branch — keep only the `setStatus('idle')` call.
+
+**Step 5: Manual verification checklist**
+
+Restart app (`kill $(pgrep -f main.py) && .venv/bin/python main.py &`), open browser:
+
+- [ ] No "Vorschau" section visible
+- [ ] Clicking a transcript item opens modal with rendered markdown
+- [ ] Title in modal header shows human-readable name (date prefix stripped)
+- [ ] Clicking backdrop or ✕ closes modal
+- [ ] Pressing Escape closes modal
+- [ ] "Im Editor öffnen" triggers xdg-open
+- [ ] Trash icon deletes file and refreshes list
+- [ ] Trash click does NOT open the modal
+
+**Step 6: Commit**
+
+```bash
+git add frontend/index.html frontend/app.js
+git commit -m "feat: transcript modal with markdown rendering, delete button, remove preview section"
+```
+
+---
+
+### Task 4: Run full test suite
+
+```bash
+pytest -v
+```
+
+Expected: all tests pass. Fix any regressions before pushing.
+
+```bash
+git push
+```
@@ -0,0 +1,143 @@
+# Speaker Diarization & Name Identification Design
+
+**Date:** 2026-04-02
+
+## Goal
+
+Extend the transcription pipeline with speaker diarization (pyannote.audio) and automatic
+speaker name identification (Ollama). Every recording produces three documents: an index,
+a raw transcript with speaker labels, and a polished summary.
+
+## Architecture
+
+```
+WAV
+ ├─► Whisper       → segments [(start, end, text), …]
+ ├─► pyannote      → speaker segments [(start, end, "SPEAKER_00"), …]
+ │
+ └─► Alignment     → [(speaker_label, text), …]
+      │
+      ├─► Ollama (name prompt) → {"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"}
+      │    └─ Fallback: WS event `speakers_unknown` → UI card → POST /speakers
+      │
+      ├─► transkript.md   (speaker: text, new paragraph per speaker change)
+      ├─► zusammenfassung.md  (key points, open questions, next steps)
+      └─► index.md        (TL;DR, speakers, duration, links to both)
+```
+
+## Config Schema Extension
+
+```toml
+[diarization]
+enabled = true
+hf_token = "hf_..."   # HuggingFace read token
+```
+
+## New Module: diarization.py
+
+```python
+class Diarizer:
+    def __init__(self, hf_token: str): ...
+    async def diarize(self, wav_path: str) -> list[tuple[float, float, str]]:
+        # returns [(start_sec, end_sec, "SPEAKER_00"), …]
+```
+
+Uses `pyannote/speaker-diarization-3.1`. Loaded lazily on first call.
+Runs in `loop.run_in_executor` to avoid blocking the event loop.
+
+## Timestamp Alignment
+
+For each Whisper segment `(start, end, text)`: find the pyannote speaker with the
+greatest time overlap → assign that speaker label. Consecutive segments with the same
+speaker are merged into one paragraph.
+
+**Remote Whisper path:** request `timestamp_granularities=["segment"]` from the
+OpenAI-compatible API — the response includes `segments[].start` and `segments[].end`.
+
+## Speaker Name Identification
+
+Ollama receives the first ~2000 chars of the aligned transcript and a prompt:
+
+> "Analysiere das folgende Gesprächstranskript. Ermittle welche Namen den Sprechern
+> zugeordnet werden können (z.B. durch direkte Anrede). Antworte NUR mit JSON:
+> `{\"SPEAKER_00\": \"Name oder null\", …}`"
+
+If all values are `null` or parsing fails → emit `speakers_unknown` WebSocket event.
+If at least one name is found → apply known names, leave unknowns as `Sprecher N`.
+
+## Frontend: Speaker Naming Card
+
+Triggered by `speakers_unknown` WS event. Shown above the record button.
+
+Each speaker has:
+- Excerpt navigator: `‹ "first few sentences…" 1/4 ›` — arrows cycle through all
+  excerpts (3-4 sentences each) for that speaker
+- Text input for the name
+
+Buttons:
+- **Übernehmen** → `POST /speakers` with `{"SPEAKER_00": "Thomas", …}` → pipeline
+  writes the three documents and emits `saved`
+- **Anonym lassen** → same POST with empty strings → labels stay as `Sprecher 1` etc.
+
+## New API Endpoint
+
+| Method | Path | Description |
+|--------|------|-------------|
+| POST | `/speakers` | Receives speaker name mapping, triggers document writing |
+
+The pipeline pauses after alignment and waits for `/speakers` before writing output.
+State stored in `api/state.py` as `state._pending_speakers`.
+
+## Three Output Documents
+
+All three share the same filename base (e.g. `2026-04-02-1430-Meeting`):
+
+**`...-index.md`**
+```markdown
+# Meeting — 02.04.2026 14:30
+
+**Sprecher:** Thomas, Möller
+**Dauer:** 23 min
+
+> [2-3 sentence TL;DR from Ollama]
+
+- [Transkript](…-transkript.md)
+- [Zusammenfassung](…-zusammenfassung.md)
+```
+
+**`...-transkript.md`** — Raw annotated transcript, new paragraph per speaker change:
+```markdown
+**Thomas:** Gut, dann fangen wir an.
+
+**Möller:** Ich hab das Budget schon vorbereitet…
+```
+
+**`...-zusammenfassung.md`** — Polished summary document (Ollama):
+```markdown
+# Meeting-Zusammenfassung — 02.04.2026
+
+## Wichtigste Punkte
+…
+
+## Offene Fragen
+…
+
+## Nächste Schritte / Ideen
+…
+```
+
+All three appear in the transcript list. Index entries get a `meeting` badge.
+
+## HuggingFace Setup (one-time, per machine)
+
+1. Create account at huggingface.co
+2. Go to https://huggingface.co/pyannote/speaker-diarization-3.1 → click
+   "Access repository" and accept the terms of service
+3. Go to huggingface.co/settings/tokens → create a token with **Read** access
+4. Enter the token in Transkriptor settings → Einstellungen → Diarisierung
+
+## Not in Scope
+
+- Speaker voice profiles / pre-registration
+- More than one diarization model
+- Windows support
@@ -1,16 +1,36 @@
 const btn = document.getElementById('record-btn');
 const statusText = document.getElementById('status-text');
 const headerStatus = document.getElementById('header-status');
-const preview = document.getElementById('preview');
 const instructionsEl = document.getElementById('instructions');
 const transcriptList = document.getElementById('transcript-list');
 const userChip = document.getElementById('user-chip');
 const logoutBtn = document.getElementById('logout-btn');
+const modal = document.getElementById('modal');
+const modalTitle = document.getElementById('modal-title');
+const modalBody = document.getElementById('modal-body');
+const modalObsidianBtn = document.getElementById('modal-obsidian-btn');
+const modalFolderBtn = document.getElementById('modal-folder-btn');
+const modalOpenBtn = document.getElementById('modal-open-btn');
+const modalCloseBtn = document.getElementById('modal-close-btn');
+const modalTabs = document.getElementById('modal-tabs');
+let _modalPath = null;
+let _modalPaths = null;
+let _modalFilename = null;
+let _modalRelated = null;
+
+const speakerCard = document.getElementById('speaker-card');
+const speakerRows = document.getElementById('speaker-rows');
+const speakerConfirmBtn = document.getElementById('speaker-confirm-btn');
+const speakerAnonymBtn = document.getElementById('speaker-anonym-btn');
+
+// state for excerpt navigation: { speakerId: { excerpts: [], idx: 0 } }
+let _speakerState = {};

 const STATUS_LABELS = {
  idle: 'Bereit',
  recording: 'Aufnahme läuft\u2026',
  processing: 'Wird verarbeitet\u2026',
+  awaiting_speakers: 'Sprecher benennen\u2026',
  error: 'Fehler',
 };

@@ -36,6 +56,77 @@ logoutBtn.addEventListener('click', () => {
  });
 });

+function _loadModalContent(filename, activeTab) {
+  modalBody.innerHTML = '';
+  apiFetch(`/transcripts/${filename.split('/').map(encodeURIComponent).join('/')}`)
+    .then(r => r.text())
+    .then(md => { modalBody.innerHTML = DOMPurify.sanitize(marked.parse(md)); });
+  // update active tab
+  modalTabs.querySelectorAll('.modal-tab').forEach(t => {
+    t.classList.toggle('active', t.dataset.file === filename);
+  });
+}
+
+function openModal(filename, path, paths, related) {
+  _modalPath = path;
+  _modalPaths = paths || null;
+  _modalFilename = filename;
+  _modalRelated = related || null;
+  modalTitle.textContent = filename.replace(/\.md$/, '').replace(/^\d{4}-\d{2}-\d{2}-\d{4}-/, '').replace(/-index$/, '');
+  modal.classList.remove('hidden');
+
+  // Build tabs if there are related files
+  modalTabs.innerHTML = '';
+  if (related && (related.transkript || related.zusammenfassung)) {
+    modalTabs.style.display = 'flex';
+    const tabDefs = [
+      { label: 'Index', file: filename },
+      { label: 'Transkript', file: related.transkript },
+      { label: 'Zusammenfassung', file: related.zusammenfassung },
+    ].filter(t => t.file);
+    tabDefs.forEach(({ label, file }) => {
+      const btn = document.createElement('button');
+      btn.className = 'modal-tab';
+      btn.textContent = label;
+      btn.dataset.file = file;
+      btn.addEventListener('click', () => _loadModalContent(file, file));
+      modalTabs.appendChild(btn);
+    });
+  } else {
+    modalTabs.style.display = 'none';
+  }
+
+  _loadModalContent(filename, filename);
+}
+
+function closeModal() {
+  modal.classList.add('hidden');
+  _modalPath = null;
+  _modalPaths = null;
+  _modalFilename = null;
+  _modalRelated = null;
+}
+
+modalCloseBtn.addEventListener('click', closeModal);
+modal.querySelector('.modal-backdrop').addEventListener('click', closeModal);
+document.addEventListener('keydown', e => { if (e.key === 'Escape') closeModal(); });
+modalObsidianBtn.addEventListener('click', () => {
+  if (_modalPaths) {
+    apiFetch('/open', { method: 'POST', body: JSON.stringify({ paths: Object.values(_modalPaths), mode: 'obsidian' }) });
+  } else if (_modalPath) {
+    apiFetch('/open', { method: 'POST', body: JSON.stringify({ path: _modalPath, mode: 'obsidian' }) });
+  }
+});
+modalFolderBtn.addEventListener('click', () => {
+  if (_modalPath) apiFetch('/open', { method: 'POST', body: JSON.stringify({ path: _modalPath, mode: 'folder' }) });
+});
+modalOpenBtn.addEventListener('click', () => {
+  if (_modalPath) apiFetch('/open', { method: 'POST', body: JSON.stringify({ path: _modalPath }) });
+});
+
+speakerConfirmBtn.addEventListener('click', () => submitSpeakers(true));
+speakerAnonymBtn.addEventListener('click', () => submitSpeakers(false));
+
 instructionsEl.addEventListener('input', async () => {
  await apiFetch('/instructions', {
    method: 'POST',
@@ -47,12 +138,20 @@ function setStatus(status) {
  btn.className = status;
  headerStatus.className = `status-badge ${status}`;
  const label = STATUS_LABELS[status] || status;
-  statusText.textContent = label;
+  statusText.textContent = status === 'error' ? label + ' — klicken zum Zurücksetzen' : label;
  headerStatus.textContent = label;
  btn.disabled = status === 'processing';
 }

-btn.addEventListener('click', () => apiFetch('/toggle', { method: 'POST' }));
+btn.addEventListener('click', async () => {
+  const r = await apiFetch('/toggle', { method: 'POST' });
+  const data = await r.json();
+  if (data.action === 'started') {
+    setStatus('recording');
+  } else if (data.action === 'reset') {
+    setStatus('idle');
+  }
+});

 function connectWs() {
  const proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
@@ -60,23 +159,95 @@ function connectWs() {
  ws.onmessage = (e) => {
    const msg = JSON.parse(e.data);
    if (msg.event === 'processing') setStatus('processing');
-    if (msg.event === 'transcribed' || msg.event === 'refined') {
-      const text = msg.raw || msg.markdown || '';
-      preview.textContent = text;
-      preview.classList.add('has-content');
-    }
    if (msg.event === 'saved') {
      setStatus('idle');
      loadTranscripts();
    }
    if (msg.event === 'error') {
-      setStatus('idle');
-      preview.textContent = `Fehler: ${msg.message}`;
+      setStatus('error');
+    }
+    if (msg.event === 'speakers_unknown') {
+      setStatus('awaiting_speakers');
+      showSpeakerCard(msg.speakers);
    }
  };
  ws.onclose = () => setTimeout(connectWs, 2000);
 }

+function showSpeakerCard(speakers) {
+  _speakerState = {};
+  speakerRows.innerHTML = '';
+  speakers.forEach(({ id, excerpts }) => {
+    _speakerState[id] = { excerpts, idx: 0 };
+
+    const row = document.createElement('div');
+    row.className = 'speaker-row';
+
+    const nav = document.createElement('div');
+    nav.className = 'excerpt-nav';
+
+    const prevBtn = document.createElement('button');
+    prevBtn.className = 'excerpt-nav-btn';
+    prevBtn.textContent = '‹';
+    prevBtn.title = 'Vorheriger Ausschnitt';
+
+    const nextBtn = document.createElement('button');
+    nextBtn.className = 'excerpt-nav-btn';
+    nextBtn.textContent = '›';
+    nextBtn.title = 'Nächster Ausschnitt';
+
+    const counter = document.createElement('span');
+    counter.className = 'excerpt-counter';
+
+    const excerptEl = document.createElement('div');
+    excerptEl.className = 'speaker-excerpt';
+
+    function updateExcerpt() {
+      const st = _speakerState[id];
+      excerptEl.textContent = `"${st.excerpts[st.idx]}"`;
+      counter.textContent = `${st.idx + 1} / ${st.excerpts.length}`;
+      prevBtn.disabled = st.idx === 0;
+      nextBtn.disabled = st.idx === st.excerpts.length - 1;
+    }
+
+    prevBtn.addEventListener('click', () => { _speakerState[id].idx--; updateExcerpt(); });
+    nextBtn.addEventListener('click', () => { _speakerState[id].idx++; updateExcerpt(); });
+
+    nav.append(prevBtn, counter, nextBtn);
+
+    const input = document.createElement('input');
+    input.type = 'text';
+    input.className = 'speaker-name-input';
+    input.placeholder = `Name für ${id.replace('SPEAKER_', 'Sprecher ')}`;
+    input.dataset.speakerId = id;
+
+    row.append(nav, excerptEl, input);
+    speakerRows.appendChild(row);
+    updateExcerpt();
+  });
+  speakerCard.classList.remove('hidden');
+}
+
+function hideSpeakerCard() {
+  speakerCard.classList.add('hidden');
+  speakerRows.innerHTML = '';
+  _speakerState = {};
+}
+
+async function submitSpeakers(useNames) {
+  const mapping = {};
+  if (useNames) {
+    speakerRows.querySelectorAll('.speaker-name-input').forEach(inp => {
+      mapping[inp.dataset.speakerId] = inp.value.trim();
+    });
+  } else {
+    Object.keys(_speakerState).forEach(id => { mapping[id] = ''; });
+  }
+  hideSpeakerCard();
+  setStatus('processing');
+  await apiFetch('/speakers', { method: 'POST', body: JSON.stringify(mapping) });
+}
+
 async function loadTranscripts() {
  const r = await apiFetch('/transcripts');
  if (!r.ok) return;
@@ -87,20 +258,50 @@ async function loadTranscripts() {
      const div = document.createElement('div');
      div.className = 'transcript-item';

+      const dateMatch = t.filename.match(/^(\d{4}-\d{2}-\d{2})-(\d{2})(\d{2})-/);
+      const dateEl = document.createElement('span');
+      dateEl.className = 'meta item-date';
+      dateEl.textContent = dateMatch ? `${dateMatch[1]} ${dateMatch[2]}:${dateMatch[3]}` : '';
+
      const name = document.createElement('span');
-      name.textContent = t.filename.replace('.md', '');
+      name.className = 'name';
+      name.textContent = t.filename.replace(/\.md$/, '').replace(/^\d{4}-\d{2}-\d{2}-\d{4}-/, '');

      const meta = document.createElement('span');
      meta.className = 'meta';
      meta.textContent = `${Math.round(t.size / 1024 * 10) / 10} KB`;

-      div.append(name, meta);
-      div.addEventListener('click', () => {
-        apiFetch('/open', {
+      div.addEventListener('click', () => openModal(t.filename, t.path, null, t.related || null));
+
+      const reprocessBtn = document.createElement('button');
+      reprocessBtn.className = 'del-btn';
+      reprocessBtn.title = 'Neu verarbeiten';
+      reprocessBtn.innerHTML = '<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor"><path d="M17.65 6.35A7.96 7.96 0 0 0 12 4a8 8 0 1 0 8 8h-2a6 6 0 1 1-1.76-4.24l-2.24 2.24H20V4l-2.35 2.35z"/></svg>';
+      reprocessBtn.addEventListener('click', async (e) => {
+        e.stopPropagation();
+        reprocessBtn.disabled = true;
+        await apiFetch(`/transcripts/${encodeURIComponent(t.filename)}/reprocess`, {
          method: 'POST',
-          body: JSON.stringify({ path: t.path }),
+          body: JSON.stringify({ instructions: instructionsEl.value }),
        });
+        reprocessBtn.disabled = false;
+        loadTranscripts();
      });
+
+      const delBtn = document.createElement('button');
+      delBtn.className = 'del-btn';
+      delBtn.title = 'Löschen';
+      delBtn.innerHTML = '<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor"><path d="M9 3h6l1 1h4v2H4V4h4l1-1zm-3 5h12l-1 13H7L6 8zm5 2v9h2v-9h-2zm-3 0v9h2v-9H8zm8 0v9h2v-9h-2z"/></svg>';
+      delBtn.addEventListener('click', async (e) => {
+        e.stopPropagation();
+        await apiFetch(`/transcripts/${encodeURIComponent(t.filename)}`, { method: 'DELETE' });
+        loadTranscripts();
+      });
+
+      const actions = document.createElement('div');
+      actions.className = 'item-actions';
+      actions.append(reprocessBtn, delBtn);
+      div.append(dateEl, name, meta, actions);
      return div;
    })
  );
@@ -117,6 +318,14 @@ async function loadTranscripts() {
  if (data.username) {
    userChip.textContent = data.username;
  }
+  if (data.is_admin) {
+    const gearLink = document.createElement('a');
+    gearLink.href = '/settings';
+    gearLink.className = 'back-btn';
+    gearLink.title = 'Einstellungen';
+    gearLink.textContent = '\u2699';
+    document.querySelector('.header-right').prepend(gearLink);
+  }
  connectWs();
  loadTranscripts();
 })();
@@ -33,9 +33,9 @@
      padding: 16px 24px;
      border-bottom: 1px solid var(--border);
    }
-    .logo-dot { width: 12px; height: 12px; background: var(--red); border-radius: 50%; }
-    header h1 { font-size: 1.1rem; font-weight: 600; letter-spacing: 0.04em; }
-    header h1 span { color: var(--red); }
+    .header-logo { height: 28px; width: auto; display: block; }
+    .header-divider { width: 1px; height: 20px; background: var(--border); flex-shrink: 0; }
+    .header-appname { font-size: 1rem; font-weight: 600; letter-spacing: 0.04em; color: var(--muted); }
    .header-right { margin-left: auto; display: flex; align-items: center; gap: 12px; }
    .status-badge {
      font-size: 0.75rem;
@@ -68,6 +68,13 @@
      transition: border-color 0.15s, color 0.15s;
    }
    .logout-btn:hover { border-color: var(--red); color: var(--red); }
+    .back-btn {
+      font-size: .75rem; padding: 4px 10px; border-radius: 20px;
+      background: none; border: 1px solid var(--border); color: var(--muted);
+      cursor: pointer; font-family: inherit; text-decoration: none;
+      transition: border-color .15s, color .15s;
+    }
+    .back-btn:hover { border-color: var(--red); color: var(--red); }
    main {
      flex: 1;
      display: flex;
@@ -107,30 +114,108 @@
    }
    textarea:focus { border-color: var(--yellow); }
    textarea::placeholder { color: var(--muted); }
-    .preview-section { display: flex; flex-direction: column; gap: 8px; }
-    #preview {
-      background: var(--surface); border: 1px solid var(--border);
-      border-radius: 8px; padding: 16px;
-      font-size: 0.85rem; line-height: 1.6; color: var(--muted);
-      min-height: 60px; white-space: pre-wrap; word-break: break-word;
-    }
-    #preview.has-content { color: var(--text); }
    .transcripts-section { display: flex; flex-direction: column; gap: 8px; }
    #transcript-list { display: flex; flex-direction: column; gap: 6px; }
    .transcript-item {
      background: var(--surface); border: 1px solid var(--border);
      border-radius: 6px; padding: 10px 14px;
-      display: flex; align-items: center; justify-content: space-between;
+      display: flex; align-items: center; gap: 10px;
      font-size: 0.82rem; cursor: pointer; transition: border-color 0.1s;
    }
    .transcript-item:hover { border-color: var(--red); }
-    .transcript-item .meta { color: var(--muted); font-size: 0.75rem; }
+    .transcript-item .name { flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
+    .transcript-item .meta { color: var(--muted); font-size: 0.75rem; flex-shrink: 0; }
+    .item-date { font-size: 0.75rem; color: var(--muted); flex-shrink: 0; font-variant-numeric: tabular-nums; }
+    .item-actions { display: flex; gap: 2px; flex-shrink: 0; }
+    .modal { position: fixed; inset: 0; z-index: 100; display: flex; align-items: center; justify-content: center; }
+    .modal.hidden { display: none; }
+    .modal-backdrop { position: absolute; inset: 0; background: rgba(0,0,0,0.7); }
+    .modal-panel {
+      position: relative; z-index: 1;
+      background: var(--surface); border: 1px solid var(--border); border-radius: 10px;
+      width: min(800px, 95vw); max-height: 85vh;
+      display: flex; flex-direction: column;
+    }
+    .modal-header {
+      display: flex; align-items: center; justify-content: space-between;
+      padding: 14px 18px; border-bottom: 1px solid var(--border);
+      flex-shrink: 0;
+    }
+    .modal-title { font-size: 0.9rem; font-weight: 600; }
+    .modal-actions { display: flex; gap: 8px; }
+    .modal-btn {
+      background: none; border: 1px solid var(--border); color: var(--muted);
+      border-radius: 6px; padding: 4px 8px; cursor: pointer; font-family: inherit;
+      font-size: 0.85rem; display: flex; align-items: center;
+      transition: border-color 0.15s, color 0.15s;
+    }
+    .modal-btn:hover { border-color: var(--red); color: var(--red); }
+    .modal-body {
+      padding: 20px 24px; overflow-y: auto; flex: 1;
+      font-size: 0.9rem; line-height: 1.7; color: var(--text);
+    }
+    .modal-body h1,.modal-body h2,.modal-body h3 { margin: 1em 0 0.4em; font-weight: 600; }
+    .modal-body h1 { font-size: 1.3rem; }
+    .modal-body h2 { font-size: 1.1rem; }
+    .modal-body p { margin: 0 0 0.8em; }
+    .modal-body ul,.modal-body ol { padding-left: 1.5em; margin: 0 0 0.8em; }
+    .modal-body code { background: var(--surface2); padding: 2px 5px; border-radius: 3px; font-size: 0.85em; }
+    .modal-body pre { background: var(--surface2); padding: 12px; border-radius: 6px; overflow-x: auto; margin: 0 0 0.8em; }
+    .modal-body pre code { background: none; padding: 0; }
+    .modal-body hr { border: none; border-top: 1px solid var(--border); margin: 1em 0; }
+    .modal-tabs { display: flex; gap: 4px; padding: 10px 18px 0; border-bottom: 1px solid var(--border); flex-shrink: 0; }
+    .modal-tab { background: none; border: 1px solid transparent; border-bottom: none; border-radius: 6px 6px 0 0; padding: 5px 12px; font-size: 0.78rem; font-family: inherit; color: var(--muted); cursor: pointer; transition: color 0.15s, border-color 0.15s; margin-bottom: -1px; }
+    .modal-tab:hover { color: var(--text); }
+    .modal-tab.active { color: var(--text); border-color: var(--border); background: var(--surface); }
+    .del-btn {
+      background: none; border: none; color: var(--muted); cursor: pointer;
+      padding: 4px; border-radius: 4px; display: flex; align-items: center;
+      transition: color 0.15s; flex-shrink: 0;
+    }
+    .del-btn:hover { color: var(--red); }
+    .speaker-card {
+      background: var(--surface); border: 1px solid var(--yellow);
+      border-radius: 10px; padding: 20px; display: flex; flex-direction: column; gap: 16px;
+    }
+    .speaker-card.hidden { display: none; }
+    .speaker-card-title { font-size: 0.8rem; text-transform: uppercase; letter-spacing: 0.08em; color: var(--yellow); }
+    .speaker-rows { display: flex; flex-direction: column; gap: 14px; }
+    .speaker-row { display: flex; flex-direction: column; gap: 6px; }
+    .excerpt-nav { display: flex; align-items: center; gap: 8px; }
+    .excerpt-nav-btn {
+      background: none; border: 1px solid var(--border); color: var(--muted);
+      border-radius: 4px; padding: 2px 8px; cursor: pointer; font-family: inherit;
+      font-size: 0.85rem; transition: border-color 0.15s, color 0.15s;
+    }
+    .excerpt-nav-btn:hover { border-color: var(--yellow); color: var(--yellow); }
+    .excerpt-counter { font-size: 0.75rem; color: var(--muted); white-space: nowrap; }
+    .speaker-excerpt {
+      font-size: 0.82rem; color: var(--muted); font-style: italic;
+      background: var(--surface2); border-radius: 6px; padding: 8px 12px;
+      line-height: 1.5; min-height: 3em;
+    }
+    .speaker-name-input {
+      background: var(--surface2); border: 1px solid var(--border); color: var(--text);
+      border-radius: 6px; padding: 8px 12px; font-family: inherit; font-size: 0.9rem;
+      outline: none; transition: border-color 0.15s; width: 100%;
+    }
+    .speaker-name-input:focus { border-color: var(--yellow); }
+    .speaker-card-actions { display: flex; gap: 10px; justify-content: flex-end; }
+    .card-btn {
+      padding: 8px 18px; border-radius: 6px; border: 1px solid var(--border);
+      background: none; color: var(--text); cursor: pointer; font-family: inherit;
+      font-size: 0.85rem; transition: all 0.15s;
+    }
+    .card-btn:hover { border-color: var(--yellow); color: var(--yellow); }
+    .card-btn.primary { background: var(--yellow); color: #111; border-color: var(--yellow); font-weight: 600; }
+    .card-btn.primary:hover { background: #e6c200; border-color: #e6c200; }
  </style>
 </head>
 <body>
  <header>
-    <div class="logo-dot"></div>
-    <h1>tüit <span>Transkriptor</span></h1>
+    <img src="/logo.svg" class="header-logo" alt="tüit">
+    <div class="header-divider"></div>
+    <span class="header-appname">Transkriptor</span>
    <div class="header-right">
      <span class="status-badge" id="header-status">Bereit</span>
      <span class="user-chip" id="user-chip"></span>
@@ -138,6 +223,15 @@
    </div>
  </header>
  <main>
+    <div id="speaker-card" class="speaker-card hidden">
+      <span class="speaker-card-title">Sprecher identifizieren</span>
+      <div id="speaker-rows" class="speaker-rows"></div>
+      <div class="speaker-card-actions">
+        <button id="speaker-anonym-btn" class="card-btn">Anonym lassen</button>
+        <button id="speaker-confirm-btn" class="card-btn primary">Übernehmen</button>
+      </div>
+    </div>
+
    <section class="record-section">
      <button id="record-btn" title="Aufnahme starten / stoppen">
        <svg class="mic-icon" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
@@ -155,16 +249,42 @@
      ></textarea>
    </section>

-    <section class="preview-section">
-      <label>Vorschau</label>
-      <div id="preview">Noch keine Aufnahme verarbeitet.</div>
-    </section>
-
    <section class="transcripts-section">
      <label>Meine Transkripte</label>
      <div id="transcript-list"></div>
    </section>
+
+  <div id="modal" class="modal hidden" role="dialog" aria-modal="true">
+    <div class="modal-backdrop"></div>
+    <div class="modal-panel">
+      <div class="modal-header">
+        <span id="modal-title" class="modal-title"></span>
+        <div class="modal-actions">
+          <button id="modal-obsidian-btn" class="modal-btn" title="In Obsidian öffnen">
+            <svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
+              <path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-1 14H9V8h2v8zm4 0h-2V8h2v8z"/>
+            </svg>
+          </button>
+          <button id="modal-folder-btn" class="modal-btn" title="Verzeichnis öffnen">
+            <svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
+              <path d="M10 4H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h16a2 2 0 0 0 2-2V8a2 2 0 0 0-2-2h-8l-2-2z"/>
+            </svg>
+          </button>
+          <button id="modal-open-btn" class="modal-btn" title="Im Editor öffnen">
+            <svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
+              <path d="M14 3h7v7h-2V6.41l-9.29 9.3-1.42-1.42L17.59 5H14V3zm-1 2H5a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2v-8h-2v8H5V7h8V5z"/>
+            </svg>
+          </button>
+          <button id="modal-close-btn" class="modal-btn" title="Schließen">✕</button>
+        </div>
+      </div>
+      <div id="modal-tabs" class="modal-tabs" style="display:none"></div>
+      <div id="modal-body" class="modal-body"></div>
+    </div>
+  </div>
  </main>
+  <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+  <script src="https://cdn.jsdelivr.net/npm/dompurify/dist/purify.min.js"></script>
  <script src="/app.js"></script>
 </body>
 </html>
@@ -0,0 +1,112 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="297mm"
+   height="210mm"
+   viewBox="0 0 1052.3622 744.09448"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="tüit Logo tü original Farben ohne Rand.svg"
+   enable-background="new">
+  <defs
+     id="defs4" />
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="8"
+     inkscape:cx="441.15795"
+     inkscape:cy="487.31844"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer3"
+     showgrid="false"
+     inkscape:window-width="1920"
+     inkscape:window-height="994"
+     inkscape:window-x="0"
+     inkscape:window-y="34"
+     inkscape:window-maximized="1">
+    <inkscape:grid
+       type="xygrid"
+       id="grid3344" />
+  </sodipodi:namedview>
+  <metadata
+     id="metadata7">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Ebene 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(0,-308.26772)"
+     style="display:none">
+    <g
+       id="g3388">
+      <flowRoot
+         style="font-style:normal;font-weight:normal;font-size:40px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+         id="flowRoot3336"
+         xml:space="preserve"><flowRegion
+           id="flowRegion3338"><rect
+             y="158.07649"
+             x="74.285713"
+             height="137.14285"
+             width="577.14288"
+             id="rect3340" /></flowRegion><flowPara
+           id="flowPara3342" /></flowRoot>    </g>
+  </g>
+  <g
+     inkscape:groupmode="layer"
+     id="layer3"
+     inkscape:label="Ebene 2"
+     style="display:inline">
+    <path
+       style="opacity:1;fill:#da251c;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0.0452675"
+       d="m 571.71573,376.46706 0,95 c 0,50 40,50 40,50 l 0,100 -390,0 c -50,0 -150.000001,0 -150.000001,-150 l 0,-150 -45,0 c -3,0 -5,-2 -5,-5 l 0,-90 c 0,-3 2,-5 5,-5 l 45,0 0,-95 c 0,-3 2,-5 5,-5 l 90.000001,0 c 3,0 5,2 5,5 l 0,95 45,0 c 3,0 5,2 5,5 l 0,90 c 0,3 -2,5 -5,5 l -45,0 0,150 c 0,0 0,50 50,50 50,0 50,-50 50,-50 l 0,-95 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 5,2 5,5 l 0,95 c 0,0 0,50 50,50 50,0 50,-50 50,-50 l 0,-95 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 5,2 5,5 z"
+       id="path9932"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="ccccccccccccccccccccccczcccccczccccc" />
+    <path
+       style="fill:#da251c;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0"
+       d="m 371.71573,226.46706 0,90 c 0,3 -2,5 -5,5 l -90,0 c -3,0 -5,-2 -5,-5 l 0,-90 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 4.875,2 5,5 z"
+       id="path9934"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="ccccccccc" />
+    <path
+       style="fill:#da251c;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0"
+       d="m 571.71573,226.46706 0,90 c 0,3 -2,5 -5,5 l -90,0 c -3,0 -5,-2 -5,-5 l 0,-90 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 5,2 5,5 z"
+       id="path9944"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="ccccccccc" />
+    <path
+       style="fill:#ffd802;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0"
+       d="m 766.71573,321.46706 -90,0 c -3,0 -5,-2 -5,-5 l 0,-90 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 5,2 5,5 l 0,90 c 0,3 -2,5 -5,5 z"
+       id="path9946"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="ccccccccc" />
+    <path
+       style="fill:#ffd802;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0"
+       d="m 1016.7157,321.46706 -44.99997,10e-6 0,150 c 0,150 -100,150 -150,150 l -190,0 0,-100 c 0,0 40,0 40,-50 l 0,-95 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 5,2 5,5 l 0,95 c 0,0 0,50 50,50 50,0 50,-50 50,-50 l 0,-150 -45,0 c -3,0 -5,-2 -5,-5 l 0,-90 c 0,-3.00001 2,-5.00001 5,-5.00001 l 45,1e-5 0,-95.00001 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 5,2 5,5 l 0,95 44.99997,0 c 3,0 5,2 5,5 l 0,90 c 0,3 -2,5 -5,5 z"
+       id="path9948"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cccccccccccczcccccccccccccccc" />
+  </g>
+</svg>
@@ -0,0 +1,131 @@
+<!DOCTYPE html>
+<html lang="de">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>tüit Transkriptor — Einstellungen</title>
+  <link rel="preconnect" href="https://fonts.googleapis.com">
+  <link href="https://fonts.googleapis.com/css2?family=Overpass:wght@300;400;600;700&display=swap" rel="stylesheet">
+  <style>
+    :root { --red:#DA251C;--yellow:#FFD802;--bg:#111;--surface:#1a1a1a;--surface2:#232323;--text:#e8e8e8;--muted:#888;--border:#2e2e2e; }
+    *{box-sizing:border-box;margin:0;padding:0;}
+    body{font-family:'Overpass',system-ui,sans-serif;background:var(--bg);color:var(--text);min-height:100vh;display:flex;flex-direction:column;}
+    header{display:flex;align-items:center;gap:12px;padding:16px 24px;border-bottom:1px solid var(--border);}
+    .header-logo{height:28px;width:auto;display:block;}
+    .header-divider{width:1px;height:20px;background:var(--border);flex-shrink:0;}
+    .header-appname{font-size:1rem;font-weight:600;letter-spacing:.04em;color:var(--muted);}
+    .header-right{margin-left:auto;display:flex;align-items:center;gap:12px;}
+    .back-btn{font-size:.75rem;padding:4px 10px;border-radius:20px;background:none;border:1px solid var(--border);color:var(--muted);cursor:pointer;font-family:inherit;text-decoration:none;transition:border-color .15s,color .15s;}
+    .back-btn:hover{border-color:var(--red);color:var(--red);}
+    main{flex:1;display:flex;flex-direction:column;gap:24px;padding:24px;max-width:700px;width:100%;margin:0 auto;}
+    h2{font-size:.8rem;color:var(--muted);text-transform:uppercase;letter-spacing:.06em;margin-bottom:12px;padding-bottom:8px;border-bottom:1px solid var(--border);}
+    .field{display:flex;flex-direction:column;gap:6px;margin-bottom:14px;}
+    label{font-size:.78rem;color:var(--muted);letter-spacing:.04em;}
+    select,input[type=text]{background:var(--surface);border:1px solid var(--border);color:var(--text);border-radius:8px;padding:10px 12px;font-family:inherit;font-size:.9rem;outline:none;transition:border-color .15s;width:100%;}
+    select:focus,input[type=text]:focus{border-color:var(--yellow);}
+    .btn-row{display:flex;gap:10px;margin-top:4px;}
+    .btn{font-size:.82rem;padding:8px 16px;border-radius:8px;border:1px solid var(--border);background:var(--surface2);color:var(--text);cursor:pointer;font-family:inherit;transition:border-color .15s,background .15s;}
+    .btn:hover{border-color:var(--red);}
+    .btn.primary{background:var(--red);border-color:var(--red);color:#fff;}
+    .btn.primary:hover{background:#b81e16;border-color:#b81e16;}
+    .toast{position:fixed;bottom:24px;right:24px;background:var(--surface2);border:1px solid var(--border);border-radius:8px;padding:10px 16px;font-size:.85rem;opacity:0;transition:opacity .2s;pointer-events:none;}
+    .toast.show{opacity:1;}
+    .combined-form{display:none;flex-direction:column;gap:10px;margin-top:10px;padding:12px;background:var(--surface2);border-radius:8px;border:1px solid var(--border);}
+    .combined-form.visible{display:flex;}
+  </style>
+</head>
+<body>
+  <header>
+    <img src="/logo.svg" class="header-logo" alt="tüit">
+    <div class="header-divider"></div>
+    <span class="header-appname">Transkriptor — Einstellungen</span>
+    <div class="header-right">
+      <a href="/" class="back-btn">&#8592; Zurück</a>
+    </div>
+  </header>
+  <main>
+    <section>
+      <h2>Audio</h2>
+      <div class="field">
+        <label>Aufnahmequelle</label>
+        <select id="audio-device">
+          <option value="">Systemstandard</option>
+        </select>
+      </div>
+      <div class="btn-row">
+        <button class="btn" id="refresh-devices-btn">Geräte aktualisieren</button>
+        <button class="btn" id="create-combined-btn">Combined Source erstellen</button>
+      </div>
+      <div class="combined-form" id="combined-form">
+        <div class="field">
+          <label>Mikrofon</label>
+          <select id="combined-mic"></select>
+        </div>
+        <div class="field">
+          <label>System-Audio Monitor</label>
+          <select id="combined-monitor"></select>
+        </div>
+        <div class="btn-row">
+          <button class="btn primary" id="combined-confirm-btn">Erstellen</button>
+          <button class="btn" id="combined-cancel-btn">Abbrechen</button>
+        </div>
+      </div>
+    </section>
+
+    <section>
+      <h2>Verarbeitung</h2>
+      <div class="field">
+        <label>Whisper Backend</label>
+        <select id="whisper-backend">
+          <option value="openai">OpenAI-kompatibel (faster-whisper-server)</option>
+          <option value="whispercpp">whisper.cpp Server</option>
+        </select>
+      </div>
+      <div class="field">
+        <label>Whisper Server URL (leer = lokal)</label>
+        <input type="text" id="whisper-url" placeholder="http://beastix:8080">
+      </div>
+      <div class="field">
+        <label>Whisper Modell</label>
+        <input type="text" id="whisper-model" placeholder="large-v3">
+      </div>
+      <div class="field">
+        <label>Ollama Server URL</label>
+        <input type="text" id="ollama-url" placeholder="http://localhost:11434">
+      </div>
+      <div class="field">
+        <label>Ollama Modell</label>
+        <select id="ollama-model"></select>
+      </div>
+      <div class="field">
+        <label>Obsidian Vault-Pfad (optional)</label>
+        <input type="text" id="obsidian-vault" placeholder="/mnt/d/.../obsidian">
+      </div>
+      <div class="btn-row">
+        <button class="btn primary" id="save-btn">Speichern</button>
+      </div>
+    </section>
+
+    <section>
+      <h2>Diarisierung</h2>
+      <div class="field">
+        <label>
+          <input type="checkbox" id="diar-enabled" style="margin-right:6px;accent-color:var(--yellow);">
+          Sprecher-Erkennung aktivieren
+        </label>
+      </div>
+      <div class="field">
+        <label>HuggingFace Token</label>
+        <input type="text" id="diar-hf-token" placeholder="hf_...">
+      </div>
+      <p style="font-size:.78rem;color:var(--muted);margin-bottom:10px;">
+        Token benötigt Lesezugriff auf
+        <a href="https://huggingface.co/pyannote/speaker-diarization-3.1" target="_blank"
+           style="color:var(--yellow);text-decoration:none;">pyannote/speaker-diarization-3.1</a>.
+      </p>
+    </section>
+  </main>
+  <div class="toast" id="toast"></div>
+  <script src="/settings.js"></script>
+</body>
+</html>
@@ -0,0 +1,123 @@
+const token = sessionStorage.getItem('token');
+function authHeaders() {
+  return token ? { 'Authorization': 'Bearer ' + token } : {};
+}
+function apiFetch(url, options) {
+  options = options || {};
+  return fetch(url, Object.assign({}, options, {
+    headers: Object.assign({'Content-Type': 'application/json'}, authHeaders(), options.headers || {}),
+  }));
+}
+
+let _devices = [];
+
+function showToast(msg) {
+  const t = document.getElementById('toast');
+  t.textContent = msg;
+  t.classList.add('show');
+  setTimeout(function() { t.classList.remove('show'); }, 2500);
+}
+
+async function loadDevices() {
+  const r = await apiFetch('/audio/devices');
+  if (!r.ok) return;
+  _devices = await r.json();
+  const sel = document.getElementById('audio-device');
+  const current = sel.value;
+  sel.replaceChildren(new Option('Systemstandard', ''));
+  _devices.forEach(function(d) { sel.appendChild(new Option(d.name, d.name)); });
+  if (current) sel.value = current;
+  ['combined-mic', 'combined-monitor'].forEach(function(id) {
+    const el = document.getElementById(id);
+    el.replaceChildren();
+    _devices.forEach(function(d) { el.appendChild(new Option(d.name, d.name)); });
+  });
+}
+
+async function loadOllamaModels(baseUrl, current) {
+  try {
+    const parsed = new URL(baseUrl);
+    if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return;
+    const r = await fetch(parsed.origin + '/api/tags');
+    if (!r.ok) return;
+    const data = await r.json();
+    const sel = document.getElementById('ollama-model');
+    sel.replaceChildren();
+    (data.models || []).forEach(function(m) { sel.appendChild(new Option(m.name, m.name)); });
+    if (current) sel.value = current;
+  } catch(e) {}
+}
+
+async function loadConfig() {
+  const r = await apiFetch('/config');
+  if (!r.ok) return;
+  const cfg = await r.json();
+  document.getElementById('audio-device').value = (cfg.audio && cfg.audio.device) || '';
+  document.getElementById('whisper-backend').value = (cfg.whisper && cfg.whisper.backend) || 'openai';
+  document.getElementById('whisper-url').value = (cfg.whisper && cfg.whisper.base_url) || '';
+  document.getElementById('whisper-model').value = (cfg.whisper && cfg.whisper.model) || 'large-v3';
+  const ollamaUrl = (cfg.ollama && cfg.ollama.base_url) || 'http://localhost:11434';
+  document.getElementById('ollama-url').value = ollamaUrl;
+  await loadOllamaModels(ollamaUrl, cfg.ollama && cfg.ollama.model);
+  const diarCfg = cfg.diarization || {};
+  document.getElementById('diar-enabled').checked = !!diarCfg.enabled;
+  document.getElementById('diar-hf-token').value = diarCfg.hf_token || '';
+  document.getElementById('obsidian-vault').value = (cfg.obsidian && cfg.obsidian.vault) || '';
+}
+
+document.getElementById('refresh-devices-btn').addEventListener('click', loadDevices);
+
+document.getElementById('create-combined-btn').addEventListener('click', function() {
+  document.getElementById('combined-form').classList.toggle('visible');
+});
+document.getElementById('combined-cancel-btn').addEventListener('click', function() {
+  document.getElementById('combined-form').classList.remove('visible');
+});
+document.getElementById('combined-confirm-btn').addEventListener('click', async function() {
+  const mic = document.getElementById('combined-mic').value;
+  const monitor = document.getElementById('combined-monitor').value;
+  const r = await apiFetch('/audio/combined', {
+    method: 'POST',
+    body: JSON.stringify({ mic: mic, monitor: monitor }),
+  });
+  if (!r.ok) { showToast('Fehler beim Erstellen'); return; }
+  const data = await r.json();
+  showToast('Erstellt: ' + data.device);
+  document.getElementById('combined-form').classList.remove('visible');
+  await loadDevices();
+  document.getElementById('audio-device').value = data.device;
+});
+
+document.getElementById('ollama-url').addEventListener('change', function(e) {
+  loadOllamaModels(e.target.value, document.getElementById('ollama-model').value);
+});
+
+document.getElementById('save-btn').addEventListener('click', async function() {
+  const body = {
+    audio: { device: document.getElementById('audio-device').value },
+    whisper: {
+      base_url: document.getElementById('whisper-url').value,
+      model: document.getElementById('whisper-model').value,
+      backend: document.getElementById('whisper-backend').value,
+    },
+    ollama: {
+      base_url: document.getElementById('ollama-url').value,
+      model: document.getElementById('ollama-model').value,
+    },
+    obsidian: {
+      vault: document.getElementById('obsidian-vault').value.trim(),
+    },
+    diarization: {
+      enabled: document.getElementById('diar-enabled').checked,
+      hf_token: document.getElementById('diar-hf-token').value.trim(),
+    },
+  };
+  const r = await apiFetch('/config', { method: 'PUT', body: JSON.stringify(body) });
+  if (r.ok) { showToast('Gespeichert'); } else { showToast('Fehler beim Speichern'); }
+});
+
+(async function() {
+  if (!token) { location.href = '/login'; return; }
+  await loadDevices();
+  await loadConfig();
+})();
@@ -0,0 +1,147 @@
+<!DOCTYPE html>
+<html lang="de">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>tüit Transkriptor — Ersteinrichtung</title>
+  <link rel="preconnect" href="https://fonts.googleapis.com">
+  <link href="https://fonts.googleapis.com/css2?family=Overpass:wght@300;400;600;700&display=swap" rel="stylesheet">
+  <style>
+    :root {
+      --red: #DA251C;
+      --yellow: #FFD802;
+      --bg: #111;
+      --surface: #1a1a1a;
+      --text: #e8e8e8;
+      --muted: #888;
+      --border: #2e2e2e;
+    }
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+    body {
+      font-family: 'Overpass', system-ui, sans-serif;
+      background: var(--bg);
+      color: var(--text);
+      min-height: 100vh;
+      display: flex;
+      align-items: center;
+      justify-content: center;
+    }
+    .card {
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 12px;
+      padding: 40px;
+      width: 100%;
+      max-width: 420px;
+      display: flex;
+      flex-direction: column;
+      gap: 24px;
+    }
+    .logo { display: flex; align-items: center; gap: 10px; }
+    .logo-dot { width: 12px; height: 12px; background: var(--red); border-radius: 50%; flex-shrink: 0; }
+    .logo h1 { font-size: 1.1rem; font-weight: 600; }
+    .logo h1 span { color: var(--red); }
+    .subtitle { font-size: 0.85rem; color: var(--muted); }
+    .field { display: flex; flex-direction: column; gap: 6px; }
+    label { font-size: 0.78rem; color: var(--muted); text-transform: uppercase; letter-spacing: 0.06em; }
+    input {
+      background: #111;
+      border: 1px solid var(--border);
+      color: var(--text);
+      border-radius: 6px;
+      padding: 10px 12px;
+      font-family: inherit;
+      font-size: 0.9rem;
+      outline: none;
+      transition: border-color 0.15s;
+    }
+    input:focus { border-color: var(--yellow); }
+    button {
+      background: var(--red);
+      color: #fff;
+      border: none;
+      border-radius: 6px;
+      padding: 12px;
+      font-family: inherit;
+      font-size: 0.9rem;
+      font-weight: 600;
+      cursor: pointer;
+      transition: opacity 0.15s;
+    }
+    button:hover { opacity: 0.88; }
+    .error {
+      background: rgba(218,37,28,0.12);
+      border: 1px solid rgba(218,37,28,0.3);
+      border-radius: 6px;
+      padding: 10px 12px;
+      font-size: 0.85rem;
+      color: #f87171;
+      display: none;
+    }
+    .error.visible { display: block; }
+  </style>
+</head>
+<body>
+  <div class="card">
+    <div class="logo">
+      <div class="logo-dot"></div>
+      <h1>tüit <span>Transkriptor</span></h1>
+    </div>
+    <p class="subtitle">Ersteinrichtung — lege den Administrator-Account an.</p>
+
+    <div class="error" id="error"></div>
+
+    <div class="field">
+      <label for="username">Benutzername</label>
+      <input type="text" id="username" autocomplete="username" autofocus>
+    </div>
+    <div class="field">
+      <label for="password">Passwort</label>
+      <input type="password" id="password" autocomplete="new-password">
+    </div>
+    <div class="field">
+      <label for="confirm">Passwort bestätigen</label>
+      <input type="password" id="confirm" autocomplete="new-password">
+    </div>
+    <div class="field">
+      <label for="output_dir">Transkripte speichern unter</label>
+      <input type="text" id="output_dir" placeholder="~/cloud.shron.de/Hetzner Storagebox/work">
+    </div>
+
+    <button id="submit-btn">Einrichtung abschließen</button>
+  </div>
+
+  <script>
+    document.getElementById('submit-btn').addEventListener('click', async () => {
+      const username = document.getElementById('username').value.trim();
+      const password = document.getElementById('password').value;
+      const confirm = document.getElementById('confirm').value;
+      const output_dir = document.getElementById('output_dir').value.trim();
+      const errorEl = document.getElementById('error');
+
+      errorEl.classList.remove('visible');
+
+      if (!username) return showError('Benutzername darf nicht leer sein.');
+      if (password.length < 6) return showError('Passwort muss mindestens 6 Zeichen lang sein.');
+      if (password !== confirm) return showError('Passwörter stimmen nicht überein.');
+
+      const r = await fetch('/setup', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ username, password, output_dir: output_dir || null }),
+      });
+      const data = await r.json();
+      if (data.ok) {
+        window.location.href = '/';
+      } else {
+        showError(data.error || 'Fehler bei der Einrichtung.');
+      }
+
+      function showError(msg) {
+        errorEl.textContent = msg;
+        errorEl.classList.add('visible');
+      }
+    });
+  </script>
+</body>
+</html>
@@ -21,8 +21,11 @@ else
  echo "  Für GPU: sudo pacman -S rocm-hip-sdk"
 fi

-echo "Python-Abhängigkeiten werden installiert..."
-pip install --user -r "$SCRIPT_DIR/requirements.txt"
+VENV_DIR="$SCRIPT_DIR/.venv"
+echo "Python-Abhängigkeiten werden installiert (venv: $VENV_DIR)..."
+python3 -m venv "$VENV_DIR"
+"$VENV_DIR/bin/pip" install -q -r "$SCRIPT_DIR/requirements.txt"
+PYTHON="$VENV_DIR/bin/python"

 # ── Netzwerk-Modus abfragen ────────────────────────────────────────────────────

@@ -57,7 +60,7 @@ Description=tüit Transkriptor
 After=graphical-session.target

 [Service]
-ExecStart=$(command -v python3) ${SCRIPT_DIR}/main.py
+ExecStart=${SCRIPT_DIR}/.venv/bin/python ${SCRIPT_DIR}/main.py
 Restart=on-failure
 RestartSec=5
 Environment=DISPLAY=:0
@@ -81,7 +84,7 @@ echo "  Systemeinstellungen → Kurzbefehle → Eigene Kurzbefehle"
 echo "  Befehl: pkill -USR1 -f main.py"
 echo ""
 if [[ "$NET_MODE" == "2" ]]; then
-  echo "Netzwerk-Zugriff: http://$(hostname -I | awk '{print $1}'):8765"
+  echo "Netzwerk-Zugriff: http://$(ip route get 1 2>/dev/null | awk '{print $7; exit}'):8765"
  echo "Tipp: Seite als Lesezeichen auf Handy/PC speichern."
  echo ""
 fi
@@ -1,21 +1,67 @@
 import httpx

+IDENTIFY_SPEAKERS_PROMPT = """Du bekommst den Anfang eines Gesprächstranskripts mit Sprecher-Labels (SPEAKER_00, SPEAKER_01, ...).
+Ermittle, welche echten Namen den Sprechern zugeordnet werden können — z.B. durch direkte Anrede ("Herr Möller", "Frank").
+Antworte NUR mit einem JSON-Objekt: {"SPEAKER_00": "Name oder null", "SPEAKER_01": "Name oder null"}
+Kein weiterer Text, keine Erklärung."""
+
+TITLE_TLDR_PROMPT = """Du bekommst einen aufbereiteten Transkript-Text.
+Gib NUR ein JSON-Objekt zurück mit zwei Feldern:
+- "title": ein prägnanter, aussagekräftiger Titel (max. 8 Wörter, kein Datum, kein "Diktat")
+- "tldr": 2-3 Sätze, die den Inhalt des Transkripts konkret zusammenfassen
+
+Kein weiterer Text, kein Kommentar, kein Markdown-Block."""
+
+SUMMARIZE_PROMPT = """Du bist ein präziser Assistent für Business-Kommunikation.
+Du bekommst ein Gesprächstranskript mit Sprecher-Labels.
+Erstelle eine strukturierte Zusammenfassung auf Deutsch mit:
+1. Einem passenden H1-Titel
+2. ## Wichtigste Punkte (Aufzählung)
+3. ## Offene Fragen (Aufzählung, falls vorhanden)
+4. ## Nächste Schritte / Ideen (Aufzählung, falls vorhanden)
+Antworte NUR mit dem fertigen Markdown."""
+
 SYSTEM_PROMPT = """Du bist ein präziser Schreibassistent.
 Du bekommst einen rohen Sprachtranskript und optionale Instruktionen des Nutzers.
 Deine Aufgabe:
 1. Bereinige den Text (Füllwörter, Wiederholungen, Tippfehler)
-2. Strukturiere ihn mit Markdown-Überschriften wenn sinnvoll
-3. Erzeuge einen passenden deutschen Titel als H1
-4. Beachte Instruktionen des Nutzers wenn vorhanden
-5. Antworte NUR mit dem fertigen Markdown — kein Kommentar, keine Erklärung
+2. Gliedere den Text in sinnvolle Absätze — trenne Gedanken durch Leerzeilen
+3. Verwende Markdown-Überschriften (##) wenn der Text mehrere Themen hat
+4. Verwende Aufzählungslisten (- ) für Aufzählungen oder Handlungsschritte
+5. Erzeuge einen passenden deutschen Titel als H1
+6. Beachte Instruktionen des Nutzers wenn vorhanden
+7. Antworte NUR mit dem fertigen Markdown — kein Kommentar, keine Erklärung

 Format:
 # Titel

-Inhalt...
+Erster Absatz...
+
+Zweiter Absatz...
+
+## Abschnitt (nur wenn sinnvoll)
+
+- Punkt 1
+- Punkt 2
 """


+PUNCTUATE_PROMPT = """Du bekommst einen rohen deutschen Sprachtranskript ohne Großschreibung und Satzzeichen.
+Füge AUSSCHLIESSLICH Satzzeichen (Punkt, Komma, Fragezeichen, Ausrufezeichen) und Großschreibung am Satzanfang hinzu.
+Verändere KEINE Wörter, kürze NICHTS, füge NICHTS hinzu.
+Wenn Sprecher-Labels vorhanden sind (z.B. **Thomas:**), behalte sie exakt so bei.
+Antworte NUR mit dem korrigierten Text, ohne Kommentar."""
+
+
+def _strip_code_fences(text: str) -> str:
+    """Remove markdown code fences (```json ... ```) from LLM responses."""
+    import re
+    m = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text)
+    if m:
+        return m.group(1)
+    return text
+
+
 class OllamaClient:
    def __init__(self, base_url: str = "http://localhost:11434"):
        self.base_url = base_url
@@ -42,3 +88,90 @@ class OllamaClient:
            )
            r.raise_for_status()
            return r.json()["response"]
+
+    async def generate_title_and_tldr(
+        self,
+        text: str,
+        model: str = "gemma3:12b",
+    ) -> tuple[str, str]:
+        """Return (title, tldr) for the given text. Falls back to defaults on error."""
+        import json
+        async with httpx.AsyncClient(timeout=60) as client:
+            r = await client.post(
+                f"{self.base_url}/api/generate",
+                json={
+                    "model": model,
+                    "prompt": f"Text:\n{text[:3000]}",
+                    "system": TITLE_TLDR_PROMPT,
+                    "stream": False,
+                },
+            )
+            r.raise_for_status()
+            raw = _strip_code_fences(r.json()["response"].strip())
+        try:
+            data = json.loads(raw)
+            title = str(data.get("title", "")).strip() or "Diktat"
+            tldr = str(data.get("tldr", "")).strip() or "Kein TL;DR verfügbar."
+            return title, tldr
+        except Exception:
+            return "Diktat", "Kein TL;DR verfügbar."
+
+    async def punctuate(
+        self,
+        text: str,
+        model: str = "gemma3:12b",
+    ) -> str:
+        """Add punctuation and capitalisation to raw whisper output without changing words."""
+        async with httpx.AsyncClient(timeout=120) as client:
+            r = await client.post(
+                f"{self.base_url}/api/generate",
+                json={"model": model, "prompt": text, "system": PUNCTUATE_PROMPT, "stream": False},
+            )
+            r.raise_for_status()
+            result = r.json()["response"].strip()
+        return result if result else text
+
+    async def identify_speakers(
+        self,
+        transcript_excerpt: str,
+        model: str = "gemma3:12b",
+    ) -> dict[str, str]:
+        """Try to map SPEAKER_XX labels to real names. Returns {} on failure."""
+        import json
+        async with httpx.AsyncClient(timeout=60) as client:
+            r = await client.post(
+                f"{self.base_url}/api/generate",
+                json={
+                    "model": model,
+                    "prompt": f"Transkript-Anfang:\n{transcript_excerpt[:2000]}",
+                    "system": IDENTIFY_SPEAKERS_PROMPT,
+                    "stream": False,
+                },
+            )
+            r.raise_for_status()
+            raw = r.json()["response"].strip()
+        try:
+            data = json.loads(raw)
+            if not isinstance(data, dict):
+                return {}
+            return {k: v for k, v in data.items() if v and str(v).lower() != "null"}
+        except Exception:
+            return {}
+
+    async def summarize(
+        self,
+        annotated_transcript: str,
+        model: str = "gemma3:12b",
+    ) -> str:
+        async with httpx.AsyncClient(timeout=180) as client:
+            r = await client.post(
+                f"{self.base_url}/api/generate",
+                json={
+                    "model": model,
+                    "prompt": f"Transkript:\n{annotated_transcript}",
+                    "system": SUMMARIZE_PROMPT,
+                    "stream": False,
+                },
+            )
+            r.raise_for_status()
+            return r.json()["response"].strip()
@@ -39,6 +39,55 @@ async def appjs():
    return FileResponse(str(FRONTEND_DIR / "app.js"))


+@app.get("/logo.svg")
+async def logo():
+    return FileResponse(str(FRONTEND_DIR / "logo.svg"), media_type="image/svg+xml")
+
+
+@app.get("/settings.js")
+async def settingsjs():
+    return FileResponse(str(FRONTEND_DIR / "settings.js"))
+
+
+# ── PipeWire combined source restore ──────────────────────────────────────────
+
+def _restore_pipewire_combined():
+    """Recreate transkriptor-combined.monitor on startup if it was previously configured."""
+    import json, subprocess, logging
+    state_path = Path(os.path.expanduser("~/.config/tueit-transcriber/pipewire-modules.json"))
+    if not state_path.exists():
+        return
+    try:
+        data = json.loads(state_path.read_text())
+        mic = data.get("mic")
+        monitor = data.get("monitor")
+        if not mic or not monitor:
+            return
+        sources = subprocess.check_output(
+            ["pactl", "list", "sources", "short"], stderr=subprocess.DEVNULL, timeout=5
+        ).decode()
+        if "transkriptor-combined.monitor" in sources:
+            return  # already loaded
+        sink_id = subprocess.check_output([
+            "pactl", "load-module", "module-null-sink",
+            "sink_name=transkriptor-combined",
+            "sink_properties=device.description=transkriptor-combined",
+        ], timeout=5).decode().strip()
+        mic_id = subprocess.check_output([
+            "pactl", "load-module", "module-loopback",
+            f"source={mic}", "sink=transkriptor-combined",
+        ], timeout=5).decode().strip()
+        mon_id = subprocess.check_output([
+            "pactl", "load-module", "module-loopback",
+            f"source={monitor}", "sink=transkriptor-combined",
+        ], timeout=5).decode().strip()
+        ids = [int(sink_id), int(mic_id), int(mon_id)]
+        state_path.write_text(json.dumps({"ids": ids, "mic": mic, "monitor": monitor}))
+        logging.getLogger(__name__).info("Restored PipeWire combined source (ids: %s)", ids)
+    except Exception as e:
+        logging.getLogger(__name__).warning("Could not restore PipeWire combined source: %s", e)
+
+
 # ── PID file ───────────────────────────────────────────────────────────────────

 def write_pid(pid_path: str):
@@ -129,19 +178,16 @@ def run_server(config: uvicorn.Config):
 # ── Entrypoint ─────────────────────────────────────────────────────────────────

 if __name__ == "__main__":
-    from auth import setup_wizard, has_users
-    if not has_users():
-        setup_wizard()
-
    cfg = load_config()
    port = cfg["server"]["port"]
    host = cfg.get("network", {}).get("host", "127.0.0.1")
    pid_path = cfg.get("pid_file", os.path.expanduser("~/.local/run/tueit-transcriber.pid"))

    write_pid(pid_path)
+    _restore_pipewire_combined()
    signal.signal(signal.SIGUSR1, _sigusr1_handler)

-    uvicorn_cfg = uvicorn.Config(app, host=host, port=port, log_level="warning")
+    uvicorn_cfg = uvicorn.Config(app, host=host, port=port, log_level="debug")
    server_thread = threading.Thread(target=run_server, args=(uvicorn_cfg,), daemon=True)
    server_thread.start()

@@ -152,7 +198,9 @@ if __name__ == "__main__":
            break
        time.sleep(0.1)

-    webbrowser.open(f"http://localhost:{port}")
+    from auth import has_users
+    start_path = "/setup" if not has_users() else "/"
+    webbrowser.open(f"http://localhost:{port}{start_path}")

    try:
        run_tray(port)
@@ -35,6 +35,22 @@ def save_transcript(
    return path


+def read_transcript(output_dir: str, filename: str) -> str | None:
+    """Return file content for a .md file inside output_dir (flat or one level deep)."""
+    if not filename.endswith(".md"):
+        return None
+    parts = filename.split("/")
+    if len(parts) > 2 or any(p in (".", "..") or not p for p in parts):
+        return None
+    path = os.path.join(output_dir, filename)
+    if not os.path.abspath(path).startswith(os.path.abspath(output_dir) + os.sep):
+        return None
+    if not os.path.exists(path):
+        return None
+    with open(path, encoding="utf-8") as f:
+        return f.read()
+
+
 def list_transcripts(output_dir: str, limit: int = 20) -> list[dict]:
    if not os.path.exists(output_dir):
        return []
@@ -46,5 +62,166 @@ def list_transcripts(output_dir: str, limit: int = 20) -> list[dict]:
    for f in files:
        full = os.path.join(output_dir, f)
        stat = os.stat(full)
-        result.append({"filename": f, "path": full, "size": stat.st_size, "mtime": stat.st_mtime})
+        item: dict = {"filename": f, "path": full, "size": stat.st_size, "mtime": stat.st_mtime}
+        if f.endswith("-index.md"):
+            base = f[: -len("-index.md")]
+            related: dict[str, str] = {}
+            for suffix, key in [("-transkript.md", "transkript"), ("-zusammenfassung.md", "zusammenfassung")]:
+                rel_filename = f"{base}/{base}{suffix}"
+                if os.path.exists(os.path.join(output_dir, rel_filename)):
+                    related[key] = rel_filename
+            if related:
+                item["related"] = related
+        result.append(item)
    return result
+
+
+def write_solo_docs(
+    raw_text: str,
+    refined: str,
+    output_dir: str,
+    dt: "datetime | None" = None,
+    title: str = "",
+    tldr: str = "",
+) -> dict[str, str]:
+    """Write index (in output_dir), transkript + zusammenfassung (in subdir)."""
+    if dt is None:
+        dt = datetime.now()
+    os.makedirs(output_dir, exist_ok=True)
+
+    if not title:
+        title = "Diktat"
+        for line in refined.splitlines():
+            if line.startswith("# "):
+                title = line[2:].strip()
+                break
+    if not tldr:
+        tldr = _extract_tldr(refined)
+
+    base = dt.strftime("%Y-%m-%d-%H%M") + "-" + slugify(title)[:50]
+    date_str = dt.strftime("%d.%m.%Y %H:%M")
+    frontmatter = f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript]\n---\n\n"
+
+    index_filename = f"{base}-index.md"
+    subdir = os.path.join(output_dir, base)
+    os.makedirs(subdir, exist_ok=True)
+
+    # --- transkript (raw whisper output, in subdir) ---
+    transkript_filename = f"{base}-transkript.md"
+    transkript_path = os.path.join(subdir, transkript_filename)
+    with open(transkript_path, "w", encoding="utf-8") as f:
+        f.write(frontmatter)
+        f.write(f"# {title} — Rohtranskript\n\n")
+        f.write(f"← [Index](../{index_filename})\n\n")
+        f.write(raw_text)
+        if not raw_text.endswith("\n"):
+            f.write("\n")
+
+    # --- zusammenfassung (Ollama-polished, in subdir) ---
+    zusammenfassung_filename = f"{base}-zusammenfassung.md"
+    zusammenfassung_path = os.path.join(subdir, zusammenfassung_filename)
+    with open(zusammenfassung_path, "w", encoding="utf-8") as f:
+        f.write(frontmatter)
+        f.write(f"← [Index](../{index_filename})\n\n")
+        f.write(refined)
+        if not refined.endswith("\n"):
+            f.write("\n")
+
+    # --- index (in output_dir root) ---
+    index_content = (
+        f"# {title}\n\n"
+        f"**Datum:** {date_str}\n\n"
+        f"> {tldr}\n\n"
+        f"- [Transkript]({base}/{transkript_filename})\n"
+        f"- [Zusammenfassung]({base}/{zusammenfassung_filename})\n"
+    )
+    index_path = os.path.join(output_dir, index_filename)
+    with open(index_path, "w", encoding="utf-8") as f:
+        f.write(f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript, index]\n---\n\n")
+        f.write(index_content)
+
+    return {"index": index_path, "transkript": transkript_path, "zusammenfassung": zusammenfassung_path}
+
+
+def write_meeting_docs(
+    aligned_segments: list[tuple[str, str]],
+    summary: str,
+    speakers: list[str],
+    duration_min: int,
+    output_dir: str,
+    dt: "datetime | None" = None,
+    title: str = "",
+    tldr: str = "",
+    transcript_text: str = "",
+) -> dict[str, str]:
+    """Write index (in output_dir), transkript + zusammenfassung (in subdir)."""
+    if dt is None:
+        dt = datetime.now()
+    os.makedirs(output_dir, exist_ok=True)
+
+    if not title:
+        title = f"Meeting {dt.strftime('%d.%m.%Y %H:%M')}"
+    if not tldr:
+        tldr = _extract_tldr(summary)
+
+    base = dt.strftime("%Y-%m-%d-%H%M") + "-" + slugify(title)[:50]
+    date_str = dt.strftime("%d.%m.%Y %H:%M")
+    frontmatter_base = f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript, meeting]\n---\n\n"
+
+    index_filename = f"{base}-index.md"
+    subdir = os.path.join(output_dir, base)
+    os.makedirs(subdir, exist_ok=True)
+
+    # --- transkript (in subdir) ---
+    if transcript_text:
+        transcript_content = transcript_text
+    else:
+        transcript_lines = []
+        for speaker, text in aligned_segments:
+            transcript_lines.append(f"**{speaker}:** {text}\n")
+        transcript_content = "\n".join(transcript_lines)
+    transkript_filename = f"{base}-transkript.md"
+    transkript_path = os.path.join(subdir, transkript_filename)
+    with open(transkript_path, "w", encoding="utf-8") as f:
+        f.write(frontmatter_base)
+        f.write(f"← [Index](../{index_filename})\n\n")
+        f.write(transcript_content)
+        if not transcript_content.endswith("\n"):
+            f.write("\n")
+
+    # --- zusammenfassung (in subdir) ---
+    zusammenfassung_filename = f"{base}-zusammenfassung.md"
+    zusammenfassung_path = os.path.join(subdir, zusammenfassung_filename)
+    with open(zusammenfassung_path, "w", encoding="utf-8") as f:
+        f.write(frontmatter_base)
+        f.write(f"← [Index](../{index_filename})\n\n")
+        f.write(summary)
+        if not summary.endswith("\n"):
+            f.write("\n")
+
+    # --- index (in output_dir root) ---
+    speaker_str = ", ".join(speakers) if speakers else "Unbekannt"
+    index_content = (
+        f"# {title}\n\n"
+        f"**Datum:** {date_str}  \n"
+        f"**Sprecher:** {speaker_str}  \n"
+        f"**Dauer:** {duration_min} min\n\n"
+        f"> {tldr}\n\n"
+        f"- [Transkript]({base}/{transkript_filename})\n"
+        f"- [Zusammenfassung]({base}/{zusammenfassung_filename})\n"
+    )
+    index_path = os.path.join(output_dir, index_filename)
+    with open(index_path, "w", encoding="utf-8") as f:
+        f.write(f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript, meeting, index]\n---\n\n")
+        f.write(index_content)
+
+    return {"index": index_path, "transkript": transkript_path, "zusammenfassung": zusammenfassung_path}
+
+
+def _extract_tldr(summary: str) -> str:
+    """Return the first non-heading, non-empty line from the summary as TL;DR."""
+    for line in summary.splitlines():
+        stripped = line.strip()
+        if stripped and not stripped.startswith("#"):
+            return stripped[:200]
+    return "Kein TL;DR verfügbar."
@@ -9,3 +9,4 @@ numpy>=1.26
 tomli_w>=1.0
 pytest>=8.0
 pytest-asyncio>=0.23
+pyannote.audio>=3.3
@@ -0,0 +1,33 @@
+def test_align_assigns_speaker_by_overlap():
+    from alignment import align_segments
+    whisper = [
+        {"start": 0.0, "end": 2.0, "text": "Hallo"},
+        {"start": 2.1, "end": 4.0, "text": "Wie geht es"},
+    ]
+    speakers = [
+        (0.0, 2.5, "SPEAKER_00"),
+        (2.5, 5.0, "SPEAKER_01"),
+    ]
+    result = align_segments(whisper, speakers)
+    assert result[0] == ("SPEAKER_00", "Hallo")
+    assert result[1] == ("SPEAKER_01", "Wie geht es")
+
+
+def test_align_merges_consecutive_same_speaker():
+    from alignment import align_segments
+    whisper = [
+        {"start": 0.0, "end": 1.0, "text": "Hallo"},
+        {"start": 1.1, "end": 2.0, "text": "Welt"},
+    ]
+    speakers = [(0.0, 3.0, "SPEAKER_00")]
+    result = align_segments(whisper, speakers)
+    assert len(result) == 1
+    assert result[0] == ("SPEAKER_00", "Hallo Welt")
+
+
+def test_align_fallback_when_no_speaker_overlap():
+    from alignment import align_segments
+    whisper = [{"start": 0.0, "end": 1.0, "text": "Hallo"}]
+    speakers = []
+    result = align_segments(whisper, speakers)
+    assert result[0][0] == "SPEAKER_00"
@@ -45,6 +45,45 @@ def test_status_requires_auth():
    assert r.status_code == 401


+def make_app_for_dir(output_dir: str):
+    from fastapi import FastAPI
+    from api.router import router, current_user
+    app = FastAPI()
+    app.dependency_overrides[current_user] = lambda: {"username": "", "output_dir": output_dir, "is_admin": False}
+    app.include_router(router)
+    return app
+
+
+def test_get_transcript_returns_content(tmp_path):
+    f = tmp_path / "2026-01-01-0900-test.md"
+    f.write_text("# Hello\n\ncontent here\n")
+    client = TestClient(make_app_for_dir(str(tmp_path)))
+    r = client.get("/transcripts/2026-01-01-0900-test.md")
+    assert r.status_code == 200
+    assert "Hello" in r.text
+
+
+def test_get_transcript_rejects_path_traversal(tmp_path):
+    client = TestClient(make_app_for_dir(str(tmp_path)))
+    r = client.get("/transcripts/..%2Fsecret.md")
+    assert r.status_code == 404
+
+
+def test_delete_transcript_removes_file(tmp_path):
+    f = tmp_path / "2026-01-01-0900-test.md"
+    f.write_text("content")
+    client = TestClient(make_app_for_dir(str(tmp_path)))
+    r = client.delete("/transcripts/2026-01-01-0900-test.md")
+    assert r.status_code == 200
+    assert not f.exists()
+
+
+def test_delete_transcript_rejects_path_traversal(tmp_path):
+    client = TestClient(make_app_for_dir(str(tmp_path)))
+    r = client.delete("/transcripts/..%2Fsecret.md")
+    assert r.status_code == 404
+
+
 def test_login_rejects_wrong_credentials():
    import tempfile, os
    from unittest.mock import patch
@@ -58,3 +97,120 @@ def test_login_rejects_wrong_credentials():
        with patch("auth.USERS_PATH", users_path):
            r = client.post("/login", json={"username": "nobody", "password": "wrong"})
            assert r.status_code == 401
+
+
+def test_audio_devices_returns_list(monkeypatch):
+    import sounddevice as sd
+    from main import app
+    from api.router import current_user
+    fake_devices = [
+        {"name": "Fake Mic", "max_input_channels": 1, "max_output_channels": 0},
+        {"name": "Fake Monitor", "max_input_channels": 2, "max_output_channels": 0},
+        {"name": "Fake Speaker", "max_input_channels": 0, "max_output_channels": 2},
+    ]
+    monkeypatch.setattr(sd, "query_devices", lambda: fake_devices)
+    app.dependency_overrides[current_user] = lambda: {"username": "u", "output_dir": "/tmp", "is_admin": True}
+    try:
+        client = TestClient(app)
+        r = client.get("/audio/devices", headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 200
+        devices = r.json()
+        assert len(devices) == 2  # only input devices
+        assert devices[0]["name"] == "Fake Mic"
+    finally:
+        app.dependency_overrides.pop(current_user, None)
+
+
+def test_audio_devices_forbidden_for_non_admin():
+    from main import app
+    from api.router import current_user
+    app.dependency_overrides[current_user] = lambda: {"username": "u", "output_dir": "/tmp", "is_admin": False}
+    try:
+        client = TestClient(app)
+        r = client.get("/audio/devices", headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 403
+    finally:
+        app.dependency_overrides.pop(current_user, None)
+
+
+def test_audio_combined_forbidden_for_non_admin():
+    from main import app
+    from api.router import current_user
+    app.dependency_overrides[current_user] = lambda: {"username": "u", "output_dir": "/tmp", "is_admin": False}
+    try:
+        from fastapi.testclient import TestClient
+        client = TestClient(app)
+        r = client.post("/audio/combined", json={"mic": "x", "monitor": "y"},
+                        headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 403
+    finally:
+        app.dependency_overrides.pop(current_user, None)
+
+
+def test_status_includes_is_admin():
+    from main import app
+    from api.router import current_user
+    app.dependency_overrides[current_user] = lambda: {"username": "u", "output_dir": "/tmp", "is_admin": True}
+    try:
+        from fastapi.testclient import TestClient
+        client = TestClient(app)
+        r = client.get("/status", headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 200
+        assert r.json()["is_admin"] is True
+    finally:
+        app.dependency_overrides.pop(current_user, None)
+
+
+def test_state_has_speaker_fields():
+    from api.state import AppState
+    s = AppState()
+    assert hasattr(s, "_speakers_event")
+    assert hasattr(s, "_pending_aligned_segments")
+    assert hasattr(s, "_speaker_names")
+    assert s._speakers_event is None
+    assert s._pending_aligned_segments is None
+    assert s._speaker_names is None
+
+
+def test_post_speakers_resolves_pipeline_pause():
+    import asyncio
+    from main import app
+    from api.router import current_user
+    from api.state import state
+
+    state._speakers_event = asyncio.Event()
+    state._speaker_names = None
+
+    app.dependency_overrides[current_user] = lambda: {"username": "u", "output_dir": "/tmp", "is_admin": False}
+    try:
+        from fastapi.testclient import TestClient
+        client = TestClient(app)
+        r = client.post("/speakers", json={"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"})
+        assert r.status_code == 200
+        assert state._speaker_names == {"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"}
+        assert state._speakers_event.is_set()
+    finally:
+        app.dependency_overrides.pop(current_user, None)
+        state._speakers_event = None
+        state._speaker_names = None
+
+
+def test_put_config_deep_merges(tmp_path, monkeypatch):
+    import config as cfg_mod
+    monkeypatch.setattr(cfg_mod, "CONFIG_PATH", str(tmp_path / "config.toml"))
+    from main import app
+    from api.router import current_user
+    app.dependency_overrides[current_user] = lambda: {"username": "u", "output_dir": "/tmp", "is_admin": True}
+    try:
+        from fastapi.testclient import TestClient
+        client = TestClient(app)
+        r = client.put("/config",
+                        json={"whisper": {"base_url": "http://beastix:8000"}},
+                        headers={"Authorization": "Bearer fake"})
+        assert r.status_code == 200
+        data = r.json()
+        # base_url updated, model preserved
+        assert data["whisper"]["base_url"] == "http://beastix:8000"
+        assert data["whisper"]["model"] == "large-v3"
+    finally:
+        app.dependency_overrides.pop(current_user, None)
@@ -27,3 +27,14 @@ def test_recorder_save_wav(tmp_path):
    with wave.open(out) as wf:
        assert wf.getframerate() == 16000
        assert wf.getnchannels() == 1
+
+
+def test_recorder_stores_device_param():
+    from audio import AudioRecorder
+    rec = AudioRecorder(device="my-pipewire-source")
+    assert rec.device == "my-pipewire-source"
+
+def test_recorder_device_none_when_empty_string():
+    from audio import AudioRecorder
+    rec = AudioRecorder(device="")
+    assert rec.device is None
@@ -6,9 +6,8 @@ from unittest.mock import patch
 def test_config_loads_defaults():
    with tempfile.TemporaryDirectory() as tmpdir:
        cfg_path = os.path.join(tmpdir, "config.toml")
+        import config
        with patch("config.CONFIG_PATH", cfg_path):
-            import importlib, config
-            importlib.reload(config)
            cfg = config.load()
            assert cfg["ollama"]["model"] == "gemma3:12b"
            assert cfg["whisper"]["model"] == "large-v3"
@@ -23,3 +22,29 @@ def test_config_creates_file_on_first_run():
        with patch("config.CONFIG_PATH", cfg_path):
            config.load()
            assert os.path.exists(cfg_path)
+
+
+def test_config_has_audio_and_whisper_base_url():
+    import config
+    from unittest.mock import patch
+    import tempfile, os
+    with tempfile.TemporaryDirectory() as tmpdir:
+        cfg_path = os.path.join(tmpdir, "config.toml")
+        with patch("config.CONFIG_PATH", cfg_path):
+            cfg = config.load()
+            assert "audio" in cfg
+            assert cfg["audio"]["device"] == ""
+            assert cfg["whisper"]["base_url"] == ""
+
+
+def test_config_has_diarization_defaults():
+    from unittest.mock import patch
+    import tempfile, os
+    with tempfile.TemporaryDirectory() as tmpdir:
+        cfg_path = os.path.join(tmpdir, "config.toml")
+        with patch("config.CONFIG_PATH", cfg_path):
+            import config
+            cfg = config.load()
+            assert "diarization" in cfg
+            assert cfg["diarization"]["enabled"] is False
+            assert cfg["diarization"]["hf_token"] == ""
@@ -0,0 +1,40 @@
+from unittest.mock import MagicMock, patch
+import pytest
+
+
+def test_diarizer_returns_list_of_tuples(tmp_path):
+    """Diarizer.diarize() returns [(start, end, speaker), ...]"""
+    wav = tmp_path / "test.wav"
+    wav.write_bytes(b"\x00" * 100)
+
+    mock_turn_1 = MagicMock()
+    mock_turn_1.start = 0.0
+    mock_turn_1.end = 2.5
+
+    mock_turn_2 = MagicMock()
+    mock_turn_2.start = 2.6
+    mock_turn_2.end = 5.0
+
+    mock_annotation = MagicMock()
+    mock_annotation.itertracks.return_value = [
+        (mock_turn_1, "A", "SPEAKER_00"),
+        (mock_turn_2, "B", "SPEAKER_01"),
+    ]
+
+    mock_output = MagicMock()
+    mock_output.speaker_diarization = mock_annotation
+    mock_pipeline = MagicMock(return_value=mock_output)
+
+    import asyncio
+    from diarization import Diarizer
+    d = Diarizer.__new__(Diarizer)
+    d._pipeline = mock_pipeline
+
+    result = asyncio.run(d.diarize(str(wav)))
+    assert result == [(0.0, 2.5, "SPEAKER_00"), (2.6, 5.0, "SPEAKER_01")]
+
+
+def test_diarizer_requires_hf_token():
+    from diarization import Diarizer
+    with pytest.raises(ValueError, match="hf_token"):
+        Diarizer(hf_token="")
@@ -35,3 +35,47 @@ async def test_list_models_returns_list():
        client = OllamaClient(base_url="http://localhost:11434")
        models = await client.list_models()
        assert "gemma3:12b" in models
+
+
+@pytest.mark.asyncio
+async def test_identify_speakers_returns_dict():
+    import respx, httpx, json
+    from llm import OllamaClient
+    client = OllamaClient()
+    mapping = {"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"}
+    transcript_excerpt = "SPEAKER_00: Gut, Herr Möller.\nSPEAKER_01: Danke, Thomas."
+
+    with respx.mock:
+        respx.post("http://localhost:11434/api/generate").mock(
+            return_value=httpx.Response(200, json={"response": json.dumps(mapping)})
+        )
+        result = await client.identify_speakers(transcript_excerpt)
+    assert result == {"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"}
+
+
+@pytest.mark.asyncio
+async def test_identify_speakers_returns_empty_on_parse_failure():
+    import respx, httpx
+    from llm import OllamaClient
+    client = OllamaClient()
+
+    with respx.mock:
+        respx.post("http://localhost:11434/api/generate").mock(
+            return_value=httpx.Response(200, json={"response": "kein json hier"})
+        )
+        result = await client.identify_speakers("irgendwas")
+    assert result == {}
+
+
+@pytest.mark.asyncio
+async def test_summarize_returns_string():
+    import respx, httpx
+    from llm import OllamaClient
+    client = OllamaClient()
+
+    with respx.mock:
+        respx.post("http://localhost:11434/api/generate").mock(
+            return_value=httpx.Response(200, json={"response": "# Zusammenfassung\n\nKurzer Text."})
+        )
+        result = await client.summarize("Thomas: Hallo.\nMöller: Hi.", model="gemma3:12b")
+    assert "Zusammenfassung" in result
@@ -58,3 +58,53 @@ def test_slugify():
    from output import slugify
    assert slugify("Mein erstes Diktat") == "mein-erstes-diktat"
    assert slugify("test  --  foo") == "test-foo"
+
+
+def test_write_solo_docs_creates_three_files(tmp_path):
+    from output import write_solo_docs
+    from datetime import datetime
+    paths = write_solo_docs(
+        raw_text="Das ist der rohe Text vom Mikrofon.",
+        refined="# Projektstatus\n\nDas Projekt läuft gut.\n",
+        output_dir=str(tmp_path),
+        dt=datetime(2026, 4, 2, 15, 0),
+        title="Projektstatus Update",
+        tldr="Das Projekt läuft gut und ist im Zeitplan.",
+    )
+    assert set(paths.keys()) == {"index", "transkript", "zusammenfassung"}
+    assert all(os.path.exists(p) for p in paths.values())
+    index = open(paths["index"]).read()
+    assert "Projektstatus Update" in index
+    assert "transkript" in index
+    assert "zusammenfassung" in index
+    # transkript and zusammenfassung are in a subdir
+    assert os.path.dirname(paths["transkript"]) != str(tmp_path)
+    assert os.path.dirname(paths["index"]) == str(tmp_path)
+    # backlinks present
+    assert "Index" in open(paths["transkript"]).read()
+    assert "Index" in open(paths["zusammenfassung"]).read()
+    assert "Das ist der rohe Text" in open(paths["transkript"]).read()
+    assert "Projekt läuft gut" in open(paths["zusammenfassung"]).read()
+
+
+def test_write_meeting_docs_creates_three_files(tmp_path):
+    from output import write_meeting_docs
+    from datetime import datetime
+    aligned = [("Thomas", "Gut, dann fangen wir an."), ("Möller", "Ich hab das vorbereitet.")]
+    paths = write_meeting_docs(
+        aligned_segments=aligned,
+        summary="# Meeting\n\n## Wichtigste Punkte\n- Budget besprochen",
+        speakers=["Thomas", "Möller"],
+        duration_min=5,
+        output_dir=str(tmp_path),
+        dt=datetime(2026, 4, 2, 14, 30),
+    )
+    assert len(paths) == 3
+    index_content = open(paths["index"]).read()
+    assert "Thomas" in index_content
+    assert "transkript" in index_content
+    transcript_content = open(paths["transkript"]).read()
+    assert "**Thomas:**" in transcript_content
+    assert "Gut, dann fangen wir an." in transcript_content
+    summary_content = open(paths["zusammenfassung"]).read()
+    assert "Budget besprochen" in summary_content
@@ -1,4 +1,5 @@
 import asyncio
+import pytest
 from unittest.mock import MagicMock


@@ -23,3 +24,75 @@ def test_transcribe_file_calls_whisper(tmp_path):
    result = asyncio.run(eng.transcribe_file(str(wav), language="de"))
    assert result == "Hallo Welt"
    mock_model.transcribe.assert_called_once_with(str(wav), language="de")
+
+
+@pytest.mark.asyncio
+async def test_transcribe_uses_remote_when_base_url_set(tmp_path):
+    import wave, struct
+    wav = tmp_path / "test.wav"
+    with wave.open(str(wav), "wb") as wf:
+        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(16000)
+        wf.writeframes(struct.pack("<100h", *([0] * 100)))
+
+    import respx, httpx
+    from transcription import TranscriptionEngine
+    eng = TranscriptionEngine()
+
+    with respx.mock:
+        respx.post("http://beastix:8000/v1/audio/transcriptions").mock(
+            return_value=httpx.Response(200, json={"text": "Hallo Welt"})
+        )
+        result = await eng.transcribe_file(
+            str(wav), language="de", model_name="large-v3",
+            device="auto", base_url="http://beastix:8000",
+        )
+    assert result == "Hallo Welt"
+
+
+def test_transcribe_file_returns_segments_when_requested(tmp_path):
+    wav = tmp_path / "test.wav"
+    wav.write_bytes(b"\x00" * 100)
+
+    mock_model = MagicMock()
+    mock_seg = MagicMock()
+    mock_seg.text = " Hallo Welt"
+    mock_seg.start = 0.0
+    mock_seg.end = 1.5
+    mock_model.transcribe.return_value = ([mock_seg], MagicMock())
+
+    from transcription import TranscriptionEngine
+    eng = TranscriptionEngine()
+    eng._model = mock_model
+
+    result = asyncio.run(eng.transcribe_file(str(wav), language="de", with_segments=True))
+    assert isinstance(result, list)
+    assert result[0]["text"] == "Hallo Welt"
+    assert result[0]["start"] == 0.0
+    assert result[0]["end"] == 1.5
+
+
+@pytest.mark.asyncio
+async def test_transcribe_remote_returns_segments_when_requested(tmp_path):
+    import wave, struct
+    wav = tmp_path / "test.wav"
+    with wave.open(str(wav), "wb") as wf:
+        wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(16000)
+        wf.writeframes(struct.pack("<100h", *([0] * 100)))
+
+    import respx, httpx
+    from transcription import TranscriptionEngine
+    eng = TranscriptionEngine()
+
+    with respx.mock:
+        respx.post("http://beastix:8000/v1/audio/transcriptions").mock(
+            return_value=httpx.Response(200, json={
+                "text": "Hallo Welt",
+                "segments": [{"start": 0.0, "end": 1.5, "text": " Hallo Welt"}],
+            })
+        )
+        result = await eng.transcribe_file(
+            str(wav), language="de", model_name="large-v3",
+            device="auto", base_url="http://beastix:8000", with_segments=True,
+        )
+    assert isinstance(result, list)
+    assert result[0]["text"] == "Hallo Welt"
@@ -1,4 +1,6 @@
 import asyncio
+import httpx
+from typing import Union


 class TranscriptionEngine:
@@ -23,14 +25,107 @@ class TranscriptionEngine:
        language: str = "de",
        model_name: str = "large-v3",
        device: str = "auto",
-    ) -> str:
-        loop = asyncio.get_event_loop()
+        base_url: str = "",
+        with_segments: bool = False,
+        backend: str = "openai",
+    ) -> Union[str, list[dict]]:
+        if base_url:
+            if backend == "whispercpp":
+                return await self._transcribe_remote_whispercpp(
+                    audio_path, language, base_url, with_segments
+                )
+            return await self._transcribe_remote(
+                audio_path, language, model_name, base_url, with_segments
+            )
+        return await self._transcribe_local(
+            audio_path, language, model_name, device, with_segments
+        )
+
+    async def _transcribe_remote(
+        self,
+        audio_path: str,
+        language: str,
+        model_name: str,
+        base_url: str,
+        with_segments: bool,
+    ) -> Union[str, list[dict]]:
+        async with httpx.AsyncClient(timeout=300) as client:
+            with open(audio_path, "rb") as f:
+                data = {"model": model_name, "language": language}
+                if with_segments:
+                    data["timestamp_granularities[]"] = "segment"
+                    data["response_format"] = "verbose_json"
+                r = await client.post(
+                    f"{base_url}/v1/audio/transcriptions",
+                    files={"file": ("audio.wav", f, "audio/wav")},
+                    data=data,
+                )
+            r.raise_for_status()
+            body = r.json()
+        if not with_segments:
+            return body["text"]
+        raw_segs = body.get("segments") or []
+        if raw_segs:
+            return [
+                {"start": s["start"], "end": s["end"], "text": s["text"].strip()}
+                for s in raw_segs
+            ]
+        return [{"start": 0.0, "end": 9999.0, "text": body["text"].strip()}]
+
+    async def _transcribe_remote_whispercpp(
+        self,
+        audio_path: str,
+        language: str,
+        base_url: str,
+        with_segments: bool,
+    ) -> Union[str, list[dict]]:
+        async with httpx.AsyncClient(timeout=300) as client:
+            with open(audio_path, "rb") as f:
+                data = {
+                    "language": language,
+                    "temperature_inc": "0",   # disable fallback to prevent repetition loops
+                }
+                if with_segments:
+                    data["response_format"] = "verbose_json"
+                r = await client.post(
+                    f"{base_url}/inference",
+                    files={"file": ("audio.wav", f, "audio/wav")},
+                    data=data,
+                )
+            r.raise_for_status()
+            body = r.json()
+        if not with_segments:
+            return body.get("text", "").strip()
+        raw_segs = body.get("segments") or []
+        if raw_segs:
+            return [
+                {"start": s["start"], "end": s["end"], "text": s["text"].strip()}
+                for s in raw_segs
+            ]
+        return [{"start": 0.0, "end": 9999.0, "text": body.get("text", "").strip()}]
+
+    async def _transcribe_local(
+        self,
+        audio_path: str,
+        language: str,
+        model_name: str,
+        device: str,
+        with_segments: bool,
+    ) -> Union[str, list[dict]]:
+        loop = asyncio.get_running_loop()
        model = self._get_model(model_name, device)
        segments, _ = await loop.run_in_executor(
            None,
            lambda: model.transcribe(audio_path, language=language),
        )
-        return "".join(seg.text for seg in segments).strip()
+        segments = list(segments)
+        if not with_segments:
+            return "".join(seg.text for seg in segments).strip()
+        return [
+            {"start": seg.start, "end": seg.end, "text": seg.text.strip()}
+            for seg in segments
+            if seg.text.strip()
+        ]


 engine = TranscriptionEngine()
Author	SHA1	Message	Date
thomas.kopp	8ec9044c75	fix: whisper repetition loops, meeting transcript punctuation - transcription: add temperature_inc=0 to whispercpp to disable fallback (prevents loops) - pipeline: punctuate meeting transcript in one pass (parallel with summarize) - output: write_meeting_docs accepts pre-built transcript_text - llm: punctuate prompt preserves speaker labels Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-02 12:34:11 +02:00
thomas.kopp	658f9be47f	fix: punctuate raw transcript, strip JSON code fences, filter null speaker names - llm: punctuate() adds punctuation/capitalisation without changing words - llm: _strip_code_fences() handles markdown-wrapped JSON from gemma3 - llm: filter string 'null' from identify_speakers result - pipeline: punctuate raw_text in parallel with refine for solo recordings Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-02 12:23:25 +02:00
thomas.kopp	d3582eaeb7	feat: tab navigation in modal (Index/Transkript/Zusammenfassung) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-02 12:10:50 +02:00
thomas.kopp	336628341b	feat: AI-generated title+tldr, subfolder structure, backlinks in transkript/zusammenfassung - llm: generate_title_and_tldr() returns concise title and 2-3 sentence summary - output: index in root, transkript+zusammenfassung in {base}/ subdir with backlinks - pipeline: call generate_title_and_tldr for both solo and meeting recordings - router: mirror subdir structure when copying to Obsidian vault Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-02 12:07:54 +02:00
thomas.kopp	1cfb9c127b	fix: use vault+file URI format for Obsidian, more reliable than path= Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-02 11:57:47 +02:00
thomas.kopp	fe8b8bb125	fix: auto-include transkript/zusammenfassung siblings when copying index to Obsidian vault Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-02 11:47:29 +02:00
thomas.kopp	ca10cbb20b	fix: call obsidian binary directly instead of xdg-open for URI handling Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-02 11:42:23 +02:00
thomas.kopp	180fe43df7	fix: handle pyannote 4.x DiarizeOutput wrapper in diarize() Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-02 11:34:02 +02:00
thomas.kopp	8ee11a31a1	fix: use token= instead of use_auth_token= for pyannote Pipeline.from_pretrained Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-02 11:15:14 +02:00
thomas.kopp	06f7361004	feat: write 3 files per solo recording (index + transkript + zusammenfassung) - pipeline: call write_solo_docs() instead of save_transcript(); broadcast paths dict - router: /open accepts paths list for Obsidian mode, copies all 3 files to vault - app.js: store _modalPaths from saved event; Obsidian button sends all paths - tests: test_write_solo_docs_creates_three_files added Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-02 11:10:28 +02:00
thomas.kopp	a37e09fb4e	feat: copy transcript to Obsidian vault on open Config: obsidian.vault path. On Obsidian button click, file is copied to vault dir then opened via obsidian:// URI. Vault path configurable in settings.	2026-04-02 11:00:55 +02:00
thomas.kopp	6f718f0753	feat: add Obsidian open button; fix folder button using dolphin --select	2026-04-02 10:55:19 +02:00
thomas.kopp	348ce332c7	feat: add folder button to transcript modal	2026-04-02 10:47:08 +02:00
thomas.kopp	7e0851fc95	fix: pass whisper backend to solo pipeline transcribe_file call	2026-04-02 09:18:20 +02:00
thomas.kopp	11dee75ab3	fix: record at 48000 Hz — PipeWire virtual sinks reject 16 kHz resampling Whisper and faster-whisper both handle arbitrary sample rates internally.	2026-04-02 09:14:34 +02:00
thomas.kopp	b4e7e08918	fix: update audio devices test to mock sounddevice instead of pactl	2026-04-02 07:52:34 +02:00
thomas.kopp	04b655e664	fix: use sounddevice names for audio device list and combined source - /audio/devices now returns sounddevice device names (not pactl source names) so the stored device name works directly with sd.InputStream - /audio/combined maps sounddevice names back to pactl source names via description matching for the loopback commands - Combined sink description set to 'transkriptor-combined' (no spaces) so sounddevice name matches the value stored in config - Add _pactl_source_for_sd_name() helper for the mapping	2026-04-02 07:51:42 +02:00
thomas.kopp	251f9c238d	fix: restore PipeWire combined source automatically on startup Save mic/monitor device names to pipewire-modules.json alongside module IDs. On startup, recreate transkriptor-combined if not already loaded.	2026-04-02 01:46:19 +02:00
thomas.kopp	1a61b53027	fix: serve /settings without auth header — JS handles token check	2026-04-02 01:38:17 +02:00
thomas.kopp	c7cad4bb2a	feat: add whisper.cpp ROCm backend support for AMD GPU acceleration - transcription.py: new _transcribe_remote_whispercpp() using /inference endpoint - transcription.py: backend param routes to openai or whispercpp remote path - config.py: whisper.backend default 'openai', alt 'whispercpp' - pipeline.py: passes backend from config to transcribe_file - settings: backend dropdown (OpenAI-compat / whisper.cpp) - SETUP.md: whisper.cpp ROCm build and systemd setup instructions whisper-cpp-server running on beastix :8080 (ROCm0, gfx1030, RX 6800 XT)	2026-04-02 01:33:32 +02:00
thomas.kopp	56d41b8620	docs: add HuggingFace diarization setup instructions to SETUP.md	2026-04-02 01:18:55 +02:00
thomas.kopp	5f384af6cf	feat: add diarization section to settings page Adds a "Diarisierung" section with an enabled/disabled toggle, HuggingFace token input, and a help link to pyannote/speaker-diarization-3.1. loadConfig() and the save handler now persist diarization settings. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-02 01:18:26 +02:00
thomas.kopp	0eb85b98f1	feat: add frontend speaker naming card for diarization Shows a card with excerpt navigation and name inputs when the backend emits speakers_unknown. Submitting posts the mapping to /speakers or leaves speakers anonymous; handles awaiting_speakers status label. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-02 01:17:23 +02:00
thomas.kopp	e04816fce6	feat: meeting pipeline — parallel diarization, speaker ID, 3-doc output	2026-04-02 01:13:24 +02:00
thomas.kopp	37e432f7fa	feat: POST /speakers — resolves pipeline pause with speaker name mapping	2026-04-02 01:07:41 +02:00
thomas.kopp	dbb35ce71d	feat: AppState gains speaker pause fields and AWAITING_SPEAKERS status	2026-04-02 01:06:30 +02:00
thomas.kopp	033c1fc486	feat: write_meeting_docs() — creates index, transkript, zusammenfassung	2026-04-02 01:05:07 +02:00
thomas.kopp	9b5b89e159	feat: OllamaClient.identify_speakers() and summarize() for diarization pipeline	2026-04-02 01:03:40 +02:00
thomas.kopp	b8cc8a3b33	feat: align_segments() — map Whisper timestamps to pyannote speakers	2026-04-02 01:00:58 +02:00
thomas.kopp	1a9d0eacc2	feat: Diarizer class wrapping pyannote/speaker-diarization-3.1	2026-04-02 00:59:50 +02:00
thomas.kopp	47909637a8	feat: transcribe_file returns timestamped segments when with_segments=True	2026-04-02 00:55:53 +02:00
thomas.kopp	7dfc0e0c5f	feat: add diarization config defaults (enabled=false, hf_token)	2026-04-02 00:53:53 +02:00
thomas.kopp	7cd6c2a848	docs: diarization implementation plan (13 tasks)	2026-04-02 00:50:57 +02:00
thomas.kopp	8d1af32ef3	docs: diarization + speaker identification design	2026-04-02 00:46:18 +02:00
thomas.kopp	80ce1aa77c	docs: add setup guide for Beastix server and client installation	2026-04-02 00:01:05 +02:00
thomas.kopp	52ba53bec4	fix: validate Ollama URL protocol before fetching api/tags	2026-04-01 20:51:23 +02:00
thomas.kopp	0bdc0a5e42	feat: settings page — PipeWire audio device + remote Whisper/Ollama config Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-01 20:48:56 +02:00
thomas.kopp	81fbbfb56e	feat: status includes is_admin, gear icon in header for admins Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-01 20:45:33 +02:00
thomas.kopp	2376bf5d71	fix: PUT /config deep-merges nested config instead of shallow update Replaces cfg.update(body) with _deep_merge so partial updates (e.g. setting whisper.base_url) no longer wipe sibling keys. Also persists the merged config back to disk via tomli_w. Adds test_put_config_deep_merges.	2026-04-01 20:40:40 +02:00
thomas.kopp	ff68827280	fix: module_ids as integers in response, add 403 test for POST /audio/combined	2026-04-01 20:38:43 +02:00
thomas.kopp	478a1ac9d0	feat: GET /audio/devices, POST /audio/combined — PipeWire source management	2026-04-01 20:36:27 +02:00
thomas.kopp	ef4aa2a840	feat: AudioRecorder accepts device param — reads audio.device from config Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-01 20:32:44 +02:00
thomas.kopp	5e7faa8844	fix: use get_running_loop() instead of deprecated get_event_loop()	2026-04-01 20:30:06 +02:00
thomas.kopp	8300851e77	feat: remote Whisper via whisper.base_url — OpenAI-compatible upload	2026-04-01 20:28:31 +02:00
thomas.kopp	912b333124	feat: add audio.device and whisper.base_url to config defaults	2026-04-01 20:25:48 +02:00
thomas.kopp	3f9abc6a89	docs: settings page + remote whisper design	2026-04-01 20:11:38 +02:00
thomas.kopp	d8c6fc790b	fix: define _guest_user() for tray/hotkey-triggered recording	2026-04-01 16:00:33 +02:00
thomas.kopp	ccdc75c74c	feat: show date and time in transcript list items	2026-04-01 14:40:01 +02:00
thomas.kopp	b74147967b	feat: tüit logo in header, clean transcript item layout with grouped action buttons	2026-04-01 14:37:03 +02:00
thomas.kopp	2ab6e7d73b	fix: move reprocess button to transcript list item, remove from modal	2026-04-01 14:30:28 +02:00
thomas.kopp	33ae9dc1d8	feat: reprocess existing transcript via Ollama — modal button + POST /transcripts/{filename}/reprocess	2026-04-01 14:27:15 +02:00
thomas.kopp	3673e28e73	fix: improve system prompt — explicit paragraph breaks, lists, section headings	2026-04-01 14:23:26 +02:00
thomas.kopp	1ab023c2ac	feat: transcript modal with markdown rendering, delete button, remove preview section	2026-04-01 14:18:04 +02:00
thomas.kopp	ae3ae61593	feat: DELETE /transcripts/{filename} — delete transcript with path-confinement check	2026-04-01 14:13:02 +02:00
thomas.kopp	aa3eef8fb1	feat: GET /transcripts/{filename} — serve transcript content	2026-04-01 14:12:30 +02:00
thomas.kopp	0bb0975a09	docs: add transcript modal + delete implementation plan	2026-04-01 14:09:23 +02:00
thomas.kopp	629341930e	fix: show recording state on first click, fix error state via WS	2026-04-01 13:57:41 +02:00
thomas.kopp	d60d6bb1be	fix: test_config_loads_defaults — remove reload() that undid CONFIG_PATH patch	2026-04-01 13:49:21 +02:00
thomas.kopp	b63f5d59a9	fix: read WebSocket token from query_params directly, not function arg	2026-04-01 13:22:20 +02:00
thomas.kopp	6e317a9c67	fix: error state resettable via mic click, debug logging, pipeline traceback	2026-04-01 12:41:45 +02:00
thomas.kopp	6574481647	fix: web-based first-run setup — removes terminal input(), works under systemd	2026-04-01 12:26:17 +02:00
thomas.kopp	d9db41b0b4	fix: use ip route instead of hostname -I for Arch compatibility	2026-04-01 12:23:41 +02:00
thomas.kopp	f133b9590c	fix: use venv instead of pip --user for Arch PEP 668 compatibility	2026-04-01 12:21:40 +02:00
thomas.kopp	325cb2e6e7	feat: merge implement/transkriptor — full tüit Transkriptor implementation	2026-04-01 11:47:32 +02:00