Compare commits
64 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 8ec9044c75 | |||
| 658f9be47f | |||
| d3582eaeb7 | |||
| 336628341b | |||
| 1cfb9c127b | |||
| fe8b8bb125 | |||
| ca10cbb20b | |||
| 180fe43df7 | |||
| 8ee11a31a1 | |||
| 06f7361004 | |||
| a37e09fb4e | |||
| 6f718f0753 | |||
| 348ce332c7 | |||
| 7e0851fc95 | |||
| 11dee75ab3 | |||
| b4e7e08918 | |||
| 04b655e664 | |||
| 251f9c238d | |||
| 1a61b53027 | |||
| c7cad4bb2a | |||
| 56d41b8620 | |||
| 5f384af6cf | |||
| 0eb85b98f1 | |||
| e04816fce6 | |||
| 37e432f7fa | |||
| dbb35ce71d | |||
| 033c1fc486 | |||
| 9b5b89e159 | |||
| b8cc8a3b33 | |||
| 1a9d0eacc2 | |||
| 47909637a8 | |||
| 7dfc0e0c5f | |||
| 7cd6c2a848 | |||
| 8d1af32ef3 | |||
| 80ce1aa77c | |||
| 52ba53bec4 | |||
| 0bdc0a5e42 | |||
| 81fbbfb56e | |||
| 2376bf5d71 | |||
| ff68827280 | |||
| 478a1ac9d0 | |||
| ef4aa2a840 | |||
| 5e7faa8844 | |||
| 8300851e77 | |||
| 912b333124 | |||
| 3f9abc6a89 | |||
| d8c6fc790b | |||
| ccdc75c74c | |||
| b74147967b | |||
| 2ab6e7d73b | |||
| 33ae9dc1d8 | |||
| 3673e28e73 | |||
| 1ab023c2ac | |||
| ae3ae61593 | |||
| aa3eef8fb1 | |||
| 0bb0975a09 | |||
| 629341930e | |||
| d60d6bb1be | |||
| b63f5d59a9 | |||
| 6e317a9c67 | |||
| 6574481647 | |||
| d9db41b0b4 | |||
| f133b9590c | |||
| 325cb2e6e7 |
@@ -0,0 +1,32 @@
|
||||
def align_segments(
|
||||
whisper_segs: list[dict],
|
||||
speaker_segs: list[tuple[float, float, str]],
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Assign each Whisper segment to the speaker with the greatest time overlap.
|
||||
Consecutive segments from the same speaker are merged into one block."""
|
||||
result: list[tuple[str, str]] = []
|
||||
for seg in whisper_segs:
|
||||
speaker = _best_speaker(seg["start"], seg["end"], speaker_segs)
|
||||
text = seg["text"].strip()
|
||||
if not text:
|
||||
continue
|
||||
if result and result[-1][0] == speaker:
|
||||
result[-1] = (speaker, result[-1][1] + " " + text)
|
||||
else:
|
||||
result.append((speaker, text))
|
||||
return result
|
||||
|
||||
|
||||
def _best_speaker(
|
||||
start: float,
|
||||
end: float,
|
||||
speaker_segs: list[tuple[float, float, str]],
|
||||
) -> str:
|
||||
best_label = "SPEAKER_00"
|
||||
best_overlap = 0.0
|
||||
for s_start, s_end, label in speaker_segs:
|
||||
overlap = max(0.0, min(end, s_end) - max(start, s_start))
|
||||
if overlap > best_overlap:
|
||||
best_overlap = overlap
|
||||
best_label = label
|
||||
return best_label
|
||||
+142
-31
@@ -1,12 +1,18 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
|
||||
from api.state import state, Status
|
||||
from api.router import broadcast
|
||||
from config import load as load_config
|
||||
from transcription import engine as transcription_engine
|
||||
from llm import OllamaClient
|
||||
from output import save_transcript
|
||||
from api.router import broadcast
|
||||
from output import write_solo_docs, write_meeting_docs
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def run_pipeline():
|
||||
@@ -17,6 +23,8 @@ async def run_pipeline():
|
||||
|
||||
output_dir = getattr(state, "_recording_output_dir", cfg["output"]["path"])
|
||||
instructions = getattr(state, "_recording_instructions", "")
|
||||
diar_cfg = cfg.get("diarization", {})
|
||||
use_diarization = diar_cfg.get("enabled") and diar_cfg.get("hf_token")
|
||||
|
||||
recorder.stop()
|
||||
await state.set_status(Status.PROCESSING)
|
||||
@@ -28,37 +36,14 @@ async def run_pipeline():
|
||||
wav_path = f.name
|
||||
recorder.save_wav(wav_path)
|
||||
|
||||
raw_text = await transcription_engine.transcribe_file(
|
||||
wav_path,
|
||||
language=cfg["whisper"]["language"],
|
||||
model_name=cfg["whisper"]["model"],
|
||||
device=cfg["whisper"]["device"],
|
||||
)
|
||||
await broadcast({"event": "transcribed", "raw": raw_text})
|
||||
|
||||
client = OllamaClient(base_url=cfg["ollama"]["base_url"])
|
||||
refined = await client.refine(
|
||||
raw_text=raw_text,
|
||||
instructions=instructions,
|
||||
model=cfg["ollama"]["model"],
|
||||
)
|
||||
await broadcast({"event": "refined", "markdown": refined})
|
||||
|
||||
title = "Diktat"
|
||||
for line in refined.splitlines():
|
||||
if line.startswith("# "):
|
||||
title = line[2:].strip()
|
||||
break
|
||||
|
||||
path = save_transcript(
|
||||
title=title,
|
||||
content=refined,
|
||||
output_dir=output_dir,
|
||||
)
|
||||
await broadcast({"event": "saved", "path": path, "title": title})
|
||||
await state.set_status(Status.IDLE)
|
||||
if use_diarization:
|
||||
await _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cfg)
|
||||
else:
|
||||
await _run_solo_pipeline(cfg, wav_path, output_dir, instructions)
|
||||
|
||||
except Exception as e:
|
||||
tb = traceback.format_exc()
|
||||
logger.error("Pipeline error:\n%s", tb)
|
||||
state.last_error = str(e)
|
||||
await state.set_status(Status.ERROR)
|
||||
await broadcast({"event": "error", "message": str(e)})
|
||||
@@ -66,8 +51,134 @@ async def run_pipeline():
|
||||
state.recording_user = None
|
||||
state._recording_output_dir = None
|
||||
state._recording_instructions = ""
|
||||
state._speakers_event = None
|
||||
state._pending_aligned_segments = None
|
||||
state._speaker_names = None
|
||||
if wav_path:
|
||||
try:
|
||||
os.unlink(wav_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
async def _run_solo_pipeline(cfg, wav_path, output_dir, instructions):
|
||||
"""Original single-document pipeline (no diarization)."""
|
||||
raw_text = await transcription_engine.transcribe_file(
|
||||
wav_path,
|
||||
language=cfg["whisper"]["language"],
|
||||
model_name=cfg["whisper"]["model"],
|
||||
device=cfg["whisper"]["device"],
|
||||
base_url=cfg["whisper"].get("base_url", ""),
|
||||
backend=cfg["whisper"].get("backend", "openai"),
|
||||
)
|
||||
await broadcast({"event": "transcribed", "raw": raw_text})
|
||||
|
||||
client = OllamaClient(base_url=cfg["ollama"]["base_url"])
|
||||
punctuated, refined = await asyncio.gather(
|
||||
client.punctuate(raw_text, model=cfg["ollama"]["model"]),
|
||||
client.refine(raw_text=raw_text, instructions=instructions, model=cfg["ollama"]["model"]),
|
||||
)
|
||||
title, tldr = await client.generate_title_and_tldr(refined, model=cfg["ollama"]["model"])
|
||||
|
||||
dt = datetime.now()
|
||||
paths = write_solo_docs(raw_text=punctuated, refined=refined, output_dir=output_dir, dt=dt, title=title, tldr=tldr)
|
||||
|
||||
await state.set_status(Status.IDLE)
|
||||
await broadcast({
|
||||
"event": "saved",
|
||||
"path": paths["index"],
|
||||
"title": title,
|
||||
"paths": paths,
|
||||
})
|
||||
|
||||
|
||||
async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cfg):
|
||||
"""Diarization pipeline: 3 documents, speaker identification."""
|
||||
from diarization import Diarizer
|
||||
from alignment import align_segments
|
||||
|
||||
diarizer = Diarizer(hf_token=diar_cfg["hf_token"])
|
||||
whisper_task = asyncio.create_task(
|
||||
transcription_engine.transcribe_file(
|
||||
wav_path,
|
||||
language=cfg["whisper"]["language"],
|
||||
model_name=cfg["whisper"]["model"],
|
||||
device=cfg["whisper"]["device"],
|
||||
base_url=cfg["whisper"].get("base_url", ""),
|
||||
backend=cfg["whisper"].get("backend", "openai"),
|
||||
with_segments=True,
|
||||
)
|
||||
)
|
||||
diar_task = asyncio.create_task(diarizer.diarize(wav_path))
|
||||
whisper_segs, speaker_segs = await asyncio.gather(whisper_task, diar_task)
|
||||
|
||||
aligned = align_segments(whisper_segs, speaker_segs)
|
||||
await broadcast({"event": "transcribed", "raw": " ".join(t for _, t in aligned)})
|
||||
|
||||
excerpt = "\n".join(f"{s}: {t}" for s, t in aligned[:20])
|
||||
client = OllamaClient(base_url=cfg["ollama"]["base_url"])
|
||||
name_map = await client.identify_speakers(excerpt, model=cfg["ollama"]["model"])
|
||||
|
||||
if not name_map:
|
||||
excerpts_per_speaker = _build_excerpts(aligned)
|
||||
state._speakers_event = asyncio.Event()
|
||||
state._pending_aligned_segments = aligned
|
||||
await state.set_status(Status.AWAITING_SPEAKERS)
|
||||
await broadcast({"event": "speakers_unknown", "speakers": [
|
||||
{"id": spk, "excerpts": exs}
|
||||
for spk, exs in excerpts_per_speaker.items()
|
||||
]})
|
||||
await state._speakers_event.wait()
|
||||
name_map = state._speaker_names or {}
|
||||
|
||||
def resolve(label):
|
||||
name = name_map.get(label, "")
|
||||
if name:
|
||||
return name
|
||||
num = label.replace("SPEAKER_", "").lstrip("0") or "1"
|
||||
return f"Sprecher {num}"
|
||||
|
||||
named_aligned = [(resolve(spk), text) for spk, text in aligned]
|
||||
speakers = sorted({spk for spk, _ in named_aligned})
|
||||
|
||||
total_secs = sum(s["end"] - s["start"] for s in whisper_segs) if whisper_segs else 0
|
||||
duration_min = max(1, round(total_secs / 60))
|
||||
|
||||
raw_transcript = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned)
|
||||
summary, punctuated_transcript = await asyncio.gather(
|
||||
client.summarize(raw_transcript, model=cfg["ollama"]["model"]),
|
||||
client.punctuate(raw_transcript, model=cfg["ollama"]["model"]),
|
||||
)
|
||||
title, tldr = await client.generate_title_and_tldr(summary, model=cfg["ollama"]["model"])
|
||||
|
||||
dt = datetime.now()
|
||||
paths = write_meeting_docs(
|
||||
aligned_segments=named_aligned,
|
||||
summary=summary,
|
||||
speakers=speakers,
|
||||
duration_min=duration_min,
|
||||
output_dir=output_dir,
|
||||
dt=dt,
|
||||
title=title,
|
||||
tldr=tldr,
|
||||
transcript_text=punctuated_transcript,
|
||||
)
|
||||
|
||||
await state.set_status(Status.IDLE)
|
||||
await broadcast({
|
||||
"event": "saved",
|
||||
"path": paths["index"],
|
||||
"title": title,
|
||||
"meeting": True,
|
||||
"paths": paths,
|
||||
})
|
||||
|
||||
|
||||
def _build_excerpts(aligned: list[tuple[str, str]], max_per_speaker: int = 4) -> dict[str, list[str]]:
|
||||
"""Build a dict of speaker → list of text excerpts."""
|
||||
from collections import defaultdict
|
||||
buckets: dict[str, list[str]] = defaultdict(list)
|
||||
for spk, text in aligned:
|
||||
if len(buckets[spk]) < max_per_speaker:
|
||||
buckets[spk].append(text[:200])
|
||||
return dict(buckets)
|
||||
|
||||
+254
-11
@@ -6,12 +6,22 @@ from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Depends, HTTPExce
|
||||
|
||||
from api.state import state, Status
|
||||
from config import load as load_config
|
||||
from output import list_transcripts
|
||||
from output import list_transcripts, read_transcript
|
||||
|
||||
router = APIRouter()
|
||||
_ws_clients: list[WebSocket] = []
|
||||
|
||||
|
||||
def _guest_user() -> dict:
|
||||
"""Return the first registered user — used for hotkey/tray-triggered recordings."""
|
||||
from auth import _load_users
|
||||
users = _load_users()
|
||||
if not users:
|
||||
raise RuntimeError("Kein Nutzer eingerichtet")
|
||||
username, data = next(iter(users.items()))
|
||||
return {"username": username, "output_dir": data["output_dir"], "is_admin": data.get("is_admin", False)}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Auth dependency
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -54,13 +64,41 @@ async def logout(authorization: Optional[str] = Header(None)):
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
@router.get("/setup")
|
||||
async def setup_page():
|
||||
from fastapi.responses import FileResponse
|
||||
from auth import has_users
|
||||
from pathlib import Path
|
||||
if has_users():
|
||||
from fastapi.responses import RedirectResponse
|
||||
return RedirectResponse("/")
|
||||
return FileResponse(str(Path(__file__).parent.parent / "frontend" / "setup.html"))
|
||||
|
||||
|
||||
@router.post("/setup")
|
||||
async def setup_post(body: dict):
|
||||
from auth import has_users, create_user
|
||||
from config import load as load_config
|
||||
if has_users():
|
||||
raise HTTPException(status_code=403, detail="Bereits eingerichtet")
|
||||
username = body.get("username", "").strip()
|
||||
password = body.get("password", "")
|
||||
if not username or len(password) < 6:
|
||||
raise HTTPException(status_code=400, detail="Ungültige Eingabe")
|
||||
cfg = load_config()
|
||||
default_dir = cfg["output"]["path"]
|
||||
output_dir = body.get("output_dir") or default_dir
|
||||
create_user(username, password, output_dir, is_admin=True)
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Protected endpoints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@router.get("/status")
|
||||
async def get_status(user: dict = Depends(current_user)):
|
||||
return {"status": state.status, "username": user["username"]}
|
||||
return {"status": state.status, "username": user["username"], "is_admin": user.get("is_admin", False)}
|
||||
|
||||
|
||||
@router.post("/toggle")
|
||||
@@ -69,9 +107,14 @@ async def toggle_recording(user: dict = Depends(current_user)):
|
||||
if state.status == Status.RECORDING:
|
||||
asyncio.create_task(run_pipeline())
|
||||
return {"action": "stopped"}
|
||||
if state.status == Status.ERROR:
|
||||
await state.set_status(Status.IDLE)
|
||||
return {"action": "reset"}
|
||||
if state.status == Status.IDLE:
|
||||
from audio import AudioRecorder
|
||||
state._recorder = AudioRecorder()
|
||||
cfg = load_config()
|
||||
audio_device = cfg.get("audio", {}).get("device") or None
|
||||
state._recorder = AudioRecorder(device=audio_device)
|
||||
state._recorder.start()
|
||||
state.recording_user = user["username"]
|
||||
state._recording_output_dir = os.path.join(user["output_dir"], user["username"])
|
||||
@@ -93,6 +136,58 @@ async def get_transcripts(user: dict = Depends(current_user)):
|
||||
return list_transcripts(user_dir)
|
||||
|
||||
|
||||
@router.get("/transcripts/{filename:path}")
|
||||
async def get_transcript(filename: str, user: dict = Depends(current_user)):
|
||||
from fastapi.responses import PlainTextResponse
|
||||
user_dir = os.path.join(user["output_dir"], user["username"])
|
||||
content = read_transcript(user_dir, filename)
|
||||
if content is None:
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
return PlainTextResponse(content)
|
||||
|
||||
|
||||
@router.post("/transcripts/{filename:path}/reprocess")
|
||||
async def reprocess_transcript(filename: str, body: dict, user: dict = Depends(current_user)):
|
||||
from output import read_transcript
|
||||
from fastapi.responses import PlainTextResponse
|
||||
from llm import OllamaClient
|
||||
user_dir = os.path.join(user["output_dir"], user["username"])
|
||||
content = read_transcript(user_dir, filename)
|
||||
if content is None:
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
# Strip YAML frontmatter before sending to LLM
|
||||
body_text = content
|
||||
if content.startswith("---\n"):
|
||||
end = content.find("\n---\n", 4)
|
||||
if end != -1:
|
||||
body_text = content[end + 5:].lstrip("\n")
|
||||
cfg = load_config()
|
||||
instructions = body.get("instructions", "")
|
||||
client = OllamaClient(base_url=cfg["ollama"]["base_url"])
|
||||
refined = await client.refine(body_text, instructions=instructions, model=cfg["ollama"]["model"])
|
||||
# Overwrite same file (keep filename stable, update frontmatter date)
|
||||
from datetime import datetime
|
||||
path = os.path.join(user_dir, filename)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
now = datetime.now()
|
||||
f.write(f"---\ndate: {now.isoformat(timespec='seconds')}\ntags: [transkript]\n---\n\n")
|
||||
f.write(refined if refined.endswith("\n") else refined + "\n")
|
||||
return PlainTextResponse(refined)
|
||||
|
||||
|
||||
@router.delete("/transcripts/{filename:path}")
|
||||
async def delete_transcript(filename: str, user: dict = Depends(current_user)):
|
||||
user_dir = os.path.join(user["output_dir"], user["username"])
|
||||
parts = filename.split("/")
|
||||
if len(parts) > 2 or any(p in (".", "..") or not p for p in parts) or not filename.endswith(".md"):
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
path = os.path.join(user_dir, filename)
|
||||
if not os.path.exists(path):
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
os.unlink(path)
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
@router.get("/config")
|
||||
async def get_config(user: dict = Depends(current_user)):
|
||||
return load_config()
|
||||
@@ -102,25 +197,173 @@ async def get_config(user: dict = Depends(current_user)):
|
||||
async def put_config(body: dict, user: dict = Depends(current_user)):
|
||||
if not user.get("is_admin"):
|
||||
raise HTTPException(status_code=403, detail="Nur Administratoren können die Config ändern")
|
||||
from config import _deep_merge, CONFIG_PATH
|
||||
import tomli_w
|
||||
cfg = load_config()
|
||||
cfg.update(body)
|
||||
return cfg
|
||||
merged = _deep_merge(cfg, body)
|
||||
os.makedirs(os.path.dirname(CONFIG_PATH), exist_ok=True)
|
||||
with open(CONFIG_PATH, "wb") as f:
|
||||
tomli_w.dump(merged, f)
|
||||
return merged
|
||||
|
||||
|
||||
@router.post("/open")
|
||||
async def open_file(body: dict, user: dict = Depends(current_user)):
|
||||
import subprocess
|
||||
path = body.get("path", "")
|
||||
# Only allow opening files within the user's own output directory
|
||||
import subprocess, shutil
|
||||
user_dir = os.path.join(user["output_dir"], user["username"])
|
||||
if path and os.path.exists(path) and os.path.abspath(path).startswith(os.path.abspath(user_dir)):
|
||||
subprocess.Popen(["xdg-open", path])
|
||||
abs_user_dir = os.path.abspath(user_dir)
|
||||
|
||||
# Accept either a single path or a list of paths (for 3-file recordings)
|
||||
raw_paths = body.get("paths") or ([body.get("path")] if body.get("path") else [])
|
||||
paths = [p for p in raw_paths if p and os.path.exists(p) and os.path.abspath(p).startswith(abs_user_dir)]
|
||||
if not paths:
|
||||
return {"ok": False}
|
||||
|
||||
mode = body.get("mode", "editor") # "editor" | "folder" | "obsidian"
|
||||
if mode == "obsidian":
|
||||
from urllib.parse import quote
|
||||
cfg = load_config()
|
||||
vault = cfg.get("obsidian", {}).get("vault", "").strip()
|
||||
# If only the index was passed, also include siblings from subdir
|
||||
all_paths = list(paths)
|
||||
for p in paths:
|
||||
if p.endswith("-index.md"):
|
||||
base = os.path.basename(p)[: -len("-index.md")]
|
||||
subdir = os.path.join(os.path.dirname(p), base)
|
||||
for suffix in ("-transkript.md", "-zusammenfassung.md"):
|
||||
sibling = os.path.join(subdir, base + suffix)
|
||||
if os.path.exists(sibling) and sibling not in all_paths:
|
||||
all_paths.append(sibling)
|
||||
|
||||
open_target = all_paths[0]
|
||||
if vault and os.path.isdir(vault):
|
||||
# Mirror directory structure: index → vault root, others → vault/{base}/
|
||||
for p in all_paths:
|
||||
rel = os.path.relpath(p, abs_user_dir)
|
||||
dest = os.path.join(vault, rel)
|
||||
os.makedirs(os.path.dirname(dest), exist_ok=True)
|
||||
shutil.copy2(p, dest)
|
||||
open_target = os.path.join(vault, os.path.relpath(all_paths[0], abs_user_dir))
|
||||
vault_name = os.path.basename(vault.rstrip("/")) if vault else ""
|
||||
file_name = os.path.basename(open_target)
|
||||
if vault_name:
|
||||
uri = f"obsidian://open?vault={quote(vault_name)}&file={quote(file_name)}"
|
||||
else:
|
||||
uri = f"obsidian://open?path={quote(open_target, safe='/')}"
|
||||
obsidian_bin = shutil.which("obsidian") or "/usr/bin/obsidian"
|
||||
subprocess.Popen([obsidian_bin, uri])
|
||||
elif mode == "folder" and shutil.which("dolphin"):
|
||||
subprocess.Popen(["dolphin", "--select", paths[0]])
|
||||
elif mode == "folder":
|
||||
subprocess.Popen(["xdg-open", os.path.dirname(paths[0])])
|
||||
else:
|
||||
subprocess.Popen(["xdg-open", paths[0]])
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
def _pactl_source_for_sd_name(sd_name: str) -> str:
|
||||
"""Map a sounddevice device name to its pactl source name via description matching.
|
||||
sounddevice strips the 'Monitor of ' prefix from pactl source descriptions.
|
||||
Falls back to sd_name if no match found."""
|
||||
import subprocess
|
||||
try:
|
||||
out = subprocess.check_output(
|
||||
["pactl", "list", "sources"], stderr=subprocess.DEVNULL, timeout=5
|
||||
).decode()
|
||||
current_name = None
|
||||
for line in out.splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("Name:"):
|
||||
current_name = line.split(":", 1)[1].strip()
|
||||
elif line.startswith("Description:") and current_name:
|
||||
desc = line.split(":", 1)[1].strip().removeprefix("Monitor of ")
|
||||
if desc == sd_name:
|
||||
return current_name
|
||||
current_name = None
|
||||
except Exception:
|
||||
pass
|
||||
return sd_name
|
||||
|
||||
|
||||
@router.get("/audio/devices")
|
||||
async def list_audio_devices(user: dict = Depends(current_user)):
|
||||
import sounddevice as sd
|
||||
if not user.get("is_admin"):
|
||||
raise HTTPException(status_code=403, detail="Nur Administratoren")
|
||||
try:
|
||||
devices = [
|
||||
{"index": i, "name": d["name"]}
|
||||
for i, d in enumerate(sd.query_devices())
|
||||
if d["max_input_channels"] > 0
|
||||
]
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"sounddevice fehlgeschlagen: {e}")
|
||||
return devices
|
||||
|
||||
|
||||
@router.post("/audio/combined")
|
||||
async def create_combined_source(body: dict, user: dict = Depends(current_user)):
|
||||
import subprocess, json, pathlib
|
||||
if not user.get("is_admin"):
|
||||
raise HTTPException(status_code=403, detail="Nur Administratoren")
|
||||
mic_sd = body.get("mic", "")
|
||||
monitor_sd = body.get("monitor", "")
|
||||
if not mic_sd or not monitor_sd:
|
||||
raise HTTPException(status_code=400, detail="mic und monitor erforderlich")
|
||||
# Map sounddevice names → pactl source names for loopback commands
|
||||
mic = _pactl_source_for_sd_name(mic_sd)
|
||||
monitor = _pactl_source_for_sd_name(monitor_sd)
|
||||
# Validate pactl names exist
|
||||
out = subprocess.check_output(
|
||||
["pactl", "list", "sources", "short"], stderr=subprocess.DEVNULL, timeout=5
|
||||
).decode()
|
||||
known = {line.split("\t")[1] for line in out.strip().splitlines() if "\t" in line}
|
||||
if mic not in known or monitor not in known:
|
||||
raise HTTPException(status_code=400, detail="Unbekanntes Audio-Device")
|
||||
# Use description without spaces so sounddevice name == sink_name
|
||||
sink_id = subprocess.check_output([
|
||||
"pactl", "load-module", "module-null-sink",
|
||||
"sink_name=transkriptor-combined",
|
||||
"sink_properties=device.description=transkriptor-combined",
|
||||
], timeout=5).decode().strip()
|
||||
mic_id = subprocess.check_output([
|
||||
"pactl", "load-module", "module-loopback",
|
||||
f"source={mic}", "sink=transkriptor-combined",
|
||||
], timeout=5).decode().strip()
|
||||
mon_id = subprocess.check_output([
|
||||
"pactl", "load-module", "module-loopback",
|
||||
f"source={monitor}", "sink=transkriptor-combined",
|
||||
], timeout=5).decode().strip()
|
||||
state_path = pathlib.Path(
|
||||
os.path.expanduser("~/.config/tueit-transcriber/pipewire-modules.json")
|
||||
)
|
||||
state_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
ids = [int(sink_id), int(mic_id), int(mon_id)]
|
||||
# Store pactl names for restore, sounddevice name as device
|
||||
state_path.write_text(json.dumps({"ids": ids, "mic": mic, "monitor": monitor}))
|
||||
return {"device": "transkriptor-combined", "module_ids": ids}
|
||||
|
||||
|
||||
@router.get("/settings")
|
||||
async def settings_page_route():
|
||||
from fastapi.responses import FileResponse
|
||||
from pathlib import Path
|
||||
return FileResponse(str(Path(__file__).parent.parent / "frontend" / "settings.html"))
|
||||
|
||||
|
||||
@router.post("/speakers")
|
||||
async def post_speakers(body: dict, user: dict = Depends(current_user)):
|
||||
if state._speakers_event is None:
|
||||
raise HTTPException(status_code=409, detail="Keine ausstehende Sprecher-Zuordnung")
|
||||
state._speaker_names = {k: v for k, v in body.items() if isinstance(k, str)}
|
||||
state._speakers_event.set()
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
@router.websocket("/ws")
|
||||
async def websocket_endpoint(ws: WebSocket, token: str = ""):
|
||||
async def websocket_endpoint(ws: WebSocket):
|
||||
from auth import get_user_for_token
|
||||
token = ws.query_params.get("token", "")
|
||||
user = get_user_for_token(token)
|
||||
if not user:
|
||||
await ws.close(code=4001)
|
||||
|
||||
+6
-1
@@ -8,15 +8,20 @@ class Status(str, Enum):
|
||||
IDLE = "idle"
|
||||
RECORDING = "recording"
|
||||
PROCESSING = "processing"
|
||||
AWAITING_SPEAKERS = "awaiting_speakers"
|
||||
ERROR = "error"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppState:
|
||||
status: Status = Status.IDLE
|
||||
recording_user: str | None = None # which user triggered the current recording
|
||||
recording_user: str | None = None
|
||||
last_error: str | None = None
|
||||
_listeners: list[Callable] = field(default_factory=list, repr=False)
|
||||
# Diarization pipeline pause
|
||||
_speakers_event: asyncio.Event | None = None
|
||||
_pending_aligned_segments: list[tuple[str, str]] | None = None
|
||||
_speaker_names: dict[str, str] | None = None
|
||||
|
||||
def subscribe(self, callback: Callable):
|
||||
self._listeners.append(callback)
|
||||
|
||||
@@ -4,8 +4,9 @@ import numpy as np
|
||||
|
||||
|
||||
class AudioRecorder:
|
||||
def __init__(self, sample_rate: int = 16000):
|
||||
def __init__(self, sample_rate: int = 48000, device: str | None = None):
|
||||
self.sample_rate = sample_rate
|
||||
self.device = device or None
|
||||
self._buffer: list[np.ndarray] = []
|
||||
self._stream = None
|
||||
self.is_recording = False
|
||||
@@ -25,6 +26,7 @@ class AudioRecorder:
|
||||
channels=1,
|
||||
dtype="int16",
|
||||
callback=self._callback,
|
||||
device=self.device,
|
||||
)
|
||||
self._stream.start()
|
||||
|
||||
|
||||
@@ -12,6 +12,11 @@ DEFAULTS = {
|
||||
"model": "large-v3",
|
||||
"language": "de",
|
||||
"device": "auto", # "auto" = use GPU if ROCm available, else CPU
|
||||
"base_url": "",
|
||||
"backend": "openai", # "openai" = OpenAI-compatible API, "whispercpp" = whisper.cpp /inference
|
||||
},
|
||||
"audio": {
|
||||
"device": "",
|
||||
},
|
||||
"server": {
|
||||
"port": 8765,
|
||||
@@ -24,6 +29,13 @@ DEFAULTS = {
|
||||
"network": {
|
||||
"host": "127.0.0.1",
|
||||
},
|
||||
"diarization": {
|
||||
"enabled": False,
|
||||
"hf_token": "",
|
||||
},
|
||||
"obsidian": {
|
||||
"vault": "",
|
||||
},
|
||||
"pid_file": os.path.expanduser("~/.local/run/tueit-transcriber.pid"),
|
||||
}
|
||||
|
||||
@@ -56,6 +68,8 @@ def _write_defaults():
|
||||
with open(CONFIG_PATH, "w") as f:
|
||||
f.write("# tüit Transkriptor config\n\n")
|
||||
f.write('[ollama]\nbase_url = "http://localhost:11434"\nmodel = "gemma3:12b"\n\n')
|
||||
f.write('[whisper]\nmodel = "large-v3"\nlanguage = "de"\ndevice = "auto"\n\n')
|
||||
f.write('[whisper]\nmodel = "large-v3"\nlanguage = "de"\ndevice = "auto"\nbase_url = ""\n\n')
|
||||
f.write('[audio]\ndevice = ""\n\n')
|
||||
f.write('[server]\nport = 8765\n\n')
|
||||
f.write(f'[output]\npath = "{DEFAULTS["output"]["path"]}"\n')
|
||||
f.write(f'[output]\npath = "{DEFAULTS["output"]["path"]}"\n\n')
|
||||
f.write('[diarization]\nenabled = false\nhf_token = ""\n\n')
|
||||
|
||||
@@ -0,0 +1,29 @@
|
||||
import asyncio
|
||||
|
||||
|
||||
class Diarizer:
|
||||
def __init__(self, hf_token: str):
|
||||
if not hf_token:
|
||||
raise ValueError("hf_token is required for diarization")
|
||||
self._hf_token = hf_token
|
||||
self._pipeline = None
|
||||
|
||||
def _load_pipeline(self):
|
||||
if self._pipeline is None:
|
||||
from pyannote.audio import Pipeline
|
||||
self._pipeline = Pipeline.from_pretrained(
|
||||
"pyannote/speaker-diarization-3.1",
|
||||
token=self._hf_token,
|
||||
)
|
||||
return self._pipeline
|
||||
|
||||
async def diarize(self, wav_path: str) -> list[tuple[float, float, str]]:
|
||||
loop = asyncio.get_running_loop()
|
||||
pipeline = await loop.run_in_executor(None, self._load_pipeline)
|
||||
result = await loop.run_in_executor(None, lambda: pipeline(wav_path))
|
||||
# pyannote 4.x returns DiarizeOutput; older versions return Annotation directly
|
||||
annotation = getattr(result, "speaker_diarization", result)
|
||||
return [
|
||||
(turn.start, turn.end, speaker)
|
||||
for turn, _, speaker in annotation.itertracks(yield_label=True)
|
||||
]
|
||||
+194
@@ -0,0 +1,194 @@
|
||||
# tüit Transkriptor — Setup-Anleitung
|
||||
|
||||
## Architektur
|
||||
|
||||
```
|
||||
┌─────────────────────────┐ ┌──────────────────────────────┐
|
||||
│ Client (Linux) │ │ Beastix (LAN-Server) │
|
||||
│ │ WAV │ │
|
||||
│ Transkriptor-App │ ──────► │ faster-whisper-server :8000 │
|
||||
│ Browser-UI :8765 │ Text │ Ollama :11434 │
|
||||
│ │ ◄────── │ │
|
||||
└─────────────────────────┘ └──────────────────────────────┘
|
||||
```
|
||||
|
||||
Jeder Client läuft komplett unabhängig. Die KI-Verarbeitung (Whisper + Ollama) kann
|
||||
entweder lokal oder auf Beastix ausgeführt werden — konfigurierbar über die
|
||||
Einstellungsseite.
|
||||
|
||||
---
|
||||
|
||||
## Beastix (Server-Setup, einmalig)
|
||||
|
||||
### 1. whisper.cpp mit ROCm/GPU kompilieren
|
||||
|
||||
Voraussetzung: ROCm installiert (Arch: `sudo pacman -S rocm-hip-sdk`).
|
||||
|
||||
```bash
|
||||
mkdir -p ~/src && cd ~/src
|
||||
git clone https://github.com/ggml-org/whisper.cpp.git --depth=1
|
||||
cd whisper.cpp
|
||||
|
||||
# Für AMD RX 6800 XT (gfx1030) — gfx-Target ggf. anpassen
|
||||
cmake -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release -DWHISPER_BUILD_SERVER=ON
|
||||
cmake --build build -j$(nproc)
|
||||
|
||||
# Modell large-v3 herunterladen (~2.9 GB)
|
||||
bash models/download-ggml-model.sh large-v3
|
||||
```
|
||||
|
||||
`gfx1030` = RX 6800 XT. Andere AMD GPUs: `rocminfo | grep gfx`
|
||||
|
||||
### 2. Als systemd-User-Service einrichten
|
||||
|
||||
```bash
|
||||
cat > ~/.config/systemd/user/whisper-cpp-server.service << 'EOF'
|
||||
[Unit]
|
||||
Description=whisper.cpp Server (ROCm/GPU)
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
ExecStart=%h/src/whisper.cpp/build/bin/whisper-server \
|
||||
--host 0.0.0.0 \
|
||||
--port 8080 \
|
||||
--model %h/src/whisper.cpp/models/ggml-large-v3.bin \
|
||||
--language de \
|
||||
--threads 4 \
|
||||
--convert
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
EOF
|
||||
|
||||
systemctl --user daemon-reload
|
||||
systemctl --user enable --now whisper-cpp-server.service
|
||||
```
|
||||
|
||||
Logs prüfen: `journalctl --user -u whisper-cpp-server -f`
|
||||
GPU-Nutzung bestätigt wenn in den Logs steht: `using ROCm0 backend`
|
||||
|
||||
### 3. Ollama installieren (falls noch nicht vorhanden)
|
||||
|
||||
```bash
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
ollama pull gemma3:12b
|
||||
```
|
||||
|
||||
### 4. Firewall — Ports freigeben (LAN-intern)
|
||||
|
||||
```bash
|
||||
# Whisper-Server
|
||||
sudo ufw allow from 192.168.0.0/16 to any port 8000
|
||||
# Ollama
|
||||
sudo ufw allow from 192.168.0.0/16 to any port 11434
|
||||
```
|
||||
|
||||
Ports nur für LAN freigeben — nicht ins Internet exponieren.
|
||||
|
||||
---
|
||||
|
||||
## Client-Setup (jeder Linux-Rechner)
|
||||
|
||||
### 1. Transkriptor installieren
|
||||
|
||||
```bash
|
||||
git clone git@git.tueit.de:tueit_GmbH/tueit_Transkriptor.git
|
||||
cd tueit_Transkriptor
|
||||
python -m venv .venv
|
||||
.venv/bin/pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### 2. App starten
|
||||
|
||||
```bash
|
||||
.venv/bin/python main.py
|
||||
```
|
||||
|
||||
Beim ersten Start: Browser öffnet sich automatisch (oder manuell: http://localhost:8765).
|
||||
Einmal Admin-Account anlegen, dann einloggen.
|
||||
|
||||
### 3. Beastix konfigurieren (Einstellungen → ⚙)
|
||||
|
||||
Als Admin einloggen → Zahnrad-Icon im Header → Einstellungen:
|
||||
|
||||
| Feld | Wert (Beispiel) |
|
||||
|------|-----------------|
|
||||
| Whisper Backend | `whisper.cpp Server` |
|
||||
| Whisper Server URL | `http://beastix:8080` |
|
||||
| Whisper Modell | `large-v3` |
|
||||
| Ollama Server URL | `http://beastix:11434` |
|
||||
| Ollama Modell | `gemma3:12b` (aus Dropdown wählen) |
|
||||
|
||||
Leer lassen = lokale Verarbeitung (benötigt lokales Whisper-Modell).
|
||||
|
||||
### 4. Als systemd-User-Service einrichten (optional)
|
||||
|
||||
```bash
|
||||
cat > ~/.config/systemd/user/tueit-transcriber.service << 'EOF'
|
||||
[Unit]
|
||||
Description=tüit Transkriptor
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
WorkingDirectory=%h/work/tueit_Transkriptor
|
||||
ExecStart=%h/work/tueit_Transkriptor/.venv/bin/python main.py
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
EOF
|
||||
|
||||
systemctl --user daemon-reload
|
||||
systemctl --user enable --now tueit-transcriber.service
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Audio — PipeWire Combined Source
|
||||
|
||||
Um Mikrofon + System-Audio gleichzeitig aufzunehmen (z.B. für Konferenzen):
|
||||
|
||||
Einstellungen → Audio → **Combined Source erstellen**
|
||||
|
||||
- Mikrofon auswählen
|
||||
- System-Audio Monitor auswählen (z.B. `alsa_output.*.monitor`)
|
||||
- Erstellen → neues Device `transkriptor-combined.monitor` erscheint in der Liste
|
||||
- Als Aufnahmequelle auswählen und speichern
|
||||
|
||||
Die PipeWire-Module werden in `~/.config/tueit-transcriber/pipewire-modules.json`
|
||||
gespeichert. Bei App-Stop können sie mit `pactl unload-module <id>` entladen werden.
|
||||
|
||||
---
|
||||
|
||||
## Diarisierung (Sprecher-Erkennung) — HuggingFace-Setup
|
||||
|
||||
Die Diarisierung verwendet das Modell `pyannote/speaker-diarization-3.1`. Das Modell
|
||||
ist kostenfrei, erfordert aber eine einmalige Zustimmung zu den Nutzungsbedingungen.
|
||||
|
||||
### 1. HuggingFace-Account
|
||||
|
||||
Falls noch kein Account vorhanden: https://huggingface.co/join
|
||||
|
||||
### 2. Modell-Zugriff beantragen
|
||||
|
||||
1. https://huggingface.co/pyannote/speaker-diarization-3.1 aufrufen
|
||||
2. **"Access repository"** klicken und die Nutzungsbedingungen akzeptieren
|
||||
|
||||
### 3. Read-Token erstellen
|
||||
|
||||
1. https://huggingface.co/settings/tokens aufrufen
|
||||
2. **"New token"** → Name z.B. `transkriptor` → Typ **Read** → erstellen
|
||||
3. Token kopieren (beginnt mit `hf_`)
|
||||
|
||||
### 4. Token in Transkriptor eintragen
|
||||
|
||||
Als Admin einloggen → Zahnrad-Icon → Einstellungen → **Diarisierung**:
|
||||
|
||||
- Checkbox **"Sprecher-Erkennung aktivieren"** aktivieren
|
||||
- Token in das Feld **HuggingFace Token** eintragen
|
||||
- **Speichern**
|
||||
|
||||
Beim ersten Einsatz lädt pyannote das Modell herunter (~1 GB) und cached es lokal.
|
||||
@@ -0,0 +1,113 @@
|
||||
# Settings Page & Remote Whisper Design
|
||||
|
||||
**Date:** 2026-04-01
|
||||
|
||||
## Goal
|
||||
|
||||
Give each Linux client a settings page to configure audio device (via PipeWire/pactl) and remote server URLs (Whisper + Ollama). Beastix runs faster-whisper-server and Ollama; clients point their config at it.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────┐ ┌─────────────────────────────┐
|
||||
│ Client (Linux) │ │ Beastix (LAN) │
|
||||
│ │ WAV │ │
|
||||
│ sounddevice │ ──────► │ faster-whisper-server │
|
||||
│ PipeWire src │ │ :8000 (OpenAI-kompatibel) │
|
||||
│ localhost:8765 │ Text │ │
|
||||
│ Browser UI │ ◄────── │ Ollama :11434 │
|
||||
└─────────────────┘ └─────────────────────────────┘
|
||||
```
|
||||
|
||||
Default: `whisper.base_url` leer → lokale faster-whisper-Instanz. Gesetzt → HTTP-Upload an Beastix.
|
||||
|
||||
## Settings Page (`/settings`)
|
||||
|
||||
Nur für Admins sichtbar (Gear-Icon im Header). Zwei Abschnitte:
|
||||
|
||||
### Abschnitt 1 — Audio
|
||||
|
||||
- Dropdown: alle verfügbaren PipeWire-Sources (live via `pactl list sources short`)
|
||||
- Button "Combined Source erstellen" → App führt pactl-Kommandos aus, Combined Source erscheint in der Liste
|
||||
- Gewähltes Device → `config.toml [audio] device = "..."`
|
||||
- sounddevice nutzt diesen Device-Namen beim nächsten Start einer Aufnahme
|
||||
|
||||
### Abschnitt 2 — Verarbeitung
|
||||
|
||||
- Whisper `base_url`: leer = lokal, sonst z.B. `http://beastix:8000`
|
||||
- Whisper `model`: Freitextfeld (default: `large-v3`)
|
||||
- Ollama `base_url`: z.B. `http://beastix:11434`
|
||||
- Ollama `model`: Dropdown gefüllt via `GET {ollama_base_url}/api/tags`
|
||||
|
||||
Änderungen werden sofort in `config.toml` gespeichert (PUT /config).
|
||||
|
||||
## Remote Whisper (transcription.py)
|
||||
|
||||
```python
|
||||
if cfg["whisper"].get("base_url"):
|
||||
# OpenAI-kompatibler Upload
|
||||
POST {base_url}/v1/audio/transcriptions
|
||||
multipart: file=<wav>, model=<model>, language=<lang>
|
||||
→ response.text
|
||||
else:
|
||||
# Lokal wie bisher
|
||||
WhisperModel(model_name, device=device).transcribe(...)
|
||||
```
|
||||
|
||||
## Neue API-Endpoints
|
||||
|
||||
| Method | Path | Beschreibung |
|
||||
|--------|------|--------------|
|
||||
| GET | `/settings` | Liefert settings.html |
|
||||
| GET | `/audio/devices` | pactl sources geparst → JSON-Liste |
|
||||
| POST | `/audio/combined` | Erstellt PipeWire Combined Source via pactl |
|
||||
|
||||
PUT `/config` ist bereits vorhanden — wird um `audio.device` und `whisper.base_url` erweitert.
|
||||
|
||||
## Config-Schema Erweiterung
|
||||
|
||||
```toml
|
||||
[audio]
|
||||
device = "" # leer = Systemstandard, sonst PipeWire-Source-Name
|
||||
|
||||
[whisper]
|
||||
model = "large-v3"
|
||||
language = "de"
|
||||
device = "auto"
|
||||
base_url = "" # leer = lokal, sonst http://beastix:8000
|
||||
```
|
||||
|
||||
## PipeWire Combined Source
|
||||
|
||||
Beim Klick auf "Combined Source erstellen":
|
||||
|
||||
```bash
|
||||
pactl load-module module-null-sink \
|
||||
sink_name=transkriptor-combined \
|
||||
sink_properties=device.description="Transkriptor\ Combined"
|
||||
|
||||
pactl load-module module-loopback \
|
||||
source=<mic-device> sink=transkriptor-combined
|
||||
|
||||
pactl load-module module-loopback \
|
||||
source=<default-output>.monitor sink=transkriptor-combined
|
||||
```
|
||||
|
||||
Ergebnis: `transkriptor-combined.monitor` erscheint als aufnehmbare Source in der Liste.
|
||||
|
||||
Die Module-IDs werden in `config.toml` gespeichert damit sie beim App-Stop sauber entladen werden können (`pactl unload-module <id>`).
|
||||
|
||||
## Beastix Setup (einmalig)
|
||||
|
||||
```bash
|
||||
pip install faster-whisper-server
|
||||
faster-whisper-server --host 0.0.0.0 --port 8000 --model large-v3
|
||||
```
|
||||
|
||||
Oder als systemd user service. Clients tragen dann `whisper.base_url = "http://beastix:8000"` ein.
|
||||
|
||||
## Nicht in diesem Scope
|
||||
|
||||
- Nutzer-Verwaltung über Settings (eigene Seite)
|
||||
- Automatischer Neustart wenn Config sich ändert
|
||||
- Windows-Support
|
||||
@@ -0,0 +1,921 @@
|
||||
# Settings Page & Remote Whisper Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Add a settings page with PipeWire audio device selection and remote Whisper/Ollama URL configuration, enabling clients to offload AI processing to Beastix.
|
||||
|
||||
**Architecture:** Config gains `audio.device` and `whisper.base_url`. `transcription.py` branches on `base_url`: local faster-whisper or remote OpenAI-compatible HTTP upload. A new `/settings` page (admin-only) lets users pick PipeWire sources via `pactl` and configure server URLs. PipeWire combined source is created on demand via `pactl load-module`.
|
||||
|
||||
**Tech Stack:** FastAPI, httpx (already in deps), pactl (PipeWire), sounddevice, faster-whisper, vanilla JS/CSS (tüit CI dark theme)
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Extend config defaults
|
||||
|
||||
**Files:**
|
||||
- Modify: `config.py`
|
||||
- Test: `tests/test_config.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Add to `tests/test_config.py`:
|
||||
|
||||
```python
|
||||
def test_config_has_audio_and_whisper_base_url():
|
||||
import config
|
||||
from unittest.mock import patch
|
||||
import tempfile, os
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
cfg_path = os.path.join(tmpdir, "config.toml")
|
||||
with patch("config.CONFIG_PATH", cfg_path):
|
||||
cfg = config.load()
|
||||
assert "audio" in cfg
|
||||
assert cfg["audio"]["device"] == ""
|
||||
assert cfg["whisper"]["base_url"] == ""
|
||||
```
|
||||
|
||||
**Step 2: Run to verify it fails**
|
||||
|
||||
```bash
|
||||
pytest tests/test_config.py::test_config_has_audio_and_whisper_base_url -v
|
||||
```
|
||||
Expected: FAIL — KeyError or AssertionError
|
||||
|
||||
**Step 3: Update `config.py` DEFAULTS**
|
||||
|
||||
```python
|
||||
DEFAULTS = {
|
||||
"ollama": {
|
||||
"base_url": "http://localhost:11434",
|
||||
"model": "gemma3:12b",
|
||||
},
|
||||
"whisper": {
|
||||
"model": "large-v3",
|
||||
"language": "de",
|
||||
"device": "auto",
|
||||
"base_url": "", # empty = local, else http://beastix:8000
|
||||
},
|
||||
"audio": {
|
||||
"device": "", # empty = system default
|
||||
},
|
||||
"server": {
|
||||
"port": 8765,
|
||||
},
|
||||
"output": {
|
||||
"path": os.path.expanduser("~/cloud.shron.de/Hetzner Storagebox/work"),
|
||||
},
|
||||
"network": {
|
||||
"host": "127.0.0.1",
|
||||
},
|
||||
"pid_file": os.path.expanduser("~/.local/run/tueit-transcriber.pid"),
|
||||
}
|
||||
```
|
||||
|
||||
Update the fallback string writer in `_write_defaults`:
|
||||
|
||||
```python
|
||||
f.write('[whisper]\nmodel = "large-v3"\nlanguage = "de"\ndevice = "auto"\nbase_url = ""\n\n')
|
||||
f.write('[audio]\ndevice = ""\n\n')
|
||||
```
|
||||
|
||||
**Step 4: Run tests**
|
||||
|
||||
```bash
|
||||
pytest tests/test_config.py -v
|
||||
```
|
||||
Expected: all PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add config.py tests/test_config.py
|
||||
git commit -m "feat: add audio.device and whisper.base_url to config defaults"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Remote Whisper in transcription.py
|
||||
|
||||
**Files:**
|
||||
- Modify: `transcription.py`
|
||||
- Modify: `api/pipeline.py`
|
||||
- Test: `tests/test_transcription.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Add to `tests/test_transcription.py`:
|
||||
|
||||
```python
|
||||
import pytest
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcribe_uses_remote_when_base_url_set(tmp_path):
|
||||
import wave, struct
|
||||
wav = tmp_path / "test.wav"
|
||||
with wave.open(str(wav), "wb") as wf:
|
||||
wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(16000)
|
||||
wf.writeframes(struct.pack("<100h", *([0] * 100)))
|
||||
|
||||
import respx, httpx
|
||||
from transcription import TranscriptionEngine
|
||||
eng = TranscriptionEngine()
|
||||
|
||||
with respx.mock:
|
||||
respx.post("http://beastix:8000/v1/audio/transcriptions").mock(
|
||||
return_value=httpx.Response(200, json={"text": "Hallo Welt"})
|
||||
)
|
||||
result = await eng.transcribe_file(
|
||||
str(wav), language="de", model_name="large-v3",
|
||||
device="auto", base_url="http://beastix:8000",
|
||||
)
|
||||
assert result == "Hallo Welt"
|
||||
```
|
||||
|
||||
**Step 2: Run to verify it fails**
|
||||
|
||||
```bash
|
||||
pytest tests/test_transcription.py::test_transcribe_uses_remote_when_base_url_set -v
|
||||
```
|
||||
Expected: FAIL — `transcribe_file` doesn't accept `base_url`
|
||||
|
||||
**Step 3: Rewrite `transcription.py`**
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
import httpx
|
||||
|
||||
|
||||
class TranscriptionEngine:
|
||||
_model = None
|
||||
|
||||
def _get_model(self, model_name: str = "large-v3", device: str = "auto"):
|
||||
if self._model is None:
|
||||
from faster_whisper import WhisperModel
|
||||
if device == "auto":
|
||||
try:
|
||||
self._model = WhisperModel(model_name, device="cuda", compute_type="float16")
|
||||
except Exception:
|
||||
self._model = WhisperModel(model_name, device="cpu", compute_type="int8")
|
||||
else:
|
||||
compute = "float16" if device in ("cuda", "rocm") else "int8"
|
||||
self._model = WhisperModel(model_name, device=device, compute_type=compute)
|
||||
return self._model
|
||||
|
||||
async def transcribe_file(
|
||||
self,
|
||||
audio_path: str,
|
||||
language: str = "de",
|
||||
model_name: str = "large-v3",
|
||||
device: str = "auto",
|
||||
base_url: str = "",
|
||||
) -> str:
|
||||
if base_url:
|
||||
return await self._transcribe_remote(audio_path, language, model_name, base_url)
|
||||
return await self._transcribe_local(audio_path, language, model_name, device)
|
||||
|
||||
async def _transcribe_remote(
|
||||
self, audio_path: str, language: str, model_name: str, base_url: str
|
||||
) -> str:
|
||||
async with httpx.AsyncClient(timeout=300) as client:
|
||||
with open(audio_path, "rb") as f:
|
||||
r = await client.post(
|
||||
f"{base_url}/v1/audio/transcriptions",
|
||||
files={"file": ("audio.wav", f, "audio/wav")},
|
||||
data={"model": model_name, "language": language},
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()["text"]
|
||||
|
||||
async def _transcribe_local(
|
||||
self, audio_path: str, language: str, model_name: str, device: str
|
||||
) -> str:
|
||||
loop = asyncio.get_event_loop()
|
||||
model = self._get_model(model_name, device)
|
||||
segments, _ = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: model.transcribe(audio_path, language=language),
|
||||
)
|
||||
return "".join(seg.text for seg in segments).strip()
|
||||
|
||||
|
||||
engine = TranscriptionEngine()
|
||||
```
|
||||
|
||||
**Step 4: Update `api/pipeline.py` — pass base_url**
|
||||
|
||||
In `run_pipeline`, update the `transcribe_file` call:
|
||||
|
||||
```python
|
||||
raw_text = await transcription_engine.transcribe_file(
|
||||
wav_path,
|
||||
language=cfg["whisper"]["language"],
|
||||
model_name=cfg["whisper"]["model"],
|
||||
device=cfg["whisper"]["device"],
|
||||
base_url=cfg["whisper"].get("base_url", ""),
|
||||
)
|
||||
```
|
||||
|
||||
**Step 5: Run all transcription tests**
|
||||
|
||||
```bash
|
||||
pytest tests/test_transcription.py -v
|
||||
```
|
||||
Expected: all PASS
|
||||
|
||||
**Step 6: Commit**
|
||||
|
||||
```bash
|
||||
git add transcription.py api/pipeline.py tests/test_transcription.py
|
||||
git commit -m "feat: remote Whisper via whisper.base_url — OpenAI-compatible upload"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Audio device in AudioRecorder
|
||||
|
||||
**Files:**
|
||||
- Modify: `audio.py`
|
||||
- Modify: `api/router.py` (toggle endpoint)
|
||||
- Test: `tests/test_audio.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Add to `tests/test_audio.py`:
|
||||
|
||||
```python
|
||||
def test_recorder_stores_device_param():
|
||||
from audio import AudioRecorder
|
||||
rec = AudioRecorder(device="my-pipewire-source")
|
||||
assert rec.device == "my-pipewire-source"
|
||||
|
||||
def test_recorder_device_none_when_empty_string():
|
||||
from audio import AudioRecorder
|
||||
rec = AudioRecorder(device="")
|
||||
assert rec.device is None
|
||||
```
|
||||
|
||||
**Step 2: Run to verify they fail**
|
||||
|
||||
```bash
|
||||
pytest tests/test_audio.py::test_recorder_stores_device_param tests/test_audio.py::test_recorder_device_none_when_empty_string -v
|
||||
```
|
||||
Expected: FAIL
|
||||
|
||||
**Step 3: Update `audio.py`**
|
||||
|
||||
```python
|
||||
import wave
|
||||
import threading
|
||||
import numpy as np
|
||||
|
||||
|
||||
class AudioRecorder:
|
||||
def __init__(self, sample_rate: int = 16000, device: str | None = None):
|
||||
self.sample_rate = sample_rate
|
||||
self.device = device or None # empty string becomes None = system default
|
||||
self._buffer: list[np.ndarray] = []
|
||||
self._stream = None
|
||||
self.is_recording = False
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _callback(self, indata, frames, time, status):
|
||||
if self.is_recording:
|
||||
with self._lock:
|
||||
self._buffer.append(indata[:, 0].copy().astype(np.int16))
|
||||
|
||||
def start(self):
|
||||
import sounddevice as sd
|
||||
self._buffer = []
|
||||
self.is_recording = True
|
||||
self._stream = sd.InputStream(
|
||||
samplerate=self.sample_rate,
|
||||
channels=1,
|
||||
dtype="int16",
|
||||
callback=self._callback,
|
||||
device=self.device,
|
||||
)
|
||||
self._stream.start()
|
||||
|
||||
def stop(self):
|
||||
self.is_recording = False
|
||||
if self._stream:
|
||||
self._stream.stop()
|
||||
self._stream.close()
|
||||
self._stream = None
|
||||
|
||||
def save_wav(self, path: str) -> str:
|
||||
with self._lock:
|
||||
data = np.concatenate(self._buffer) if self._buffer else np.zeros(0, dtype=np.int16)
|
||||
with wave.open(path, "wb") as wf:
|
||||
wf.setnchannels(1)
|
||||
wf.setsampwidth(2)
|
||||
wf.setframerate(self.sample_rate)
|
||||
wf.writeframes(data.tobytes())
|
||||
return path
|
||||
```
|
||||
|
||||
**Step 4: Pass device from config in `api/router.py` toggle endpoint**
|
||||
|
||||
In `toggle_recording`, update the `Status.IDLE` branch:
|
||||
|
||||
```python
|
||||
if state.status == Status.IDLE:
|
||||
from audio import AudioRecorder
|
||||
audio_device = cfg.get("audio", {}).get("device") or None
|
||||
state._recorder = AudioRecorder(device=audio_device)
|
||||
...
|
||||
```
|
||||
|
||||
Also load config at the top of toggle_recording (it's already imported):
|
||||
```python
|
||||
cfg = load_config()
|
||||
```
|
||||
|
||||
**Step 5: Run tests**
|
||||
|
||||
```bash
|
||||
pytest tests/test_audio.py -v
|
||||
```
|
||||
Expected: all PASS
|
||||
|
||||
**Step 6: Commit**
|
||||
|
||||
```bash
|
||||
git add audio.py api/router.py tests/test_audio.py
|
||||
git commit -m "feat: AudioRecorder accepts device param — reads audio.device from config"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 4: API — GET /audio/devices and POST /audio/combined
|
||||
|
||||
**Files:**
|
||||
- Modify: `api/router.py`
|
||||
- Test: `tests/test_api.py`
|
||||
|
||||
**Step 1: Write the failing tests**
|
||||
|
||||
Add to `tests/test_api.py`:
|
||||
|
||||
```python
|
||||
def test_audio_devices_returns_list(monkeypatch):
|
||||
import subprocess
|
||||
pactl_output = (
|
||||
"1\talsa_input.pci.analog-stereo\tPipeWire\ts32le 2ch 48000Hz\tRUNNING\n"
|
||||
"2\talsa_output.pci.analog-stereo.monitor\tPipeWire\ts32le 2ch 48000Hz\tIDLE\n"
|
||||
)
|
||||
monkeypatch.setattr(subprocess, "check_output", lambda *a, **kw: pactl_output.encode())
|
||||
from unittest.mock import patch
|
||||
with patch("api.router.current_user",
|
||||
return_value={"username": "u", "output_dir": "/tmp", "is_admin": True}):
|
||||
from fastapi.testclient import TestClient
|
||||
from main import app
|
||||
client = TestClient(app)
|
||||
r = client.get("/audio/devices", headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 200
|
||||
devices = r.json()
|
||||
assert len(devices) == 2
|
||||
assert devices[0]["name"] == "alsa_input.pci.analog-stereo"
|
||||
|
||||
def test_audio_devices_forbidden_for_non_admin():
|
||||
from unittest.mock import patch
|
||||
with patch("api.router.current_user",
|
||||
return_value={"username": "u", "output_dir": "/tmp", "is_admin": False}):
|
||||
from fastapi.testclient import TestClient
|
||||
from main import app
|
||||
client = TestClient(app)
|
||||
r = client.get("/audio/devices", headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 403
|
||||
```
|
||||
|
||||
**Step 2: Run to verify they fail**
|
||||
|
||||
```bash
|
||||
pytest tests/test_api.py::test_audio_devices_returns_list tests/test_api.py::test_audio_devices_forbidden_for_non_admin -v
|
||||
```
|
||||
Expected: FAIL — routes don't exist
|
||||
|
||||
**Step 3: Add endpoints to `api/router.py`**
|
||||
|
||||
```python
|
||||
@router.get("/audio/devices")
|
||||
async def list_audio_devices(user: dict = Depends(current_user)):
|
||||
import subprocess
|
||||
if not user.get("is_admin"):
|
||||
raise HTTPException(status_code=403, detail="Nur Administratoren")
|
||||
try:
|
||||
out = subprocess.check_output(
|
||||
["pactl", "list", "sources", "short"],
|
||||
stderr=subprocess.DEVNULL, timeout=5,
|
||||
).decode()
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"pactl fehlgeschlagen: {e}")
|
||||
devices = []
|
||||
for line in out.strip().splitlines():
|
||||
parts = line.split("\t")
|
||||
if len(parts) >= 2:
|
||||
devices.append({
|
||||
"index": parts[0],
|
||||
"name": parts[1],
|
||||
"state": parts[4] if len(parts) > 4 else "",
|
||||
})
|
||||
return devices
|
||||
|
||||
|
||||
@router.post("/audio/combined")
|
||||
async def create_combined_source(body: dict, user: dict = Depends(current_user)):
|
||||
import subprocess, json, pathlib
|
||||
if not user.get("is_admin"):
|
||||
raise HTTPException(status_code=403, detail="Nur Administratoren")
|
||||
mic = body.get("mic", "")
|
||||
monitor = body.get("monitor", "")
|
||||
if not mic or not monitor:
|
||||
raise HTTPException(status_code=400, detail="mic und monitor erforderlich")
|
||||
# Validate: names must come from pactl list — no shell injection via user input
|
||||
out = subprocess.check_output(
|
||||
["pactl", "list", "sources", "short"], stderr=subprocess.DEVNULL, timeout=5
|
||||
).decode()
|
||||
known = {line.split("\t")[1] for line in out.strip().splitlines() if "\t" in line}
|
||||
if mic not in known or monitor not in known:
|
||||
raise HTTPException(status_code=400, detail="Unbekanntes Audio-Device")
|
||||
sink_id = subprocess.check_output([
|
||||
"pactl", "load-module", "module-null-sink",
|
||||
"sink_name=transkriptor-combined",
|
||||
"sink_properties=device.description=Transkriptor Combined",
|
||||
], timeout=5).decode().strip()
|
||||
mic_id = subprocess.check_output([
|
||||
"pactl", "load-module", "module-loopback",
|
||||
f"source={mic}", "sink=transkriptor-combined",
|
||||
], timeout=5).decode().strip()
|
||||
mon_id = subprocess.check_output([
|
||||
"pactl", "load-module", "module-loopback",
|
||||
f"source={monitor}", "sink=transkriptor-combined",
|
||||
], timeout=5).decode().strip()
|
||||
state_path = pathlib.Path(
|
||||
os.path.expanduser("~/.config/tueit-transcriber/pipewire-modules.json")
|
||||
)
|
||||
state_path.write_text(json.dumps({"ids": [int(sink_id), int(mic_id), int(mon_id)]}))
|
||||
return {"device": "transkriptor-combined.monitor", "module_ids": [sink_id, mic_id, mon_id]}
|
||||
```
|
||||
|
||||
**Step 4: Run tests**
|
||||
|
||||
```bash
|
||||
pytest tests/test_api.py::test_audio_devices_returns_list tests/test_api.py::test_audio_devices_forbidden_for_non_admin -v
|
||||
```
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add api/router.py tests/test_api.py
|
||||
git commit -m "feat: GET /audio/devices, POST /audio/combined — PipeWire source management"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 5: Fix PUT /config to deep-merge
|
||||
|
||||
**Files:**
|
||||
- Modify: `api/router.py`
|
||||
- Test: `tests/test_api.py`
|
||||
|
||||
Current `put_config` does a shallow `cfg.update(body)` — overwrites nested dicts. Must deep-merge.
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Add to `tests/test_api.py`:
|
||||
|
||||
```python
|
||||
def test_put_config_deep_merges(tmp_path, monkeypatch):
|
||||
import config as cfg_mod
|
||||
monkeypatch.setattr(cfg_mod, "CONFIG_PATH",
|
||||
str(tmp_path / "config.toml"))
|
||||
from unittest.mock import patch
|
||||
with patch("api.router.current_user",
|
||||
return_value={"username": "u", "output_dir": "/tmp", "is_admin": True}):
|
||||
from fastapi.testclient import TestClient
|
||||
from main import app
|
||||
client = TestClient(app)
|
||||
r = client.put("/config",
|
||||
json={"whisper": {"base_url": "http://beastix:8000"}},
|
||||
headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# base_url updated, model preserved
|
||||
assert data["whisper"]["base_url"] == "http://beastix:8000"
|
||||
assert data["whisper"]["model"] == "large-v3"
|
||||
```
|
||||
|
||||
**Step 2: Run to verify it fails**
|
||||
|
||||
```bash
|
||||
pytest tests/test_api.py::test_put_config_deep_merges -v
|
||||
```
|
||||
Expected: FAIL — shallow update loses whisper.model
|
||||
|
||||
**Step 3: Fix `put_config` in `api/router.py`**
|
||||
|
||||
```python
|
||||
@router.put("/config")
|
||||
async def put_config(body: dict, user: dict = Depends(current_user)):
|
||||
if not user.get("is_admin"):
|
||||
raise HTTPException(status_code=403, detail="Nur Administratoren können die Config ändern")
|
||||
import tomli_w
|
||||
from config import _deep_merge, CONFIG_PATH
|
||||
cfg = load_config()
|
||||
merged = _deep_merge(cfg, body)
|
||||
os.makedirs(os.path.dirname(CONFIG_PATH), exist_ok=True)
|
||||
with open(CONFIG_PATH, "wb") as f:
|
||||
tomli_w.dump(merged, f)
|
||||
return merged
|
||||
```
|
||||
|
||||
**Step 4: Run tests**
|
||||
|
||||
```bash
|
||||
pytest tests/test_api.py::test_put_config_deep_merges -v
|
||||
```
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add api/router.py tests/test_api.py
|
||||
git commit -m "fix: PUT /config deep-merges nested config instead of shallow update"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 6: GET /status returns is_admin
|
||||
|
||||
**Files:**
|
||||
- Modify: `api/router.py`
|
||||
- Test: `tests/test_api.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Add to `tests/test_api.py`:
|
||||
|
||||
```python
|
||||
def test_status_includes_is_admin():
|
||||
from unittest.mock import patch
|
||||
with patch("api.router.current_user",
|
||||
return_value={"username": "u", "output_dir": "/tmp", "is_admin": True}):
|
||||
from fastapi.testclient import TestClient
|
||||
from main import app
|
||||
client = TestClient(app)
|
||||
r = client.get("/status", headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 200
|
||||
assert r.json()["is_admin"] is True
|
||||
```
|
||||
|
||||
**Step 2: Run to verify it fails**
|
||||
|
||||
```bash
|
||||
pytest tests/test_api.py::test_status_includes_is_admin -v
|
||||
```
|
||||
Expected: FAIL
|
||||
|
||||
**Step 3: Update `get_status` in `api/router.py`**
|
||||
|
||||
```python
|
||||
@router.get("/status")
|
||||
async def get_status(user: dict = Depends(current_user)):
|
||||
return {
|
||||
"status": state.status,
|
||||
"username": user["username"],
|
||||
"is_admin": user.get("is_admin", False),
|
||||
}
|
||||
```
|
||||
|
||||
**Step 4: Add gear icon in `frontend/app.js` init block**
|
||||
|
||||
In the `(async () => { ... })()` init, after `userChip.textContent = data.username`:
|
||||
|
||||
```javascript
|
||||
if (data.is_admin) {
|
||||
const gearLink = document.createElement('a');
|
||||
gearLink.href = '/settings';
|
||||
gearLink.className = 'back-btn';
|
||||
gearLink.title = 'Einstellungen';
|
||||
gearLink.textContent = '\u2699'; // ⚙ gear symbol
|
||||
document.querySelector('.header-right').prepend(gearLink);
|
||||
}
|
||||
```
|
||||
|
||||
**Step 5: Run tests**
|
||||
|
||||
```bash
|
||||
pytest tests/test_api.py::test_status_includes_is_admin -v
|
||||
```
|
||||
Expected: PASS
|
||||
|
||||
**Step 6: Commit**
|
||||
|
||||
```bash
|
||||
git add api/router.py frontend/app.js tests/test_api.py
|
||||
git commit -m "feat: status includes is_admin, gear icon in header for admins"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 7: Settings page HTML + JS + routes
|
||||
|
||||
**Files:**
|
||||
- Create: `frontend/settings.html`
|
||||
- Create: `frontend/settings.js`
|
||||
- Modify: `api/router.py`
|
||||
- Modify: `main.py`
|
||||
|
||||
**Step 1: Add GET /settings to `api/router.py`**
|
||||
|
||||
```python
|
||||
@router.get("/settings")
|
||||
async def settings_page_route(user: dict = Depends(current_user)):
|
||||
from fastapi.responses import FileResponse, RedirectResponse
|
||||
from pathlib import Path
|
||||
if not user.get("is_admin"):
|
||||
return RedirectResponse("/")
|
||||
return FileResponse(str(Path(__file__).parent.parent / "frontend" / "settings.html"))
|
||||
```
|
||||
|
||||
**Step 2: Add `/settings.js` route to `main.py`**
|
||||
|
||||
```python
|
||||
@app.get("/settings.js")
|
||||
async def settingsjs():
|
||||
return FileResponse(str(FRONTEND_DIR / "settings.js"))
|
||||
```
|
||||
|
||||
**Step 3: Create `frontend/settings.html`**
|
||||
|
||||
```html
|
||||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>tüit Transkriptor — Einstellungen</title>
|
||||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||||
<link href="https://fonts.googleapis.com/css2?family=Overpass:wght@300;400;600;700&display=swap" rel="stylesheet">
|
||||
<style>
|
||||
:root { --red:#DA251C;--yellow:#FFD802;--bg:#111;--surface:#1a1a1a;--surface2:#232323;--text:#e8e8e8;--muted:#888;--border:#2e2e2e; }
|
||||
*{box-sizing:border-box;margin:0;padding:0;}
|
||||
body{font-family:'Overpass',system-ui,sans-serif;background:var(--bg);color:var(--text);min-height:100vh;display:flex;flex-direction:column;}
|
||||
header{display:flex;align-items:center;gap:12px;padding:16px 24px;border-bottom:1px solid var(--border);}
|
||||
.header-logo{height:28px;width:auto;display:block;}
|
||||
.header-divider{width:1px;height:20px;background:var(--border);flex-shrink:0;}
|
||||
.header-appname{font-size:1rem;font-weight:600;letter-spacing:.04em;color:var(--muted);}
|
||||
.header-right{margin-left:auto;display:flex;align-items:center;gap:12px;}
|
||||
.back-btn{font-size:.75rem;padding:4px 10px;border-radius:20px;background:none;border:1px solid var(--border);color:var(--muted);cursor:pointer;font-family:inherit;text-decoration:none;transition:border-color .15s,color .15s;}
|
||||
.back-btn:hover{border-color:var(--red);color:var(--red);}
|
||||
main{flex:1;display:flex;flex-direction:column;gap:24px;padding:24px;max-width:700px;width:100%;margin:0 auto;}
|
||||
h2{font-size:.8rem;color:var(--muted);text-transform:uppercase;letter-spacing:.06em;margin-bottom:12px;padding-bottom:8px;border-bottom:1px solid var(--border);}
|
||||
.field{display:flex;flex-direction:column;gap:6px;margin-bottom:14px;}
|
||||
label{font-size:.78rem;color:var(--muted);letter-spacing:.04em;}
|
||||
select,input[type=text]{background:var(--surface);border:1px solid var(--border);color:var(--text);border-radius:8px;padding:10px 12px;font-family:inherit;font-size:.9rem;outline:none;transition:border-color .15s;width:100%;}
|
||||
select:focus,input[type=text]:focus{border-color:var(--yellow);}
|
||||
.btn-row{display:flex;gap:10px;margin-top:4px;}
|
||||
.btn{font-size:.82rem;padding:8px 16px;border-radius:8px;border:1px solid var(--border);background:var(--surface2);color:var(--text);cursor:pointer;font-family:inherit;transition:border-color .15s,background .15s;}
|
||||
.btn:hover{border-color:var(--red);}
|
||||
.btn.primary{background:var(--red);border-color:var(--red);color:#fff;}
|
||||
.btn.primary:hover{background:#b81e16;border-color:#b81e16;}
|
||||
.toast{position:fixed;bottom:24px;right:24px;background:var(--surface2);border:1px solid var(--border);border-radius:8px;padding:10px 16px;font-size:.85rem;opacity:0;transition:opacity .2s;pointer-events:none;}
|
||||
.toast.show{opacity:1;}
|
||||
.combined-form{display:none;flex-direction:column;gap:10px;margin-top:10px;padding:12px;background:var(--surface2);border-radius:8px;border:1px solid var(--border);}
|
||||
.combined-form.visible{display:flex;}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<img src="/logo.svg" class="header-logo" alt="tüit">
|
||||
<div class="header-divider"></div>
|
||||
<span class="header-appname">Transkriptor — Einstellungen</span>
|
||||
<div class="header-right">
|
||||
<a href="/" class="back-btn">← Zurück</a>
|
||||
</div>
|
||||
</header>
|
||||
<main>
|
||||
<section>
|
||||
<h2>Audio</h2>
|
||||
<div class="field">
|
||||
<label>Aufnahmequelle</label>
|
||||
<select id="audio-device">
|
||||
<option value="">Systemstandard</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="btn-row">
|
||||
<button class="btn" id="refresh-devices-btn">Geräte aktualisieren</button>
|
||||
<button class="btn" id="create-combined-btn">Combined Source erstellen</button>
|
||||
</div>
|
||||
<div class="combined-form" id="combined-form">
|
||||
<div class="field">
|
||||
<label>Mikrofon</label>
|
||||
<select id="combined-mic"></select>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>System-Audio Monitor</label>
|
||||
<select id="combined-monitor"></select>
|
||||
</div>
|
||||
<div class="btn-row">
|
||||
<button class="btn primary" id="combined-confirm-btn">Erstellen</button>
|
||||
<button class="btn" id="combined-cancel-btn">Abbrechen</button>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Verarbeitung</h2>
|
||||
<div class="field">
|
||||
<label>Whisper Server URL (leer = lokal)</label>
|
||||
<input type="text" id="whisper-url" placeholder="http://beastix:8000">
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>Whisper Modell</label>
|
||||
<input type="text" id="whisper-model" placeholder="large-v3">
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>Ollama Server URL</label>
|
||||
<input type="text" id="ollama-url" placeholder="http://localhost:11434">
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>Ollama Modell</label>
|
||||
<select id="ollama-model"></select>
|
||||
</div>
|
||||
<div class="btn-row">
|
||||
<button class="btn primary" id="save-btn">Speichern</button>
|
||||
</div>
|
||||
</section>
|
||||
</main>
|
||||
<div class="toast" id="toast"></div>
|
||||
<script src="/settings.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
**Step 4: Create `frontend/settings.js`**
|
||||
|
||||
```javascript
|
||||
const token = sessionStorage.getItem('token');
|
||||
function authHeaders() {
|
||||
return token ? { 'Authorization': 'Bearer ' + token } : {};
|
||||
}
|
||||
function apiFetch(url, options) {
|
||||
options = options || {};
|
||||
return fetch(url, Object.assign({}, options, {
|
||||
headers: Object.assign({'Content-Type': 'application/json'}, authHeaders(), options.headers || {}),
|
||||
}));
|
||||
}
|
||||
|
||||
let _devices = [];
|
||||
|
||||
function showToast(msg) {
|
||||
const t = document.getElementById('toast');
|
||||
t.textContent = msg;
|
||||
t.classList.add('show');
|
||||
setTimeout(function() { t.classList.remove('show'); }, 2500);
|
||||
}
|
||||
|
||||
async function loadDevices() {
|
||||
const r = await apiFetch('/audio/devices');
|
||||
if (!r.ok) return;
|
||||
_devices = await r.json();
|
||||
const sel = document.getElementById('audio-device');
|
||||
const current = sel.value;
|
||||
sel.replaceChildren(new Option('Systemstandard', ''));
|
||||
_devices.forEach(function(d) { sel.appendChild(new Option(d.name, d.name)); });
|
||||
if (current) sel.value = current;
|
||||
['combined-mic', 'combined-monitor'].forEach(function(id) {
|
||||
const el = document.getElementById(id);
|
||||
el.replaceChildren();
|
||||
_devices.forEach(function(d) { el.appendChild(new Option(d.name, d.name)); });
|
||||
});
|
||||
}
|
||||
|
||||
async function loadOllamaModels(baseUrl, current) {
|
||||
try {
|
||||
const r = await fetch(baseUrl + '/api/tags');
|
||||
if (!r.ok) return;
|
||||
const data = await r.json();
|
||||
const sel = document.getElementById('ollama-model');
|
||||
sel.replaceChildren();
|
||||
(data.models || []).forEach(function(m) { sel.appendChild(new Option(m.name, m.name)); });
|
||||
if (current) sel.value = current;
|
||||
} catch(e) {}
|
||||
}
|
||||
|
||||
async function loadConfig() {
|
||||
const r = await apiFetch('/config');
|
||||
if (!r.ok) return;
|
||||
const cfg = await r.json();
|
||||
document.getElementById('audio-device').value = (cfg.audio && cfg.audio.device) || '';
|
||||
document.getElementById('whisper-url').value = (cfg.whisper && cfg.whisper.base_url) || '';
|
||||
document.getElementById('whisper-model').value = (cfg.whisper && cfg.whisper.model) || 'large-v3';
|
||||
const ollamaUrl = (cfg.ollama && cfg.ollama.base_url) || 'http://localhost:11434';
|
||||
document.getElementById('ollama-url').value = ollamaUrl;
|
||||
await loadOllamaModels(ollamaUrl, cfg.ollama && cfg.ollama.model);
|
||||
}
|
||||
|
||||
document.getElementById('refresh-devices-btn').addEventListener('click', loadDevices);
|
||||
|
||||
document.getElementById('create-combined-btn').addEventListener('click', function() {
|
||||
document.getElementById('combined-form').classList.toggle('visible');
|
||||
});
|
||||
document.getElementById('combined-cancel-btn').addEventListener('click', function() {
|
||||
document.getElementById('combined-form').classList.remove('visible');
|
||||
});
|
||||
document.getElementById('combined-confirm-btn').addEventListener('click', async function() {
|
||||
const mic = document.getElementById('combined-mic').value;
|
||||
const monitor = document.getElementById('combined-monitor').value;
|
||||
const r = await apiFetch('/audio/combined', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ mic: mic, monitor: monitor }),
|
||||
});
|
||||
if (!r.ok) { showToast('Fehler beim Erstellen'); return; }
|
||||
const data = await r.json();
|
||||
showToast('Erstellt: ' + data.device);
|
||||
document.getElementById('combined-form').classList.remove('visible');
|
||||
await loadDevices();
|
||||
document.getElementById('audio-device').value = data.device;
|
||||
});
|
||||
|
||||
document.getElementById('ollama-url').addEventListener('change', function(e) {
|
||||
loadOllamaModels(e.target.value, document.getElementById('ollama-model').value);
|
||||
});
|
||||
|
||||
document.getElementById('save-btn').addEventListener('click', async function() {
|
||||
const body = {
|
||||
audio: { device: document.getElementById('audio-device').value },
|
||||
whisper: {
|
||||
base_url: document.getElementById('whisper-url').value,
|
||||
model: document.getElementById('whisper-model').value,
|
||||
},
|
||||
ollama: {
|
||||
base_url: document.getElementById('ollama-url').value,
|
||||
model: document.getElementById('ollama-model').value,
|
||||
},
|
||||
};
|
||||
const r = await apiFetch('/config', { method: 'PUT', body: JSON.stringify(body) });
|
||||
if (r.ok) { showToast('Gespeichert'); } else { showToast('Fehler beim Speichern'); }
|
||||
});
|
||||
|
||||
(async function() {
|
||||
if (!token) { location.href = '/login'; return; }
|
||||
await loadDevices();
|
||||
await loadConfig();
|
||||
})();
|
||||
```
|
||||
|
||||
**Step 5: Manual verification checklist**
|
||||
|
||||
Restart app, open browser as admin:
|
||||
|
||||
- [ ] Gear icon (⚙) sichtbar im Header
|
||||
- [ ] Klick öffnet `/settings`
|
||||
- [ ] Audio-Dropdown listet PipeWire-Sources
|
||||
- [ ] "Geräte aktualisieren" lädt Liste neu
|
||||
- [ ] "Combined Source erstellen" zeigt Mic/Monitor-Dropdowns
|
||||
- [ ] Nach Erstellen: neues Device in der Liste wählbar
|
||||
- [ ] Whisper-URL leer → lokale Verarbeitung
|
||||
- [ ] Whisper-URL gesetzt → Transkript wird remote verarbeitet
|
||||
- [ ] Ollama-Modelle laden aus konfiguriertem Ollama-Server
|
||||
- [ ] Speichern → Toast, config.toml aktualisiert
|
||||
- [ ] Aufnahme nutzt konfiguriertes Audio-Device
|
||||
- [ ] Non-Admin sieht kein Gear-Icon, `/settings` leitet zu `/` um
|
||||
|
||||
**Step 6: Commit**
|
||||
|
||||
```bash
|
||||
git add api/router.py main.py frontend/settings.html frontend/settings.js
|
||||
git commit -m "feat: settings page — PipeWire audio device + remote Whisper/Ollama config"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 8: Run full test suite + push
|
||||
|
||||
```bash
|
||||
pytest -v
|
||||
```
|
||||
|
||||
Expected: all tests pass.
|
||||
|
||||
```bash
|
||||
git push
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Beastix Setup (einmalig, außerhalb App-Code)
|
||||
|
||||
```bash
|
||||
pip install faster-whisper-server
|
||||
uvicorn faster_whisper_server.main:app --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
Clients tragen ein:
|
||||
```toml
|
||||
[whisper]
|
||||
base_url = "http://beastix:8000"
|
||||
```
|
||||
@@ -0,0 +1,383 @@
|
||||
# Transcript Modal & Delete Implementation Plan
|
||||
|
||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
||||
|
||||
**Goal:** Add transcript delete and a markdown-rendering modal viewer, removing the existing preview section.
|
||||
|
||||
**Architecture:** Two new REST endpoints (GET + DELETE `/transcripts/{filename}`) with path-confinement security. Frontend gains a full-screen modal using marked.js + DOMPurify for safe rendering; the static preview div is removed entirely. Each list item gets a trash icon that stops event propagation so it doesn't trigger the modal.
|
||||
|
||||
**Tech Stack:** FastAPI (existing), marked.js 14 + DOMPurify 3 via CDN, vanilla JS/CSS (no new build step)
|
||||
|
||||
---
|
||||
|
||||
### Task 1: Backend — GET /transcripts/{filename}
|
||||
|
||||
**Files:**
|
||||
- Modify: `api/router.py`
|
||||
- Modify: `output.py`
|
||||
- Test: `tests/test_api.py`
|
||||
|
||||
**Step 1: Write the failing test**
|
||||
|
||||
Add to `tests/test_api.py`:
|
||||
|
||||
```python
|
||||
def test_get_transcript_returns_content(tmp_path, monkeypatch):
|
||||
f = tmp_path / "2026-01-01-0900-test.md"
|
||||
f.write_text("# Hello\n\ncontent here\n")
|
||||
from unittest.mock import patch
|
||||
with patch("api.router.current_user", return_value={"username": "", "output_dir": str(tmp_path), "is_admin": False}):
|
||||
from fastapi.testclient import TestClient
|
||||
from main import app
|
||||
client = TestClient(app)
|
||||
r = client.get("/transcripts/2026-01-01-0900-test.md",
|
||||
headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 200
|
||||
assert "Hello" in r.text
|
||||
|
||||
def test_get_transcript_rejects_path_traversal(tmp_path):
|
||||
from unittest.mock import patch
|
||||
with patch("api.router.current_user", return_value={"username": "", "output_dir": str(tmp_path), "is_admin": False}):
|
||||
from fastapi.testclient import TestClient
|
||||
from main import app
|
||||
client = TestClient(app)
|
||||
r = client.get("/transcripts/..%2Fsecret.md",
|
||||
headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 404
|
||||
```
|
||||
|
||||
**Step 2: Run to verify it fails**
|
||||
|
||||
```bash
|
||||
pytest tests/test_api.py::test_get_transcript_returns_content tests/test_api.py::test_get_transcript_rejects_path_traversal -v
|
||||
```
|
||||
Expected: FAIL — 404 or 405 (route doesn't exist yet)
|
||||
|
||||
**Step 3: Add `read_transcript` to `output.py`**
|
||||
|
||||
```python
|
||||
def read_transcript(output_dir: str, filename: str) -> str | None:
|
||||
"""Return file content if filename is a plain .md file inside output_dir."""
|
||||
if os.path.basename(filename) != filename or not filename.endswith(".md"):
|
||||
return None
|
||||
path = os.path.join(output_dir, filename)
|
||||
if not os.path.exists(path):
|
||||
return None
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return f.read()
|
||||
```
|
||||
|
||||
**Step 4: Add GET endpoint to `api/router.py`**
|
||||
|
||||
Add after the existing `get_transcripts` endpoint:
|
||||
|
||||
```python
|
||||
@router.get("/transcripts/{filename}")
|
||||
async def get_transcript(filename: str, user: dict = Depends(current_user)):
|
||||
from output import read_transcript
|
||||
from fastapi.responses import PlainTextResponse
|
||||
user_dir = os.path.join(user["output_dir"], user["username"])
|
||||
content = read_transcript(user_dir, filename)
|
||||
if content is None:
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
return PlainTextResponse(content)
|
||||
```
|
||||
|
||||
**Step 5: Run tests**
|
||||
|
||||
```bash
|
||||
pytest tests/test_api.py::test_get_transcript_returns_content tests/test_api.py::test_get_transcript_rejects_path_traversal -v
|
||||
```
|
||||
Expected: PASS
|
||||
|
||||
**Step 6: Commit**
|
||||
|
||||
```bash
|
||||
git add output.py api/router.py tests/test_api.py
|
||||
git commit -m "feat: GET /transcripts/{filename} — serve transcript content"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Backend — DELETE /transcripts/{filename}
|
||||
|
||||
**Files:**
|
||||
- Modify: `api/router.py`
|
||||
- Test: `tests/test_api.py`
|
||||
|
||||
**Step 1: Write the failing tests**
|
||||
|
||||
Add to `tests/test_api.py`:
|
||||
|
||||
```python
|
||||
def test_delete_transcript_removes_file(tmp_path):
|
||||
f = tmp_path / "2026-01-01-0900-test.md"
|
||||
f.write_text("content")
|
||||
from unittest.mock import patch
|
||||
with patch("api.router.current_user", return_value={"username": "", "output_dir": str(tmp_path), "is_admin": False}):
|
||||
from fastapi.testclient import TestClient
|
||||
from main import app
|
||||
client = TestClient(app)
|
||||
r = client.delete("/transcripts/2026-01-01-0900-test.md",
|
||||
headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 200
|
||||
assert not f.exists()
|
||||
|
||||
def test_delete_transcript_rejects_path_traversal(tmp_path):
|
||||
from unittest.mock import patch
|
||||
with patch("api.router.current_user", return_value={"username": "", "output_dir": str(tmp_path), "is_admin": False}):
|
||||
from fastapi.testclient import TestClient
|
||||
from main import app
|
||||
client = TestClient(app)
|
||||
r = client.delete("/transcripts/..%2Fsecret.md",
|
||||
headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 404
|
||||
```
|
||||
|
||||
**Step 2: Run to verify they fail**
|
||||
|
||||
```bash
|
||||
pytest tests/test_api.py::test_delete_transcript_removes_file tests/test_api.py::test_delete_transcript_rejects_path_traversal -v
|
||||
```
|
||||
Expected: FAIL — route doesn't exist
|
||||
|
||||
**Step 3: Add DELETE endpoint to `api/router.py`**
|
||||
|
||||
```python
|
||||
@router.delete("/transcripts/{filename}")
|
||||
async def delete_transcript(filename: str, user: dict = Depends(current_user)):
|
||||
user_dir = os.path.join(user["output_dir"], user["username"])
|
||||
if os.path.basename(filename) != filename or not filename.endswith(".md"):
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
path = os.path.join(user_dir, filename)
|
||||
if not os.path.exists(path):
|
||||
raise HTTPException(status_code=404, detail="Nicht gefunden")
|
||||
os.unlink(path)
|
||||
return {"ok": True}
|
||||
```
|
||||
|
||||
**Step 4: Run tests**
|
||||
|
||||
```bash
|
||||
pytest tests/test_api.py::test_delete_transcript_removes_file tests/test_api.py::test_delete_transcript_rejects_path_traversal -v
|
||||
```
|
||||
Expected: PASS
|
||||
|
||||
**Step 5: Commit**
|
||||
|
||||
```bash
|
||||
git add api/router.py tests/test_api.py
|
||||
git commit -m "feat: DELETE /transcripts/{filename} — delete transcript with path-confinement check"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Frontend — Remove preview section, add modal + marked.js + DOMPurify
|
||||
|
||||
**Files:**
|
||||
- Modify: `frontend/index.html`
|
||||
- Modify: `frontend/app.js`
|
||||
|
||||
No automated tests; manual verification checklist at end.
|
||||
|
||||
**Step 1: Remove preview section from `index.html`**
|
||||
|
||||
Delete this block from `<main>`:
|
||||
|
||||
```html
|
||||
<section class="preview-section">
|
||||
<label>Vorschau</label>
|
||||
<div id="preview">Noch keine Aufnahme verarbeitet.</div>
|
||||
</section>
|
||||
```
|
||||
|
||||
Delete these CSS rules (search for them by selector):
|
||||
- `.preview-section`
|
||||
- `#preview`
|
||||
- `#preview.has-content`
|
||||
|
||||
**Step 2: Add script tags — replace existing `<script src="/app.js">` line**
|
||||
|
||||
```html
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/dompurify/dist/purify.min.js"></script>
|
||||
<script src="/app.js"></script>
|
||||
```
|
||||
|
||||
**Step 3: Add modal HTML + CSS**
|
||||
|
||||
Add this block inside `<main>` before `</main>` closing tag (it's a fixed overlay, position doesn't matter):
|
||||
|
||||
```html
|
||||
<div id="modal" class="modal hidden" role="dialog" aria-modal="true">
|
||||
<div class="modal-backdrop"></div>
|
||||
<div class="modal-panel">
|
||||
<div class="modal-header">
|
||||
<span id="modal-title" class="modal-title"></span>
|
||||
<div class="modal-actions">
|
||||
<button id="modal-open-btn" class="modal-btn" title="Im Editor öffnen">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path d="M14 3h7v7h-2V6.41l-9.29 9.3-1.42-1.42L17.59 5H14V3zm-1 2H5a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2v-8h-2v8H5V7h8V5z"/>
|
||||
</svg>
|
||||
</button>
|
||||
<button id="modal-close-btn" class="modal-btn" title="Schließen">✕</button>
|
||||
</div>
|
||||
</div>
|
||||
<div id="modal-body" class="modal-body"></div>
|
||||
</div>
|
||||
</div>
|
||||
```
|
||||
|
||||
Add CSS inside `<style>`:
|
||||
|
||||
```css
|
||||
.modal { position: fixed; inset: 0; z-index: 100; display: flex; align-items: center; justify-content: center; }
|
||||
.modal.hidden { display: none; }
|
||||
.modal-backdrop { position: absolute; inset: 0; background: rgba(0,0,0,0.7); }
|
||||
.modal-panel {
|
||||
position: relative; z-index: 1;
|
||||
background: var(--surface); border: 1px solid var(--border); border-radius: 10px;
|
||||
width: min(800px, 95vw); max-height: 85vh;
|
||||
display: flex; flex-direction: column;
|
||||
}
|
||||
.modal-header {
|
||||
display: flex; align-items: center; justify-content: space-between;
|
||||
padding: 14px 18px; border-bottom: 1px solid var(--border);
|
||||
flex-shrink: 0;
|
||||
}
|
||||
.modal-title { font-size: 0.9rem; font-weight: 600; }
|
||||
.modal-actions { display: flex; gap: 8px; }
|
||||
.modal-btn {
|
||||
background: none; border: 1px solid var(--border); color: var(--muted);
|
||||
border-radius: 6px; padding: 4px 8px; cursor: pointer; font-family: inherit;
|
||||
font-size: 0.85rem; display: flex; align-items: center;
|
||||
transition: border-color 0.15s, color 0.15s;
|
||||
}
|
||||
.modal-btn:hover { border-color: var(--red); color: var(--red); }
|
||||
.modal-body {
|
||||
padding: 20px 24px; overflow-y: auto; flex: 1;
|
||||
font-size: 0.9rem; line-height: 1.7; color: var(--text);
|
||||
}
|
||||
.modal-body h1,.modal-body h2,.modal-body h3 { margin: 1em 0 0.4em; font-weight: 600; }
|
||||
.modal-body h1 { font-size: 1.3rem; }
|
||||
.modal-body h2 { font-size: 1.1rem; }
|
||||
.modal-body p { margin: 0 0 0.8em; }
|
||||
.modal-body ul,.modal-body ol { padding-left: 1.5em; margin: 0 0 0.8em; }
|
||||
.modal-body code { background: var(--surface2); padding: 2px 5px; border-radius: 3px; font-size: 0.85em; }
|
||||
.modal-body pre { background: var(--surface2); padding: 12px; border-radius: 6px; overflow-x: auto; margin: 0 0 0.8em; }
|
||||
.modal-body pre code { background: none; padding: 0; }
|
||||
.modal-body hr { border: none; border-top: 1px solid var(--border); margin: 1em 0; }
|
||||
.del-btn {
|
||||
background: none; border: none; color: var(--muted); cursor: pointer;
|
||||
padding: 4px; border-radius: 4px; display: flex; align-items: center;
|
||||
transition: color 0.15s; flex-shrink: 0;
|
||||
}
|
||||
.del-btn:hover { color: var(--red); }
|
||||
```
|
||||
|
||||
**Step 4: Update `app.js`**
|
||||
|
||||
Remove these variable declarations at the top:
|
||||
```javascript
|
||||
const preview = document.getElementById('preview');
|
||||
```
|
||||
|
||||
Add these variable declarations at the top:
|
||||
```javascript
|
||||
const modal = document.getElementById('modal');
|
||||
const modalTitle = document.getElementById('modal-title');
|
||||
const modalBody = document.getElementById('modal-body');
|
||||
const modalOpenBtn = document.getElementById('modal-open-btn');
|
||||
const modalCloseBtn = document.getElementById('modal-close-btn');
|
||||
let _modalPath = null;
|
||||
```
|
||||
|
||||
Add these functions and event listeners (after the `logoutBtn` listener):
|
||||
```javascript
|
||||
function openModal(filename, path) {
|
||||
_modalPath = path;
|
||||
modalTitle.textContent = filename.replace(/\.md$/, '').replace(/^\d{4}-\d{2}-\d{2}-\d{4}-/, '');
|
||||
modalBody.innerHTML = '';
|
||||
modal.classList.remove('hidden');
|
||||
apiFetch(`/transcripts/${encodeURIComponent(filename)}`)
|
||||
.then(r => r.text())
|
||||
.then(md => {
|
||||
modalBody.innerHTML = DOMPurify.sanitize(marked.parse(md));
|
||||
});
|
||||
}
|
||||
|
||||
function closeModal() {
|
||||
modal.classList.add('hidden');
|
||||
_modalPath = null;
|
||||
}
|
||||
|
||||
modalCloseBtn.addEventListener('click', closeModal);
|
||||
modal.querySelector('.modal-backdrop').addEventListener('click', closeModal);
|
||||
document.addEventListener('keydown', e => { if (e.key === 'Escape') closeModal(); });
|
||||
modalOpenBtn.addEventListener('click', () => {
|
||||
if (_modalPath) apiFetch('/open', { method: 'POST', body: JSON.stringify({ path: _modalPath }) });
|
||||
});
|
||||
```
|
||||
|
||||
In `loadTranscripts`, replace the existing `div.addEventListener` block and the `div.append(name, meta)` line with:
|
||||
|
||||
```javascript
|
||||
div.addEventListener('click', () => openModal(t.filename, t.path));
|
||||
|
||||
const delBtn = document.createElement('button');
|
||||
delBtn.className = 'del-btn';
|
||||
delBtn.title = 'Löschen';
|
||||
delBtn.innerHTML = '<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor"><path d="M9 3h6l1 1h4v2H4V4h4l1-1zm-3 5h12l-1 13H7L6 8zm5 2v9h2v-9h-2zm-3 0v9h2v-9H8zm8 0v9h2v-9h-2z"/></svg>';
|
||||
delBtn.addEventListener('click', async (e) => {
|
||||
e.stopPropagation();
|
||||
await apiFetch(`/transcripts/${encodeURIComponent(t.filename)}`, { method: 'DELETE' });
|
||||
loadTranscripts();
|
||||
});
|
||||
|
||||
div.append(name, meta, delBtn);
|
||||
```
|
||||
|
||||
Remove the WS handler block that references preview:
|
||||
```javascript
|
||||
if (msg.event === 'transcribed' || msg.event === 'refined') {
|
||||
const text = msg.raw || msg.markdown || '';
|
||||
preview.textContent = text;
|
||||
preview.classList.add('has-content');
|
||||
}
|
||||
```
|
||||
|
||||
Also remove the `setStatus('idle')` + `preview.*` lines from the click handler's `reset` branch — keep only the `setStatus('idle')` call.
|
||||
|
||||
**Step 5: Manual verification checklist**
|
||||
|
||||
Restart app (`kill $(pgrep -f main.py) && .venv/bin/python main.py &`), open browser:
|
||||
|
||||
- [ ] No "Vorschau" section visible
|
||||
- [ ] Clicking a transcript item opens modal with rendered markdown
|
||||
- [ ] Title in modal header shows human-readable name (date prefix stripped)
|
||||
- [ ] Clicking backdrop or ✕ closes modal
|
||||
- [ ] Pressing Escape closes modal
|
||||
- [ ] "Im Editor öffnen" triggers xdg-open
|
||||
- [ ] Trash icon deletes file and refreshes list
|
||||
- [ ] Trash click does NOT open the modal
|
||||
|
||||
**Step 6: Commit**
|
||||
|
||||
```bash
|
||||
git add frontend/index.html frontend/app.js
|
||||
git commit -m "feat: transcript modal with markdown rendering, delete button, remove preview section"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 4: Run full test suite
|
||||
|
||||
```bash
|
||||
pytest -v
|
||||
```
|
||||
|
||||
Expected: all tests pass. Fix any regressions before pushing.
|
||||
|
||||
```bash
|
||||
git push
|
||||
```
|
||||
@@ -0,0 +1,143 @@
|
||||
# Speaker Diarization & Name Identification Design
|
||||
|
||||
**Date:** 2026-04-02
|
||||
|
||||
## Goal
|
||||
|
||||
Extend the transcription pipeline with speaker diarization (pyannote.audio) and automatic
|
||||
speaker name identification (Ollama). Every recording produces three documents: an index,
|
||||
a raw transcript with speaker labels, and a polished summary.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
WAV
|
||||
├─► Whisper → segments [(start, end, text), …]
|
||||
├─► pyannote → speaker segments [(start, end, "SPEAKER_00"), …]
|
||||
│
|
||||
└─► Alignment → [(speaker_label, text), …]
|
||||
│
|
||||
├─► Ollama (name prompt) → {"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"}
|
||||
│ └─ Fallback: WS event `speakers_unknown` → UI card → POST /speakers
|
||||
│
|
||||
├─► transkript.md (speaker: text, new paragraph per speaker change)
|
||||
├─► zusammenfassung.md (key points, open questions, next steps)
|
||||
└─► index.md (TL;DR, speakers, duration, links to both)
|
||||
```
|
||||
|
||||
## Config Schema Extension
|
||||
|
||||
```toml
|
||||
[diarization]
|
||||
enabled = true
|
||||
hf_token = "hf_..." # HuggingFace read token
|
||||
```
|
||||
|
||||
## New Module: diarization.py
|
||||
|
||||
```python
|
||||
class Diarizer:
|
||||
def __init__(self, hf_token: str): ...
|
||||
async def diarize(self, wav_path: str) -> list[tuple[float, float, str]]:
|
||||
# returns [(start_sec, end_sec, "SPEAKER_00"), …]
|
||||
```
|
||||
|
||||
Uses `pyannote/speaker-diarization-3.1`. Loaded lazily on first call.
|
||||
Runs in `loop.run_in_executor` to avoid blocking the event loop.
|
||||
|
||||
## Timestamp Alignment
|
||||
|
||||
For each Whisper segment `(start, end, text)`: find the pyannote speaker with the
|
||||
greatest time overlap → assign that speaker label. Consecutive segments with the same
|
||||
speaker are merged into one paragraph.
|
||||
|
||||
**Remote Whisper path:** request `timestamp_granularities=["segment"]` from the
|
||||
OpenAI-compatible API — the response includes `segments[].start` and `segments[].end`.
|
||||
|
||||
## Speaker Name Identification
|
||||
|
||||
Ollama receives the first ~2000 chars of the aligned transcript and a prompt:
|
||||
|
||||
> "Analysiere das folgende Gesprächstranskript. Ermittle welche Namen den Sprechern
|
||||
> zugeordnet werden können (z.B. durch direkte Anrede). Antworte NUR mit JSON:
|
||||
> `{\"SPEAKER_00\": \"Name oder null\", …}`"
|
||||
|
||||
If all values are `null` or parsing fails → emit `speakers_unknown` WebSocket event.
|
||||
If at least one name is found → apply known names, leave unknowns as `Sprecher N`.
|
||||
|
||||
## Frontend: Speaker Naming Card
|
||||
|
||||
Triggered by `speakers_unknown` WS event. Shown above the record button.
|
||||
|
||||
Each speaker has:
|
||||
- Excerpt navigator: `‹ "first few sentences…" 1/4 ›` — arrows cycle through all
|
||||
excerpts (3-4 sentences each) for that speaker
|
||||
- Text input for the name
|
||||
|
||||
Buttons:
|
||||
- **Übernehmen** → `POST /speakers` with `{"SPEAKER_00": "Thomas", …}` → pipeline
|
||||
writes the three documents and emits `saved`
|
||||
- **Anonym lassen** → same POST with empty strings → labels stay as `Sprecher 1` etc.
|
||||
|
||||
## New API Endpoint
|
||||
|
||||
| Method | Path | Description |
|
||||
|--------|------|-------------|
|
||||
| POST | `/speakers` | Receives speaker name mapping, triggers document writing |
|
||||
|
||||
The pipeline pauses after alignment and waits for `/speakers` before writing output.
|
||||
State stored in `api/state.py` as `state._pending_speakers`.
|
||||
|
||||
## Three Output Documents
|
||||
|
||||
All three share the same filename base (e.g. `2026-04-02-1430-Meeting`):
|
||||
|
||||
**`...-index.md`**
|
||||
```markdown
|
||||
# Meeting — 02.04.2026 14:30
|
||||
|
||||
**Sprecher:** Thomas, Möller
|
||||
**Dauer:** 23 min
|
||||
|
||||
> [2-3 sentence TL;DR from Ollama]
|
||||
|
||||
- [Transkript](…-transkript.md)
|
||||
- [Zusammenfassung](…-zusammenfassung.md)
|
||||
```
|
||||
|
||||
**`...-transkript.md`** — Raw annotated transcript, new paragraph per speaker change:
|
||||
```markdown
|
||||
**Thomas:** Gut, dann fangen wir an.
|
||||
|
||||
**Möller:** Ich hab das Budget schon vorbereitet…
|
||||
```
|
||||
|
||||
**`...-zusammenfassung.md`** — Polished summary document (Ollama):
|
||||
```markdown
|
||||
# Meeting-Zusammenfassung — 02.04.2026
|
||||
|
||||
## Wichtigste Punkte
|
||||
…
|
||||
|
||||
## Offene Fragen
|
||||
…
|
||||
|
||||
## Nächste Schritte / Ideen
|
||||
…
|
||||
```
|
||||
|
||||
All three appear in the transcript list. Index entries get a `meeting` badge.
|
||||
|
||||
## HuggingFace Setup (one-time, per machine)
|
||||
|
||||
1. Create account at huggingface.co
|
||||
2. Go to https://huggingface.co/pyannote/speaker-diarization-3.1 → click
|
||||
"Access repository" and accept the terms of service
|
||||
3. Go to huggingface.co/settings/tokens → create a token with **Read** access
|
||||
4. Enter the token in Transkriptor settings → Einstellungen → Diarisierung
|
||||
|
||||
## Not in Scope
|
||||
|
||||
- Speaker voice profiles / pre-registration
|
||||
- More than one diarization model
|
||||
- Windows support
|
||||
File diff suppressed because it is too large
Load Diff
+224
-15
@@ -1,16 +1,36 @@
|
||||
const btn = document.getElementById('record-btn');
|
||||
const statusText = document.getElementById('status-text');
|
||||
const headerStatus = document.getElementById('header-status');
|
||||
const preview = document.getElementById('preview');
|
||||
const instructionsEl = document.getElementById('instructions');
|
||||
const transcriptList = document.getElementById('transcript-list');
|
||||
const userChip = document.getElementById('user-chip');
|
||||
const logoutBtn = document.getElementById('logout-btn');
|
||||
const modal = document.getElementById('modal');
|
||||
const modalTitle = document.getElementById('modal-title');
|
||||
const modalBody = document.getElementById('modal-body');
|
||||
const modalObsidianBtn = document.getElementById('modal-obsidian-btn');
|
||||
const modalFolderBtn = document.getElementById('modal-folder-btn');
|
||||
const modalOpenBtn = document.getElementById('modal-open-btn');
|
||||
const modalCloseBtn = document.getElementById('modal-close-btn');
|
||||
const modalTabs = document.getElementById('modal-tabs');
|
||||
let _modalPath = null;
|
||||
let _modalPaths = null;
|
||||
let _modalFilename = null;
|
||||
let _modalRelated = null;
|
||||
|
||||
const speakerCard = document.getElementById('speaker-card');
|
||||
const speakerRows = document.getElementById('speaker-rows');
|
||||
const speakerConfirmBtn = document.getElementById('speaker-confirm-btn');
|
||||
const speakerAnonymBtn = document.getElementById('speaker-anonym-btn');
|
||||
|
||||
// state for excerpt navigation: { speakerId: { excerpts: [], idx: 0 } }
|
||||
let _speakerState = {};
|
||||
|
||||
const STATUS_LABELS = {
|
||||
idle: 'Bereit',
|
||||
recording: 'Aufnahme läuft\u2026',
|
||||
processing: 'Wird verarbeitet\u2026',
|
||||
awaiting_speakers: 'Sprecher benennen\u2026',
|
||||
error: 'Fehler',
|
||||
};
|
||||
|
||||
@@ -36,6 +56,77 @@ logoutBtn.addEventListener('click', () => {
|
||||
});
|
||||
});
|
||||
|
||||
function _loadModalContent(filename, activeTab) {
|
||||
modalBody.innerHTML = '';
|
||||
apiFetch(`/transcripts/${filename.split('/').map(encodeURIComponent).join('/')}`)
|
||||
.then(r => r.text())
|
||||
.then(md => { modalBody.innerHTML = DOMPurify.sanitize(marked.parse(md)); });
|
||||
// update active tab
|
||||
modalTabs.querySelectorAll('.modal-tab').forEach(t => {
|
||||
t.classList.toggle('active', t.dataset.file === filename);
|
||||
});
|
||||
}
|
||||
|
||||
function openModal(filename, path, paths, related) {
|
||||
_modalPath = path;
|
||||
_modalPaths = paths || null;
|
||||
_modalFilename = filename;
|
||||
_modalRelated = related || null;
|
||||
modalTitle.textContent = filename.replace(/\.md$/, '').replace(/^\d{4}-\d{2}-\d{2}-\d{4}-/, '').replace(/-index$/, '');
|
||||
modal.classList.remove('hidden');
|
||||
|
||||
// Build tabs if there are related files
|
||||
modalTabs.innerHTML = '';
|
||||
if (related && (related.transkript || related.zusammenfassung)) {
|
||||
modalTabs.style.display = 'flex';
|
||||
const tabDefs = [
|
||||
{ label: 'Index', file: filename },
|
||||
{ label: 'Transkript', file: related.transkript },
|
||||
{ label: 'Zusammenfassung', file: related.zusammenfassung },
|
||||
].filter(t => t.file);
|
||||
tabDefs.forEach(({ label, file }) => {
|
||||
const btn = document.createElement('button');
|
||||
btn.className = 'modal-tab';
|
||||
btn.textContent = label;
|
||||
btn.dataset.file = file;
|
||||
btn.addEventListener('click', () => _loadModalContent(file, file));
|
||||
modalTabs.appendChild(btn);
|
||||
});
|
||||
} else {
|
||||
modalTabs.style.display = 'none';
|
||||
}
|
||||
|
||||
_loadModalContent(filename, filename);
|
||||
}
|
||||
|
||||
function closeModal() {
|
||||
modal.classList.add('hidden');
|
||||
_modalPath = null;
|
||||
_modalPaths = null;
|
||||
_modalFilename = null;
|
||||
_modalRelated = null;
|
||||
}
|
||||
|
||||
modalCloseBtn.addEventListener('click', closeModal);
|
||||
modal.querySelector('.modal-backdrop').addEventListener('click', closeModal);
|
||||
document.addEventListener('keydown', e => { if (e.key === 'Escape') closeModal(); });
|
||||
modalObsidianBtn.addEventListener('click', () => {
|
||||
if (_modalPaths) {
|
||||
apiFetch('/open', { method: 'POST', body: JSON.stringify({ paths: Object.values(_modalPaths), mode: 'obsidian' }) });
|
||||
} else if (_modalPath) {
|
||||
apiFetch('/open', { method: 'POST', body: JSON.stringify({ path: _modalPath, mode: 'obsidian' }) });
|
||||
}
|
||||
});
|
||||
modalFolderBtn.addEventListener('click', () => {
|
||||
if (_modalPath) apiFetch('/open', { method: 'POST', body: JSON.stringify({ path: _modalPath, mode: 'folder' }) });
|
||||
});
|
||||
modalOpenBtn.addEventListener('click', () => {
|
||||
if (_modalPath) apiFetch('/open', { method: 'POST', body: JSON.stringify({ path: _modalPath }) });
|
||||
});
|
||||
|
||||
speakerConfirmBtn.addEventListener('click', () => submitSpeakers(true));
|
||||
speakerAnonymBtn.addEventListener('click', () => submitSpeakers(false));
|
||||
|
||||
instructionsEl.addEventListener('input', async () => {
|
||||
await apiFetch('/instructions', {
|
||||
method: 'POST',
|
||||
@@ -47,12 +138,20 @@ function setStatus(status) {
|
||||
btn.className = status;
|
||||
headerStatus.className = `status-badge ${status}`;
|
||||
const label = STATUS_LABELS[status] || status;
|
||||
statusText.textContent = label;
|
||||
statusText.textContent = status === 'error' ? label + ' — klicken zum Zurücksetzen' : label;
|
||||
headerStatus.textContent = label;
|
||||
btn.disabled = status === 'processing';
|
||||
}
|
||||
|
||||
btn.addEventListener('click', () => apiFetch('/toggle', { method: 'POST' }));
|
||||
btn.addEventListener('click', async () => {
|
||||
const r = await apiFetch('/toggle', { method: 'POST' });
|
||||
const data = await r.json();
|
||||
if (data.action === 'started') {
|
||||
setStatus('recording');
|
||||
} else if (data.action === 'reset') {
|
||||
setStatus('idle');
|
||||
}
|
||||
});
|
||||
|
||||
function connectWs() {
|
||||
const proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
|
||||
@@ -60,23 +159,95 @@ function connectWs() {
|
||||
ws.onmessage = (e) => {
|
||||
const msg = JSON.parse(e.data);
|
||||
if (msg.event === 'processing') setStatus('processing');
|
||||
if (msg.event === 'transcribed' || msg.event === 'refined') {
|
||||
const text = msg.raw || msg.markdown || '';
|
||||
preview.textContent = text;
|
||||
preview.classList.add('has-content');
|
||||
}
|
||||
if (msg.event === 'saved') {
|
||||
setStatus('idle');
|
||||
loadTranscripts();
|
||||
}
|
||||
if (msg.event === 'error') {
|
||||
setStatus('idle');
|
||||
preview.textContent = `Fehler: ${msg.message}`;
|
||||
setStatus('error');
|
||||
}
|
||||
if (msg.event === 'speakers_unknown') {
|
||||
setStatus('awaiting_speakers');
|
||||
showSpeakerCard(msg.speakers);
|
||||
}
|
||||
};
|
||||
ws.onclose = () => setTimeout(connectWs, 2000);
|
||||
}
|
||||
|
||||
function showSpeakerCard(speakers) {
|
||||
_speakerState = {};
|
||||
speakerRows.innerHTML = '';
|
||||
speakers.forEach(({ id, excerpts }) => {
|
||||
_speakerState[id] = { excerpts, idx: 0 };
|
||||
|
||||
const row = document.createElement('div');
|
||||
row.className = 'speaker-row';
|
||||
|
||||
const nav = document.createElement('div');
|
||||
nav.className = 'excerpt-nav';
|
||||
|
||||
const prevBtn = document.createElement('button');
|
||||
prevBtn.className = 'excerpt-nav-btn';
|
||||
prevBtn.textContent = '‹';
|
||||
prevBtn.title = 'Vorheriger Ausschnitt';
|
||||
|
||||
const nextBtn = document.createElement('button');
|
||||
nextBtn.className = 'excerpt-nav-btn';
|
||||
nextBtn.textContent = '›';
|
||||
nextBtn.title = 'Nächster Ausschnitt';
|
||||
|
||||
const counter = document.createElement('span');
|
||||
counter.className = 'excerpt-counter';
|
||||
|
||||
const excerptEl = document.createElement('div');
|
||||
excerptEl.className = 'speaker-excerpt';
|
||||
|
||||
function updateExcerpt() {
|
||||
const st = _speakerState[id];
|
||||
excerptEl.textContent = `"${st.excerpts[st.idx]}"`;
|
||||
counter.textContent = `${st.idx + 1} / ${st.excerpts.length}`;
|
||||
prevBtn.disabled = st.idx === 0;
|
||||
nextBtn.disabled = st.idx === st.excerpts.length - 1;
|
||||
}
|
||||
|
||||
prevBtn.addEventListener('click', () => { _speakerState[id].idx--; updateExcerpt(); });
|
||||
nextBtn.addEventListener('click', () => { _speakerState[id].idx++; updateExcerpt(); });
|
||||
|
||||
nav.append(prevBtn, counter, nextBtn);
|
||||
|
||||
const input = document.createElement('input');
|
||||
input.type = 'text';
|
||||
input.className = 'speaker-name-input';
|
||||
input.placeholder = `Name für ${id.replace('SPEAKER_', 'Sprecher ')}`;
|
||||
input.dataset.speakerId = id;
|
||||
|
||||
row.append(nav, excerptEl, input);
|
||||
speakerRows.appendChild(row);
|
||||
updateExcerpt();
|
||||
});
|
||||
speakerCard.classList.remove('hidden');
|
||||
}
|
||||
|
||||
function hideSpeakerCard() {
|
||||
speakerCard.classList.add('hidden');
|
||||
speakerRows.innerHTML = '';
|
||||
_speakerState = {};
|
||||
}
|
||||
|
||||
async function submitSpeakers(useNames) {
|
||||
const mapping = {};
|
||||
if (useNames) {
|
||||
speakerRows.querySelectorAll('.speaker-name-input').forEach(inp => {
|
||||
mapping[inp.dataset.speakerId] = inp.value.trim();
|
||||
});
|
||||
} else {
|
||||
Object.keys(_speakerState).forEach(id => { mapping[id] = ''; });
|
||||
}
|
||||
hideSpeakerCard();
|
||||
setStatus('processing');
|
||||
await apiFetch('/speakers', { method: 'POST', body: JSON.stringify(mapping) });
|
||||
}
|
||||
|
||||
async function loadTranscripts() {
|
||||
const r = await apiFetch('/transcripts');
|
||||
if (!r.ok) return;
|
||||
@@ -87,20 +258,50 @@ async function loadTranscripts() {
|
||||
const div = document.createElement('div');
|
||||
div.className = 'transcript-item';
|
||||
|
||||
const dateMatch = t.filename.match(/^(\d{4}-\d{2}-\d{2})-(\d{2})(\d{2})-/);
|
||||
const dateEl = document.createElement('span');
|
||||
dateEl.className = 'meta item-date';
|
||||
dateEl.textContent = dateMatch ? `${dateMatch[1]} ${dateMatch[2]}:${dateMatch[3]}` : '';
|
||||
|
||||
const name = document.createElement('span');
|
||||
name.textContent = t.filename.replace('.md', '');
|
||||
name.className = 'name';
|
||||
name.textContent = t.filename.replace(/\.md$/, '').replace(/^\d{4}-\d{2}-\d{2}-\d{4}-/, '');
|
||||
|
||||
const meta = document.createElement('span');
|
||||
meta.className = 'meta';
|
||||
meta.textContent = `${Math.round(t.size / 1024 * 10) / 10} KB`;
|
||||
|
||||
div.append(name, meta);
|
||||
div.addEventListener('click', () => {
|
||||
apiFetch('/open', {
|
||||
div.addEventListener('click', () => openModal(t.filename, t.path, null, t.related || null));
|
||||
|
||||
const reprocessBtn = document.createElement('button');
|
||||
reprocessBtn.className = 'del-btn';
|
||||
reprocessBtn.title = 'Neu verarbeiten';
|
||||
reprocessBtn.innerHTML = '<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor"><path d="M17.65 6.35A7.96 7.96 0 0 0 12 4a8 8 0 1 0 8 8h-2a6 6 0 1 1-1.76-4.24l-2.24 2.24H20V4l-2.35 2.35z"/></svg>';
|
||||
reprocessBtn.addEventListener('click', async (e) => {
|
||||
e.stopPropagation();
|
||||
reprocessBtn.disabled = true;
|
||||
await apiFetch(`/transcripts/${encodeURIComponent(t.filename)}/reprocess`, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ path: t.path }),
|
||||
body: JSON.stringify({ instructions: instructionsEl.value }),
|
||||
});
|
||||
reprocessBtn.disabled = false;
|
||||
loadTranscripts();
|
||||
});
|
||||
|
||||
const delBtn = document.createElement('button');
|
||||
delBtn.className = 'del-btn';
|
||||
delBtn.title = 'Löschen';
|
||||
delBtn.innerHTML = '<svg width="14" height="14" viewBox="0 0 24 24" fill="currentColor"><path d="M9 3h6l1 1h4v2H4V4h4l1-1zm-3 5h12l-1 13H7L6 8zm5 2v9h2v-9h-2zm-3 0v9h2v-9H8zm8 0v9h2v-9h-2z"/></svg>';
|
||||
delBtn.addEventListener('click', async (e) => {
|
||||
e.stopPropagation();
|
||||
await apiFetch(`/transcripts/${encodeURIComponent(t.filename)}`, { method: 'DELETE' });
|
||||
loadTranscripts();
|
||||
});
|
||||
|
||||
const actions = document.createElement('div');
|
||||
actions.className = 'item-actions';
|
||||
actions.append(reprocessBtn, delBtn);
|
||||
div.append(dateEl, name, meta, actions);
|
||||
return div;
|
||||
})
|
||||
);
|
||||
@@ -117,6 +318,14 @@ async function loadTranscripts() {
|
||||
if (data.username) {
|
||||
userChip.textContent = data.username;
|
||||
}
|
||||
if (data.is_admin) {
|
||||
const gearLink = document.createElement('a');
|
||||
gearLink.href = '/settings';
|
||||
gearLink.className = 'back-btn';
|
||||
gearLink.title = 'Einstellungen';
|
||||
gearLink.textContent = '\u2699';
|
||||
document.querySelector('.header-right').prepend(gearLink);
|
||||
}
|
||||
connectWs();
|
||||
loadTranscripts();
|
||||
})();
|
||||
|
||||
+140
-20
@@ -33,9 +33,9 @@
|
||||
padding: 16px 24px;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
.logo-dot { width: 12px; height: 12px; background: var(--red); border-radius: 50%; }
|
||||
header h1 { font-size: 1.1rem; font-weight: 600; letter-spacing: 0.04em; }
|
||||
header h1 span { color: var(--red); }
|
||||
.header-logo { height: 28px; width: auto; display: block; }
|
||||
.header-divider { width: 1px; height: 20px; background: var(--border); flex-shrink: 0; }
|
||||
.header-appname { font-size: 1rem; font-weight: 600; letter-spacing: 0.04em; color: var(--muted); }
|
||||
.header-right { margin-left: auto; display: flex; align-items: center; gap: 12px; }
|
||||
.status-badge {
|
||||
font-size: 0.75rem;
|
||||
@@ -68,6 +68,13 @@
|
||||
transition: border-color 0.15s, color 0.15s;
|
||||
}
|
||||
.logout-btn:hover { border-color: var(--red); color: var(--red); }
|
||||
.back-btn {
|
||||
font-size: .75rem; padding: 4px 10px; border-radius: 20px;
|
||||
background: none; border: 1px solid var(--border); color: var(--muted);
|
||||
cursor: pointer; font-family: inherit; text-decoration: none;
|
||||
transition: border-color .15s, color .15s;
|
||||
}
|
||||
.back-btn:hover { border-color: var(--red); color: var(--red); }
|
||||
main {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
@@ -107,30 +114,108 @@
|
||||
}
|
||||
textarea:focus { border-color: var(--yellow); }
|
||||
textarea::placeholder { color: var(--muted); }
|
||||
.preview-section { display: flex; flex-direction: column; gap: 8px; }
|
||||
#preview {
|
||||
background: var(--surface); border: 1px solid var(--border);
|
||||
border-radius: 8px; padding: 16px;
|
||||
font-size: 0.85rem; line-height: 1.6; color: var(--muted);
|
||||
min-height: 60px; white-space: pre-wrap; word-break: break-word;
|
||||
}
|
||||
#preview.has-content { color: var(--text); }
|
||||
.transcripts-section { display: flex; flex-direction: column; gap: 8px; }
|
||||
#transcript-list { display: flex; flex-direction: column; gap: 6px; }
|
||||
.transcript-item {
|
||||
background: var(--surface); border: 1px solid var(--border);
|
||||
border-radius: 6px; padding: 10px 14px;
|
||||
display: flex; align-items: center; justify-content: space-between;
|
||||
display: flex; align-items: center; gap: 10px;
|
||||
font-size: 0.82rem; cursor: pointer; transition: border-color 0.1s;
|
||||
}
|
||||
.transcript-item:hover { border-color: var(--red); }
|
||||
.transcript-item .meta { color: var(--muted); font-size: 0.75rem; }
|
||||
.transcript-item .name { flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
||||
.transcript-item .meta { color: var(--muted); font-size: 0.75rem; flex-shrink: 0; }
|
||||
.item-date { font-size: 0.75rem; color: var(--muted); flex-shrink: 0; font-variant-numeric: tabular-nums; }
|
||||
.item-actions { display: flex; gap: 2px; flex-shrink: 0; }
|
||||
.modal { position: fixed; inset: 0; z-index: 100; display: flex; align-items: center; justify-content: center; }
|
||||
.modal.hidden { display: none; }
|
||||
.modal-backdrop { position: absolute; inset: 0; background: rgba(0,0,0,0.7); }
|
||||
.modal-panel {
|
||||
position: relative; z-index: 1;
|
||||
background: var(--surface); border: 1px solid var(--border); border-radius: 10px;
|
||||
width: min(800px, 95vw); max-height: 85vh;
|
||||
display: flex; flex-direction: column;
|
||||
}
|
||||
.modal-header {
|
||||
display: flex; align-items: center; justify-content: space-between;
|
||||
padding: 14px 18px; border-bottom: 1px solid var(--border);
|
||||
flex-shrink: 0;
|
||||
}
|
||||
.modal-title { font-size: 0.9rem; font-weight: 600; }
|
||||
.modal-actions { display: flex; gap: 8px; }
|
||||
.modal-btn {
|
||||
background: none; border: 1px solid var(--border); color: var(--muted);
|
||||
border-radius: 6px; padding: 4px 8px; cursor: pointer; font-family: inherit;
|
||||
font-size: 0.85rem; display: flex; align-items: center;
|
||||
transition: border-color 0.15s, color 0.15s;
|
||||
}
|
||||
.modal-btn:hover { border-color: var(--red); color: var(--red); }
|
||||
.modal-body {
|
||||
padding: 20px 24px; overflow-y: auto; flex: 1;
|
||||
font-size: 0.9rem; line-height: 1.7; color: var(--text);
|
||||
}
|
||||
.modal-body h1,.modal-body h2,.modal-body h3 { margin: 1em 0 0.4em; font-weight: 600; }
|
||||
.modal-body h1 { font-size: 1.3rem; }
|
||||
.modal-body h2 { font-size: 1.1rem; }
|
||||
.modal-body p { margin: 0 0 0.8em; }
|
||||
.modal-body ul,.modal-body ol { padding-left: 1.5em; margin: 0 0 0.8em; }
|
||||
.modal-body code { background: var(--surface2); padding: 2px 5px; border-radius: 3px; font-size: 0.85em; }
|
||||
.modal-body pre { background: var(--surface2); padding: 12px; border-radius: 6px; overflow-x: auto; margin: 0 0 0.8em; }
|
||||
.modal-body pre code { background: none; padding: 0; }
|
||||
.modal-body hr { border: none; border-top: 1px solid var(--border); margin: 1em 0; }
|
||||
.modal-tabs { display: flex; gap: 4px; padding: 10px 18px 0; border-bottom: 1px solid var(--border); flex-shrink: 0; }
|
||||
.modal-tab { background: none; border: 1px solid transparent; border-bottom: none; border-radius: 6px 6px 0 0; padding: 5px 12px; font-size: 0.78rem; font-family: inherit; color: var(--muted); cursor: pointer; transition: color 0.15s, border-color 0.15s; margin-bottom: -1px; }
|
||||
.modal-tab:hover { color: var(--text); }
|
||||
.modal-tab.active { color: var(--text); border-color: var(--border); background: var(--surface); }
|
||||
.del-btn {
|
||||
background: none; border: none; color: var(--muted); cursor: pointer;
|
||||
padding: 4px; border-radius: 4px; display: flex; align-items: center;
|
||||
transition: color 0.15s; flex-shrink: 0;
|
||||
}
|
||||
.del-btn:hover { color: var(--red); }
|
||||
.speaker-card {
|
||||
background: var(--surface); border: 1px solid var(--yellow);
|
||||
border-radius: 10px; padding: 20px; display: flex; flex-direction: column; gap: 16px;
|
||||
}
|
||||
.speaker-card.hidden { display: none; }
|
||||
.speaker-card-title { font-size: 0.8rem; text-transform: uppercase; letter-spacing: 0.08em; color: var(--yellow); }
|
||||
.speaker-rows { display: flex; flex-direction: column; gap: 14px; }
|
||||
.speaker-row { display: flex; flex-direction: column; gap: 6px; }
|
||||
.excerpt-nav { display: flex; align-items: center; gap: 8px; }
|
||||
.excerpt-nav-btn {
|
||||
background: none; border: 1px solid var(--border); color: var(--muted);
|
||||
border-radius: 4px; padding: 2px 8px; cursor: pointer; font-family: inherit;
|
||||
font-size: 0.85rem; transition: border-color 0.15s, color 0.15s;
|
||||
}
|
||||
.excerpt-nav-btn:hover { border-color: var(--yellow); color: var(--yellow); }
|
||||
.excerpt-counter { font-size: 0.75rem; color: var(--muted); white-space: nowrap; }
|
||||
.speaker-excerpt {
|
||||
font-size: 0.82rem; color: var(--muted); font-style: italic;
|
||||
background: var(--surface2); border-radius: 6px; padding: 8px 12px;
|
||||
line-height: 1.5; min-height: 3em;
|
||||
}
|
||||
.speaker-name-input {
|
||||
background: var(--surface2); border: 1px solid var(--border); color: var(--text);
|
||||
border-radius: 6px; padding: 8px 12px; font-family: inherit; font-size: 0.9rem;
|
||||
outline: none; transition: border-color 0.15s; width: 100%;
|
||||
}
|
||||
.speaker-name-input:focus { border-color: var(--yellow); }
|
||||
.speaker-card-actions { display: flex; gap: 10px; justify-content: flex-end; }
|
||||
.card-btn {
|
||||
padding: 8px 18px; border-radius: 6px; border: 1px solid var(--border);
|
||||
background: none; color: var(--text); cursor: pointer; font-family: inherit;
|
||||
font-size: 0.85rem; transition: all 0.15s;
|
||||
}
|
||||
.card-btn:hover { border-color: var(--yellow); color: var(--yellow); }
|
||||
.card-btn.primary { background: var(--yellow); color: #111; border-color: var(--yellow); font-weight: 600; }
|
||||
.card-btn.primary:hover { background: #e6c200; border-color: #e6c200; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<div class="logo-dot"></div>
|
||||
<h1>tüit <span>Transkriptor</span></h1>
|
||||
<img src="/logo.svg" class="header-logo" alt="tüit">
|
||||
<div class="header-divider"></div>
|
||||
<span class="header-appname">Transkriptor</span>
|
||||
<div class="header-right">
|
||||
<span class="status-badge" id="header-status">Bereit</span>
|
||||
<span class="user-chip" id="user-chip"></span>
|
||||
@@ -138,6 +223,15 @@
|
||||
</div>
|
||||
</header>
|
||||
<main>
|
||||
<div id="speaker-card" class="speaker-card hidden">
|
||||
<span class="speaker-card-title">Sprecher identifizieren</span>
|
||||
<div id="speaker-rows" class="speaker-rows"></div>
|
||||
<div class="speaker-card-actions">
|
||||
<button id="speaker-anonym-btn" class="card-btn">Anonym lassen</button>
|
||||
<button id="speaker-confirm-btn" class="card-btn primary">Übernehmen</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<section class="record-section">
|
||||
<button id="record-btn" title="Aufnahme starten / stoppen">
|
||||
<svg class="mic-icon" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
|
||||
@@ -155,16 +249,42 @@
|
||||
></textarea>
|
||||
</section>
|
||||
|
||||
<section class="preview-section">
|
||||
<label>Vorschau</label>
|
||||
<div id="preview">Noch keine Aufnahme verarbeitet.</div>
|
||||
</section>
|
||||
|
||||
<section class="transcripts-section">
|
||||
<label>Meine Transkripte</label>
|
||||
<div id="transcript-list"></div>
|
||||
</section>
|
||||
|
||||
<div id="modal" class="modal hidden" role="dialog" aria-modal="true">
|
||||
<div class="modal-backdrop"></div>
|
||||
<div class="modal-panel">
|
||||
<div class="modal-header">
|
||||
<span id="modal-title" class="modal-title"></span>
|
||||
<div class="modal-actions">
|
||||
<button id="modal-obsidian-btn" class="modal-btn" title="In Obsidian öffnen">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-1 14H9V8h2v8zm4 0h-2V8h2v8z"/>
|
||||
</svg>
|
||||
</button>
|
||||
<button id="modal-folder-btn" class="modal-btn" title="Verzeichnis öffnen">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path d="M10 4H4a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h16a2 2 0 0 0 2-2V8a2 2 0 0 0-2-2h-8l-2-2z"/>
|
||||
</svg>
|
||||
</button>
|
||||
<button id="modal-open-btn" class="modal-btn" title="Im Editor öffnen">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path d="M14 3h7v7h-2V6.41l-9.29 9.3-1.42-1.42L17.59 5H14V3zm-1 2H5a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2v-8h-2v8H5V7h8V5z"/>
|
||||
</svg>
|
||||
</button>
|
||||
<button id="modal-close-btn" class="modal-btn" title="Schließen">✕</button>
|
||||
</div>
|
||||
</div>
|
||||
<div id="modal-tabs" class="modal-tabs" style="display:none"></div>
|
||||
<div id="modal-body" class="modal-body"></div>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/dompurify/dist/purify.min.js"></script>
|
||||
<script src="/app.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
Executable
+112
@@ -0,0 +1,112 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
width="297mm"
|
||||
height="210mm"
|
||||
viewBox="0 0 1052.3622 744.09448"
|
||||
id="svg2"
|
||||
version="1.1"
|
||||
inkscape:version="0.91 r13725"
|
||||
sodipodi:docname="tüit Logo tü original Farben ohne Rand.svg"
|
||||
enable-background="new">
|
||||
<defs
|
||||
id="defs4" />
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="8"
|
||||
inkscape:cx="441.15795"
|
||||
inkscape:cy="487.31844"
|
||||
inkscape:document-units="px"
|
||||
inkscape:current-layer="layer3"
|
||||
showgrid="false"
|
||||
inkscape:window-width="1920"
|
||||
inkscape:window-height="994"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="34"
|
||||
inkscape:window-maximized="1">
|
||||
<inkscape:grid
|
||||
type="xygrid"
|
||||
id="grid3344" />
|
||||
</sodipodi:namedview>
|
||||
<metadata
|
||||
id="metadata7">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
inkscape:label="Ebene 1"
|
||||
inkscape:groupmode="layer"
|
||||
id="layer1"
|
||||
transform="translate(0,-308.26772)"
|
||||
style="display:none">
|
||||
<g
|
||||
id="g3388">
|
||||
<flowRoot
|
||||
style="font-style:normal;font-weight:normal;font-size:40px;line-height:125%;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
id="flowRoot3336"
|
||||
xml:space="preserve"><flowRegion
|
||||
id="flowRegion3338"><rect
|
||||
y="158.07649"
|
||||
x="74.285713"
|
||||
height="137.14285"
|
||||
width="577.14288"
|
||||
id="rect3340" /></flowRegion><flowPara
|
||||
id="flowPara3342" /></flowRoot> </g>
|
||||
</g>
|
||||
<g
|
||||
inkscape:groupmode="layer"
|
||||
id="layer3"
|
||||
inkscape:label="Ebene 2"
|
||||
style="display:inline">
|
||||
<path
|
||||
style="opacity:1;fill:#da251c;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0.0452675"
|
||||
d="m 571.71573,376.46706 0,95 c 0,50 40,50 40,50 l 0,100 -390,0 c -50,0 -150.000001,0 -150.000001,-150 l 0,-150 -45,0 c -3,0 -5,-2 -5,-5 l 0,-90 c 0,-3 2,-5 5,-5 l 45,0 0,-95 c 0,-3 2,-5 5,-5 l 90.000001,0 c 3,0 5,2 5,5 l 0,95 45,0 c 3,0 5,2 5,5 l 0,90 c 0,3 -2,5 -5,5 l -45,0 0,150 c 0,0 0,50 50,50 50,0 50,-50 50,-50 l 0,-95 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 5,2 5,5 l 0,95 c 0,0 0,50 50,50 50,0 50,-50 50,-50 l 0,-95 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 5,2 5,5 z"
|
||||
id="path9932"
|
||||
inkscape:connector-curvature="0"
|
||||
sodipodi:nodetypes="ccccccccccccccccccccccczcccccczccccc" />
|
||||
<path
|
||||
style="fill:#da251c;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0"
|
||||
d="m 371.71573,226.46706 0,90 c 0,3 -2,5 -5,5 l -90,0 c -3,0 -5,-2 -5,-5 l 0,-90 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 4.875,2 5,5 z"
|
||||
id="path9934"
|
||||
inkscape:connector-curvature="0"
|
||||
sodipodi:nodetypes="ccccccccc" />
|
||||
<path
|
||||
style="fill:#da251c;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0"
|
||||
d="m 571.71573,226.46706 0,90 c 0,3 -2,5 -5,5 l -90,0 c -3,0 -5,-2 -5,-5 l 0,-90 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 5,2 5,5 z"
|
||||
id="path9944"
|
||||
inkscape:connector-curvature="0"
|
||||
sodipodi:nodetypes="ccccccccc" />
|
||||
<path
|
||||
style="fill:#ffd802;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0"
|
||||
d="m 766.71573,321.46706 -90,0 c -3,0 -5,-2 -5,-5 l 0,-90 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 5,2 5,5 l 0,90 c 0,3 -2,5 -5,5 z"
|
||||
id="path9946"
|
||||
inkscape:connector-curvature="0"
|
||||
sodipodi:nodetypes="ccccccccc" />
|
||||
<path
|
||||
style="fill:#ffd802;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0"
|
||||
d="m 1016.7157,321.46706 -44.99997,10e-6 0,150 c 0,150 -100,150 -150,150 l -190,0 0,-100 c 0,0 40,0 40,-50 l 0,-95 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 5,2 5,5 l 0,95 c 0,0 0,50 50,50 50,0 50,-50 50,-50 l 0,-150 -45,0 c -3,0 -5,-2 -5,-5 l 0,-90 c 0,-3.00001 2,-5.00001 5,-5.00001 l 45,1e-5 0,-95.00001 c 0,-3 2,-5 5,-5 l 90,0 c 3,0 5,2 5,5 l 0,95 44.99997,0 c 3,0 5,2 5,5 l 0,90 c 0,3 -2,5 -5,5 z"
|
||||
id="path9948"
|
||||
inkscape:connector-curvature="0"
|
||||
sodipodi:nodetypes="cccccccccccczcccccccccccccccc" />
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 5.1 KiB |
@@ -0,0 +1,131 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>tüit Transkriptor — Einstellungen</title>
|
||||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||||
<link href="https://fonts.googleapis.com/css2?family=Overpass:wght@300;400;600;700&display=swap" rel="stylesheet">
|
||||
<style>
|
||||
:root { --red:#DA251C;--yellow:#FFD802;--bg:#111;--surface:#1a1a1a;--surface2:#232323;--text:#e8e8e8;--muted:#888;--border:#2e2e2e; }
|
||||
*{box-sizing:border-box;margin:0;padding:0;}
|
||||
body{font-family:'Overpass',system-ui,sans-serif;background:var(--bg);color:var(--text);min-height:100vh;display:flex;flex-direction:column;}
|
||||
header{display:flex;align-items:center;gap:12px;padding:16px 24px;border-bottom:1px solid var(--border);}
|
||||
.header-logo{height:28px;width:auto;display:block;}
|
||||
.header-divider{width:1px;height:20px;background:var(--border);flex-shrink:0;}
|
||||
.header-appname{font-size:1rem;font-weight:600;letter-spacing:.04em;color:var(--muted);}
|
||||
.header-right{margin-left:auto;display:flex;align-items:center;gap:12px;}
|
||||
.back-btn{font-size:.75rem;padding:4px 10px;border-radius:20px;background:none;border:1px solid var(--border);color:var(--muted);cursor:pointer;font-family:inherit;text-decoration:none;transition:border-color .15s,color .15s;}
|
||||
.back-btn:hover{border-color:var(--red);color:var(--red);}
|
||||
main{flex:1;display:flex;flex-direction:column;gap:24px;padding:24px;max-width:700px;width:100%;margin:0 auto;}
|
||||
h2{font-size:.8rem;color:var(--muted);text-transform:uppercase;letter-spacing:.06em;margin-bottom:12px;padding-bottom:8px;border-bottom:1px solid var(--border);}
|
||||
.field{display:flex;flex-direction:column;gap:6px;margin-bottom:14px;}
|
||||
label{font-size:.78rem;color:var(--muted);letter-spacing:.04em;}
|
||||
select,input[type=text]{background:var(--surface);border:1px solid var(--border);color:var(--text);border-radius:8px;padding:10px 12px;font-family:inherit;font-size:.9rem;outline:none;transition:border-color .15s;width:100%;}
|
||||
select:focus,input[type=text]:focus{border-color:var(--yellow);}
|
||||
.btn-row{display:flex;gap:10px;margin-top:4px;}
|
||||
.btn{font-size:.82rem;padding:8px 16px;border-radius:8px;border:1px solid var(--border);background:var(--surface2);color:var(--text);cursor:pointer;font-family:inherit;transition:border-color .15s,background .15s;}
|
||||
.btn:hover{border-color:var(--red);}
|
||||
.btn.primary{background:var(--red);border-color:var(--red);color:#fff;}
|
||||
.btn.primary:hover{background:#b81e16;border-color:#b81e16;}
|
||||
.toast{position:fixed;bottom:24px;right:24px;background:var(--surface2);border:1px solid var(--border);border-radius:8px;padding:10px 16px;font-size:.85rem;opacity:0;transition:opacity .2s;pointer-events:none;}
|
||||
.toast.show{opacity:1;}
|
||||
.combined-form{display:none;flex-direction:column;gap:10px;margin-top:10px;padding:12px;background:var(--surface2);border-radius:8px;border:1px solid var(--border);}
|
||||
.combined-form.visible{display:flex;}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<img src="/logo.svg" class="header-logo" alt="tüit">
|
||||
<div class="header-divider"></div>
|
||||
<span class="header-appname">Transkriptor — Einstellungen</span>
|
||||
<div class="header-right">
|
||||
<a href="/" class="back-btn">← Zurück</a>
|
||||
</div>
|
||||
</header>
|
||||
<main>
|
||||
<section>
|
||||
<h2>Audio</h2>
|
||||
<div class="field">
|
||||
<label>Aufnahmequelle</label>
|
||||
<select id="audio-device">
|
||||
<option value="">Systemstandard</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="btn-row">
|
||||
<button class="btn" id="refresh-devices-btn">Geräte aktualisieren</button>
|
||||
<button class="btn" id="create-combined-btn">Combined Source erstellen</button>
|
||||
</div>
|
||||
<div class="combined-form" id="combined-form">
|
||||
<div class="field">
|
||||
<label>Mikrofon</label>
|
||||
<select id="combined-mic"></select>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>System-Audio Monitor</label>
|
||||
<select id="combined-monitor"></select>
|
||||
</div>
|
||||
<div class="btn-row">
|
||||
<button class="btn primary" id="combined-confirm-btn">Erstellen</button>
|
||||
<button class="btn" id="combined-cancel-btn">Abbrechen</button>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Verarbeitung</h2>
|
||||
<div class="field">
|
||||
<label>Whisper Backend</label>
|
||||
<select id="whisper-backend">
|
||||
<option value="openai">OpenAI-kompatibel (faster-whisper-server)</option>
|
||||
<option value="whispercpp">whisper.cpp Server</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>Whisper Server URL (leer = lokal)</label>
|
||||
<input type="text" id="whisper-url" placeholder="http://beastix:8080">
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>Whisper Modell</label>
|
||||
<input type="text" id="whisper-model" placeholder="large-v3">
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>Ollama Server URL</label>
|
||||
<input type="text" id="ollama-url" placeholder="http://localhost:11434">
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>Ollama Modell</label>
|
||||
<select id="ollama-model"></select>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>Obsidian Vault-Pfad (optional)</label>
|
||||
<input type="text" id="obsidian-vault" placeholder="/mnt/d/.../obsidian">
|
||||
</div>
|
||||
<div class="btn-row">
|
||||
<button class="btn primary" id="save-btn">Speichern</button>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<h2>Diarisierung</h2>
|
||||
<div class="field">
|
||||
<label>
|
||||
<input type="checkbox" id="diar-enabled" style="margin-right:6px;accent-color:var(--yellow);">
|
||||
Sprecher-Erkennung aktivieren
|
||||
</label>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>HuggingFace Token</label>
|
||||
<input type="text" id="diar-hf-token" placeholder="hf_...">
|
||||
</div>
|
||||
<p style="font-size:.78rem;color:var(--muted);margin-bottom:10px;">
|
||||
Token benötigt Lesezugriff auf
|
||||
<a href="https://huggingface.co/pyannote/speaker-diarization-3.1" target="_blank"
|
||||
style="color:var(--yellow);text-decoration:none;">pyannote/speaker-diarization-3.1</a>.
|
||||
</p>
|
||||
</section>
|
||||
</main>
|
||||
<div class="toast" id="toast"></div>
|
||||
<script src="/settings.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,123 @@
|
||||
const token = sessionStorage.getItem('token');
|
||||
function authHeaders() {
|
||||
return token ? { 'Authorization': 'Bearer ' + token } : {};
|
||||
}
|
||||
function apiFetch(url, options) {
|
||||
options = options || {};
|
||||
return fetch(url, Object.assign({}, options, {
|
||||
headers: Object.assign({'Content-Type': 'application/json'}, authHeaders(), options.headers || {}),
|
||||
}));
|
||||
}
|
||||
|
||||
let _devices = [];
|
||||
|
||||
function showToast(msg) {
|
||||
const t = document.getElementById('toast');
|
||||
t.textContent = msg;
|
||||
t.classList.add('show');
|
||||
setTimeout(function() { t.classList.remove('show'); }, 2500);
|
||||
}
|
||||
|
||||
async function loadDevices() {
|
||||
const r = await apiFetch('/audio/devices');
|
||||
if (!r.ok) return;
|
||||
_devices = await r.json();
|
||||
const sel = document.getElementById('audio-device');
|
||||
const current = sel.value;
|
||||
sel.replaceChildren(new Option('Systemstandard', ''));
|
||||
_devices.forEach(function(d) { sel.appendChild(new Option(d.name, d.name)); });
|
||||
if (current) sel.value = current;
|
||||
['combined-mic', 'combined-monitor'].forEach(function(id) {
|
||||
const el = document.getElementById(id);
|
||||
el.replaceChildren();
|
||||
_devices.forEach(function(d) { el.appendChild(new Option(d.name, d.name)); });
|
||||
});
|
||||
}
|
||||
|
||||
async function loadOllamaModels(baseUrl, current) {
|
||||
try {
|
||||
const parsed = new URL(baseUrl);
|
||||
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') return;
|
||||
const r = await fetch(parsed.origin + '/api/tags');
|
||||
if (!r.ok) return;
|
||||
const data = await r.json();
|
||||
const sel = document.getElementById('ollama-model');
|
||||
sel.replaceChildren();
|
||||
(data.models || []).forEach(function(m) { sel.appendChild(new Option(m.name, m.name)); });
|
||||
if (current) sel.value = current;
|
||||
} catch(e) {}
|
||||
}
|
||||
|
||||
async function loadConfig() {
|
||||
const r = await apiFetch('/config');
|
||||
if (!r.ok) return;
|
||||
const cfg = await r.json();
|
||||
document.getElementById('audio-device').value = (cfg.audio && cfg.audio.device) || '';
|
||||
document.getElementById('whisper-backend').value = (cfg.whisper && cfg.whisper.backend) || 'openai';
|
||||
document.getElementById('whisper-url').value = (cfg.whisper && cfg.whisper.base_url) || '';
|
||||
document.getElementById('whisper-model').value = (cfg.whisper && cfg.whisper.model) || 'large-v3';
|
||||
const ollamaUrl = (cfg.ollama && cfg.ollama.base_url) || 'http://localhost:11434';
|
||||
document.getElementById('ollama-url').value = ollamaUrl;
|
||||
await loadOllamaModels(ollamaUrl, cfg.ollama && cfg.ollama.model);
|
||||
const diarCfg = cfg.diarization || {};
|
||||
document.getElementById('diar-enabled').checked = !!diarCfg.enabled;
|
||||
document.getElementById('diar-hf-token').value = diarCfg.hf_token || '';
|
||||
document.getElementById('obsidian-vault').value = (cfg.obsidian && cfg.obsidian.vault) || '';
|
||||
}
|
||||
|
||||
document.getElementById('refresh-devices-btn').addEventListener('click', loadDevices);
|
||||
|
||||
document.getElementById('create-combined-btn').addEventListener('click', function() {
|
||||
document.getElementById('combined-form').classList.toggle('visible');
|
||||
});
|
||||
document.getElementById('combined-cancel-btn').addEventListener('click', function() {
|
||||
document.getElementById('combined-form').classList.remove('visible');
|
||||
});
|
||||
document.getElementById('combined-confirm-btn').addEventListener('click', async function() {
|
||||
const mic = document.getElementById('combined-mic').value;
|
||||
const monitor = document.getElementById('combined-monitor').value;
|
||||
const r = await apiFetch('/audio/combined', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ mic: mic, monitor: monitor }),
|
||||
});
|
||||
if (!r.ok) { showToast('Fehler beim Erstellen'); return; }
|
||||
const data = await r.json();
|
||||
showToast('Erstellt: ' + data.device);
|
||||
document.getElementById('combined-form').classList.remove('visible');
|
||||
await loadDevices();
|
||||
document.getElementById('audio-device').value = data.device;
|
||||
});
|
||||
|
||||
document.getElementById('ollama-url').addEventListener('change', function(e) {
|
||||
loadOllamaModels(e.target.value, document.getElementById('ollama-model').value);
|
||||
});
|
||||
|
||||
document.getElementById('save-btn').addEventListener('click', async function() {
|
||||
const body = {
|
||||
audio: { device: document.getElementById('audio-device').value },
|
||||
whisper: {
|
||||
base_url: document.getElementById('whisper-url').value,
|
||||
model: document.getElementById('whisper-model').value,
|
||||
backend: document.getElementById('whisper-backend').value,
|
||||
},
|
||||
ollama: {
|
||||
base_url: document.getElementById('ollama-url').value,
|
||||
model: document.getElementById('ollama-model').value,
|
||||
},
|
||||
obsidian: {
|
||||
vault: document.getElementById('obsidian-vault').value.trim(),
|
||||
},
|
||||
diarization: {
|
||||
enabled: document.getElementById('diar-enabled').checked,
|
||||
hf_token: document.getElementById('diar-hf-token').value.trim(),
|
||||
},
|
||||
};
|
||||
const r = await apiFetch('/config', { method: 'PUT', body: JSON.stringify(body) });
|
||||
if (r.ok) { showToast('Gespeichert'); } else { showToast('Fehler beim Speichern'); }
|
||||
});
|
||||
|
||||
(async function() {
|
||||
if (!token) { location.href = '/login'; return; }
|
||||
await loadDevices();
|
||||
await loadConfig();
|
||||
})();
|
||||
@@ -0,0 +1,147 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>tüit Transkriptor — Ersteinrichtung</title>
|
||||
<link rel="preconnect" href="https://fonts.googleapis.com">
|
||||
<link href="https://fonts.googleapis.com/css2?family=Overpass:wght@300;400;600;700&display=swap" rel="stylesheet">
|
||||
<style>
|
||||
:root {
|
||||
--red: #DA251C;
|
||||
--yellow: #FFD802;
|
||||
--bg: #111;
|
||||
--surface: #1a1a1a;
|
||||
--text: #e8e8e8;
|
||||
--muted: #888;
|
||||
--border: #2e2e2e;
|
||||
}
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body {
|
||||
font-family: 'Overpass', system-ui, sans-serif;
|
||||
background: var(--bg);
|
||||
color: var(--text);
|
||||
min-height: 100vh;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
.card {
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 12px;
|
||||
padding: 40px;
|
||||
width: 100%;
|
||||
max-width: 420px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 24px;
|
||||
}
|
||||
.logo { display: flex; align-items: center; gap: 10px; }
|
||||
.logo-dot { width: 12px; height: 12px; background: var(--red); border-radius: 50%; flex-shrink: 0; }
|
||||
.logo h1 { font-size: 1.1rem; font-weight: 600; }
|
||||
.logo h1 span { color: var(--red); }
|
||||
.subtitle { font-size: 0.85rem; color: var(--muted); }
|
||||
.field { display: flex; flex-direction: column; gap: 6px; }
|
||||
label { font-size: 0.78rem; color: var(--muted); text-transform: uppercase; letter-spacing: 0.06em; }
|
||||
input {
|
||||
background: #111;
|
||||
border: 1px solid var(--border);
|
||||
color: var(--text);
|
||||
border-radius: 6px;
|
||||
padding: 10px 12px;
|
||||
font-family: inherit;
|
||||
font-size: 0.9rem;
|
||||
outline: none;
|
||||
transition: border-color 0.15s;
|
||||
}
|
||||
input:focus { border-color: var(--yellow); }
|
||||
button {
|
||||
background: var(--red);
|
||||
color: #fff;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
padding: 12px;
|
||||
font-family: inherit;
|
||||
font-size: 0.9rem;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: opacity 0.15s;
|
||||
}
|
||||
button:hover { opacity: 0.88; }
|
||||
.error {
|
||||
background: rgba(218,37,28,0.12);
|
||||
border: 1px solid rgba(218,37,28,0.3);
|
||||
border-radius: 6px;
|
||||
padding: 10px 12px;
|
||||
font-size: 0.85rem;
|
||||
color: #f87171;
|
||||
display: none;
|
||||
}
|
||||
.error.visible { display: block; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="card">
|
||||
<div class="logo">
|
||||
<div class="logo-dot"></div>
|
||||
<h1>tüit <span>Transkriptor</span></h1>
|
||||
</div>
|
||||
<p class="subtitle">Ersteinrichtung — lege den Administrator-Account an.</p>
|
||||
|
||||
<div class="error" id="error"></div>
|
||||
|
||||
<div class="field">
|
||||
<label for="username">Benutzername</label>
|
||||
<input type="text" id="username" autocomplete="username" autofocus>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label for="password">Passwort</label>
|
||||
<input type="password" id="password" autocomplete="new-password">
|
||||
</div>
|
||||
<div class="field">
|
||||
<label for="confirm">Passwort bestätigen</label>
|
||||
<input type="password" id="confirm" autocomplete="new-password">
|
||||
</div>
|
||||
<div class="field">
|
||||
<label for="output_dir">Transkripte speichern unter</label>
|
||||
<input type="text" id="output_dir" placeholder="~/cloud.shron.de/Hetzner Storagebox/work">
|
||||
</div>
|
||||
|
||||
<button id="submit-btn">Einrichtung abschließen</button>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
document.getElementById('submit-btn').addEventListener('click', async () => {
|
||||
const username = document.getElementById('username').value.trim();
|
||||
const password = document.getElementById('password').value;
|
||||
const confirm = document.getElementById('confirm').value;
|
||||
const output_dir = document.getElementById('output_dir').value.trim();
|
||||
const errorEl = document.getElementById('error');
|
||||
|
||||
errorEl.classList.remove('visible');
|
||||
|
||||
if (!username) return showError('Benutzername darf nicht leer sein.');
|
||||
if (password.length < 6) return showError('Passwort muss mindestens 6 Zeichen lang sein.');
|
||||
if (password !== confirm) return showError('Passwörter stimmen nicht überein.');
|
||||
|
||||
const r = await fetch('/setup', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ username, password, output_dir: output_dir || null }),
|
||||
});
|
||||
const data = await r.json();
|
||||
if (data.ok) {
|
||||
window.location.href = '/';
|
||||
} else {
|
||||
showError(data.error || 'Fehler bei der Einrichtung.');
|
||||
}
|
||||
|
||||
function showError(msg) {
|
||||
errorEl.textContent = msg;
|
||||
errorEl.classList.add('visible');
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
+7
-4
@@ -21,8 +21,11 @@ else
|
||||
echo " Für GPU: sudo pacman -S rocm-hip-sdk"
|
||||
fi
|
||||
|
||||
echo "Python-Abhängigkeiten werden installiert..."
|
||||
pip install --user -r "$SCRIPT_DIR/requirements.txt"
|
||||
VENV_DIR="$SCRIPT_DIR/.venv"
|
||||
echo "Python-Abhängigkeiten werden installiert (venv: $VENV_DIR)..."
|
||||
python3 -m venv "$VENV_DIR"
|
||||
"$VENV_DIR/bin/pip" install -q -r "$SCRIPT_DIR/requirements.txt"
|
||||
PYTHON="$VENV_DIR/bin/python"
|
||||
|
||||
# ── Netzwerk-Modus abfragen ────────────────────────────────────────────────────
|
||||
|
||||
@@ -57,7 +60,7 @@ Description=tüit Transkriptor
|
||||
After=graphical-session.target
|
||||
|
||||
[Service]
|
||||
ExecStart=$(command -v python3) ${SCRIPT_DIR}/main.py
|
||||
ExecStart=${SCRIPT_DIR}/.venv/bin/python ${SCRIPT_DIR}/main.py
|
||||
Restart=on-failure
|
||||
RestartSec=5
|
||||
Environment=DISPLAY=:0
|
||||
@@ -81,7 +84,7 @@ echo " Systemeinstellungen → Kurzbefehle → Eigene Kurzbefehle"
|
||||
echo " Befehl: pkill -USR1 -f main.py"
|
||||
echo ""
|
||||
if [[ "$NET_MODE" == "2" ]]; then
|
||||
echo "Netzwerk-Zugriff: http://$(hostname -I | awk '{print $1}'):8765"
|
||||
echo "Netzwerk-Zugriff: http://$(ip route get 1 2>/dev/null | awk '{print $7; exit}'):8765"
|
||||
echo "Tipp: Seite als Lesezeichen auf Handy/PC speichern."
|
||||
echo ""
|
||||
fi
|
||||
|
||||
@@ -1,21 +1,67 @@
|
||||
import httpx
|
||||
|
||||
IDENTIFY_SPEAKERS_PROMPT = """Du bekommst den Anfang eines Gesprächstranskripts mit Sprecher-Labels (SPEAKER_00, SPEAKER_01, ...).
|
||||
Ermittle, welche echten Namen den Sprechern zugeordnet werden können — z.B. durch direkte Anrede ("Herr Möller", "Frank").
|
||||
Antworte NUR mit einem JSON-Objekt: {"SPEAKER_00": "Name oder null", "SPEAKER_01": "Name oder null"}
|
||||
Kein weiterer Text, keine Erklärung."""
|
||||
|
||||
TITLE_TLDR_PROMPT = """Du bekommst einen aufbereiteten Transkript-Text.
|
||||
Gib NUR ein JSON-Objekt zurück mit zwei Feldern:
|
||||
- "title": ein prägnanter, aussagekräftiger Titel (max. 8 Wörter, kein Datum, kein "Diktat")
|
||||
- "tldr": 2-3 Sätze, die den Inhalt des Transkripts konkret zusammenfassen
|
||||
|
||||
Kein weiterer Text, kein Kommentar, kein Markdown-Block."""
|
||||
|
||||
SUMMARIZE_PROMPT = """Du bist ein präziser Assistent für Business-Kommunikation.
|
||||
Du bekommst ein Gesprächstranskript mit Sprecher-Labels.
|
||||
Erstelle eine strukturierte Zusammenfassung auf Deutsch mit:
|
||||
1. Einem passenden H1-Titel
|
||||
2. ## Wichtigste Punkte (Aufzählung)
|
||||
3. ## Offene Fragen (Aufzählung, falls vorhanden)
|
||||
4. ## Nächste Schritte / Ideen (Aufzählung, falls vorhanden)
|
||||
Antworte NUR mit dem fertigen Markdown."""
|
||||
|
||||
SYSTEM_PROMPT = """Du bist ein präziser Schreibassistent.
|
||||
Du bekommst einen rohen Sprachtranskript und optionale Instruktionen des Nutzers.
|
||||
Deine Aufgabe:
|
||||
1. Bereinige den Text (Füllwörter, Wiederholungen, Tippfehler)
|
||||
2. Strukturiere ihn mit Markdown-Überschriften wenn sinnvoll
|
||||
3. Erzeuge einen passenden deutschen Titel als H1
|
||||
4. Beachte Instruktionen des Nutzers wenn vorhanden
|
||||
5. Antworte NUR mit dem fertigen Markdown — kein Kommentar, keine Erklärung
|
||||
2. Gliedere den Text in sinnvolle Absätze — trenne Gedanken durch Leerzeilen
|
||||
3. Verwende Markdown-Überschriften (##) wenn der Text mehrere Themen hat
|
||||
4. Verwende Aufzählungslisten (- ) für Aufzählungen oder Handlungsschritte
|
||||
5. Erzeuge einen passenden deutschen Titel als H1
|
||||
6. Beachte Instruktionen des Nutzers wenn vorhanden
|
||||
7. Antworte NUR mit dem fertigen Markdown — kein Kommentar, keine Erklärung
|
||||
|
||||
Format:
|
||||
# Titel
|
||||
|
||||
Inhalt...
|
||||
Erster Absatz...
|
||||
|
||||
Zweiter Absatz...
|
||||
|
||||
## Abschnitt (nur wenn sinnvoll)
|
||||
|
||||
- Punkt 1
|
||||
- Punkt 2
|
||||
"""
|
||||
|
||||
|
||||
PUNCTUATE_PROMPT = """Du bekommst einen rohen deutschen Sprachtranskript ohne Großschreibung und Satzzeichen.
|
||||
Füge AUSSCHLIESSLICH Satzzeichen (Punkt, Komma, Fragezeichen, Ausrufezeichen) und Großschreibung am Satzanfang hinzu.
|
||||
Verändere KEINE Wörter, kürze NICHTS, füge NICHTS hinzu.
|
||||
Wenn Sprecher-Labels vorhanden sind (z.B. **Thomas:**), behalte sie exakt so bei.
|
||||
Antworte NUR mit dem korrigierten Text, ohne Kommentar."""
|
||||
|
||||
|
||||
def _strip_code_fences(text: str) -> str:
|
||||
"""Remove markdown code fences (```json ... ```) from LLM responses."""
|
||||
import re
|
||||
m = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text)
|
||||
if m:
|
||||
return m.group(1)
|
||||
return text
|
||||
|
||||
|
||||
class OllamaClient:
|
||||
def __init__(self, base_url: str = "http://localhost:11434"):
|
||||
self.base_url = base_url
|
||||
@@ -42,3 +88,90 @@ class OllamaClient:
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()["response"]
|
||||
|
||||
async def generate_title_and_tldr(
|
||||
self,
|
||||
text: str,
|
||||
model: str = "gemma3:12b",
|
||||
) -> tuple[str, str]:
|
||||
"""Return (title, tldr) for the given text. Falls back to defaults on error."""
|
||||
import json
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
r = await client.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json={
|
||||
"model": model,
|
||||
"prompt": f"Text:\n{text[:3000]}",
|
||||
"system": TITLE_TLDR_PROMPT,
|
||||
"stream": False,
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
raw = _strip_code_fences(r.json()["response"].strip())
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
title = str(data.get("title", "")).strip() or "Diktat"
|
||||
tldr = str(data.get("tldr", "")).strip() or "Kein TL;DR verfügbar."
|
||||
return title, tldr
|
||||
except Exception:
|
||||
return "Diktat", "Kein TL;DR verfügbar."
|
||||
|
||||
async def punctuate(
|
||||
self,
|
||||
text: str,
|
||||
model: str = "gemma3:12b",
|
||||
) -> str:
|
||||
"""Add punctuation and capitalisation to raw whisper output without changing words."""
|
||||
async with httpx.AsyncClient(timeout=120) as client:
|
||||
r = await client.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json={"model": model, "prompt": text, "system": PUNCTUATE_PROMPT, "stream": False},
|
||||
)
|
||||
r.raise_for_status()
|
||||
result = r.json()["response"].strip()
|
||||
return result if result else text
|
||||
|
||||
async def identify_speakers(
|
||||
self,
|
||||
transcript_excerpt: str,
|
||||
model: str = "gemma3:12b",
|
||||
) -> dict[str, str]:
|
||||
"""Try to map SPEAKER_XX labels to real names. Returns {} on failure."""
|
||||
import json
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
r = await client.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json={
|
||||
"model": model,
|
||||
"prompt": f"Transkript-Anfang:\n{transcript_excerpt[:2000]}",
|
||||
"system": IDENTIFY_SPEAKERS_PROMPT,
|
||||
"stream": False,
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
raw = r.json()["response"].strip()
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
if not isinstance(data, dict):
|
||||
return {}
|
||||
return {k: v for k, v in data.items() if v and str(v).lower() != "null"}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
async def summarize(
|
||||
self,
|
||||
annotated_transcript: str,
|
||||
model: str = "gemma3:12b",
|
||||
) -> str:
|
||||
async with httpx.AsyncClient(timeout=180) as client:
|
||||
r = await client.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json={
|
||||
"model": model,
|
||||
"prompt": f"Transkript:\n{annotated_transcript}",
|
||||
"system": SUMMARIZE_PROMPT,
|
||||
"stream": False,
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()["response"].strip()
|
||||
|
||||
@@ -39,6 +39,55 @@ async def appjs():
|
||||
return FileResponse(str(FRONTEND_DIR / "app.js"))
|
||||
|
||||
|
||||
@app.get("/logo.svg")
|
||||
async def logo():
|
||||
return FileResponse(str(FRONTEND_DIR / "logo.svg"), media_type="image/svg+xml")
|
||||
|
||||
|
||||
@app.get("/settings.js")
|
||||
async def settingsjs():
|
||||
return FileResponse(str(FRONTEND_DIR / "settings.js"))
|
||||
|
||||
|
||||
# ── PipeWire combined source restore ──────────────────────────────────────────
|
||||
|
||||
def _restore_pipewire_combined():
|
||||
"""Recreate transkriptor-combined.monitor on startup if it was previously configured."""
|
||||
import json, subprocess, logging
|
||||
state_path = Path(os.path.expanduser("~/.config/tueit-transcriber/pipewire-modules.json"))
|
||||
if not state_path.exists():
|
||||
return
|
||||
try:
|
||||
data = json.loads(state_path.read_text())
|
||||
mic = data.get("mic")
|
||||
monitor = data.get("monitor")
|
||||
if not mic or not monitor:
|
||||
return
|
||||
sources = subprocess.check_output(
|
||||
["pactl", "list", "sources", "short"], stderr=subprocess.DEVNULL, timeout=5
|
||||
).decode()
|
||||
if "transkriptor-combined.monitor" in sources:
|
||||
return # already loaded
|
||||
sink_id = subprocess.check_output([
|
||||
"pactl", "load-module", "module-null-sink",
|
||||
"sink_name=transkriptor-combined",
|
||||
"sink_properties=device.description=transkriptor-combined",
|
||||
], timeout=5).decode().strip()
|
||||
mic_id = subprocess.check_output([
|
||||
"pactl", "load-module", "module-loopback",
|
||||
f"source={mic}", "sink=transkriptor-combined",
|
||||
], timeout=5).decode().strip()
|
||||
mon_id = subprocess.check_output([
|
||||
"pactl", "load-module", "module-loopback",
|
||||
f"source={monitor}", "sink=transkriptor-combined",
|
||||
], timeout=5).decode().strip()
|
||||
ids = [int(sink_id), int(mic_id), int(mon_id)]
|
||||
state_path.write_text(json.dumps({"ids": ids, "mic": mic, "monitor": monitor}))
|
||||
logging.getLogger(__name__).info("Restored PipeWire combined source (ids: %s)", ids)
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning("Could not restore PipeWire combined source: %s", e)
|
||||
|
||||
|
||||
# ── PID file ───────────────────────────────────────────────────────────────────
|
||||
|
||||
def write_pid(pid_path: str):
|
||||
@@ -129,19 +178,16 @@ def run_server(config: uvicorn.Config):
|
||||
# ── Entrypoint ─────────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
from auth import setup_wizard, has_users
|
||||
if not has_users():
|
||||
setup_wizard()
|
||||
|
||||
cfg = load_config()
|
||||
port = cfg["server"]["port"]
|
||||
host = cfg.get("network", {}).get("host", "127.0.0.1")
|
||||
pid_path = cfg.get("pid_file", os.path.expanduser("~/.local/run/tueit-transcriber.pid"))
|
||||
|
||||
write_pid(pid_path)
|
||||
_restore_pipewire_combined()
|
||||
signal.signal(signal.SIGUSR1, _sigusr1_handler)
|
||||
|
||||
uvicorn_cfg = uvicorn.Config(app, host=host, port=port, log_level="warning")
|
||||
uvicorn_cfg = uvicorn.Config(app, host=host, port=port, log_level="debug")
|
||||
server_thread = threading.Thread(target=run_server, args=(uvicorn_cfg,), daemon=True)
|
||||
server_thread.start()
|
||||
|
||||
@@ -152,7 +198,9 @@ if __name__ == "__main__":
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
webbrowser.open(f"http://localhost:{port}")
|
||||
from auth import has_users
|
||||
start_path = "/setup" if not has_users() else "/"
|
||||
webbrowser.open(f"http://localhost:{port}{start_path}")
|
||||
|
||||
try:
|
||||
run_tray(port)
|
||||
|
||||
@@ -35,6 +35,22 @@ def save_transcript(
|
||||
return path
|
||||
|
||||
|
||||
def read_transcript(output_dir: str, filename: str) -> str | None:
|
||||
"""Return file content for a .md file inside output_dir (flat or one level deep)."""
|
||||
if not filename.endswith(".md"):
|
||||
return None
|
||||
parts = filename.split("/")
|
||||
if len(parts) > 2 or any(p in (".", "..") or not p for p in parts):
|
||||
return None
|
||||
path = os.path.join(output_dir, filename)
|
||||
if not os.path.abspath(path).startswith(os.path.abspath(output_dir) + os.sep):
|
||||
return None
|
||||
if not os.path.exists(path):
|
||||
return None
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def list_transcripts(output_dir: str, limit: int = 20) -> list[dict]:
|
||||
if not os.path.exists(output_dir):
|
||||
return []
|
||||
@@ -46,5 +62,166 @@ def list_transcripts(output_dir: str, limit: int = 20) -> list[dict]:
|
||||
for f in files:
|
||||
full = os.path.join(output_dir, f)
|
||||
stat = os.stat(full)
|
||||
result.append({"filename": f, "path": full, "size": stat.st_size, "mtime": stat.st_mtime})
|
||||
item: dict = {"filename": f, "path": full, "size": stat.st_size, "mtime": stat.st_mtime}
|
||||
if f.endswith("-index.md"):
|
||||
base = f[: -len("-index.md")]
|
||||
related: dict[str, str] = {}
|
||||
for suffix, key in [("-transkript.md", "transkript"), ("-zusammenfassung.md", "zusammenfassung")]:
|
||||
rel_filename = f"{base}/{base}{suffix}"
|
||||
if os.path.exists(os.path.join(output_dir, rel_filename)):
|
||||
related[key] = rel_filename
|
||||
if related:
|
||||
item["related"] = related
|
||||
result.append(item)
|
||||
return result
|
||||
|
||||
|
||||
def write_solo_docs(
|
||||
raw_text: str,
|
||||
refined: str,
|
||||
output_dir: str,
|
||||
dt: "datetime | None" = None,
|
||||
title: str = "",
|
||||
tldr: str = "",
|
||||
) -> dict[str, str]:
|
||||
"""Write index (in output_dir), transkript + zusammenfassung (in subdir)."""
|
||||
if dt is None:
|
||||
dt = datetime.now()
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
if not title:
|
||||
title = "Diktat"
|
||||
for line in refined.splitlines():
|
||||
if line.startswith("# "):
|
||||
title = line[2:].strip()
|
||||
break
|
||||
if not tldr:
|
||||
tldr = _extract_tldr(refined)
|
||||
|
||||
base = dt.strftime("%Y-%m-%d-%H%M") + "-" + slugify(title)[:50]
|
||||
date_str = dt.strftime("%d.%m.%Y %H:%M")
|
||||
frontmatter = f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript]\n---\n\n"
|
||||
|
||||
index_filename = f"{base}-index.md"
|
||||
subdir = os.path.join(output_dir, base)
|
||||
os.makedirs(subdir, exist_ok=True)
|
||||
|
||||
# --- transkript (raw whisper output, in subdir) ---
|
||||
transkript_filename = f"{base}-transkript.md"
|
||||
transkript_path = os.path.join(subdir, transkript_filename)
|
||||
with open(transkript_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter)
|
||||
f.write(f"# {title} — Rohtranskript\n\n")
|
||||
f.write(f"← [Index](../{index_filename})\n\n")
|
||||
f.write(raw_text)
|
||||
if not raw_text.endswith("\n"):
|
||||
f.write("\n")
|
||||
|
||||
# --- zusammenfassung (Ollama-polished, in subdir) ---
|
||||
zusammenfassung_filename = f"{base}-zusammenfassung.md"
|
||||
zusammenfassung_path = os.path.join(subdir, zusammenfassung_filename)
|
||||
with open(zusammenfassung_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter)
|
||||
f.write(f"← [Index](../{index_filename})\n\n")
|
||||
f.write(refined)
|
||||
if not refined.endswith("\n"):
|
||||
f.write("\n")
|
||||
|
||||
# --- index (in output_dir root) ---
|
||||
index_content = (
|
||||
f"# {title}\n\n"
|
||||
f"**Datum:** {date_str}\n\n"
|
||||
f"> {tldr}\n\n"
|
||||
f"- [Transkript]({base}/{transkript_filename})\n"
|
||||
f"- [Zusammenfassung]({base}/{zusammenfassung_filename})\n"
|
||||
)
|
||||
index_path = os.path.join(output_dir, index_filename)
|
||||
with open(index_path, "w", encoding="utf-8") as f:
|
||||
f.write(f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript, index]\n---\n\n")
|
||||
f.write(index_content)
|
||||
|
||||
return {"index": index_path, "transkript": transkript_path, "zusammenfassung": zusammenfassung_path}
|
||||
|
||||
|
||||
def write_meeting_docs(
|
||||
aligned_segments: list[tuple[str, str]],
|
||||
summary: str,
|
||||
speakers: list[str],
|
||||
duration_min: int,
|
||||
output_dir: str,
|
||||
dt: "datetime | None" = None,
|
||||
title: str = "",
|
||||
tldr: str = "",
|
||||
transcript_text: str = "",
|
||||
) -> dict[str, str]:
|
||||
"""Write index (in output_dir), transkript + zusammenfassung (in subdir)."""
|
||||
if dt is None:
|
||||
dt = datetime.now()
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
if not title:
|
||||
title = f"Meeting {dt.strftime('%d.%m.%Y %H:%M')}"
|
||||
if not tldr:
|
||||
tldr = _extract_tldr(summary)
|
||||
|
||||
base = dt.strftime("%Y-%m-%d-%H%M") + "-" + slugify(title)[:50]
|
||||
date_str = dt.strftime("%d.%m.%Y %H:%M")
|
||||
frontmatter_base = f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript, meeting]\n---\n\n"
|
||||
|
||||
index_filename = f"{base}-index.md"
|
||||
subdir = os.path.join(output_dir, base)
|
||||
os.makedirs(subdir, exist_ok=True)
|
||||
|
||||
# --- transkript (in subdir) ---
|
||||
if transcript_text:
|
||||
transcript_content = transcript_text
|
||||
else:
|
||||
transcript_lines = []
|
||||
for speaker, text in aligned_segments:
|
||||
transcript_lines.append(f"**{speaker}:** {text}\n")
|
||||
transcript_content = "\n".join(transcript_lines)
|
||||
transkript_filename = f"{base}-transkript.md"
|
||||
transkript_path = os.path.join(subdir, transkript_filename)
|
||||
with open(transkript_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter_base)
|
||||
f.write(f"← [Index](../{index_filename})\n\n")
|
||||
f.write(transcript_content)
|
||||
if not transcript_content.endswith("\n"):
|
||||
f.write("\n")
|
||||
|
||||
# --- zusammenfassung (in subdir) ---
|
||||
zusammenfassung_filename = f"{base}-zusammenfassung.md"
|
||||
zusammenfassung_path = os.path.join(subdir, zusammenfassung_filename)
|
||||
with open(zusammenfassung_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter_base)
|
||||
f.write(f"← [Index](../{index_filename})\n\n")
|
||||
f.write(summary)
|
||||
if not summary.endswith("\n"):
|
||||
f.write("\n")
|
||||
|
||||
# --- index (in output_dir root) ---
|
||||
speaker_str = ", ".join(speakers) if speakers else "Unbekannt"
|
||||
index_content = (
|
||||
f"# {title}\n\n"
|
||||
f"**Datum:** {date_str} \n"
|
||||
f"**Sprecher:** {speaker_str} \n"
|
||||
f"**Dauer:** {duration_min} min\n\n"
|
||||
f"> {tldr}\n\n"
|
||||
f"- [Transkript]({base}/{transkript_filename})\n"
|
||||
f"- [Zusammenfassung]({base}/{zusammenfassung_filename})\n"
|
||||
)
|
||||
index_path = os.path.join(output_dir, index_filename)
|
||||
with open(index_path, "w", encoding="utf-8") as f:
|
||||
f.write(f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript, meeting, index]\n---\n\n")
|
||||
f.write(index_content)
|
||||
|
||||
return {"index": index_path, "transkript": transkript_path, "zusammenfassung": zusammenfassung_path}
|
||||
|
||||
|
||||
def _extract_tldr(summary: str) -> str:
|
||||
"""Return the first non-heading, non-empty line from the summary as TL;DR."""
|
||||
for line in summary.splitlines():
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith("#"):
|
||||
return stripped[:200]
|
||||
return "Kein TL;DR verfügbar."
|
||||
|
||||
@@ -9,3 +9,4 @@ numpy>=1.26
|
||||
tomli_w>=1.0
|
||||
pytest>=8.0
|
||||
pytest-asyncio>=0.23
|
||||
pyannote.audio>=3.3
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
def test_align_assigns_speaker_by_overlap():
|
||||
from alignment import align_segments
|
||||
whisper = [
|
||||
{"start": 0.0, "end": 2.0, "text": "Hallo"},
|
||||
{"start": 2.1, "end": 4.0, "text": "Wie geht es"},
|
||||
]
|
||||
speakers = [
|
||||
(0.0, 2.5, "SPEAKER_00"),
|
||||
(2.5, 5.0, "SPEAKER_01"),
|
||||
]
|
||||
result = align_segments(whisper, speakers)
|
||||
assert result[0] == ("SPEAKER_00", "Hallo")
|
||||
assert result[1] == ("SPEAKER_01", "Wie geht es")
|
||||
|
||||
|
||||
def test_align_merges_consecutive_same_speaker():
|
||||
from alignment import align_segments
|
||||
whisper = [
|
||||
{"start": 0.0, "end": 1.0, "text": "Hallo"},
|
||||
{"start": 1.1, "end": 2.0, "text": "Welt"},
|
||||
]
|
||||
speakers = [(0.0, 3.0, "SPEAKER_00")]
|
||||
result = align_segments(whisper, speakers)
|
||||
assert len(result) == 1
|
||||
assert result[0] == ("SPEAKER_00", "Hallo Welt")
|
||||
|
||||
|
||||
def test_align_fallback_when_no_speaker_overlap():
|
||||
from alignment import align_segments
|
||||
whisper = [{"start": 0.0, "end": 1.0, "text": "Hallo"}]
|
||||
speakers = []
|
||||
result = align_segments(whisper, speakers)
|
||||
assert result[0][0] == "SPEAKER_00"
|
||||
@@ -45,6 +45,45 @@ def test_status_requires_auth():
|
||||
assert r.status_code == 401
|
||||
|
||||
|
||||
def make_app_for_dir(output_dir: str):
|
||||
from fastapi import FastAPI
|
||||
from api.router import router, current_user
|
||||
app = FastAPI()
|
||||
app.dependency_overrides[current_user] = lambda: {"username": "", "output_dir": output_dir, "is_admin": False}
|
||||
app.include_router(router)
|
||||
return app
|
||||
|
||||
|
||||
def test_get_transcript_returns_content(tmp_path):
|
||||
f = tmp_path / "2026-01-01-0900-test.md"
|
||||
f.write_text("# Hello\n\ncontent here\n")
|
||||
client = TestClient(make_app_for_dir(str(tmp_path)))
|
||||
r = client.get("/transcripts/2026-01-01-0900-test.md")
|
||||
assert r.status_code == 200
|
||||
assert "Hello" in r.text
|
||||
|
||||
|
||||
def test_get_transcript_rejects_path_traversal(tmp_path):
|
||||
client = TestClient(make_app_for_dir(str(tmp_path)))
|
||||
r = client.get("/transcripts/..%2Fsecret.md")
|
||||
assert r.status_code == 404
|
||||
|
||||
|
||||
def test_delete_transcript_removes_file(tmp_path):
|
||||
f = tmp_path / "2026-01-01-0900-test.md"
|
||||
f.write_text("content")
|
||||
client = TestClient(make_app_for_dir(str(tmp_path)))
|
||||
r = client.delete("/transcripts/2026-01-01-0900-test.md")
|
||||
assert r.status_code == 200
|
||||
assert not f.exists()
|
||||
|
||||
|
||||
def test_delete_transcript_rejects_path_traversal(tmp_path):
|
||||
client = TestClient(make_app_for_dir(str(tmp_path)))
|
||||
r = client.delete("/transcripts/..%2Fsecret.md")
|
||||
assert r.status_code == 404
|
||||
|
||||
|
||||
def test_login_rejects_wrong_credentials():
|
||||
import tempfile, os
|
||||
from unittest.mock import patch
|
||||
@@ -58,3 +97,120 @@ def test_login_rejects_wrong_credentials():
|
||||
with patch("auth.USERS_PATH", users_path):
|
||||
r = client.post("/login", json={"username": "nobody", "password": "wrong"})
|
||||
assert r.status_code == 401
|
||||
|
||||
|
||||
def test_audio_devices_returns_list(monkeypatch):
|
||||
import sounddevice as sd
|
||||
from main import app
|
||||
from api.router import current_user
|
||||
fake_devices = [
|
||||
{"name": "Fake Mic", "max_input_channels": 1, "max_output_channels": 0},
|
||||
{"name": "Fake Monitor", "max_input_channels": 2, "max_output_channels": 0},
|
||||
{"name": "Fake Speaker", "max_input_channels": 0, "max_output_channels": 2},
|
||||
]
|
||||
monkeypatch.setattr(sd, "query_devices", lambda: fake_devices)
|
||||
app.dependency_overrides[current_user] = lambda: {"username": "u", "output_dir": "/tmp", "is_admin": True}
|
||||
try:
|
||||
client = TestClient(app)
|
||||
r = client.get("/audio/devices", headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 200
|
||||
devices = r.json()
|
||||
assert len(devices) == 2 # only input devices
|
||||
assert devices[0]["name"] == "Fake Mic"
|
||||
finally:
|
||||
app.dependency_overrides.pop(current_user, None)
|
||||
|
||||
|
||||
def test_audio_devices_forbidden_for_non_admin():
|
||||
from main import app
|
||||
from api.router import current_user
|
||||
app.dependency_overrides[current_user] = lambda: {"username": "u", "output_dir": "/tmp", "is_admin": False}
|
||||
try:
|
||||
client = TestClient(app)
|
||||
r = client.get("/audio/devices", headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 403
|
||||
finally:
|
||||
app.dependency_overrides.pop(current_user, None)
|
||||
|
||||
|
||||
def test_audio_combined_forbidden_for_non_admin():
|
||||
from main import app
|
||||
from api.router import current_user
|
||||
app.dependency_overrides[current_user] = lambda: {"username": "u", "output_dir": "/tmp", "is_admin": False}
|
||||
try:
|
||||
from fastapi.testclient import TestClient
|
||||
client = TestClient(app)
|
||||
r = client.post("/audio/combined", json={"mic": "x", "monitor": "y"},
|
||||
headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 403
|
||||
finally:
|
||||
app.dependency_overrides.pop(current_user, None)
|
||||
|
||||
|
||||
def test_status_includes_is_admin():
|
||||
from main import app
|
||||
from api.router import current_user
|
||||
app.dependency_overrides[current_user] = lambda: {"username": "u", "output_dir": "/tmp", "is_admin": True}
|
||||
try:
|
||||
from fastapi.testclient import TestClient
|
||||
client = TestClient(app)
|
||||
r = client.get("/status", headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 200
|
||||
assert r.json()["is_admin"] is True
|
||||
finally:
|
||||
app.dependency_overrides.pop(current_user, None)
|
||||
|
||||
|
||||
def test_state_has_speaker_fields():
|
||||
from api.state import AppState
|
||||
s = AppState()
|
||||
assert hasattr(s, "_speakers_event")
|
||||
assert hasattr(s, "_pending_aligned_segments")
|
||||
assert hasattr(s, "_speaker_names")
|
||||
assert s._speakers_event is None
|
||||
assert s._pending_aligned_segments is None
|
||||
assert s._speaker_names is None
|
||||
|
||||
|
||||
def test_post_speakers_resolves_pipeline_pause():
|
||||
import asyncio
|
||||
from main import app
|
||||
from api.router import current_user
|
||||
from api.state import state
|
||||
|
||||
state._speakers_event = asyncio.Event()
|
||||
state._speaker_names = None
|
||||
|
||||
app.dependency_overrides[current_user] = lambda: {"username": "u", "output_dir": "/tmp", "is_admin": False}
|
||||
try:
|
||||
from fastapi.testclient import TestClient
|
||||
client = TestClient(app)
|
||||
r = client.post("/speakers", json={"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"})
|
||||
assert r.status_code == 200
|
||||
assert state._speaker_names == {"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"}
|
||||
assert state._speakers_event.is_set()
|
||||
finally:
|
||||
app.dependency_overrides.pop(current_user, None)
|
||||
state._speakers_event = None
|
||||
state._speaker_names = None
|
||||
|
||||
|
||||
def test_put_config_deep_merges(tmp_path, monkeypatch):
|
||||
import config as cfg_mod
|
||||
monkeypatch.setattr(cfg_mod, "CONFIG_PATH", str(tmp_path / "config.toml"))
|
||||
from main import app
|
||||
from api.router import current_user
|
||||
app.dependency_overrides[current_user] = lambda: {"username": "u", "output_dir": "/tmp", "is_admin": True}
|
||||
try:
|
||||
from fastapi.testclient import TestClient
|
||||
client = TestClient(app)
|
||||
r = client.put("/config",
|
||||
json={"whisper": {"base_url": "http://beastix:8000"}},
|
||||
headers={"Authorization": "Bearer fake"})
|
||||
assert r.status_code == 200
|
||||
data = r.json()
|
||||
# base_url updated, model preserved
|
||||
assert data["whisper"]["base_url"] == "http://beastix:8000"
|
||||
assert data["whisper"]["model"] == "large-v3"
|
||||
finally:
|
||||
app.dependency_overrides.pop(current_user, None)
|
||||
|
||||
@@ -27,3 +27,14 @@ def test_recorder_save_wav(tmp_path):
|
||||
with wave.open(out) as wf:
|
||||
assert wf.getframerate() == 16000
|
||||
assert wf.getnchannels() == 1
|
||||
|
||||
|
||||
def test_recorder_stores_device_param():
|
||||
from audio import AudioRecorder
|
||||
rec = AudioRecorder(device="my-pipewire-source")
|
||||
assert rec.device == "my-pipewire-source"
|
||||
|
||||
def test_recorder_device_none_when_empty_string():
|
||||
from audio import AudioRecorder
|
||||
rec = AudioRecorder(device="")
|
||||
assert rec.device is None
|
||||
|
||||
+27
-2
@@ -6,9 +6,8 @@ from unittest.mock import patch
|
||||
def test_config_loads_defaults():
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
cfg_path = os.path.join(tmpdir, "config.toml")
|
||||
import config
|
||||
with patch("config.CONFIG_PATH", cfg_path):
|
||||
import importlib, config
|
||||
importlib.reload(config)
|
||||
cfg = config.load()
|
||||
assert cfg["ollama"]["model"] == "gemma3:12b"
|
||||
assert cfg["whisper"]["model"] == "large-v3"
|
||||
@@ -23,3 +22,29 @@ def test_config_creates_file_on_first_run():
|
||||
with patch("config.CONFIG_PATH", cfg_path):
|
||||
config.load()
|
||||
assert os.path.exists(cfg_path)
|
||||
|
||||
|
||||
def test_config_has_audio_and_whisper_base_url():
|
||||
import config
|
||||
from unittest.mock import patch
|
||||
import tempfile, os
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
cfg_path = os.path.join(tmpdir, "config.toml")
|
||||
with patch("config.CONFIG_PATH", cfg_path):
|
||||
cfg = config.load()
|
||||
assert "audio" in cfg
|
||||
assert cfg["audio"]["device"] == ""
|
||||
assert cfg["whisper"]["base_url"] == ""
|
||||
|
||||
|
||||
def test_config_has_diarization_defaults():
|
||||
from unittest.mock import patch
|
||||
import tempfile, os
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
cfg_path = os.path.join(tmpdir, "config.toml")
|
||||
with patch("config.CONFIG_PATH", cfg_path):
|
||||
import config
|
||||
cfg = config.load()
|
||||
assert "diarization" in cfg
|
||||
assert cfg["diarization"]["enabled"] is False
|
||||
assert cfg["diarization"]["hf_token"] == ""
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
from unittest.mock import MagicMock, patch
|
||||
import pytest
|
||||
|
||||
|
||||
def test_diarizer_returns_list_of_tuples(tmp_path):
|
||||
"""Diarizer.diarize() returns [(start, end, speaker), ...]"""
|
||||
wav = tmp_path / "test.wav"
|
||||
wav.write_bytes(b"\x00" * 100)
|
||||
|
||||
mock_turn_1 = MagicMock()
|
||||
mock_turn_1.start = 0.0
|
||||
mock_turn_1.end = 2.5
|
||||
|
||||
mock_turn_2 = MagicMock()
|
||||
mock_turn_2.start = 2.6
|
||||
mock_turn_2.end = 5.0
|
||||
|
||||
mock_annotation = MagicMock()
|
||||
mock_annotation.itertracks.return_value = [
|
||||
(mock_turn_1, "A", "SPEAKER_00"),
|
||||
(mock_turn_2, "B", "SPEAKER_01"),
|
||||
]
|
||||
|
||||
mock_output = MagicMock()
|
||||
mock_output.speaker_diarization = mock_annotation
|
||||
mock_pipeline = MagicMock(return_value=mock_output)
|
||||
|
||||
import asyncio
|
||||
from diarization import Diarizer
|
||||
d = Diarizer.__new__(Diarizer)
|
||||
d._pipeline = mock_pipeline
|
||||
|
||||
result = asyncio.run(d.diarize(str(wav)))
|
||||
assert result == [(0.0, 2.5, "SPEAKER_00"), (2.6, 5.0, "SPEAKER_01")]
|
||||
|
||||
|
||||
def test_diarizer_requires_hf_token():
|
||||
from diarization import Diarizer
|
||||
with pytest.raises(ValueError, match="hf_token"):
|
||||
Diarizer(hf_token="")
|
||||
@@ -35,3 +35,47 @@ async def test_list_models_returns_list():
|
||||
client = OllamaClient(base_url="http://localhost:11434")
|
||||
models = await client.list_models()
|
||||
assert "gemma3:12b" in models
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_identify_speakers_returns_dict():
|
||||
import respx, httpx, json
|
||||
from llm import OllamaClient
|
||||
client = OllamaClient()
|
||||
mapping = {"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"}
|
||||
transcript_excerpt = "SPEAKER_00: Gut, Herr Möller.\nSPEAKER_01: Danke, Thomas."
|
||||
|
||||
with respx.mock:
|
||||
respx.post("http://localhost:11434/api/generate").mock(
|
||||
return_value=httpx.Response(200, json={"response": json.dumps(mapping)})
|
||||
)
|
||||
result = await client.identify_speakers(transcript_excerpt)
|
||||
assert result == {"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_identify_speakers_returns_empty_on_parse_failure():
|
||||
import respx, httpx
|
||||
from llm import OllamaClient
|
||||
client = OllamaClient()
|
||||
|
||||
with respx.mock:
|
||||
respx.post("http://localhost:11434/api/generate").mock(
|
||||
return_value=httpx.Response(200, json={"response": "kein json hier"})
|
||||
)
|
||||
result = await client.identify_speakers("irgendwas")
|
||||
assert result == {}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_summarize_returns_string():
|
||||
import respx, httpx
|
||||
from llm import OllamaClient
|
||||
client = OllamaClient()
|
||||
|
||||
with respx.mock:
|
||||
respx.post("http://localhost:11434/api/generate").mock(
|
||||
return_value=httpx.Response(200, json={"response": "# Zusammenfassung\n\nKurzer Text."})
|
||||
)
|
||||
result = await client.summarize("Thomas: Hallo.\nMöller: Hi.", model="gemma3:12b")
|
||||
assert "Zusammenfassung" in result
|
||||
|
||||
@@ -58,3 +58,53 @@ def test_slugify():
|
||||
from output import slugify
|
||||
assert slugify("Mein erstes Diktat") == "mein-erstes-diktat"
|
||||
assert slugify("test -- foo") == "test-foo"
|
||||
|
||||
|
||||
def test_write_solo_docs_creates_three_files(tmp_path):
|
||||
from output import write_solo_docs
|
||||
from datetime import datetime
|
||||
paths = write_solo_docs(
|
||||
raw_text="Das ist der rohe Text vom Mikrofon.",
|
||||
refined="# Projektstatus\n\nDas Projekt läuft gut.\n",
|
||||
output_dir=str(tmp_path),
|
||||
dt=datetime(2026, 4, 2, 15, 0),
|
||||
title="Projektstatus Update",
|
||||
tldr="Das Projekt läuft gut und ist im Zeitplan.",
|
||||
)
|
||||
assert set(paths.keys()) == {"index", "transkript", "zusammenfassung"}
|
||||
assert all(os.path.exists(p) for p in paths.values())
|
||||
index = open(paths["index"]).read()
|
||||
assert "Projektstatus Update" in index
|
||||
assert "transkript" in index
|
||||
assert "zusammenfassung" in index
|
||||
# transkript and zusammenfassung are in a subdir
|
||||
assert os.path.dirname(paths["transkript"]) != str(tmp_path)
|
||||
assert os.path.dirname(paths["index"]) == str(tmp_path)
|
||||
# backlinks present
|
||||
assert "Index" in open(paths["transkript"]).read()
|
||||
assert "Index" in open(paths["zusammenfassung"]).read()
|
||||
assert "Das ist der rohe Text" in open(paths["transkript"]).read()
|
||||
assert "Projekt läuft gut" in open(paths["zusammenfassung"]).read()
|
||||
|
||||
|
||||
def test_write_meeting_docs_creates_three_files(tmp_path):
|
||||
from output import write_meeting_docs
|
||||
from datetime import datetime
|
||||
aligned = [("Thomas", "Gut, dann fangen wir an."), ("Möller", "Ich hab das vorbereitet.")]
|
||||
paths = write_meeting_docs(
|
||||
aligned_segments=aligned,
|
||||
summary="# Meeting\n\n## Wichtigste Punkte\n- Budget besprochen",
|
||||
speakers=["Thomas", "Möller"],
|
||||
duration_min=5,
|
||||
output_dir=str(tmp_path),
|
||||
dt=datetime(2026, 4, 2, 14, 30),
|
||||
)
|
||||
assert len(paths) == 3
|
||||
index_content = open(paths["index"]).read()
|
||||
assert "Thomas" in index_content
|
||||
assert "transkript" in index_content
|
||||
transcript_content = open(paths["transkript"]).read()
|
||||
assert "**Thomas:**" in transcript_content
|
||||
assert "Gut, dann fangen wir an." in transcript_content
|
||||
summary_content = open(paths["zusammenfassung"]).read()
|
||||
assert "Budget besprochen" in summary_content
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
|
||||
@@ -23,3 +24,75 @@ def test_transcribe_file_calls_whisper(tmp_path):
|
||||
result = asyncio.run(eng.transcribe_file(str(wav), language="de"))
|
||||
assert result == "Hallo Welt"
|
||||
mock_model.transcribe.assert_called_once_with(str(wav), language="de")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcribe_uses_remote_when_base_url_set(tmp_path):
|
||||
import wave, struct
|
||||
wav = tmp_path / "test.wav"
|
||||
with wave.open(str(wav), "wb") as wf:
|
||||
wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(16000)
|
||||
wf.writeframes(struct.pack("<100h", *([0] * 100)))
|
||||
|
||||
import respx, httpx
|
||||
from transcription import TranscriptionEngine
|
||||
eng = TranscriptionEngine()
|
||||
|
||||
with respx.mock:
|
||||
respx.post("http://beastix:8000/v1/audio/transcriptions").mock(
|
||||
return_value=httpx.Response(200, json={"text": "Hallo Welt"})
|
||||
)
|
||||
result = await eng.transcribe_file(
|
||||
str(wav), language="de", model_name="large-v3",
|
||||
device="auto", base_url="http://beastix:8000",
|
||||
)
|
||||
assert result == "Hallo Welt"
|
||||
|
||||
|
||||
def test_transcribe_file_returns_segments_when_requested(tmp_path):
|
||||
wav = tmp_path / "test.wav"
|
||||
wav.write_bytes(b"\x00" * 100)
|
||||
|
||||
mock_model = MagicMock()
|
||||
mock_seg = MagicMock()
|
||||
mock_seg.text = " Hallo Welt"
|
||||
mock_seg.start = 0.0
|
||||
mock_seg.end = 1.5
|
||||
mock_model.transcribe.return_value = ([mock_seg], MagicMock())
|
||||
|
||||
from transcription import TranscriptionEngine
|
||||
eng = TranscriptionEngine()
|
||||
eng._model = mock_model
|
||||
|
||||
result = asyncio.run(eng.transcribe_file(str(wav), language="de", with_segments=True))
|
||||
assert isinstance(result, list)
|
||||
assert result[0]["text"] == "Hallo Welt"
|
||||
assert result[0]["start"] == 0.0
|
||||
assert result[0]["end"] == 1.5
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_transcribe_remote_returns_segments_when_requested(tmp_path):
|
||||
import wave, struct
|
||||
wav = tmp_path / "test.wav"
|
||||
with wave.open(str(wav), "wb") as wf:
|
||||
wf.setnchannels(1); wf.setsampwidth(2); wf.setframerate(16000)
|
||||
wf.writeframes(struct.pack("<100h", *([0] * 100)))
|
||||
|
||||
import respx, httpx
|
||||
from transcription import TranscriptionEngine
|
||||
eng = TranscriptionEngine()
|
||||
|
||||
with respx.mock:
|
||||
respx.post("http://beastix:8000/v1/audio/transcriptions").mock(
|
||||
return_value=httpx.Response(200, json={
|
||||
"text": "Hallo Welt",
|
||||
"segments": [{"start": 0.0, "end": 1.5, "text": " Hallo Welt"}],
|
||||
})
|
||||
)
|
||||
result = await eng.transcribe_file(
|
||||
str(wav), language="de", model_name="large-v3",
|
||||
device="auto", base_url="http://beastix:8000", with_segments=True,
|
||||
)
|
||||
assert isinstance(result, list)
|
||||
assert result[0]["text"] == "Hallo Welt"
|
||||
|
||||
+98
-3
@@ -1,4 +1,6 @@
|
||||
import asyncio
|
||||
import httpx
|
||||
from typing import Union
|
||||
|
||||
|
||||
class TranscriptionEngine:
|
||||
@@ -23,14 +25,107 @@ class TranscriptionEngine:
|
||||
language: str = "de",
|
||||
model_name: str = "large-v3",
|
||||
device: str = "auto",
|
||||
) -> str:
|
||||
loop = asyncio.get_event_loop()
|
||||
base_url: str = "",
|
||||
with_segments: bool = False,
|
||||
backend: str = "openai",
|
||||
) -> Union[str, list[dict]]:
|
||||
if base_url:
|
||||
if backend == "whispercpp":
|
||||
return await self._transcribe_remote_whispercpp(
|
||||
audio_path, language, base_url, with_segments
|
||||
)
|
||||
return await self._transcribe_remote(
|
||||
audio_path, language, model_name, base_url, with_segments
|
||||
)
|
||||
return await self._transcribe_local(
|
||||
audio_path, language, model_name, device, with_segments
|
||||
)
|
||||
|
||||
async def _transcribe_remote(
|
||||
self,
|
||||
audio_path: str,
|
||||
language: str,
|
||||
model_name: str,
|
||||
base_url: str,
|
||||
with_segments: bool,
|
||||
) -> Union[str, list[dict]]:
|
||||
async with httpx.AsyncClient(timeout=300) as client:
|
||||
with open(audio_path, "rb") as f:
|
||||
data = {"model": model_name, "language": language}
|
||||
if with_segments:
|
||||
data["timestamp_granularities[]"] = "segment"
|
||||
data["response_format"] = "verbose_json"
|
||||
r = await client.post(
|
||||
f"{base_url}/v1/audio/transcriptions",
|
||||
files={"file": ("audio.wav", f, "audio/wav")},
|
||||
data=data,
|
||||
)
|
||||
r.raise_for_status()
|
||||
body = r.json()
|
||||
if not with_segments:
|
||||
return body["text"]
|
||||
raw_segs = body.get("segments") or []
|
||||
if raw_segs:
|
||||
return [
|
||||
{"start": s["start"], "end": s["end"], "text": s["text"].strip()}
|
||||
for s in raw_segs
|
||||
]
|
||||
return [{"start": 0.0, "end": 9999.0, "text": body["text"].strip()}]
|
||||
|
||||
async def _transcribe_remote_whispercpp(
|
||||
self,
|
||||
audio_path: str,
|
||||
language: str,
|
||||
base_url: str,
|
||||
with_segments: bool,
|
||||
) -> Union[str, list[dict]]:
|
||||
async with httpx.AsyncClient(timeout=300) as client:
|
||||
with open(audio_path, "rb") as f:
|
||||
data = {
|
||||
"language": language,
|
||||
"temperature_inc": "0", # disable fallback to prevent repetition loops
|
||||
}
|
||||
if with_segments:
|
||||
data["response_format"] = "verbose_json"
|
||||
r = await client.post(
|
||||
f"{base_url}/inference",
|
||||
files={"file": ("audio.wav", f, "audio/wav")},
|
||||
data=data,
|
||||
)
|
||||
r.raise_for_status()
|
||||
body = r.json()
|
||||
if not with_segments:
|
||||
return body.get("text", "").strip()
|
||||
raw_segs = body.get("segments") or []
|
||||
if raw_segs:
|
||||
return [
|
||||
{"start": s["start"], "end": s["end"], "text": s["text"].strip()}
|
||||
for s in raw_segs
|
||||
]
|
||||
return [{"start": 0.0, "end": 9999.0, "text": body.get("text", "").strip()}]
|
||||
|
||||
async def _transcribe_local(
|
||||
self,
|
||||
audio_path: str,
|
||||
language: str,
|
||||
model_name: str,
|
||||
device: str,
|
||||
with_segments: bool,
|
||||
) -> Union[str, list[dict]]:
|
||||
loop = asyncio.get_running_loop()
|
||||
model = self._get_model(model_name, device)
|
||||
segments, _ = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: model.transcribe(audio_path, language=language),
|
||||
)
|
||||
return "".join(seg.text for seg in segments).strip()
|
||||
segments = list(segments)
|
||||
if not with_segments:
|
||||
return "".join(seg.text for seg in segments).strip()
|
||||
return [
|
||||
{"start": seg.start, "end": seg.end, "text": seg.text.strip()}
|
||||
for seg in segments
|
||||
if seg.text.strip()
|
||||
]
|
||||
|
||||
|
||||
engine = TranscriptionEngine()
|
||||
|
||||
Reference in New Issue
Block a user