diff --git a/api/pipeline.py b/api/pipeline.py index 747dcae..981dfe6 100644 --- a/api/pipeline.py +++ b/api/pipeline.py @@ -74,15 +74,14 @@ async def _run_solo_pipeline(cfg, wav_path, output_dir, instructions): await broadcast({"event": "transcribed", "raw": raw_text}) client = OllamaClient(base_url=cfg["ollama"]["base_url"]) - refined = await client.refine( - raw_text=raw_text, - instructions=instructions, - model=cfg["ollama"]["model"], + punctuated, refined = await asyncio.gather( + client.punctuate(raw_text, model=cfg["ollama"]["model"]), + client.refine(raw_text=raw_text, instructions=instructions, model=cfg["ollama"]["model"]), ) + title, tldr = await client.generate_title_and_tldr(refined, model=cfg["ollama"]["model"]) dt = datetime.now() - title, tldr = await client.generate_title_and_tldr(refined, model=cfg["ollama"]["model"]) - paths = write_solo_docs(raw_text=raw_text, refined=refined, output_dir=output_dir, dt=dt, title=title, tldr=tldr) + paths = write_solo_docs(raw_text=punctuated, refined=refined, output_dir=output_dir, dt=dt, title=title, tldr=tldr) await state.set_status(Status.IDLE) await broadcast({ diff --git a/llm.py b/llm.py index 81b560c..3dee612 100644 --- a/llm.py +++ b/llm.py @@ -46,6 +46,21 @@ Zweiter Absatz... """ +PUNCTUATE_PROMPT = """Du bekommst einen rohen deutschen Sprachtranskript ohne Großschreibung und Satzzeichen. +Füge AUSSCHLIESSLICH Satzzeichen (Punkt, Komma, Fragezeichen, Ausrufezeichen) und Großschreibung am Satzanfang hinzu. +Verändere KEINE Wörter, kürze NICHTS, füge NICHTS hinzu. +Antworte NUR mit dem korrigierten Text, ohne Kommentar.""" + + +def _strip_code_fences(text: str) -> str: + """Remove markdown code fences (```json ... ```) from LLM responses.""" + import re + m = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", text) + if m: + return m.group(1) + return text + + class OllamaClient: def __init__(self, base_url: str = "http://localhost:11434"): self.base_url = base_url @@ -91,7 +106,7 @@ class OllamaClient: }, ) r.raise_for_status() - raw = r.json()["response"].strip() + raw = _strip_code_fences(r.json()["response"].strip()) try: data = json.loads(raw) title = str(data.get("title", "")).strip() or "Diktat" @@ -100,6 +115,21 @@ class OllamaClient: except Exception: return "Diktat", "Kein TL;DR verfügbar." + async def punctuate( + self, + text: str, + model: str = "gemma3:12b", + ) -> str: + """Add punctuation and capitalisation to raw whisper output without changing words.""" + async with httpx.AsyncClient(timeout=120) as client: + r = await client.post( + f"{self.base_url}/api/generate", + json={"model": model, "prompt": text, "system": PUNCTUATE_PROMPT, "stream": False}, + ) + r.raise_for_status() + result = r.json()["response"].strip() + return result if result else text + async def identify_speakers( self, transcript_excerpt: str, @@ -123,7 +153,7 @@ class OllamaClient: data = json.loads(raw) if not isinstance(data, dict): return {} - return {k: v for k, v in data.items() if v} + return {k: v for k, v in data.items() if v and str(v).lower() != "null"} except Exception: return {}