From 8ec9044c75eca61b92397410bc844467980efd4d Mon Sep 17 00:00:00 2001 From: "thomas.kopp" Date: Thu, 2 Apr 2026 12:34:11 +0200 Subject: [PATCH] fix: whisper repetition loops, meeting transcript punctuation - transcription: add temperature_inc=0 to whispercpp to disable fallback (prevents loops) - pipeline: punctuate meeting transcript in one pass (parallel with summarize) - output: write_meeting_docs accepts pre-built transcript_text - llm: punctuate prompt preserves speaker labels Co-Authored-By: Claude Sonnet 4.6 --- api/pipeline.py | 8 ++++++-- llm.py | 1 + output.py | 12 ++++++++---- transcription.py | 5 ++++- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/api/pipeline.py b/api/pipeline.py index 981dfe6..f44c5f3 100644 --- a/api/pipeline.py +++ b/api/pipeline.py @@ -144,8 +144,11 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf total_secs = sum(s["end"] - s["start"] for s in whisper_segs) if whisper_segs else 0 duration_min = max(1, round(total_secs / 60)) - transcript_text = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned) - summary = await client.summarize(transcript_text, model=cfg["ollama"]["model"]) + raw_transcript = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned) + summary, punctuated_transcript = await asyncio.gather( + client.summarize(raw_transcript, model=cfg["ollama"]["model"]), + client.punctuate(raw_transcript, model=cfg["ollama"]["model"]), + ) title, tldr = await client.generate_title_and_tldr(summary, model=cfg["ollama"]["model"]) dt = datetime.now() @@ -158,6 +161,7 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf dt=dt, title=title, tldr=tldr, + transcript_text=punctuated_transcript, ) await state.set_status(Status.IDLE) diff --git a/llm.py b/llm.py index 3dee612..d162f4e 100644 --- a/llm.py +++ b/llm.py @@ -49,6 +49,7 @@ Zweiter Absatz... PUNCTUATE_PROMPT = """Du bekommst einen rohen deutschen Sprachtranskript ohne Großschreibung und Satzzeichen. Füge AUSSCHLIESSLICH Satzzeichen (Punkt, Komma, Fragezeichen, Ausrufezeichen) und Großschreibung am Satzanfang hinzu. Verändere KEINE Wörter, kürze NICHTS, füge NICHTS hinzu. +Wenn Sprecher-Labels vorhanden sind (z.B. **Thomas:**), behalte sie exakt so bei. Antworte NUR mit dem korrigierten Text, ohne Kommentar.""" diff --git a/output.py b/output.py index 3442167..5804174 100644 --- a/output.py +++ b/output.py @@ -152,6 +152,7 @@ def write_meeting_docs( dt: "datetime | None" = None, title: str = "", tldr: str = "", + transcript_text: str = "", ) -> dict[str, str]: """Write index (in output_dir), transkript + zusammenfassung (in subdir).""" if dt is None: @@ -172,10 +173,13 @@ def write_meeting_docs( os.makedirs(subdir, exist_ok=True) # --- transkript (in subdir) --- - transcript_lines = [] - for speaker, text in aligned_segments: - transcript_lines.append(f"**{speaker}:** {text}\n") - transcript_content = "\n".join(transcript_lines) + if transcript_text: + transcript_content = transcript_text + else: + transcript_lines = [] + for speaker, text in aligned_segments: + transcript_lines.append(f"**{speaker}:** {text}\n") + transcript_content = "\n".join(transcript_lines) transkript_filename = f"{base}-transkript.md" transkript_path = os.path.join(subdir, transkript_filename) with open(transkript_path, "w", encoding="utf-8") as f: diff --git a/transcription.py b/transcription.py index 3247062..08c1b6d 100644 --- a/transcription.py +++ b/transcription.py @@ -81,7 +81,10 @@ class TranscriptionEngine: ) -> Union[str, list[dict]]: async with httpx.AsyncClient(timeout=300) as client: with open(audio_path, "rb") as f: - data = {"language": language} + data = { + "language": language, + "temperature_inc": "0", # disable fallback to prevent repetition loops + } if with_segments: data["response_format"] = "verbose_json" r = await client.post(