fix: whisper repetition loops, meeting transcript punctuation

- transcription: add temperature_inc=0 to whispercpp to disable fallback (prevents loops) - pipeline: punctuate meeting transcript in one pass (parallel with summarize) - output: write_meeting_docs accepts pre-built transcript_text - llm: punctuate prompt preserves speaker labels Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-02 12:34:11 +02:00
parent 658f9be47f
commit 8ec9044c75
4 changed files with 19 additions and 7 deletions
@@ -144,8 +144,11 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf
    total_secs = sum(s["end"] - s["start"] for s in whisper_segs) if whisper_segs else 0
    duration_min = max(1, round(total_secs / 60))
-    transcript_text = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned)
+    raw_transcript = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned)
-    summary = await client.summarize(transcript_text, model=cfg["ollama"]["model"])
+    summary, punctuated_transcript = await asyncio.gather(
        client.summarize(raw_transcript, model=cfg["ollama"]["model"]),
        client.punctuate(raw_transcript, model=cfg["ollama"]["model"]),
    )
    title, tldr = await client.generate_title_and_tldr(summary, model=cfg["ollama"]["model"])
    dt = datetime.now()
@@ -158,6 +161,7 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf
        dt=dt,
        title=title,
        tldr=tldr,
        transcript_text=punctuated_transcript,
    )
    await state.set_status(Status.IDLE)
@@ -49,6 +49,7 @@ Zweiter Absatz...
 PUNCTUATE_PROMPT = """Du bekommst einen rohen deutschen Sprachtranskript ohne Großschreibung und Satzzeichen.
 Füge AUSSCHLIESSLICH Satzzeichen (Punkt, Komma, Fragezeichen, Ausrufezeichen) und Großschreibung am Satzanfang hinzu.
 Verändere KEINE Wörter, kürze NICHTS, füge NICHTS hinzu.
 Wenn Sprecher-Labels vorhanden sind (z.B. **Thomas:**), behalte sie exakt so bei.
 Antworte NUR mit dem korrigierten Text, ohne Kommentar."""
@@ -152,6 +152,7 @@ def write_meeting_docs(
    dt: "datetime | None" = None,
    title: str = "",
    tldr: str = "",
    transcript_text: str = "",
 ) -> dict[str, str]:
    """Write index (in output_dir), transkript + zusammenfassung (in subdir)."""
    if dt is None:
@@ -172,10 +173,13 @@ def write_meeting_docs(
    os.makedirs(subdir, exist_ok=True)
    # --- transkript (in subdir) ---
-    transcript_lines = []
+    if transcript_text:
-    for speaker, text in aligned_segments:
+        transcript_content = transcript_text
-        transcript_lines.append(f"**{speaker}:** {text}\n")
+    else:
-    transcript_content = "\n".join(transcript_lines)
+        transcript_lines = []
        for speaker, text in aligned_segments:
            transcript_lines.append(f"**{speaker}:** {text}\n")
        transcript_content = "\n".join(transcript_lines)
    transkript_filename = f"{base}-transkript.md"
    transkript_path = os.path.join(subdir, transkript_filename)
    with open(transkript_path, "w", encoding="utf-8") as f:
@@ -81,7 +81,10 @@ class TranscriptionEngine:
    ) -> Union[str, list[dict]]:
        async with httpx.AsyncClient(timeout=300) as client:
            with open(audio_path, "rb") as f:
-                data = {"language": language}
+                data = {
                    "language": language,
                    "temperature_inc": "0",   # disable fallback to prevent repetition loops
                }
                if with_segments:
                    data["response_format"] = "verbose_json"
                r = await client.post(