fix: whisper repetition loops, meeting transcript punctuation

- transcription: add temperature_inc=0 to whispercpp to disable fallback (prevents loops)
- pipeline: punctuate meeting transcript in one pass (parallel with summarize)
- output: write_meeting_docs accepts pre-built transcript_text
- llm: punctuate prompt preserves speaker labels

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-02 12:34:11 +02:00
parent 658f9be47f
commit 8ec9044c75
4 changed files with 19 additions and 7 deletions
+6 -2
View File
@@ -144,8 +144,11 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf
total_secs = sum(s["end"] - s["start"] for s in whisper_segs) if whisper_segs else 0 total_secs = sum(s["end"] - s["start"] for s in whisper_segs) if whisper_segs else 0
duration_min = max(1, round(total_secs / 60)) duration_min = max(1, round(total_secs / 60))
transcript_text = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned) raw_transcript = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned)
summary = await client.summarize(transcript_text, model=cfg["ollama"]["model"]) summary, punctuated_transcript = await asyncio.gather(
client.summarize(raw_transcript, model=cfg["ollama"]["model"]),
client.punctuate(raw_transcript, model=cfg["ollama"]["model"]),
)
title, tldr = await client.generate_title_and_tldr(summary, model=cfg["ollama"]["model"]) title, tldr = await client.generate_title_and_tldr(summary, model=cfg["ollama"]["model"])
dt = datetime.now() dt = datetime.now()
@@ -158,6 +161,7 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf
dt=dt, dt=dt,
title=title, title=title,
tldr=tldr, tldr=tldr,
transcript_text=punctuated_transcript,
) )
await state.set_status(Status.IDLE) await state.set_status(Status.IDLE)
+1
View File
@@ -49,6 +49,7 @@ Zweiter Absatz...
PUNCTUATE_PROMPT = """Du bekommst einen rohen deutschen Sprachtranskript ohne Großschreibung und Satzzeichen. PUNCTUATE_PROMPT = """Du bekommst einen rohen deutschen Sprachtranskript ohne Großschreibung und Satzzeichen.
Füge AUSSCHLIESSLICH Satzzeichen (Punkt, Komma, Fragezeichen, Ausrufezeichen) und Großschreibung am Satzanfang hinzu. Füge AUSSCHLIESSLICH Satzzeichen (Punkt, Komma, Fragezeichen, Ausrufezeichen) und Großschreibung am Satzanfang hinzu.
Verändere KEINE Wörter, kürze NICHTS, füge NICHTS hinzu. Verändere KEINE Wörter, kürze NICHTS, füge NICHTS hinzu.
Wenn Sprecher-Labels vorhanden sind (z.B. **Thomas:**), behalte sie exakt so bei.
Antworte NUR mit dem korrigierten Text, ohne Kommentar.""" Antworte NUR mit dem korrigierten Text, ohne Kommentar."""
+8 -4
View File
@@ -152,6 +152,7 @@ def write_meeting_docs(
dt: "datetime | None" = None, dt: "datetime | None" = None,
title: str = "", title: str = "",
tldr: str = "", tldr: str = "",
transcript_text: str = "",
) -> dict[str, str]: ) -> dict[str, str]:
"""Write index (in output_dir), transkript + zusammenfassung (in subdir).""" """Write index (in output_dir), transkript + zusammenfassung (in subdir)."""
if dt is None: if dt is None:
@@ -172,10 +173,13 @@ def write_meeting_docs(
os.makedirs(subdir, exist_ok=True) os.makedirs(subdir, exist_ok=True)
# --- transkript (in subdir) --- # --- transkript (in subdir) ---
transcript_lines = [] if transcript_text:
for speaker, text in aligned_segments: transcript_content = transcript_text
transcript_lines.append(f"**{speaker}:** {text}\n") else:
transcript_content = "\n".join(transcript_lines) transcript_lines = []
for speaker, text in aligned_segments:
transcript_lines.append(f"**{speaker}:** {text}\n")
transcript_content = "\n".join(transcript_lines)
transkript_filename = f"{base}-transkript.md" transkript_filename = f"{base}-transkript.md"
transkript_path = os.path.join(subdir, transkript_filename) transkript_path = os.path.join(subdir, transkript_filename)
with open(transkript_path, "w", encoding="utf-8") as f: with open(transkript_path, "w", encoding="utf-8") as f:
+4 -1
View File
@@ -81,7 +81,10 @@ class TranscriptionEngine:
) -> Union[str, list[dict]]: ) -> Union[str, list[dict]]:
async with httpx.AsyncClient(timeout=300) as client: async with httpx.AsyncClient(timeout=300) as client:
with open(audio_path, "rb") as f: with open(audio_path, "rb") as f:
data = {"language": language} data = {
"language": language,
"temperature_inc": "0", # disable fallback to prevent repetition loops
}
if with_segments: if with_segments:
data["response_format"] = "verbose_json" data["response_format"] = "verbose_json"
r = await client.post( r = await client.post(