fix: whisper repetition loops, meeting transcript punctuation

- transcription: add temperature_inc=0 to whispercpp to disable fallback (prevents loops)
- pipeline: punctuate meeting transcript in one pass (parallel with summarize)
- output: write_meeting_docs accepts pre-built transcript_text
- llm: punctuate prompt preserves speaker labels

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-02 12:34:11 +02:00
parent 658f9be47f
commit 8ec9044c75
4 changed files with 19 additions and 7 deletions
+6 -2
View File
@@ -144,8 +144,11 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf
total_secs = sum(s["end"] - s["start"] for s in whisper_segs) if whisper_segs else 0
duration_min = max(1, round(total_secs / 60))
transcript_text = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned)
summary = await client.summarize(transcript_text, model=cfg["ollama"]["model"])
raw_transcript = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned)
summary, punctuated_transcript = await asyncio.gather(
client.summarize(raw_transcript, model=cfg["ollama"]["model"]),
client.punctuate(raw_transcript, model=cfg["ollama"]["model"]),
)
title, tldr = await client.generate_title_and_tldr(summary, model=cfg["ollama"]["model"])
dt = datetime.now()
@@ -158,6 +161,7 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf
dt=dt,
title=title,
tldr=tldr,
transcript_text=punctuated_transcript,
)
await state.set_status(Status.IDLE)
+1
View File
@@ -49,6 +49,7 @@ Zweiter Absatz...
PUNCTUATE_PROMPT = """Du bekommst einen rohen deutschen Sprachtranskript ohne Großschreibung und Satzzeichen.
Füge AUSSCHLIESSLICH Satzzeichen (Punkt, Komma, Fragezeichen, Ausrufezeichen) und Großschreibung am Satzanfang hinzu.
Verändere KEINE Wörter, kürze NICHTS, füge NICHTS hinzu.
Wenn Sprecher-Labels vorhanden sind (z.B. **Thomas:**), behalte sie exakt so bei.
Antworte NUR mit dem korrigierten Text, ohne Kommentar."""
+8 -4
View File
@@ -152,6 +152,7 @@ def write_meeting_docs(
dt: "datetime | None" = None,
title: str = "",
tldr: str = "",
transcript_text: str = "",
) -> dict[str, str]:
"""Write index (in output_dir), transkript + zusammenfassung (in subdir)."""
if dt is None:
@@ -172,10 +173,13 @@ def write_meeting_docs(
os.makedirs(subdir, exist_ok=True)
# --- transkript (in subdir) ---
transcript_lines = []
for speaker, text in aligned_segments:
transcript_lines.append(f"**{speaker}:** {text}\n")
transcript_content = "\n".join(transcript_lines)
if transcript_text:
transcript_content = transcript_text
else:
transcript_lines = []
for speaker, text in aligned_segments:
transcript_lines.append(f"**{speaker}:** {text}\n")
transcript_content = "\n".join(transcript_lines)
transkript_filename = f"{base}-transkript.md"
transkript_path = os.path.join(subdir, transkript_filename)
with open(transkript_path, "w", encoding="utf-8") as f:
+4 -1
View File
@@ -81,7 +81,10 @@ class TranscriptionEngine:
) -> Union[str, list[dict]]:
async with httpx.AsyncClient(timeout=300) as client:
with open(audio_path, "rb") as f:
data = {"language": language}
data = {
"language": language,
"temperature_inc": "0", # disable fallback to prevent repetition loops
}
if with_segments:
data["response_format"] = "verbose_json"
r = await client.post(