fix: whisper repetition loops, meeting transcript punctuation
- transcription: add temperature_inc=0 to whispercpp to disable fallback (prevents loops) - pipeline: punctuate meeting transcript in one pass (parallel with summarize) - output: write_meeting_docs accepts pre-built transcript_text - llm: punctuate prompt preserves speaker labels Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+6
-2
@@ -144,8 +144,11 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf
|
|||||||
total_secs = sum(s["end"] - s["start"] for s in whisper_segs) if whisper_segs else 0
|
total_secs = sum(s["end"] - s["start"] for s in whisper_segs) if whisper_segs else 0
|
||||||
duration_min = max(1, round(total_secs / 60))
|
duration_min = max(1, round(total_secs / 60))
|
||||||
|
|
||||||
transcript_text = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned)
|
raw_transcript = "\n\n".join(f"**{spk}:** {txt}" for spk, txt in named_aligned)
|
||||||
summary = await client.summarize(transcript_text, model=cfg["ollama"]["model"])
|
summary, punctuated_transcript = await asyncio.gather(
|
||||||
|
client.summarize(raw_transcript, model=cfg["ollama"]["model"]),
|
||||||
|
client.punctuate(raw_transcript, model=cfg["ollama"]["model"]),
|
||||||
|
)
|
||||||
title, tldr = await client.generate_title_and_tldr(summary, model=cfg["ollama"]["model"])
|
title, tldr = await client.generate_title_and_tldr(summary, model=cfg["ollama"]["model"])
|
||||||
|
|
||||||
dt = datetime.now()
|
dt = datetime.now()
|
||||||
@@ -158,6 +161,7 @@ async def _run_meeting_pipeline(cfg, wav_path, output_dir, instructions, diar_cf
|
|||||||
dt=dt,
|
dt=dt,
|
||||||
title=title,
|
title=title,
|
||||||
tldr=tldr,
|
tldr=tldr,
|
||||||
|
transcript_text=punctuated_transcript,
|
||||||
)
|
)
|
||||||
|
|
||||||
await state.set_status(Status.IDLE)
|
await state.set_status(Status.IDLE)
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ Zweiter Absatz...
|
|||||||
PUNCTUATE_PROMPT = """Du bekommst einen rohen deutschen Sprachtranskript ohne Großschreibung und Satzzeichen.
|
PUNCTUATE_PROMPT = """Du bekommst einen rohen deutschen Sprachtranskript ohne Großschreibung und Satzzeichen.
|
||||||
Füge AUSSCHLIESSLICH Satzzeichen (Punkt, Komma, Fragezeichen, Ausrufezeichen) und Großschreibung am Satzanfang hinzu.
|
Füge AUSSCHLIESSLICH Satzzeichen (Punkt, Komma, Fragezeichen, Ausrufezeichen) und Großschreibung am Satzanfang hinzu.
|
||||||
Verändere KEINE Wörter, kürze NICHTS, füge NICHTS hinzu.
|
Verändere KEINE Wörter, kürze NICHTS, füge NICHTS hinzu.
|
||||||
|
Wenn Sprecher-Labels vorhanden sind (z.B. **Thomas:**), behalte sie exakt so bei.
|
||||||
Antworte NUR mit dem korrigierten Text, ohne Kommentar."""
|
Antworte NUR mit dem korrigierten Text, ohne Kommentar."""
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -152,6 +152,7 @@ def write_meeting_docs(
|
|||||||
dt: "datetime | None" = None,
|
dt: "datetime | None" = None,
|
||||||
title: str = "",
|
title: str = "",
|
||||||
tldr: str = "",
|
tldr: str = "",
|
||||||
|
transcript_text: str = "",
|
||||||
) -> dict[str, str]:
|
) -> dict[str, str]:
|
||||||
"""Write index (in output_dir), transkript + zusammenfassung (in subdir)."""
|
"""Write index (in output_dir), transkript + zusammenfassung (in subdir)."""
|
||||||
if dt is None:
|
if dt is None:
|
||||||
@@ -172,10 +173,13 @@ def write_meeting_docs(
|
|||||||
os.makedirs(subdir, exist_ok=True)
|
os.makedirs(subdir, exist_ok=True)
|
||||||
|
|
||||||
# --- transkript (in subdir) ---
|
# --- transkript (in subdir) ---
|
||||||
transcript_lines = []
|
if transcript_text:
|
||||||
for speaker, text in aligned_segments:
|
transcript_content = transcript_text
|
||||||
transcript_lines.append(f"**{speaker}:** {text}\n")
|
else:
|
||||||
transcript_content = "\n".join(transcript_lines)
|
transcript_lines = []
|
||||||
|
for speaker, text in aligned_segments:
|
||||||
|
transcript_lines.append(f"**{speaker}:** {text}\n")
|
||||||
|
transcript_content = "\n".join(transcript_lines)
|
||||||
transkript_filename = f"{base}-transkript.md"
|
transkript_filename = f"{base}-transkript.md"
|
||||||
transkript_path = os.path.join(subdir, transkript_filename)
|
transkript_path = os.path.join(subdir, transkript_filename)
|
||||||
with open(transkript_path, "w", encoding="utf-8") as f:
|
with open(transkript_path, "w", encoding="utf-8") as f:
|
||||||
|
|||||||
+4
-1
@@ -81,7 +81,10 @@ class TranscriptionEngine:
|
|||||||
) -> Union[str, list[dict]]:
|
) -> Union[str, list[dict]]:
|
||||||
async with httpx.AsyncClient(timeout=300) as client:
|
async with httpx.AsyncClient(timeout=300) as client:
|
||||||
with open(audio_path, "rb") as f:
|
with open(audio_path, "rb") as f:
|
||||||
data = {"language": language}
|
data = {
|
||||||
|
"language": language,
|
||||||
|
"temperature_inc": "0", # disable fallback to prevent repetition loops
|
||||||
|
}
|
||||||
if with_segments:
|
if with_segments:
|
||||||
data["response_format"] = "verbose_json"
|
data["response_format"] = "verbose_json"
|
||||||
r = await client.post(
|
r = await client.post(
|
||||||
|
|||||||
Reference in New Issue
Block a user