fix: punctuate raw transcript, strip JSON code fences, filter null speaker names

- llm: punctuate() adds punctuation/capitalisation without changing words
- llm: _strip_code_fences() handles markdown-wrapped JSON from gemma3
- llm: filter string 'null' from identify_speakers result
- pipeline: punctuate raw_text in parallel with refine for solo recordings

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-02 12:23:25 +02:00
parent d3582eaeb7
commit 658f9be47f
2 changed files with 37 additions and 8 deletions
+5 -6
View File
@@ -74,15 +74,14 @@ async def _run_solo_pipeline(cfg, wav_path, output_dir, instructions):
await broadcast({"event": "transcribed", "raw": raw_text})
client = OllamaClient(base_url=cfg["ollama"]["base_url"])
refined = await client.refine(
raw_text=raw_text,
instructions=instructions,
model=cfg["ollama"]["model"],
punctuated, refined = await asyncio.gather(
client.punctuate(raw_text, model=cfg["ollama"]["model"]),
client.refine(raw_text=raw_text, instructions=instructions, model=cfg["ollama"]["model"]),
)
title, tldr = await client.generate_title_and_tldr(refined, model=cfg["ollama"]["model"])
dt = datetime.now()
title, tldr = await client.generate_title_and_tldr(refined, model=cfg["ollama"]["model"])
paths = write_solo_docs(raw_text=raw_text, refined=refined, output_dir=output_dir, dt=dt, title=title, tldr=tldr)
paths = write_solo_docs(raw_text=punctuated, refined=refined, output_dir=output_dir, dt=dt, title=title, tldr=tldr)
await state.set_status(Status.IDLE)
await broadcast({