fix: punctuate raw transcript, strip JSON code fences, filter null speaker names
- llm: punctuate() adds punctuation/capitalisation without changing words - llm: _strip_code_fences() handles markdown-wrapped JSON from gemma3 - llm: filter string 'null' from identify_speakers result - pipeline: punctuate raw_text in parallel with refine for solo recordings Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+5
-6
@@ -74,15 +74,14 @@ async def _run_solo_pipeline(cfg, wav_path, output_dir, instructions):
|
||||
await broadcast({"event": "transcribed", "raw": raw_text})
|
||||
|
||||
client = OllamaClient(base_url=cfg["ollama"]["base_url"])
|
||||
refined = await client.refine(
|
||||
raw_text=raw_text,
|
||||
instructions=instructions,
|
||||
model=cfg["ollama"]["model"],
|
||||
punctuated, refined = await asyncio.gather(
|
||||
client.punctuate(raw_text, model=cfg["ollama"]["model"]),
|
||||
client.refine(raw_text=raw_text, instructions=instructions, model=cfg["ollama"]["model"]),
|
||||
)
|
||||
title, tldr = await client.generate_title_and_tldr(refined, model=cfg["ollama"]["model"])
|
||||
|
||||
dt = datetime.now()
|
||||
title, tldr = await client.generate_title_and_tldr(refined, model=cfg["ollama"]["model"])
|
||||
paths = write_solo_docs(raw_text=raw_text, refined=refined, output_dir=output_dir, dt=dt, title=title, tldr=tldr)
|
||||
paths = write_solo_docs(raw_text=punctuated, refined=refined, output_dir=output_dir, dt=dt, title=title, tldr=tldr)
|
||||
|
||||
await state.set_status(Status.IDLE)
|
||||
await broadcast({
|
||||
|
||||
Reference in New Issue
Block a user