feat: OllamaClient.identify_speakers() and summarize() for diarization pipeline

2026-04-02 01:03:40 +02:00
parent b8cc8a3b33
commit 9b5b89e159
2 changed files with 103 additions and 0 deletions
@@ -1,5 +1,19 @@
 import httpx
 IDENTIFY_SPEAKERS_PROMPT = """Du bekommst den Anfang eines Gesprächstranskripts mit Sprecher-Labels (SPEAKER_00, SPEAKER_01, ...).
 Ermittle, welche echten Namen den Sprechern zugeordnet werden können — z.B. durch direkte Anrede ("Herr Möller", "Frank").
 Antworte NUR mit einem JSON-Objekt: {"SPEAKER_00": "Name oder null", "SPEAKER_01": "Name oder null"}
 Kein weiterer Text, keine Erklärung."""
 SUMMARIZE_PROMPT = """Du bist ein präziser Assistent für Business-Kommunikation.
 Du bekommst ein Gesprächstranskript mit Sprecher-Labels.
 Erstelle eine strukturierte Zusammenfassung auf Deutsch mit:
 1. Einem passenden H1-Titel
 2. ## Wichtigste Punkte (Aufzählung)
 3. ## Offene Fragen (Aufzählung, falls vorhanden)
 4. ## Nächste Schritte / Ideen (Aufzählung, falls vorhanden)
 Antworte NUR mit dem fertigen Markdown."""
 SYSTEM_PROMPT = """Du bist ein präziser Schreibassistent.
 Du bekommst einen rohen Sprachtranskript und optionale Instruktionen des Nutzers.
 Deine Aufgabe:
@@ -51,3 +65,48 @@ class OllamaClient:
            )
            r.raise_for_status()
            return r.json()["response"]
    async def identify_speakers(
        self,
        transcript_excerpt: str,
        model: str = "gemma3:12b",
    ) -> dict[str, str]:
        """Try to map SPEAKER_XX labels to real names. Returns {} on failure."""
        import json
        async with httpx.AsyncClient(timeout=60) as client:
            r = await client.post(
                f"{self.base_url}/api/generate",
                json={
                    "model": model,
                    "prompt": f"Transkript-Anfang:\n{transcript_excerpt[:2000]}",
                    "system": IDENTIFY_SPEAKERS_PROMPT,
                    "stream": False,
                },
            )
            r.raise_for_status()
            raw = r.json()["response"].strip()
        try:
            data = json.loads(raw)
            if not isinstance(data, dict):
                return {}
            return {k: v for k, v in data.items() if v}
        except Exception:
            return {}
    async def summarize(
        self,
        annotated_transcript: str,
        model: str = "gemma3:12b",
    ) -> str:
        async with httpx.AsyncClient(timeout=180) as client:
            r = await client.post(
                f"{self.base_url}/api/generate",
                json={
                    "model": model,
                    "prompt": f"Transkript:\n{annotated_transcript}",
                    "system": SUMMARIZE_PROMPT,
                    "stream": False,
                },
            )
            r.raise_for_status()
            return r.json()["response"].strip()
@@ -35,3 +35,47 @@ async def test_list_models_returns_list():
        client = OllamaClient(base_url="http://localhost:11434")
        models = await client.list_models()
        assert "gemma3:12b" in models
@pytest.mark.asyncio
 async def test_identify_speakers_returns_dict():
    import respx, httpx, json
    from llm import OllamaClient
    client = OllamaClient()
    mapping = {"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"}
    transcript_excerpt = "SPEAKER_00: Gut, Herr Möller.\nSPEAKER_01: Danke, Thomas."
    with respx.mock:
        respx.post("http://localhost:11434/api/generate").mock(
            return_value=httpx.Response(200, json={"response": json.dumps(mapping)})
        )
        result = await client.identify_speakers(transcript_excerpt)
    assert result == {"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"}
@pytest.mark.asyncio
 async def test_identify_speakers_returns_empty_on_parse_failure():
    import respx, httpx
    from llm import OllamaClient
    client = OllamaClient()
    with respx.mock:
        respx.post("http://localhost:11434/api/generate").mock(
            return_value=httpx.Response(200, json={"response": "kein json hier"})
        )
        result = await client.identify_speakers("irgendwas")
    assert result == {}
@pytest.mark.asyncio
 async def test_summarize_returns_string():
    import respx, httpx
    from llm import OllamaClient
    client = OllamaClient()
    with respx.mock:
        respx.post("http://localhost:11434/api/generate").mock(
            return_value=httpx.Response(200, json={"response": "# Zusammenfassung\n\nKurzer Text."})
        )
        result = await client.summarize("Thomas: Hallo.\nMöller: Hi.", model="gemma3:12b")
    assert "Zusammenfassung" in result