feat: OllamaClient.identify_speakers() and summarize() for diarization pipeline

2026-04-02 01:03:40 +02:00
parent b8cc8a3b33
commit 9b5b89e159
2 changed files with 103 additions and 0 deletions
@@ -1,5 +1,19 @@
 import httpx

+IDENTIFY_SPEAKERS_PROMPT = """Du bekommst den Anfang eines Gesprächstranskripts mit Sprecher-Labels (SPEAKER_00, SPEAKER_01, ...).
+Ermittle, welche echten Namen den Sprechern zugeordnet werden können — z.B. durch direkte Anrede ("Herr Möller", "Frank").
+Antworte NUR mit einem JSON-Objekt: {"SPEAKER_00": "Name oder null", "SPEAKER_01": "Name oder null"}
+Kein weiterer Text, keine Erklärung."""
+
+SUMMARIZE_PROMPT = """Du bist ein präziser Assistent für Business-Kommunikation.
+Du bekommst ein Gesprächstranskript mit Sprecher-Labels.
+Erstelle eine strukturierte Zusammenfassung auf Deutsch mit:
+1. Einem passenden H1-Titel
+2. ## Wichtigste Punkte (Aufzählung)
+3. ## Offene Fragen (Aufzählung, falls vorhanden)
+4. ## Nächste Schritte / Ideen (Aufzählung, falls vorhanden)
+Antworte NUR mit dem fertigen Markdown."""
+
 SYSTEM_PROMPT = """Du bist ein präziser Schreibassistent.
 Du bekommst einen rohen Sprachtranskript und optionale Instruktionen des Nutzers.
 Deine Aufgabe:
@@ -51,3 +65,48 @@ class OllamaClient:
            )
            r.raise_for_status()
            return r.json()["response"]
+
+    async def identify_speakers(
+        self,
+        transcript_excerpt: str,
+        model: str = "gemma3:12b",
+    ) -> dict[str, str]:
+        """Try to map SPEAKER_XX labels to real names. Returns {} on failure."""
+        import json
+        async with httpx.AsyncClient(timeout=60) as client:
+            r = await client.post(
+                f"{self.base_url}/api/generate",
+                json={
+                    "model": model,
+                    "prompt": f"Transkript-Anfang:\n{transcript_excerpt[:2000]}",
+                    "system": IDENTIFY_SPEAKERS_PROMPT,
+                    "stream": False,
+                },
+            )
+            r.raise_for_status()
+            raw = r.json()["response"].strip()
+        try:
+            data = json.loads(raw)
+            if not isinstance(data, dict):
+                return {}
+            return {k: v for k, v in data.items() if v}
+        except Exception:
+            return {}
+
+    async def summarize(
+        self,
+        annotated_transcript: str,
+        model: str = "gemma3:12b",
+    ) -> str:
+        async with httpx.AsyncClient(timeout=180) as client:
+            r = await client.post(
+                f"{self.base_url}/api/generate",
+                json={
+                    "model": model,
+                    "prompt": f"Transkript:\n{annotated_transcript}",
+                    "system": SUMMARIZE_PROMPT,
+                    "stream": False,
+                },
+            )
+            r.raise_for_status()
+            return r.json()["response"].strip()
@@ -35,3 +35,47 @@ async def test_list_models_returns_list():
        client = OllamaClient(base_url="http://localhost:11434")
        models = await client.list_models()
        assert "gemma3:12b" in models
+
+
+@pytest.mark.asyncio
+async def test_identify_speakers_returns_dict():
+    import respx, httpx, json
+    from llm import OllamaClient
+    client = OllamaClient()
+    mapping = {"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"}
+    transcript_excerpt = "SPEAKER_00: Gut, Herr Möller.\nSPEAKER_01: Danke, Thomas."
+
+    with respx.mock:
+        respx.post("http://localhost:11434/api/generate").mock(
+            return_value=httpx.Response(200, json={"response": json.dumps(mapping)})
+        )
+        result = await client.identify_speakers(transcript_excerpt)
+    assert result == {"SPEAKER_00": "Thomas", "SPEAKER_01": "Möller"}
+
+
+@pytest.mark.asyncio
+async def test_identify_speakers_returns_empty_on_parse_failure():
+    import respx, httpx
+    from llm import OllamaClient
+    client = OllamaClient()
+
+    with respx.mock:
+        respx.post("http://localhost:11434/api/generate").mock(
+            return_value=httpx.Response(200, json={"response": "kein json hier"})
+        )
+        result = await client.identify_speakers("irgendwas")
+    assert result == {}
+
+
+@pytest.mark.asyncio
+async def test_summarize_returns_string():
+    import respx, httpx
+    from llm import OllamaClient
+    client = OllamaClient()
+
+    with respx.mock:
+        respx.post("http://localhost:11434/api/generate").mock(
+            return_value=httpx.Response(200, json={"response": "# Zusammenfassung\n\nKurzer Text."})
+        )
+        result = await client.summarize("Thomas: Hallo.\nMöller: Hi.", model="gemma3:12b")
+    assert "Zusammenfassung" in result