feat: AI-generated title+tldr, subfolder structure, backlinks in transkript/zusammenfassung

- llm: generate_title_and_tldr() returns concise title and 2-3 sentence summary - output: index in root, transkript+zusammenfassung in {base}/ subdir with backlinks - pipeline: call generate_title_and_tldr for both solo and meeting recordings - router: mirror subdir structure when copying to Obsidian vault Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-02 12:07:54 +02:00
parent 1cfb9c127b
commit 336628341b
6 changed files with 1072 additions and 27 deletions
@@ -5,6 +5,13 @@ Ermittle, welche echten Namen den Sprechern zugeordnet werden können — z.B. d
 Antworte NUR mit einem JSON-Objekt: {"SPEAKER_00": "Name oder null", "SPEAKER_01": "Name oder null"}
 Kein weiterer Text, keine Erklärung."""

+TITLE_TLDR_PROMPT = """Du bekommst einen aufbereiteten Transkript-Text.
+Gib NUR ein JSON-Objekt zurück mit zwei Feldern:
+- "title": ein prägnanter, aussagekräftiger Titel (max. 8 Wörter, kein Datum, kein "Diktat")
+- "tldr": 2-3 Sätze, die den Inhalt des Transkripts konkret zusammenfassen
+
+Kein weiterer Text, kein Kommentar, kein Markdown-Block."""
+
 SUMMARIZE_PROMPT = """Du bist ein präziser Assistent für Business-Kommunikation.
 Du bekommst ein Gesprächstranskript mit Sprecher-Labels.
 Erstelle eine strukturierte Zusammenfassung auf Deutsch mit:
@@ -66,6 +73,33 @@ class OllamaClient:
            r.raise_for_status()
            return r.json()["response"]

+    async def generate_title_and_tldr(
+        self,
+        text: str,
+        model: str = "gemma3:12b",
+    ) -> tuple[str, str]:
+        """Return (title, tldr) for the given text. Falls back to defaults on error."""
+        import json
+        async with httpx.AsyncClient(timeout=60) as client:
+            r = await client.post(
+                f"{self.base_url}/api/generate",
+                json={
+                    "model": model,
+                    "prompt": f"Text:\n{text[:3000]}",
+                    "system": TITLE_TLDR_PROMPT,
+                    "stream": False,
+                },
+            )
+            r.raise_for_status()
+            raw = r.json()["response"].strip()
+        try:
+            data = json.loads(raw)
+            title = str(data.get("title", "")).strip() or "Diktat"
+            tldr = str(data.get("tldr", "")).strip() or "Kein TL;DR verfügbar."
+            return title, tldr
+        except Exception:
+            return "Diktat", "Kein TL;DR verfügbar."
+
    async def identify_speakers(
        self,
        transcript_excerpt: str,