import os import re import unicodedata from datetime import datetime def slugify(text: str) -> str: for src, dst in [("ä","a"),("ö","o"),("ü","u"),("Ä","a"),("Ö","o"),("Ü","u"),("ß","ss")]: text = text.replace(src, dst) text = unicodedata.normalize("NFKD", text) text = "".join(c for c in text if unicodedata.category(c) != "Mn") text = text.lower() text = re.sub(r"[^a-z0-9]+", "-", text) return text.strip("-") def save_transcript( title: str, content: str, output_dir: str, dt: datetime | None = None, ) -> str: if dt is None: dt = datetime.now() slug = slugify(title)[:60] filename = f"{dt.strftime('%Y-%m-%d-%H%M')}-{slug}.md" os.makedirs(output_dir, exist_ok=True) path = os.path.join(output_dir, filename) with open(path, "w", encoding="utf-8") as f: f.write(f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript]\n---\n\n") f.write(f"# {title}\n\n") f.write(content) if not content.endswith("\n"): f.write("\n") return path def read_transcript(output_dir: str, filename: str) -> str | None: """Return file content if filename is a plain .md file inside output_dir.""" if os.path.basename(filename) != filename or not filename.endswith(".md"): return None path = os.path.join(output_dir, filename) if not os.path.exists(path): return None with open(path, encoding="utf-8") as f: return f.read() def list_transcripts(output_dir: str, limit: int = 20) -> list[dict]: if not os.path.exists(output_dir): return [] files = sorted( [f for f in os.listdir(output_dir) if f.endswith(".md")], reverse=True, )[:limit] result = [] for f in files: full = os.path.join(output_dir, f) stat = os.stat(full) result.append({"filename": f, "path": full, "size": stat.st_size, "mtime": stat.st_mtime}) return result def write_solo_docs( raw_text: str, refined: str, output_dir: str, dt: "datetime | None" = None, title: str = "", tldr: str = "", ) -> dict[str, str]: """Write index (in output_dir), transkript + zusammenfassung (in subdir).""" if dt is None: dt = datetime.now() os.makedirs(output_dir, exist_ok=True) if not title: title = "Diktat" for line in refined.splitlines(): if line.startswith("# "): title = line[2:].strip() break if not tldr: tldr = _extract_tldr(refined) base = dt.strftime("%Y-%m-%d-%H%M") + "-" + slugify(title)[:50] date_str = dt.strftime("%d.%m.%Y %H:%M") frontmatter = f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript]\n---\n\n" index_filename = f"{base}-index.md" subdir = os.path.join(output_dir, base) os.makedirs(subdir, exist_ok=True) # --- transkript (raw whisper output, in subdir) --- transkript_filename = f"{base}-transkript.md" transkript_path = os.path.join(subdir, transkript_filename) with open(transkript_path, "w", encoding="utf-8") as f: f.write(frontmatter) f.write(f"# {title} — Rohtranskript\n\n") f.write(f"← [Index](../{index_filename})\n\n") f.write(raw_text) if not raw_text.endswith("\n"): f.write("\n") # --- zusammenfassung (Ollama-polished, in subdir) --- zusammenfassung_filename = f"{base}-zusammenfassung.md" zusammenfassung_path = os.path.join(subdir, zusammenfassung_filename) with open(zusammenfassung_path, "w", encoding="utf-8") as f: f.write(frontmatter) f.write(f"← [Index](../{index_filename})\n\n") f.write(refined) if not refined.endswith("\n"): f.write("\n") # --- index (in output_dir root) --- index_content = ( f"# {title}\n\n" f"**Datum:** {date_str}\n\n" f"> {tldr}\n\n" f"- [Transkript]({base}/{transkript_filename})\n" f"- [Zusammenfassung]({base}/{zusammenfassung_filename})\n" ) index_path = os.path.join(output_dir, index_filename) with open(index_path, "w", encoding="utf-8") as f: f.write(f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript, index]\n---\n\n") f.write(index_content) return {"index": index_path, "transkript": transkript_path, "zusammenfassung": zusammenfassung_path} def write_meeting_docs( aligned_segments: list[tuple[str, str]], summary: str, speakers: list[str], duration_min: int, output_dir: str, dt: "datetime | None" = None, title: str = "", tldr: str = "", ) -> dict[str, str]: """Write index (in output_dir), transkript + zusammenfassung (in subdir).""" if dt is None: dt = datetime.now() os.makedirs(output_dir, exist_ok=True) if not title: title = f"Meeting {dt.strftime('%d.%m.%Y %H:%M')}" if not tldr: tldr = _extract_tldr(summary) base = dt.strftime("%Y-%m-%d-%H%M") + "-" + slugify(title)[:50] date_str = dt.strftime("%d.%m.%Y %H:%M") frontmatter_base = f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript, meeting]\n---\n\n" index_filename = f"{base}-index.md" subdir = os.path.join(output_dir, base) os.makedirs(subdir, exist_ok=True) # --- transkript (in subdir) --- transcript_lines = [] for speaker, text in aligned_segments: transcript_lines.append(f"**{speaker}:** {text}\n") transcript_content = "\n".join(transcript_lines) transkript_filename = f"{base}-transkript.md" transkript_path = os.path.join(subdir, transkript_filename) with open(transkript_path, "w", encoding="utf-8") as f: f.write(frontmatter_base) f.write(f"← [Index](../{index_filename})\n\n") f.write(transcript_content) if not transcript_content.endswith("\n"): f.write("\n") # --- zusammenfassung (in subdir) --- zusammenfassung_filename = f"{base}-zusammenfassung.md" zusammenfassung_path = os.path.join(subdir, zusammenfassung_filename) with open(zusammenfassung_path, "w", encoding="utf-8") as f: f.write(frontmatter_base) f.write(f"← [Index](../{index_filename})\n\n") f.write(summary) if not summary.endswith("\n"): f.write("\n") # --- index (in output_dir root) --- speaker_str = ", ".join(speakers) if speakers else "Unbekannt" index_content = ( f"# {title}\n\n" f"**Datum:** {date_str} \n" f"**Sprecher:** {speaker_str} \n" f"**Dauer:** {duration_min} min\n\n" f"> {tldr}\n\n" f"- [Transkript]({base}/{transkript_filename})\n" f"- [Zusammenfassung]({base}/{zusammenfassung_filename})\n" ) index_path = os.path.join(output_dir, index_filename) with open(index_path, "w", encoding="utf-8") as f: f.write(f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript, meeting, index]\n---\n\n") f.write(index_content) return {"index": index_path, "transkript": transkript_path, "zusammenfassung": zusammenfassung_path} def _extract_tldr(summary: str) -> str: """Return the first non-heading, non-empty line from the summary as TL;DR.""" for line in summary.splitlines(): stripped = line.strip() if stripped and not stripped.startswith("#"): return stripped[:200] return "Kein TL;DR verfügbar."