129 lines
4.5 KiB
Python
129 lines
4.5 KiB
Python
import os
|
|
import re
|
|
import unicodedata
|
|
from datetime import datetime
|
|
|
|
|
|
def slugify(text: str) -> str:
|
|
for src, dst in [("ä","a"),("ö","o"),("ü","u"),("Ä","a"),("Ö","o"),("Ü","u"),("ß","ss")]:
|
|
text = text.replace(src, dst)
|
|
text = unicodedata.normalize("NFKD", text)
|
|
text = "".join(c for c in text if unicodedata.category(c) != "Mn")
|
|
text = text.lower()
|
|
text = re.sub(r"[^a-z0-9]+", "-", text)
|
|
return text.strip("-")
|
|
|
|
|
|
def save_transcript(
|
|
title: str,
|
|
content: str,
|
|
output_dir: str,
|
|
dt: datetime | None = None,
|
|
) -> str:
|
|
if dt is None:
|
|
dt = datetime.now()
|
|
slug = slugify(title)[:60]
|
|
filename = f"{dt.strftime('%Y-%m-%d-%H%M')}-{slug}.md"
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
path = os.path.join(output_dir, filename)
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
f.write(f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript]\n---\n\n")
|
|
f.write(f"# {title}\n\n")
|
|
f.write(content)
|
|
if not content.endswith("\n"):
|
|
f.write("\n")
|
|
return path
|
|
|
|
|
|
def read_transcript(output_dir: str, filename: str) -> str | None:
|
|
"""Return file content if filename is a plain .md file inside output_dir."""
|
|
if os.path.basename(filename) != filename or not filename.endswith(".md"):
|
|
return None
|
|
path = os.path.join(output_dir, filename)
|
|
if not os.path.exists(path):
|
|
return None
|
|
with open(path, encoding="utf-8") as f:
|
|
return f.read()
|
|
|
|
|
|
def list_transcripts(output_dir: str, limit: int = 20) -> list[dict]:
|
|
if not os.path.exists(output_dir):
|
|
return []
|
|
files = sorted(
|
|
[f for f in os.listdir(output_dir) if f.endswith(".md")],
|
|
reverse=True,
|
|
)[:limit]
|
|
result = []
|
|
for f in files:
|
|
full = os.path.join(output_dir, f)
|
|
stat = os.stat(full)
|
|
result.append({"filename": f, "path": full, "size": stat.st_size, "mtime": stat.st_mtime})
|
|
return result
|
|
|
|
|
|
def write_meeting_docs(
|
|
aligned_segments: list[tuple[str, str]],
|
|
summary: str,
|
|
speakers: list[str],
|
|
duration_min: int,
|
|
output_dir: str,
|
|
dt: "datetime | None" = None,
|
|
) -> dict[str, str]:
|
|
"""Write index, transkript, and zusammenfassung. Returns {type: path}."""
|
|
if dt is None:
|
|
dt = datetime.now()
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
base = dt.strftime("%Y-%m-%d-%H%M") + "-meeting"
|
|
date_str = dt.strftime("%d.%m.%Y %H:%M")
|
|
frontmatter_base = f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript, meeting]\n---\n\n"
|
|
|
|
# --- transkript ---
|
|
transcript_lines = []
|
|
for speaker, text in aligned_segments:
|
|
transcript_lines.append(f"**{speaker}:** {text}\n")
|
|
transcript_content = "\n".join(transcript_lines)
|
|
transkript_filename = f"{base}-transkript.md"
|
|
transkript_path = os.path.join(output_dir, transkript_filename)
|
|
with open(transkript_path, "w", encoding="utf-8") as f:
|
|
f.write(frontmatter_base)
|
|
f.write(transcript_content)
|
|
if not transcript_content.endswith("\n"):
|
|
f.write("\n")
|
|
|
|
# --- zusammenfassung ---
|
|
zusammenfassung_filename = f"{base}-zusammenfassung.md"
|
|
zusammenfassung_path = os.path.join(output_dir, zusammenfassung_filename)
|
|
with open(zusammenfassung_path, "w", encoding="utf-8") as f:
|
|
f.write(frontmatter_base)
|
|
f.write(summary)
|
|
if not summary.endswith("\n"):
|
|
f.write("\n")
|
|
|
|
# --- index ---
|
|
speaker_str = ", ".join(speakers) if speakers else "Unbekannt"
|
|
tl_dr = _extract_tldr(summary)
|
|
index_content = (
|
|
f"# Meeting — {date_str}\n\n"
|
|
f"**Sprecher:** {speaker_str} \n"
|
|
f"**Dauer:** {duration_min} min\n\n"
|
|
f"> {tl_dr}\n\n"
|
|
f"- [Transkript]({transkript_filename})\n"
|
|
f"- [Zusammenfassung]({zusammenfassung_filename})\n"
|
|
)
|
|
index_filename = f"{base}-index.md"
|
|
index_path = os.path.join(output_dir, index_filename)
|
|
with open(index_path, "w", encoding="utf-8") as f:
|
|
f.write(f"---\ndate: {dt.isoformat(timespec='seconds')}\ntags: [transkript, meeting, index]\n---\n\n")
|
|
f.write(index_content)
|
|
|
|
return {"index": index_path, "transkript": transkript_path, "zusammenfassung": zusammenfassung_path}
|
|
|
|
|
|
def _extract_tldr(summary: str) -> str:
|
|
"""Return the first non-heading, non-empty line from the summary as TL;DR."""
|
|
for line in summary.splitlines():
|
|
stripped = line.strip()
|
|
if stripped and not stripped.startswith("#"):
|
|
return stripped[:200]
|
|
return "Kein TL;DR verfügbar."
|