diff --git a/llm.py b/llm.py
new file mode 100644
index 0000000..b6fb5dc
--- /dev/null
+++ b/llm.py
@@ -0,0 +1,44 @@
+import httpx
+
+SYSTEM_PROMPT = """Du bist ein präziser Schreibassistent.
+Du bekommst einen rohen Sprachtranskript und optionale Instruktionen des Nutzers.
+Deine Aufgabe:
+1. Bereinige den Text (Füllwörter, Wiederholungen, Tippfehler)
+2. Strukturiere ihn mit Markdown-Überschriften wenn sinnvoll
+3. Erzeuge einen passenden deutschen Titel als H1
+4. Beachte Instruktionen des Nutzers wenn vorhanden
+5. Antworte NUR mit dem fertigen Markdown — kein Kommentar, keine Erklärung
+
+Format:
+# Titel
+
+Inhalt...
+"""
+
+
+class OllamaClient:
+    def __init__(self, base_url: str = "http://localhost:11434"):
+        self.base_url = base_url
+
+    async def list_models(self) -> list[str]:
+        async with httpx.AsyncClient() as client:
+            r = await client.get(f"{self.base_url}/api/tags")
+            r.raise_for_status()
+            return [m["name"] for m in r.json().get("models", [])]
+
+    async def refine(
+        self,
+        raw_text: str,
+        instructions: str = "",
+        model: str = "gemma3:12b",
+    ) -> str:
+        prompt = f"Transkript:\n{raw_text}"
+        if instructions.strip():
+            prompt += f"\n\nInstruktionen:\n{instructions.strip()}"
+        async with httpx.AsyncClient(timeout=120) as client:
+            r = await client.post(
+                f"{self.base_url}/api/generate",
+                json={"model": model, "prompt": prompt, "system": SYSTEM_PROMPT, "stream": False},
+            )
+            r.raise_for_status()
+            return r.json()["response"]
diff --git a/tests/test_llm.py b/tests/test_llm.py
new file mode 100644
index 0000000..ea543cf
--- /dev/null
+++ b/tests/test_llm.py
@@ -0,0 +1,37 @@
+import pytest
+from unittest.mock import AsyncMock, patch, MagicMock
+
+
+@pytest.mark.asyncio
+async def test_refine_calls_ollama():
+    from llm import OllamaClient
+    mock_response = MagicMock()
+    mock_response.json.return_value = {"response": "# Titel\n\nInhalt."}
+    mock_response.raise_for_status = MagicMock()
+
+    with patch("httpx.AsyncClient") as MockClient:
+        instance = MockClient.return_value.__aenter__.return_value
+        instance.post = AsyncMock(return_value=mock_response)
+        client = OllamaClient(base_url="http://localhost:11434")
+        result = await client.refine(
+            raw_text="Das ist ein test.",
+            instructions="Mach eine Zusammenfassung.",
+            model="gemma3:12b",
+        )
+        assert "Inhalt" in result
+        instance.post.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_list_models_returns_list():
+    from llm import OllamaClient
+    mock_response = MagicMock()
+    mock_response.json.return_value = {"models": [{"name": "gemma3:12b"}, {"name": "mistral:7b"}]}
+    mock_response.raise_for_status = MagicMock()
+
+    with patch("httpx.AsyncClient") as MockClient:
+        instance = MockClient.return_value.__aenter__.return_value
+        instance.get = AsyncMock(return_value=mock_response)
+        client = OllamaClient(base_url="http://localhost:11434")
+        models = await client.list_models()
+        assert "gemma3:12b" in models