From cef8e38b22b46d3ec6c9d8220c65db0ee8c08321 Mon Sep 17 00:00:00 2001 From: "thomas.kopp" Date: Sat, 4 Apr 2026 13:59:53 +0200 Subject: [PATCH] Add autoresearch implementation plan 11 tasks covering config, budget check, log utilities, stuck detection, metrics display, skills, tmux orchestrator, and PostSessionStart hook. Co-Authored-By: Claude Sonnet 4.6 --- .../2026-04-04-autoresearch-implementation.md | 1151 +++++++++++++++++ 1 file changed, 1151 insertions(+) create mode 100644 docs/plans/2026-04-04-autoresearch-implementation.md diff --git a/docs/plans/2026-04-04-autoresearch-implementation.md b/docs/plans/2026-04-04-autoresearch-implementation.md new file mode 100644 index 0000000..72d2020 --- /dev/null +++ b/docs/plans/2026-04-04-autoresearch-implementation.md @@ -0,0 +1,1151 @@ +# autoresearch Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Autonomous research loop that iteratively improves Claude Code config and project code, running inside tmux with one claude session (ralph-loop) per project. + +**Architecture:** A bash orchestrator (`bin/start.sh`) creates a tmux session with one window per project. Each window starts an interactive `claude` session and invokes `/autoresearch-loop` which uses ralph-loop to repeat experiments indefinitely. Each iteration: create worktree → make changes → run benchmarks → compare metric → merge or discard → log. A PostSessionStart hook auto-starts the tmux session when token/budget thresholds are met. + +**Tech Stack:** Bash (orchestration, worktree management), Python 3 + rich (log viewer, metrics display), ralph-loop plugin (iteration driver), claude CLI (interactive sessions), tmux 3.x, jq (JSON processing) + +--- + +## Task 1: Project scaffolding + +**Files:** +- Create: `~/work/autoresearch/bin/` (directory) +- Create: `~/work/autoresearch/tests/` (directory) +- Create: `~/work/autoresearch/CLAUDE.md` +- Create: `~/work/autoresearch/.gitignore` + +**Step 1: Create directories** + +```bash +mkdir -p ~/work/autoresearch/bin +mkdir -p ~/work/autoresearch/tests +``` + +**Step 2: Write CLAUDE.md** + +```markdown +# autoresearch + +Autonomous research loop for Claude Code. Runs inside tmux, one window per project. + +## Key files +- `bin/start.sh` — tmux session orchestrator (start/stop/status) +- `bin/check-budget.py` — Anthropic API budget checker +- `bin/log-view.py` — formatted log viewer (overview pane, left) +- `bin/metrics.py` — metrics summary (overview pane, right) +- `~/.claude/autoresearch.yaml` — main config +- `~/.claude/skills/autoresearch/skill.md` — /autoresearch skill +- `~/.claude/skills/autoresearch-loop/skill.md` — /autoresearch-loop (ralph-loop payload) + +## Run +```bash +bin/start.sh # start tmux session +bin/start.sh stop # stop all windows +bin/start.sh status # show running experiments +``` + +## Logs +- `~/.claude/autoresearch/log.jsonl` — append-only experiment log +- Each line: `{ "ts": "ISO8601", "project": "...", "exp": "NNNN", "metric_before": 0.0, "metric_after": 0.0, "kept": true, "files_changed": ["..."] }` +``` + +**Step 3: Write .gitignore** + +``` +__pycache__/ +*.pyc +.worktrees/ +``` + +**Step 4: Commit** + +```bash +cd ~/work/autoresearch +git add CLAUDE.md .gitignore bin/ tests/ +git commit -m "feat: project scaffolding" +``` + +--- + +## Task 2: Configuration file + schema + +**Files:** +- Create: `~/.claude/autoresearch.yaml` +- Create: `~/work/autoresearch/bin/config.py` +- Create: `~/work/autoresearch/tests/test_config.py` + +**Step 1: Write failing test** + +```python +# tests/test_config.py +import sys +sys.path.insert(0, 'bin') +from config import load_config, ConfigError +import pytest, tempfile, os, yaml + +def test_load_valid_config(): + data = { + 'projects': [{'path': '/tmp', 'benchmarks': ['echo ok'], 'time_limit_minutes': 5}], + 'token_threshold': {'context_remaining_pct': 60, 'api_budget_usd': 5.0} + } + with tempfile.NamedTemporaryFile('w', suffix='.yaml', delete=False) as f: + yaml.dump(data, f) + name = f.name + cfg = load_config(name) + assert cfg['projects'][0]['time_limit_minutes'] == 5 + os.unlink(name) + +def test_missing_projects_raises(): + with tempfile.NamedTemporaryFile('w', suffix='.yaml', delete=False) as f: + yaml.dump({}, f) + name = f.name + with pytest.raises(ConfigError): + load_config(name) + os.unlink(name) + +def test_expands_tilde_in_path(): + data = { + 'projects': [{'path': '~/.claude', 'benchmarks': [], 'time_limit_minutes': 5}], + 'token_threshold': {'context_remaining_pct': 60, 'api_budget_usd': 5.0} + } + with tempfile.NamedTemporaryFile('w', suffix='.yaml', delete=False) as f: + yaml.dump(data, f) + name = f.name + cfg = load_config(name) + assert '~' not in cfg['projects'][0]['path'] + os.unlink(name) +``` + +**Step 2: Run test to verify it fails** + +```bash +cd ~/work/autoresearch && python3 -m pytest tests/test_config.py -v +``` +Expected: FAIL — `ModuleNotFoundError: No module named 'config'` + +**Step 3: Write implementation** + +```python +# bin/config.py +import os +import yaml + +CONFIG_PATH = os.path.expanduser('~/.claude/autoresearch.yaml') +DEFAULTS = {'time_limit_minutes': 10, 'benchmarks': []} + +class ConfigError(Exception): + pass + +def load_config(path=CONFIG_PATH): + with open(path) as f: + cfg = yaml.safe_load(f) + if not cfg or 'projects' not in cfg or not cfg['projects']: + raise ConfigError(f"'projects' list is required in {path}") + for p in cfg['projects']: + p.setdefault('benchmarks', DEFAULTS['benchmarks']) + p.setdefault('time_limit_minutes', DEFAULTS['time_limit_minutes']) + p['path'] = os.path.expanduser(p['path']) + cfg.setdefault('token_threshold', {'context_remaining_pct': 60, 'api_budget_usd': 5.0}) + return cfg +``` + +**Step 4: Write default config** + +```yaml +# ~/.claude/autoresearch.yaml +projects: + - path: ~/.claude + benchmarks: [] + time_limit_minutes: 5 + + - path: ~/work/kundendoku + benchmarks: + - "go test ./..." + - "go build ./..." + time_limit_minutes: 10 + +token_threshold: + context_remaining_pct: 60 + api_budget_usd: 5.00 +``` + +**Step 5: Run tests to verify they pass** + +```bash +python3 -m pytest tests/test_config.py -v +``` +Expected: 3 PASSED + +**Step 6: Commit** + +```bash +git add bin/config.py tests/test_config.py +git commit -m "feat: config loader with validation" +``` + +--- + +## Task 3: API budget checker + +**Files:** +- Create: `~/work/autoresearch/bin/check-budget.py` +- Create: `~/work/autoresearch/tests/test_check_budget.py` + +**Step 1: Write failing tests** + +```python +# tests/test_check_budget.py +import sys +sys.path.insert(0, 'bin') +from unittest.mock import patch, MagicMock +import pytest + +# Import after path setup +import importlib +check_budget = importlib.import_module('check-budget') + +def test_budget_ok_when_used_less_than_limit(): + with patch('check-budget.get_used_usd', return_value=2.0): + assert check_budget.is_budget_ok(limit_usd=5.0) is True + +def test_budget_not_ok_when_used_exceeds_limit(): + with patch('check-budget.get_used_usd', return_value=5.5): + assert check_budget.is_budget_ok(limit_usd=5.0) is False + +def test_budget_ok_on_api_error(): + # Fail open: if we can't check, assume OK + with patch('check-budget.get_used_usd', side_effect=Exception("network error")): + assert check_budget.is_budget_ok(limit_usd=5.0) is True +``` + +Note: Python files with hyphens need special import handling. Rename to `check_budget.py` for easier imports. + +**Step 2: Run tests to verify they fail** + +```bash +python3 -m pytest tests/test_check_budget.py -v +``` +Expected: FAIL — import error + +**Step 3: Write implementation** + +```python +# bin/check_budget.py +"""Check Anthropic API budget usage. Fails open (returns True) on errors.""" +import os +import sys +import urllib.request +import json + +USAGE_API = "https://api.anthropic.com/v1/usage" + +def get_used_usd() -> float: + api_key = os.environ.get('ANTHROPIC_API_KEY', '') + if not api_key: + raise EnvironmentError("ANTHROPIC_API_KEY not set") + req = urllib.request.Request( + USAGE_API, + headers={"x-api-key": api_key, "anthropic-version": "2023-06-01"} + ) + with urllib.request.urlopen(req, timeout=5) as resp: + data = json.loads(resp.read()) + # Sum all usage costs + return sum(item.get('cost_usd', 0.0) for item in data.get('data', [])) + +def is_budget_ok(limit_usd: float) -> bool: + try: + used = get_used_usd() + return used < limit_usd + except Exception: + return True # fail open + +if __name__ == '__main__': + limit = float(sys.argv[1]) if len(sys.argv) > 1 else 5.0 + ok = is_budget_ok(limit) + print("ok" if ok else "low") + sys.exit(0 if ok else 1) +``` + +**Step 4: Fix test imports (use `check_budget` not `check-budget`)** + +Update `tests/test_check_budget.py`: + +```python +import sys +sys.path.insert(0, 'bin') +from unittest.mock import patch +from check_budget import is_budget_ok, get_used_usd + +def test_budget_ok_when_used_less_than_limit(): + with patch('check_budget.get_used_usd', return_value=2.0): + assert is_budget_ok(limit_usd=5.0) is True + +def test_budget_not_ok_when_used_exceeds_limit(): + with patch('check_budget.get_used_usd', return_value=5.5): + assert is_budget_ok(limit_usd=5.0) is False + +def test_budget_ok_on_api_error(): + with patch('check_budget.get_used_usd', side_effect=Exception("network")): + assert is_budget_ok(limit_usd=5.0) is True +``` + +**Step 5: Run tests** + +```bash +python3 -m pytest tests/test_check_budget.py -v +``` +Expected: 3 PASSED + +**Step 6: Commit** + +```bash +git add bin/check_budget.py tests/test_check_budget.py +git commit -m "feat: Anthropic API budget checker, fails open on error" +``` + +--- + +## Task 4: Experiment log utilities + +**Files:** +- Create: `~/work/autoresearch/bin/log_append.py` +- Create: `~/work/autoresearch/bin/log_view.py` +- Create: `~/work/autoresearch/tests/test_log.py` +- Create dir: `~/.claude/autoresearch/` + +**Log entry schema (one JSON per line in `~/.claude/autoresearch/log.jsonl`):** + +```json +{ + "ts": "2026-04-04T22:00:00Z", + "project": "kundendoku", + "exp": "0001", + "description": "Optimize SQL query in handler.go", + "metric_before": 1.234, + "metric_after": 1.187, + "kept": true, + "files_changed": ["internal/handler.go"], + "stuck": false, + "error": null +} +``` + +**Step 1: Write failing tests** + +```python +# tests/test_log.py +import sys, os, json, tempfile, pytest +sys.path.insert(0, 'bin') +from log_append import append_entry, LOG_PATH + +def test_append_creates_file(tmp_path): + log = str(tmp_path / "log.jsonl") + append_entry({"ts": "2026-01-01T00:00:00Z", "project": "test"}, path=log) + assert os.path.exists(log) + +def test_append_writes_valid_json(tmp_path): + log = str(tmp_path / "log.jsonl") + append_entry({"project": "x", "kept": True}, path=log) + with open(log) as f: + data = json.loads(f.read().strip()) + assert data["project"] == "x" + +def test_append_multiple_entries(tmp_path): + log = str(tmp_path / "log.jsonl") + for i in range(3): + append_entry({"n": i}, path=log) + with open(log) as f: + lines = [l for l in f if l.strip()] + assert len(lines) == 3 +``` + +**Step 2: Run tests to verify they fail** + +```bash +python3 -m pytest tests/test_log.py -v +``` + +**Step 3: Write log_append.py** + +```python +# bin/log_append.py +import json, os +from datetime import datetime, timezone + +LOG_PATH = os.path.expanduser('~/.claude/autoresearch/log.jsonl') + +def append_entry(entry: dict, path: str = LOG_PATH) -> None: + entry.setdefault('ts', datetime.now(timezone.utc).isoformat()) + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, 'a') as f: + f.write(json.dumps(entry) + '\n') +``` + +**Step 4: Write log_view.py** + +```python +# bin/log_view.py +"""Live log viewer for overview pane. Run: python3 bin/log_view.py --follow""" +import json, sys, time, os +from rich.console import Console +from rich.table import Table +from rich import box + +LOG_PATH = os.path.expanduser('~/.claude/autoresearch/log.jsonl') +console = Console() + +def read_entries(path=LOG_PATH): + if not os.path.exists(path): + return [] + with open(path) as f: + entries = [] + for line in f: + line = line.strip() + if line: + try: + entries.append(json.loads(line)) + except json.JSONDecodeError: + pass + return entries + +def render_table(entries): + table = Table(box=box.SIMPLE, show_header=True, header_style="bold #DA251C") + table.add_column("Time", style="dim", width=8) + table.add_column("Project", width=14) + table.add_column("Exp", width=6) + table.add_column("Description", width=40) + table.add_column("Δ Metric", width=10, justify="right") + table.add_column("", width=3) + for e in entries[-30:]: + ts = e.get('ts', '')[-8:][:5] if e.get('ts') else '?' + before = e.get('metric_before') + after = e.get('metric_after') + if before and after: + delta = f"{after - before:+.3f}" + delta_style = "green" if after < before else "red" + else: + delta = "n/a" + delta_style = "dim" + kept = "✓" if e.get('kept') else "✗" + kept_style = "green" if e.get('kept') else "red" + table.add_row( + ts, + e.get('project', '?')[:14], + str(e.get('exp', '?')), + (e.get('description') or '')[:40], + f"[{delta_style}]{delta}[/{delta_style}]", + f"[{kept_style}]{kept}[/{kept_style}]", + ) + return table + +if __name__ == '__main__': + follow = '--follow' in sys.argv + while True: + entries = read_entries() + console.clear() + console.print(f"[bold #DA251C]autoresearch[/bold #DA251C] — {len(entries)} experiments", justify="center") + console.print(render_table(entries)) + if not follow: + break + time.sleep(5) +``` + +**Step 5: Run tests** + +```bash +python3 -m pytest tests/test_log.py -v +``` +Expected: 3 PASSED + +**Step 6: Commit** + +```bash +git add bin/log_append.py bin/log_view.py tests/test_log.py +git commit -m "feat: experiment log append and rich viewer" +``` + +--- + +## Task 5: Stuck detection + +**Files:** +- Create: `~/work/autoresearch/bin/stuck_check.py` +- Create: `~/work/autoresearch/tests/test_stuck.py` + +**Step 1: Write failing tests** + +```python +# tests/test_stuck.py +import sys +sys.path.insert(0, 'bin') +from stuck_check import is_stuck + +def test_not_stuck_with_different_files(): + entries = [ + {'project': 'x', 'files_changed': ['a.go']}, + {'project': 'x', 'files_changed': ['b.go']}, + {'project': 'x', 'files_changed': ['c.go']}, + ] + assert is_stuck('x', entries) is False + +def test_stuck_when_same_files_3_times(): + entries = [ + {'project': 'x', 'files_changed': ['a.go', 'b.go']}, + {'project': 'x', 'files_changed': ['a.go', 'b.go']}, + {'project': 'x', 'files_changed': ['a.go', 'b.go']}, + ] + assert is_stuck('x', entries) is True + +def test_not_stuck_with_fewer_than_3_entries(): + entries = [ + {'project': 'x', 'files_changed': ['a.go']}, + {'project': 'x', 'files_changed': ['a.go']}, + ] + assert is_stuck('x', entries) is False + +def test_only_checks_matching_project(): + entries = [ + {'project': 'y', 'files_changed': ['a.go']}, + {'project': 'y', 'files_changed': ['a.go']}, + {'project': 'y', 'files_changed': ['a.go']}, + {'project': 'x', 'files_changed': ['a.go']}, + {'project': 'x', 'files_changed': ['a.go']}, + ] + assert is_stuck('x', entries) is False # only 2 entries for x +``` + +**Step 2: Run tests to verify they fail** + +```bash +python3 -m pytest tests/test_stuck.py -v +``` + +**Step 3: Write implementation** + +```python +# bin/stuck_check.py +"""Detect if experiments are cycling over the same files.""" +import json, os, sys +from log_append import LOG_PATH, read_entries # reuse log reader + +def is_stuck(project: str, entries: list, window: int = 3) -> bool: + project_entries = [e for e in entries if e.get('project') == project] + if len(project_entries) < window: + return False + recent = project_entries[-window:] + file_sets = [frozenset(e.get('files_changed', [])) for e in recent] + return len(set(file_sets)) == 1 and file_sets[0] != frozenset() + +if __name__ == '__main__': + project = sys.argv[1] if len(sys.argv) > 1 else '' + from log_append import read_entries as _read + entries = _read() + if is_stuck(project, entries): + print("stuck") + sys.exit(1) + print("ok") + sys.exit(0) +``` + +Note: `stuck_check.py` imports `read_entries` from `log_append`. Update `log_append.py` to export `read_entries` (move the function there from `log_view.py`). + +**Step 4: Move read_entries to log_append.py** + +Add to `bin/log_append.py`: + +```python +def read_entries(path: str = LOG_PATH) -> list: + if not os.path.exists(path): + return [] + entries = [] + with open(path) as f: + for line in f: + line = line.strip() + if line: + try: + entries.append(json.loads(line)) + except json.JSONDecodeError: + pass + return entries +``` + +Update `bin/log_view.py` to import `read_entries` from `log_append` instead of defining it locally. + +**Step 5: Run all tests** + +```bash +python3 -m pytest tests/ -v +``` +Expected: all PASSED + +**Step 6: Commit** + +```bash +git add bin/stuck_check.py bin/log_append.py bin/log_view.py tests/test_stuck.py +git commit -m "feat: stuck detection — alerts when same files repeat 3 times" +``` + +--- + +## Task 6: Metrics summary script (overview pane, right) + +**Files:** +- Create: `~/work/autoresearch/bin/metrics.py` + +No tests needed — this is pure display logic. + +**Step 1: Write metrics.py** + +```python +# bin/metrics.py +"""Compact metrics panel for overview pane right side. Run: watch -n5 python3 bin/metrics.py""" +import json, os, sys +from datetime import datetime, timezone +from rich.console import Console +from rich.panel import Panel +from rich.text import Text +sys.path.insert(0, os.path.dirname(__file__)) +from log_append import read_entries + +console = Console(width=40) + +def summarize(entries): + today = datetime.now(timezone.utc).date().isoformat() + today_entries = [e for e in entries if e.get('ts', '').startswith(today)] + total = len(today_entries) + kept = sum(1 for e in today_entries if e.get('kept')) + projects = {} + for e in today_entries: + p = e.get('project', 'unknown') + projects.setdefault(p, {'total': 0, 'kept': 0}) + projects[p]['total'] += 1 + if e.get('kept'): + projects[p]['kept'] += 1 + return total, kept, projects + +def render(): + entries = read_entries() + total, kept, projects = summarize(entries) + rate = f"{kept/total*100:.0f}%" if total else "—" + text = Text() + text.append(f"Today: {total} experiments\n", style="bold") + text.append(f"Kept: {kept} ({rate})\n\n", style="green") + for p, s in projects.items(): + r = f"{s['kept']}/{s['total']}" + text.append(f" {p[:16]:<16} {r}\n") + console.print(Panel(text, title="[bold #DA251C]Metrics[/bold #DA251C]", border_style="#FFD802")) + +if __name__ == '__main__': + render() +``` + +**Step 2: Test manually** + +```bash +python3 bin/metrics.py +``` +Expected: renders a panel (empty if no log yet) + +**Step 3: Commit** + +```bash +git add bin/metrics.py +git commit -m "feat: metrics summary panel for overview pane" +``` + +--- + +## Task 7: /autoresearch-loop skill (ralph-loop payload) + +**Files:** +- Create: `~/.claude/skills/autoresearch-loop/skill.md` + +This is the prompt that ralph-loop repeats on every iteration. It runs in the project directory. + +**Step 1: Create skill** + +```bash +mkdir -p ~/.claude/skills/autoresearch-loop +``` + +Write `~/.claude/skills/autoresearch-loop/skill.md`: + +````markdown +--- +description: "One autoresearch experiment iteration (called by ralph-loop)" +hide-from-slash-command-tool: "true" +--- + +# autoresearch — one experiment iteration + +You are the autoresearch agent for this project. Execute exactly one experiment: + +## 1. Pre-flight checks + +Run these checks first. If either fails, output `STOP` immediately. + +```bash +# Budget check +python3 ~/work/autoresearch/bin/check_budget.py $(python3 -c " +import yaml; c=yaml.safe_load(open(os.path.expanduser('~/.claude/autoresearch.yaml'))); print(c['token_threshold']['api_budget_usd']) +" 2>/dev/null || echo 5.0) +``` + +```bash +# Stuck check +python3 ~/work/autoresearch/bin/stuck_check.py "$(basename $PWD)" +``` + +If stuck: print a clear message explaining which files keep appearing, then output `STOP`. + +## 2. Plan your experiment + +- Read recent log entries for this project to understand what has been tried +- Identify ONE concrete improvement to make (code quality, config, performance, prompt clarity) +- Be specific: name the file and the change + +## 3. Create worktree + +```bash +EXP=$(printf "%04d" $(ls .worktrees/ 2>/dev/null | wc -l)) +BRANCH="exp-${EXP}-$(echo '' | tr ' ' '-' | tr '[:upper:]' '[:lower:]')" +git worktree add .worktrees/${BRANCH} -b ${BRANCH} +cd .worktrees/${BRANCH} +``` + +## 4. Make changes + +Work in the worktree. Make exactly the planned change. Keep it focused. + +## 5. Run benchmarks (with timeout) + +```bash +# Read benchmarks from config +python3 -c " +import yaml, os +cfg = yaml.safe_load(open(os.path.expanduser('~/.claude/autoresearch.yaml'))) +project_path = os.getcwd() +for p in cfg['projects']: + if os.path.expanduser(p['path']) in project_path: + for b in p['benchmarks']: + print(b) + break +" +``` + +Run each benchmark with `timeout `. Capture output and exit codes. + +## 6. Evaluate + +- If benchmarks fail: discard (worktree is broken) +- If benchmarks pass: compare to baseline on main branch +- For projects with no benchmarks: use your judgment (is this objectively better?) + +## 7. Merge or discard + +```bash +# If keeping: +cd ~/work/ +git merge .worktrees/${BRANCH} --no-ff -m "exp: ${BRANCH}" +git worktree remove .worktrees/${BRANCH} + +# If discarding: +git worktree remove .worktrees/${BRANCH} --force +git branch -D ${BRANCH} +``` + +## 8. Log the result + +```python +python3 ~/work/autoresearch/bin/log_append.py +``` + +Or call directly: + +```python +import sys; sys.path.insert(0, '/home/templis/work/autoresearch/bin') +from log_append import append_entry +append_entry({ + "project": "", + "exp": "", + "description": "", + "metric_before": , + "metric_after": , + "kept": , + "files_changed": ["", ""], + "stuck": False, + "error": None +}) +``` + +## 9. Continue + +Do NOT output `STOP` unless a pre-flight check failed. +The loop will automatically feed this prompt back for the next iteration. +```` + +**Step 2: Verify skill file exists** + +```bash +ls ~/.claude/skills/autoresearch-loop/skill.md +``` + +**Step 3: Commit** + +```bash +cd ~/work/autoresearch +git add -A # skill is in ~/.claude, handled separately +git commit -m "feat: autoresearch-loop skill (ralph-loop payload)" +``` + +(Note: ~/.claude skills are managed by the claude-config repo separately) + +--- + +## Task 8: /autoresearch skill (start/stop/status interface) + +**Files:** +- Create: `~/.claude/skills/autoresearch/skill.md` + +**Step 1: Write skill** + +```bash +mkdir -p ~/.claude/skills/autoresearch +``` + +Write `~/.claude/skills/autoresearch/skill.md`: + +````markdown +--- +description: "Start, stop, or show status of the autoresearch tmux session" +argument-hint: "[start|stop|status]" +--- + +# autoresearch + +Manage the autoresearch tmux session. + +## start (default) + +Run: +```bash +~/work/autoresearch/bin/start.sh start +``` + +Report: which tmux windows were created, which projects are running. + +## stop + +Run: +```bash +~/work/autoresearch/bin/start.sh stop +``` + +## status + +Run: +```bash +~/work/autoresearch/bin/start.sh status +``` + +Show: which windows are running, last log entry per project. +```` + +**Step 2: Verify** + +```bash +ls ~/.claude/skills/autoresearch/skill.md +``` + +--- + +## Task 9: tmux orchestrator (bin/start.sh) + +**Files:** +- Create: `~/work/autoresearch/bin/start.sh` + +**Step 1: Write start.sh** + +```bash +#!/usr/bin/env bash +# autoresearch tmux orchestrator +# Usage: start.sh [start|stop|status] +set -euo pipefail + +SESSION="autoresearch" +CONFIG="${HOME}/.claude/autoresearch.yaml" +LOGDIR="${HOME}/.claude/autoresearch" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Read projects from config +get_projects() { + python3 -c " +import yaml, os, sys +cfg = yaml.safe_load(open('${CONFIG}')) +for p in cfg['projects']: + print(os.path.expanduser(p['path'])) +" +} + +cmd_start() { + if tmux has-session -t "$SESSION" 2>/dev/null; then + echo "autoresearch: session '$SESSION' already running" + echo " attach: tmux attach -t $SESSION" + exit 0 + fi + + mkdir -p "$LOGDIR" + + # Window 0: overview (log viewer left + metrics right) + tmux new-session -d -s "$SESSION" -n "overview" -x 220 -y 50 + tmux split-window -h -t "${SESSION}:overview" -p 30 + tmux send-keys -t "${SESSION}:overview.0" \ + "watch -n5 python3 ${SCRIPT_DIR}/log_view.py" Enter + tmux send-keys -t "${SESSION}:overview.1" \ + "watch -n10 python3 ${SCRIPT_DIR}/metrics.py" Enter + + # One window per project + WIN=1 + while IFS= read -r project_path; do + project_name=$(basename "$project_path") + tmux new-window -t "${SESSION}:${WIN}" -n "$project_name" + tmux send-keys -t "${SESSION}:${WIN}" \ + "cd '${project_path}' && claude --name 'autoresearch-${project_name}'" Enter + # Wait briefly then send the ralph-loop invocation + sleep 2 + tmux send-keys -t "${SESSION}:${WIN}" \ + "/ralph-loop Run one autoresearch experiment iteration. --completion-promise STOP" Enter + WIN=$((WIN + 1)) + done < <(get_projects) + + tmux select-window -t "${SESSION}:0" + echo "autoresearch: started session '$SESSION' with $((WIN-1)) project windows" + echo " attach: tmux attach -t $SESSION" +} + +cmd_stop() { + if ! tmux has-session -t "$SESSION" 2>/dev/null; then + echo "autoresearch: session '$SESSION' not running" + exit 0 + fi + tmux kill-session -t "$SESSION" + echo "autoresearch: session stopped" +} + +cmd_status() { + if ! tmux has-session -t "$SESSION" 2>/dev/null; then + echo "autoresearch: not running" + exit 1 + fi + echo "autoresearch: running" + tmux list-windows -t "$SESSION" + echo "" + echo "Last entries per project:" + python3 -c " +import json, os +from collections import defaultdict +log = os.path.expanduser('~/.claude/autoresearch/log.jsonl') +if not os.path.exists(log): + print(' (no experiments yet)') + exit() +latest = {} +with open(log) as f: + for line in f: + try: + e = json.loads(line) + latest[e.get('project')] = e + except: pass +for proj, e in latest.items(): + kept = '✓' if e.get('kept') else '✗' + print(f' {proj}: exp {e.get(\"exp\",\"?\")} {kept} — {e.get(\"description\",\"\")[:50]}') +" +} + +case "${1:-start}" in + start) cmd_start ;; + stop) cmd_stop ;; + status) cmd_status ;; + *) echo "Usage: $0 [start|stop|status]"; exit 1 ;; +esac +``` + +**Step 2: Make executable** + +```bash +chmod +x ~/work/autoresearch/bin/start.sh +``` + +**Step 3: Dry-run test (no actual tmux)** + +```bash +# Verify syntax +bash -n ~/work/autoresearch/bin/start.sh && echo "syntax OK" +# Verify status when not running +~/work/autoresearch/bin/start.sh status || echo "expected: not running" +``` + +**Step 4: Commit** + +```bash +cd ~/work/autoresearch +git add bin/start.sh +git commit -m "feat: tmux orchestrator — start/stop/status autoresearch session" +``` + +--- + +## Task 10: PostSessionStart hook + +**Files:** +- Modify: `~/.claude/settings.json` +- Create: `~/work/autoresearch/bin/session-start-hook.sh` + +**Step 1: Write hook script** + +```bash +#!/usr/bin/env bash +# PostSessionStart hook: auto-start autoresearch when tokens + budget available +set -euo pipefail + +CONFIG="${HOME}/.claude/autoresearch.yaml" +SESSION="autoresearch" + +# Only run if config exists +[[ -f "$CONFIG" ]] || exit 0 + +# Skip if autoresearch session already running +tmux has-session -t "$SESSION" 2>/dev/null && exit 0 + +# Check API budget +BUDGET_LIMIT=$(python3 -c " +import yaml; c=yaml.safe_load(open('${CONFIG}')) +print(c.get('token_threshold', {}).get('api_budget_usd', 5.0)) +" 2>/dev/null || echo "5.0") + +python3 "${HOME}/work/autoresearch/bin/check_budget.py" "$BUDGET_LIMIT" || exit 0 + +# Budget OK → start autoresearch in background +nohup "${HOME}/work/autoresearch/bin/start.sh" start \ + >> "${HOME}/.claude/autoresearch/hook.log" 2>&1 & + +exit 0 +``` + +```bash +chmod +x ~/work/autoresearch/bin/session-start-hook.sh +``` + +**Step 2: Add hook to settings.json** + +Read `~/.claude/settings.json` first, then add the PostSessionStart hook. + +New hooks section (merge with existing): + +```json +"PostSessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "~/work/autoresearch/bin/session-start-hook.sh", + "async": true + } + ] + } +] +``` + +**Step 3: Verify settings.json is valid JSON** + +```bash +python3 -m json.tool ~/.claude/settings.json > /dev/null && echo "valid JSON" +``` + +**Step 4: Test hook manually** + +```bash +~/work/autoresearch/bin/session-start-hook.sh && echo "hook OK" +``` + +**Step 5: Commit** + +```bash +cd ~/work/autoresearch +git add bin/session-start-hook.sh +git commit -m "feat: PostSessionStart hook — auto-start when budget available" +``` + +--- + +## Task 11: Integration dry-run + +**Goal:** Start the tmux session, verify all windows open correctly, verify log viewer and metrics panel render. + +**Step 1: Start session** + +```bash +~/work/autoresearch/bin/start.sh start +``` +Expected output: `autoresearch: started session 'autoresearch' with N project windows` + +**Step 2: Verify tmux windows** + +```bash +tmux list-windows -t autoresearch +``` +Expected: `0: overview`, `1: claude-config` (or first project name), etc. + +**Step 3: Check overview pane renders** + +```bash +tmux attach -t autoresearch +# Navigate to window 0, verify log viewer and metrics panel are visible +# Ctrl+b d to detach +``` + +**Step 4: Verify claude session started in project window** + +Attach and switch to window 1, verify `claude` prompt is visible. + +**Step 5: Stop session** + +```bash +~/work/autoresearch/bin/start.sh stop +``` + +**Step 6: Add project to CLAUDE.md and workspace CLAUDE.md** + +Update `/home/templis/work/CLAUDE.md` projects table to include `autoresearch/`. + +**Step 7: Final commit + push to Gitea** + +```bash +cd ~/work/autoresearch +git add -A +git commit -m "feat: integration verified — autoresearch MVP complete" +git remote add origin git@git.tueit.de:tueit_GmbH/autoresearch.git +git push -u origin main +``` + +--- + +## Summary of created files + +| File | Purpose | +|------|---------| +| `~/work/autoresearch/bin/config.py` | Load + validate autoresearch.yaml | +| `~/work/autoresearch/bin/check_budget.py` | Anthropic API budget check | +| `~/work/autoresearch/bin/log_append.py` | Append experiment log entries | +| `~/work/autoresearch/bin/log_view.py` | Rich log viewer (overview pane left) | +| `~/work/autoresearch/bin/metrics.py` | Metrics summary (overview pane right) | +| `~/work/autoresearch/bin/stuck_check.py` | Stuck file detection | +| `~/work/autoresearch/bin/start.sh` | tmux session orchestrator | +| `~/work/autoresearch/bin/session-start-hook.sh` | PostSessionStart hook | +| `~/.claude/autoresearch.yaml` | Main config (projects + thresholds) | +| `~/.claude/skills/autoresearch/skill.md` | `/autoresearch` user skill | +| `~/.claude/skills/autoresearch-loop/skill.md` | `/autoresearch-loop` ralph-loop payload | +| `~/.claude/settings.json` | +PostSessionStart hook |