| <directory> | <YYYY-MM-DD>] - User/assistant messages extracted from message.data + part.data - Tool calls → Claude Code `tool_use` blocks - Tool outputs → `tool_result` blocks (folded into the assistant turn by the mempalace normalizer) - `step-start` / `step-finish` parts are dropped (noise) - `reasoning` parts prefixed with `[reasoning]` and kept as text Dedup: - source_file = absolute staging path (deterministic per session ID) - Re-runs skip unchanged sessions. To force re-mining, delete the staging dir: rm -rf ~/.cache/mempalace-session/<wing>/ Rationale: Opencode lacks a session-stopping hook (upstream PRs #16598, #16769 still open). Until that lands + mempalace hooks_cli.py gains an opencode harness, this wrapper is how we get automatic session capture. EOF } # ── Parse args ─────────────────────────────────────────────────────── while [[ $# -gt 0 ]]; do case "$1" in -h|--help) usage; exit 0 ;; --wing) WING="${2:-}"; shift 2 ;; --session) SESSION_ID="${2:-}"; shift 2 ;; --since) SINCE="${2:-}"; shift 2 ;; --min-messages) MIN_MESSAGES="${2:-}"; shift 2 ;; --agent) AGENT="${2:-}"; shift 2 ;; --db) OPENCODE_DB="${2:-}"; shift 2 ;; --dry-run) DRY_RUN=1; shift ;; --no-repair) NO_REPAIR=1; shift ;; --) shift; break ;; -*) echo "error: unknown option: $1" >&2; usage >&2; exit 1 ;; *) echo "error: unexpected arg: $1" >&2; exit 1 ;; esac done # ── Preflight ──────────────────────────────────────────────────────── if [[ ! -f "$OPENCODE_DB" ]]; then echo "error: opencode.db not found at $OPENCODE_DB" >&2 echo " override with --db <path> or OPENCODE_DB env var" >&2 exit 2 fi if ! command -v mempalace >/dev/null 2>&1; then echo "error: mempalace CLI not found in PATH" >&2 exit 3 fi if ! [[ "$MIN_MESSAGES" =~ ^[0-9]+$ ]]; then echo "error: --min-messages must be an integer" >&2 exit 1 fi # ── Staging dir ────────────────────────────────────────────────────── # Deterministic per-wing path so source_file dedup works across re-runs. CACHE_ROOT="${XDG_CACHE_HOME:-$HOME/.cache}/mempalace-session" STAGE="$CACHE_ROOT/$WING" mkdir -p "$STAGE" # ── Export sessions (Python heredoc) ──────────────────────────────── # Writes one JSONL file per qualifying session into $STAGE. # Prints: EXPORTED <count> on stdout, plus per-session lines. # # If the palace is reachable, also classifies each export as NEW or ALREADY # FILED (matching by source_file path) so --dry-run can report the true # mine-set size, not just the export-set size. Classification is advisory # only — the real mine step delegates dedup to `mempalace mine --mode convos`, # which is the authoritative source of truth. export_count=$(python3 - "$OPENCODE_DB" "$STAGE" "$SESSION_ID" "$SINCE" "$MIN_MESSAGES" <<'PY' import sqlite3, json, sys, os from datetime import datetime, timezone from pathlib import Path db_path, stage, session_filter, since, min_messages = sys.argv[1:6] min_messages = int(min_messages) stage = Path(stage) # Convert --since YYYY-MM-DD to epoch ms (opencode uses ms timestamps) since_ms = None if since: try: since_ms = int(datetime.strptime(since, "%Y-%m-%d").replace(tzinfo=timezone.utc).timestamp() * 1000) except ValueError: print(f"error: --since must be YYYY-MM-DD, got {since!r}", file=sys.stderr) sys.exit(1) # ── Load palace's already-filed source_files (best-effort, read-only) ── # Key the dedup check on absolute staging path. The palace stores these in # chroma.sqlite3 under embedding_metadata.key='source_file'. If the palace # isn't reachable (first install, moved, permission-denied), we fall through # to "everything is new" — the mine step will do the real dedup anyway. already_filed = set() palace_path = os.environ.get("MEMPALACE_PATH", os.path.expanduser("~/.mempalace/palace")) chroma_db = Path(palace_path) / "chroma.sqlite3" if chroma_db.is_file(): try: pcon = sqlite3.connect(f"file:{chroma_db}?mode=ro", uri=True) for (sf,) in pcon.execute( "SELECT DISTINCT string_value FROM embedding_metadata " "WHERE key='source_file' AND string_value LIKE ?", (f"{stage}%",), ): if sf: already_filed.add(sf) pcon.close() except sqlite3.Error: pass # palace unreachable → treat all exports as new (miner will dedup) conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) conn.row_factory = sqlite3.Row cur = conn.cursor() # Select sessions q = "SELECT * FROM session WHERE 1=1" params = [] if session_filter: q += " AND id = ?" params.append(session_filter) if since_ms is not None: q += " AND time_updated >= ?" params.append(since_ms) q += " ORDER BY time_updated" cur.execute(q, params) sessions = [dict(r) for r in cur.fetchall()] if not sessions: print("EXPORTED 0") sys.exit(0) # Prefetch messages + parts for qualifying sessions exported = 0 skipped_short = 0 skipped_already_filed = 0 for sess in sessions: sid = sess["id"] cur.execute("SELECT COUNT(*) FROM message WHERE session_id=?", (sid,)) msg_count = cur.fetchone()[0] if msg_count < min_messages: skipped_short += 1 continue cur.execute( "SELECT * FROM message WHERE session_id=? ORDER BY time_created", (sid,) ) messages = [dict(r) for r in cur.fetchall()] cur.execute( "SELECT * FROM part WHERE session_id=? ORDER BY time_created", (sid,) ) parts_by_msg: dict[str, list] = {} for r in cur.fetchall(): d = dict(r) parts_by_msg.setdefault(d["message_id"], []).append(d) # Build JSONL lines out_lines: list[dict] = [] # Synthetic header as first user turn — injects title/directory/date # into the transcript so semantic search can find sessions by topic, # not just by session-id filename. title = sess.get("title") or "(untitled)" directory = sess.get("directory") or "?" date_str = datetime.fromtimestamp( sess["time_created"] / 1000, tz=timezone.utc ).strftime("%Y-%m-%d") header = f"[session: {title} | {directory} | {date_str}]" out_lines.append({"type": "user", "message": {"content": header}}) for msg in messages: mdata = json.loads(msg["data"]) role = mdata.get("role") if role not in ("user", "assistant"): continue parts = parts_by_msg.get(msg["id"], []) blocks = [] tool_results = [] for p in parts: try: pd = json.loads(p["data"]) except json.JSONDecodeError: continue t = pd.get("type") if t == "text": txt = (pd.get("text") or "").strip() if txt: blocks.append({"type": "text", "text": txt}) elif t == "tool": # opencode tool part → tool_use block + deferred tool_result state = pd.get("state") or {} tool_name = pd.get("tool") or "Unknown" call_id = pd.get("callID") or p["id"] tool_input = state.get("input") or {} tool_output = state.get("output") blocks.append({ "type": "tool_use", "id": call_id, "name": tool_name, "input": tool_input, }) if tool_output: tool_results.append({ "type": "tool_result", "tool_use_id": call_id, "content": str(tool_output), }) elif t in ("step-start", "step-finish"): continue elif t == "reasoning": rtext = (pd.get("text") or "").strip() if rtext: blocks.append({"type": "text", "text": f"[reasoning] {rtext}"}) if not blocks: continue # Simplify single-text-block messages to a bare string (more tolerant # of normalizer edge cases; mempalace accepts either shape). if len(blocks) == 1 and blocks[0]["type"] == "text": content = blocks[0]["text"] else: content = blocks out_lines.append({ "type": role, "message": {"content": content}, }) # For assistants, follow up with a synthetic human tool_result message # per tool call. The mempalace normalizer's `is_tool_only` branch # folds these back into the assistant turn (see normalize.py:211-214). if role == "assistant" and tool_results: out_lines.append({ "type": "human", "message": {"content": tool_results}, }) # Must have at least 2 turns for the normalizer to accept the file if len(out_lines) < 2: skipped_short += 1 continue slug = sess.get("slug") or "session" out_path = stage / f"{slug}_{sid}.jsonl" with open(out_path, "w", encoding="utf-8") as f: for obj in out_lines: f.write(json.dumps(obj, ensure_ascii=False) + "\n") # Set mtime to session time_updated so dedup sees a stable value. try: ts = sess["time_updated"] / 1000 os.utime(out_path, (ts, ts)) except Exception: pass exported += 1 is_filed = str(out_path) in already_filed if is_filed: skipped_already_filed += 1 status = "SKIP" if is_filed else "NEW " print(f" [{status}] {out_path.name} ({msg_count} msgs, {len(out_lines)} turns)", file=sys.stderr) print(f"EXPORTED {exported}") print(f"ALREADY_FILED {skipped_already_filed}") if skipped_short: print(f"SKIPPED_SHORT {skipped_short}", file=sys.stderr) PY ) # Parse counts from stdout count="$(printf '%s\n' "$export_count" | awk '/^EXPORTED / { print $2 }')" count="${count:-0}" already_filed="$(printf '%s\n' "$export_count" | awk '/^ALREADY_FILED / { print $2 }')" already_filed="${already_filed:-0}" to_file=$(( count - already_filed )) if [[ "$count" -eq 0 ]]; then echo "no sessions qualified for export" exit 0 fi echo "" echo "Exported $count session(s) to $STAGE" echo " $to_file new → will be filed on mine" echo " $already_filed already filed → will be skipped (dedup by source_file)" if [[ $DRY_RUN -eq 1 ]]; then if [[ "$to_file" -eq 0 ]]; then echo "" echo "--dry-run: no new sessions to mine. A real run would skip all $count." else echo "" echo "--dry-run: skipping mine step. A real run would file $to_file new session(s)." fi exit 0 fi # ── Run the mine ───────────────────────────────────────────────────── echo "" echo "Mining into wing '$WING'..." if ! mempalace mine "$STAGE" --mode convos --wing "$WING" --agent "$AGENT"; then echo "error: mempalace mine failed" >&2 exit 4 fi # ── Repair index ───────────────────────────────────────────────────── if [[ $NO_REPAIR -eq 0 ]]; then echo "" echo "Rebuilding HNSW index..." mempalace repair --yes fi echo "" echo "Done. Wing '$WING' updated. Remember to reconnect any live MCP sessions."

#!/usr/bin/env bash # mempalace-session — mine opencode session history into MemPalace # # Opencode persists every session (verbatim user/assistant turns + tool calls) # in a local SQLite DB at ~/.local/share/opencode/opencode.db. There is # currently no opencode session-stopping hook upstream, so the diary-based # auto-save is best-effort; this wrapper closes the gap by mining the SQLite # directly. # # Strategy: # 1. Read opencode.db and export each qualifying session to a Claude Code # JSONL file (format the mempalace normalizer already understands). # 2. Stage exports under ~/.cache/mempalace-session//. # 3. Run `mempalace mine --mode convos` against the staging dir. # # Dedup: mempalace convos mode keys on source_file (absolute staging path). # The staging path is deterministic (per-wing under XDG_CACHE_HOME) so re-runs # are idempotent as long as session content hasn't changed. # # Session filter: sessions with fewer than --min-messages messages (default 3) # are skipped to avoid filing throwaway /exit'd sessions. # # Usage: # mempalace-session # mempalace-session --wing # mempalace-session --session # mempalace-session --since 2026-04-01 # mempalace-session --min-messages 6 # mempalace-session --dry-run # mempalace-session --help # # Exit codes: # 0 success # 1 usage / argument error # 2 opencode.db missing or unreadable # 3 mempalace CLI not installed # 4 mine failed # # Dependencies: bash, python3 (stdlib sqlite3), mempalace (v3.3.3+) set -euo pipefail # ── Defaults ───────────────────────────────────────────────────────── AGENT="${USER:-mempalace}" WING="wing_conversations" SESSION_ID="" SINCE="" MIN_MESSAGES=3 DRY_RUN=0 NO_REPAIR=0 OPENCODE_DB="${OPENCODE_DB:-$HOME/.local/share/opencode/opencode.db}" # ── Usage ──────────────────────────────────────────────────────────── usage() { cat <<'EOF' mempalace-session — mine opencode session history into MemPalace Usage: mempalace-session [options] Options: --wing Target wing (default: wing_conversations) --session Export one session only (default: all qualifying) --since Only sessions with time_updated on/after this date --min-messages Skip sessions with fewer than N messages (default: 3) --agent Agent name recorded on drawers (default: $USER) --db Path to opencode.db (default: $OPENCODE_DB or ~/.local/share/opencode/opencode.db) --dry-run Export + list; do not mine into palace. Each session is tagged [NEW] or [SKIP] based on whether its source_file is already present in the palace. --no-repair Skip `mempalace repair` after mining -h, --help Show this help Idempotency: Re-running on the same corpus is safe. The export step always writes every qualifying session to the cache, but the mine step dedups on source_file path — already-filed sessions are skipped without re-embedding. A --dry-run summary shows exactly how many of the exported files are new vs already filed, so you can see in advance what a real run would do. What gets mined: - Each qualifying session → one Claude Code JSONL file - Staged under ~/.cache/mempalace-session// - Filed via `mempalace mine --mode convos` Transcript shape per session: - Synthetic header as first user turn: [session: | <directory> | <YYYY-MM-DD>] - User/assistant messages extracted from message.data + part.data - Tool calls → Claude Code `tool_use` blocks - Tool outputs → `tool_result` blocks (folded into the assistant turn by the mempalace normalizer) - `step-start` / `step-finish` parts are dropped (noise) - `reasoning` parts prefixed with `[reasoning]` and kept as text Dedup: - source_file = absolute staging path (deterministic per session ID) - Re-runs skip unchanged sessions. To force re-mining, delete the staging dir: rm -rf ~/.cache/mempalace-session/<wing>/ Rationale: Opencode lacks a session-stopping hook (upstream PRs #16598, #16769 still open). Until that lands + mempalace hooks_cli.py gains an opencode harness, this wrapper is how we get automatic session capture. EOF } # ── Parse args ─────────────────────────────────────────────────────── while [[ $# -gt 0 ]]; do case "$1" in -h|--help) usage; exit 0 ;; --wing) WING="${2:-}"; shift 2 ;; --session) SESSION_ID="${2:-}"; shift 2 ;; --since) SINCE="${2:-}"; shift 2 ;; --min-messages) MIN_MESSAGES="${2:-}"; shift 2 ;; --agent) AGENT="${2:-}"; shift 2 ;; --db) OPENCODE_DB="${2:-}"; shift 2 ;; --dry-run) DRY_RUN=1; shift ;; --no-repair) NO_REPAIR=1; shift ;; --) shift; break ;; -*) echo "error: unknown option: $1" >&2; usage >&2; exit 1 ;; *) echo "error: unexpected arg: $1" >&2; exit 1 ;; esac done # ── Preflight ──────────────────────────────────────────────────────── if [[ ! -f "$OPENCODE_DB" ]]; then echo "error: opencode.db not found at $OPENCODE_DB" >&2 echo " override with --db <path> or OPENCODE_DB env var" >&2 exit 2 fi if ! command -v mempalace >/dev/null 2>&1; then echo "error: mempalace CLI not found in PATH" >&2 exit 3 fi if ! [[ "$MIN_MESSAGES" =~ ^[0-9]+$ ]]; then echo "error: --min-messages must be an integer" >&2 exit 1 fi # ── Staging dir ────────────────────────────────────────────────────── # Deterministic per-wing path so source_file dedup works across re-runs. CACHE_ROOT="${XDG_CACHE_HOME:-$HOME/.cache}/mempalace-session" STAGE="$CACHE_ROOT/$WING" mkdir -p "$STAGE" # ── Export sessions (Python heredoc) ──────────────────────────────── # Writes one JSONL file per qualifying session into $STAGE. # Prints: EXPORTED <count> on stdout, plus per-session lines. # # If the palace is reachable, also classifies each export as NEW or ALREADY # FILED (matching by source_file path) so --dry-run can report the true # mine-set size, not just the export-set size. Classification is advisory # only — the real mine step delegates dedup to `mempalace mine --mode convos`, # which is the authoritative source of truth. export_count=$(python3 - "$OPENCODE_DB" "$STAGE" "$SESSION_ID" "$SINCE" "$MIN_MESSAGES" <<'PY' import sqlite3, json, sys, os from datetime import datetime, timezone from pathlib import Path db_path, stage, session_filter, since, min_messages = sys.argv[1:6] min_messages = int(min_messages) stage = Path(stage) # Convert --since YYYY-MM-DD to epoch ms (opencode uses ms timestamps) since_ms = None if since: try: since_ms = int(datetime.strptime(since, "%Y-%m-%d").replace(tzinfo=timezone.utc).timestamp() * 1000) except ValueError: print(f"error: --since must be YYYY-MM-DD, got {since!r}", file=sys.stderr) sys.exit(1) # ── Load palace's already-filed source_files (best-effort, read-only) ── # Key the dedup check on absolute staging path. The palace stores these in # chroma.sqlite3 under embedding_metadata.key='source_file'. If the palace # isn't reachable (first install, moved, permission-denied), we fall through # to "everything is new" — the mine step will do the real dedup anyway. already_filed = set() palace_path = os.environ.get("MEMPALACE_PATH", os.path.expanduser("~/.mempalace/palace")) chroma_db = Path(palace_path) / "chroma.sqlite3" if chroma_db.is_file(): try: pcon = sqlite3.connect(f"file:{chroma_db}?mode=ro", uri=True) for (sf,) in pcon.execute( "SELECT DISTINCT string_value FROM embedding_metadata " "WHERE key='source_file' AND string_value LIKE ?", (f"{stage}%",), ): if sf: already_filed.add(sf) pcon.close() except sqlite3.Error: pass # palace unreachable → treat all exports as new (miner will dedup) conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True) conn.row_factory = sqlite3.Row cur = conn.cursor() # Select sessions q = "SELECT * FROM session WHERE 1=1" params = [] if session_filter: q += " AND id = ?" params.append(session_filter) if since_ms is not None: q += " AND time_updated >= ?" params.append(since_ms) q += " ORDER BY time_updated" cur.execute(q, params) sessions = [dict(r) for r in cur.fetchall()] if not sessions: print("EXPORTED 0") sys.exit(0) # Prefetch messages + parts for qualifying sessions exported = 0 skipped_short = 0 skipped_already_filed = 0 for sess in sessions: sid = sess["id"] cur.execute("SELECT COUNT(*) FROM message WHERE session_id=?", (sid,)) msg_count = cur.fetchone()[0] if msg_count < min_messages: skipped_short += 1 continue cur.execute( "SELECT * FROM message WHERE session_id=? ORDER BY time_created", (sid,) ) messages = [dict(r) for r in cur.fetchall()] cur.execute( "SELECT * FROM part WHERE session_id=? ORDER BY time_created", (sid,) ) parts_by_msg: dict[str, list] = {} for r in cur.fetchall(): d = dict(r) parts_by_msg.setdefault(d["message_id"], []).append(d) # Build JSONL lines out_lines: list[dict] = [] # Synthetic header as first user turn — injects title/directory/date # into the transcript so semantic search can find sessions by topic, # not just by session-id filename. title = sess.get("title") or "(untitled)" directory = sess.get("directory") or "?" date_str = datetime.fromtimestamp( sess["time_created"] / 1000, tz=timezone.utc ).strftime("%Y-%m-%d") header = f"[session: {title} | {directory} | {date_str}]" out_lines.append({"type": "user", "message": {"content": header}}) for msg in messages: mdata = json.loads(msg["data"]) role = mdata.get("role") if role not in ("user", "assistant"): continue parts = parts_by_msg.get(msg["id"], []) blocks = [] tool_results = [] for p in parts: try: pd = json.loads(p["data"]) except json.JSONDecodeError: continue t = pd.get("type") if t == "text": txt = (pd.get("text") or "").strip() if txt: blocks.append({"type": "text", "text": txt}) elif t == "tool": # opencode tool part → tool_use block + deferred tool_result state = pd.get("state") or {} tool_name = pd.get("tool") or "Unknown" call_id = pd.get("callID") or p["id"] tool_input = state.get("input") or {} tool_output = state.get("output") blocks.append({ "type": "tool_use", "id": call_id, "name": tool_name, "input": tool_input, }) if tool_output: tool_results.append({ "type": "tool_result", "tool_use_id": call_id, "content": str(tool_output), }) elif t in ("step-start", "step-finish"): continue elif t == "reasoning": rtext = (pd.get("text") or "").strip() if rtext: blocks.append({"type": "text", "text": f"[reasoning] {rtext}"}) if not blocks: continue # Simplify single-text-block messages to a bare string (more tolerant # of normalizer edge cases; mempalace accepts either shape). if len(blocks) == 1 and blocks[0]["type"] == "text": content = blocks[0]["text"] else: content = blocks out_lines.append({ "type": role, "message": {"content": content}, }) # For assistants, follow up with a synthetic human tool_result message # per tool call. The mempalace normalizer's `is_tool_only` branch # folds these back into the assistant turn (see normalize.py:211-214). if role == "assistant" and tool_results: out_lines.append({ "type": "human", "message": {"content": tool_results}, }) # Must have at least 2 turns for the normalizer to accept the file if len(out_lines) < 2: skipped_short += 1 continue slug = sess.get("slug") or "session" out_path = stage / f"{slug}_{sid}.jsonl" with open(out_path, "w", encoding="utf-8") as f: for obj in out_lines: f.write(json.dumps(obj, ensure_ascii=False) + "\n") # Set mtime to session time_updated so dedup sees a stable value. try: ts = sess["time_updated"] / 1000 os.utime(out_path, (ts, ts)) except Exception: pass exported += 1 is_filed = str(out_path) in already_filed if is_filed: skipped_already_filed += 1 status = "SKIP" if is_filed else "NEW " print(f" [{status}] {out_path.name} ({msg_count} msgs, {len(out_lines)} turns)", file=sys.stderr) print(f"EXPORTED {exported}") print(f"ALREADY_FILED {skipped_already_filed}") if skipped_short: print(f"SKIPPED_SHORT {skipped_short}", file=sys.stderr) PY ) # Parse counts from stdout count="$(printf '%s\n' "$export_count" | awk '/^EXPORTED / { print $2 }')" count="${count:-0}" already_filed="$(printf '%s\n' "$export_count" | awk '/^ALREADY_FILED / { print $2 }')" already_filed="${already_filed:-0}" to_file=$(( count - already_filed )) if [[ "$count" -eq 0 ]]; then echo "no sessions qualified for export" exit 0 fi echo "" echo "Exported $count session(s) to $STAGE" echo " $to_file new → will be filed on mine" echo " $already_filed already filed → will be skipped (dedup by source_file)" if [[ $DRY_RUN -eq 1 ]]; then if [[ "$to_file" -eq 0 ]]; then echo "" echo "--dry-run: no new sessions to mine. A real run would skip all $count." else echo "" echo "--dry-run: skipping mine step. A real run would file $to_file new session(s)." fi exit 0 fi # ── Run the mine ───────────────────────────────────────────────────── echo "" echo "Mining into wing '$WING'..." if ! mempalace mine "$STAGE" --mode convos --wing "$WING" --agent "$AGENT"; then echo "error: mempalace mine failed" >&2 exit 4 fi # ── Repair index ───────────────────────────────────────────────────── if [[ $NO_REPAIR -eq 0 ]]; then echo "" echo "Rebuilding HNSW index..." mempalace repair --yes fi echo "" echo "Done. Wing '$WING' updated. Remember to reconnect any live MCP sessions."