Initial commit — split out from cli_utils
Producer-side MemPalace tooling: two bash wrappers that bridge opencode session history and project documentation into the palace. Originally developed in cli_utils (2026-04-28); split into its own repo on 2026-04-30 because the conceptual fit was weak — cli_utils is interactive shell tooling, while this is agent memory infrastructure with its own architecture, dependency surface, and growth trajectory. Contents: - bin/mempalace-docs — docs-only mining wrapper (originally a2ddcc9 in cli_utils), bridges the gap until MemPalace PR #1213 (exclude_patterns) merges upstream. - bin/mempalace-session — opencode → palace session bridge (originally dacca0e in cli_utils). Reads ~/.local/share/opencode/opencode.db, exports each session to Claude Code JSONL, mines via 'mempalace mine --mode convos'. Bridges the gap until opencode session-stopping hooks + an opencode harness in hooks_cli.py land upstream. - ARCHITECTURE.md — canonical spec: architecture diagram, component details, setup recipe, operational notes, upstream-retirement roadmap. Originally a4cf314 in cli_utils. - SKILL.md — companion agent skill (producer side). Pairs with the consumer-side mempalace skill. Symlinked into ~/.agents/skills/opencode-mempalace-bridge/ by install.sh. - install.sh — idempotent installer, also handles --uninstall. - AGENTS.md — repo conventions. History of the individual files is not preserved in this split; see cli_utils (gitea.jordbo.se/joakimp/cli_utils) commits a2ddcc9, dacca0e, and a4cf314 for the original authorship context.
This commit is contained in:
Executable
+268
@@ -0,0 +1,268 @@
|
||||
#!/usr/bin/env bash
|
||||
# mempalace-docs — mine a project into MemPalace with docs-only filtering
|
||||
#
|
||||
# Works around the fact that upstream `mempalace mine` has a hardcoded
|
||||
# READABLE_EXTENSIONS list that includes .py / .ts / .js / .go / .rs etc,
|
||||
# which pollutes the palace with low-signal code-fragment drawers.
|
||||
#
|
||||
# Strategy: stage a copy of only docs/config/script files into /tmp, then
|
||||
# run `mempalace mine` against that staging dir. Wing is derived from the
|
||||
# source directory name (override with --wing).
|
||||
#
|
||||
# Once MemPalace PR #1213 (exclude_patterns in mempalace.yaml) lands, this
|
||||
# wrapper becomes a thin shim over `mempalace mine` with a default
|
||||
# exclude_patterns injected.
|
||||
#
|
||||
# Usage:
|
||||
# mempalace-docs <directory>
|
||||
# mempalace-docs <directory> --wing <name>
|
||||
# mempalace-docs <directory> --agent <name>
|
||||
# mempalace-docs <directory> --dry-run
|
||||
# mempalace-docs --help
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 success
|
||||
# 1 usage / argument error
|
||||
# 2 source directory missing
|
||||
# 3 mempalace CLI not installed
|
||||
# 4 mine failed
|
||||
#
|
||||
# Dependencies: bash, find, cp, mempalace (v3.3.3+)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ── Defaults ─────────────────────────────────────────────────────────
|
||||
AGENT="${USER:-mempalace}"
|
||||
WING=""
|
||||
SRC=""
|
||||
DRY_RUN=0
|
||||
NO_REPAIR=0
|
||||
|
||||
# File patterns to include. Docs + config + intent-bearing scripts.
|
||||
# Everything else (code) is excluded by omission.
|
||||
INCLUDE_GLOBS=(
|
||||
'*.md' '*.mdx' '*.rst' '*.txt'
|
||||
'*.yml' '*.yaml' '*.toml'
|
||||
'*.json' # includes package.json, pyproject companions; lockfiles filtered below
|
||||
'*.sh' '*.bash' '*.zsh' '*.fish'
|
||||
'Dockerfile*' 'Makefile*' 'Containerfile*'
|
||||
'*.conf' '*.cfg' '*.ini'
|
||||
'LICENSE*' 'COPYING*' 'NOTICE*' 'AUTHORS*' 'CONTRIBUTORS*'
|
||||
)
|
||||
|
||||
# Path segments to always skip (in addition to .gitignore).
|
||||
SKIP_DIRS=(
|
||||
'.git' '.venv' 'venv' '__pycache__' 'node_modules'
|
||||
'.mypy_cache' '.pytest_cache' '.ruff_cache' '.tox' '.nox'
|
||||
'dist' 'build' '.next' '.nuxt' 'target' 'coverage'
|
||||
'.DS_Store'
|
||||
)
|
||||
|
||||
# Filename patterns to skip even if caught by an include glob.
|
||||
SKIP_FILES=(
|
||||
'package-lock.json' 'yarn.lock' 'pnpm-lock.yaml' 'poetry.lock'
|
||||
'Cargo.lock' 'Gemfile.lock' 'composer.lock'
|
||||
'.gitignore' '.dockerignore'
|
||||
)
|
||||
|
||||
# ── Usage ────────────────────────────────────────────────────────────
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
mempalace-docs — mine a project into MemPalace, docs/config/scripts only
|
||||
|
||||
Usage:
|
||||
mempalace-docs <directory> [options]
|
||||
|
||||
Options:
|
||||
--wing <name> Override wing name (default: source directory name)
|
||||
--agent <name> Agent name recorded on drawers (default: $USER)
|
||||
--dry-run List files that would be mined; do not file
|
||||
--no-repair Skip `mempalace repair` after mining
|
||||
-h, --help Show this help
|
||||
|
||||
What gets mined:
|
||||
Docs: *.md *.mdx *.rst *.txt
|
||||
Config: *.yml *.yaml *.toml *.json *.conf *.cfg *.ini
|
||||
Scripts: *.sh *.bash *.zsh *.fish Dockerfile* Makefile*
|
||||
Legal: LICENSE* COPYING* NOTICE* AUTHORS*
|
||||
|
||||
What gets skipped (by design):
|
||||
Source code: .py .ts .tsx .js .jsx .go .rs .java .cpp .c .rb .kt .swift
|
||||
Caches / deps: .git .venv venv node_modules __pycache__ .mypy_cache
|
||||
.pytest_cache .ruff_cache dist build .next target coverage
|
||||
Lockfiles: package-lock.json yarn.lock poetry.lock Cargo.lock ...
|
||||
|
||||
Rationale:
|
||||
The palace is for context and intent. Agents read code directly via
|
||||
grep/glob/Read — mining it creates a parallel, lossier, drift-prone
|
||||
copy that pollutes semantic search.
|
||||
|
||||
This wrapper is a bridge until MemPalace PR #1213 (exclude_patterns)
|
||||
lands upstream.
|
||||
EOF
|
||||
}
|
||||
|
||||
# ── Parse args ───────────────────────────────────────────────────────
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-h|--help) usage; exit 0 ;;
|
||||
--wing) WING="${2:-}"; shift 2 ;;
|
||||
--agent) AGENT="${2:-}"; shift 2 ;;
|
||||
--dry-run) DRY_RUN=1; shift ;;
|
||||
--no-repair) NO_REPAIR=1; shift ;;
|
||||
--) shift; break ;;
|
||||
-*) echo "error: unknown option: $1" >&2; usage >&2; exit 1 ;;
|
||||
*) if [[ -z "$SRC" ]]; then SRC="$1"; shift; else echo "error: unexpected arg: $1" >&2; exit 1; fi ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -z "$SRC" ]]; then usage >&2; exit 1; fi
|
||||
if [[ ! -d "$SRC" ]]; then
|
||||
echo "error: not a directory: $SRC" >&2; exit 2
|
||||
fi
|
||||
if ! command -v mempalace >/dev/null 2>&1; then
|
||||
echo "error: mempalace CLI not found in PATH" >&2; exit 3
|
||||
fi
|
||||
|
||||
SRC="$(cd "$SRC" && pwd)"
|
||||
|
||||
# Determine wing name with the following precedence:
|
||||
# 1. explicit --wing flag (user override)
|
||||
# 2. `wing:` value in $SRC/mempalace.yaml (respect existing project config)
|
||||
# 3. sanitized source directory basename (hyphens → underscores, matching
|
||||
# mempalace's convention for implicit wing names)
|
||||
if [[ -z "$WING" && -f "$SRC/mempalace.yaml" ]]; then
|
||||
WING="$(awk -F': *' '/^wing:/ { gsub(/["\x27 ]/,"",$2); print $2; exit }' "$SRC/mempalace.yaml" 2>/dev/null || true)"
|
||||
fi
|
||||
if [[ -z "$WING" ]]; then
|
||||
WING="$(basename "$SRC" | tr '-' '_')"
|
||||
fi
|
||||
|
||||
# ── Build staging directory ──────────────────────────────────────────
|
||||
# Use a deterministic, per-wing cache path so re-runs produce the same
|
||||
# source_file paths the miner saw last time. This is critical: mempalace
|
||||
# dedup keys on source_file + source_mtime, so a mktemp path would cause
|
||||
# every run to re-file the entire wing.
|
||||
CACHE_ROOT="${XDG_CACHE_HOME:-$HOME/.cache}/mempalace-docs"
|
||||
STAGE="$CACHE_ROOT/$WING"
|
||||
mkdir -p "$CACHE_ROOT"
|
||||
rm -rf "$STAGE"
|
||||
mkdir -p "$STAGE"
|
||||
# Only clean up the per-wing stage on exit — leave $CACHE_ROOT itself
|
||||
# alone in case other wings are staging concurrently.
|
||||
trap 'rm -rf "$STAGE"' EXIT INT TERM
|
||||
|
||||
# Build find expression
|
||||
find_cmd=(find "$SRC" -type f)
|
||||
|
||||
# Prune unwanted dirs
|
||||
for d in "${SKIP_DIRS[@]}"; do
|
||||
find_cmd+=('!' -path "*/$d/*" '!' -path "*/$d")
|
||||
done
|
||||
|
||||
# Include only matching names
|
||||
find_cmd+=('(' -false)
|
||||
for g in "${INCLUDE_GLOBS[@]}"; do
|
||||
find_cmd+=('-o' '-name' "$g")
|
||||
done
|
||||
find_cmd+=(')')
|
||||
|
||||
# Gather matches, then filter skip_files
|
||||
mapfile -t matches < <("${find_cmd[@]}")
|
||||
|
||||
filtered=()
|
||||
for f in "${matches[@]}"; do
|
||||
base="$(basename "$f")"
|
||||
skip=0
|
||||
for sf in "${SKIP_FILES[@]}"; do
|
||||
if [[ "$base" == "$sf" ]]; then skip=1; break; fi
|
||||
done
|
||||
[[ $skip -eq 0 ]] && filtered+=("$f")
|
||||
done
|
||||
|
||||
count="${#filtered[@]}"
|
||||
|
||||
if [[ $count -eq 0 ]]; then
|
||||
echo "no matching files found in $SRC" >&2
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [[ $DRY_RUN -eq 1 ]]; then
|
||||
echo "Would mine $count files into wing '$WING':"
|
||||
printf ' %s\n' "${filtered[@]}" | sed "s#^ $SRC/# #"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Copy into staging, preserving mtime (critical for mempalace dedup —
|
||||
# the miner compares stored mtime against the staged copy's mtime).
|
||||
for f in "${filtered[@]}"; do
|
||||
rel="${f#$SRC/}"
|
||||
dest="$STAGE/$rel"
|
||||
mkdir -p "$(dirname "$dest")"
|
||||
cp -p "$f" "$dest"
|
||||
done
|
||||
|
||||
# Purge any drawers in this wing that came from the original source
|
||||
# directory. The miner records source_file = absolute path from the
|
||||
# staging dir; this differs from a prior `mempalace mine <source>` run,
|
||||
# so without this purge the wing would accumulate duplicates every time
|
||||
# we switch between upstream `mempalace mine` and this wrapper.
|
||||
# We only purge source_file paths matching $SRC/*, leaving other wings
|
||||
# and other sources alone.
|
||||
python3 - "$WING" "$SRC" <<'PY'
|
||||
import sqlite3, sys, os
|
||||
wing, src = sys.argv[1], sys.argv[2].rstrip("/")
|
||||
db_path = os.path.expanduser("~/.mempalace/palace/chroma.sqlite3")
|
||||
if not os.path.exists(db_path):
|
||||
sys.exit(0)
|
||||
db = sqlite3.connect(db_path)
|
||||
cur = db.cursor()
|
||||
# Find embedding ids in target wing whose source_file is under $SRC/
|
||||
q = """
|
||||
SELECT DISTINCT w.id
|
||||
FROM embedding_metadata w
|
||||
JOIN embedding_metadata s ON w.id = s.id AND s.key = 'source_file'
|
||||
WHERE w.key = 'wing'
|
||||
AND w.string_value = ?
|
||||
AND (s.string_value LIKE ? OR s.string_value LIKE ?)
|
||||
"""
|
||||
pats = (f"{src}/%", f"{src}")
|
||||
ids = [r[0] for r in cur.execute(q, (wing, pats[0], pats[1]))]
|
||||
if ids:
|
||||
ph = ",".join("?" * len(ids))
|
||||
for tbl in ("embedding_metadata", "embeddings"):
|
||||
try:
|
||||
cur.execute(f"DELETE FROM {tbl} WHERE id IN ({ph})", ids)
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
db.commit()
|
||||
print(f" purged {len(ids)} pre-existing drawers for {src} from wing '{wing}'")
|
||||
db.close()
|
||||
PY
|
||||
|
||||
# Write mempalace.yaml into staging dir so the miner uses the right wing
|
||||
cat > "$STAGE/mempalace.yaml" <<EOF
|
||||
wing: $WING
|
||||
rooms:
|
||||
- name: general
|
||||
description: Docs, config, and scripts from $WING
|
||||
keywords: [general]
|
||||
EOF
|
||||
|
||||
echo "Staging $count files into wing '$WING'..."
|
||||
|
||||
# ── Run the mine ─────────────────────────────────────────────────────
|
||||
if ! mempalace mine "$STAGE" --agent "$AGENT" --wing "$WING"; then
|
||||
echo "error: mempalace mine failed" >&2
|
||||
exit 4
|
||||
fi
|
||||
|
||||
# ── Repair index ─────────────────────────────────────────────────────
|
||||
if [[ $NO_REPAIR -eq 0 ]]; then
|
||||
echo ""
|
||||
echo "Rebuilding HNSW index..."
|
||||
mempalace repair --yes
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Done. Wing '$WING' is ready. Remember to reconnect any live MCP sessions."
|
||||
Executable
+349
@@ -0,0 +1,349 @@
|
||||
#!/usr/bin/env bash
|
||||
# mempalace-session — mine opencode session history into MemPalace
|
||||
#
|
||||
# Opencode persists every session (verbatim user/assistant turns + tool calls)
|
||||
# in a local SQLite DB at ~/.local/share/opencode/opencode.db. There is
|
||||
# currently no opencode session-stopping hook upstream, so the diary-based
|
||||
# auto-save is best-effort; this wrapper closes the gap by mining the SQLite
|
||||
# directly.
|
||||
#
|
||||
# Strategy:
|
||||
# 1. Read opencode.db and export each qualifying session to a Claude Code
|
||||
# JSONL file (format the mempalace normalizer already understands).
|
||||
# 2. Stage exports under ~/.cache/mempalace-session/<wing>/.
|
||||
# 3. Run `mempalace mine --mode convos` against the staging dir.
|
||||
#
|
||||
# Dedup: mempalace convos mode keys on source_file (absolute staging path).
|
||||
# The staging path is deterministic (per-wing under XDG_CACHE_HOME) so re-runs
|
||||
# are idempotent as long as session content hasn't changed.
|
||||
#
|
||||
# Session filter: sessions with fewer than --min-messages messages (default 3)
|
||||
# are skipped to avoid filing throwaway /exit'd sessions.
|
||||
#
|
||||
# Usage:
|
||||
# mempalace-session
|
||||
# mempalace-session --wing <name>
|
||||
# mempalace-session --session <id>
|
||||
# mempalace-session --since 2026-04-01
|
||||
# mempalace-session --min-messages 6
|
||||
# mempalace-session --dry-run
|
||||
# mempalace-session --help
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 success
|
||||
# 1 usage / argument error
|
||||
# 2 opencode.db missing or unreadable
|
||||
# 3 mempalace CLI not installed
|
||||
# 4 mine failed
|
||||
#
|
||||
# Dependencies: bash, python3 (stdlib sqlite3), mempalace (v3.3.3+)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ── Defaults ─────────────────────────────────────────────────────────
|
||||
AGENT="${USER:-mempalace}"
|
||||
WING="wing_conversations"
|
||||
SESSION_ID=""
|
||||
SINCE=""
|
||||
MIN_MESSAGES=3
|
||||
DRY_RUN=0
|
||||
NO_REPAIR=0
|
||||
OPENCODE_DB="${OPENCODE_DB:-$HOME/.local/share/opencode/opencode.db}"
|
||||
|
||||
# ── Usage ────────────────────────────────────────────────────────────
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
mempalace-session — mine opencode session history into MemPalace
|
||||
|
||||
Usage:
|
||||
mempalace-session [options]
|
||||
|
||||
Options:
|
||||
--wing <name> Target wing (default: wing_conversations)
|
||||
--session <id> Export one session only (default: all qualifying)
|
||||
--since <YYYY-MM-DD> Only sessions with time_updated on/after this date
|
||||
--min-messages <N> Skip sessions with fewer than N messages (default: 3)
|
||||
--agent <name> Agent name recorded on drawers (default: $USER)
|
||||
--db <path> Path to opencode.db (default: $OPENCODE_DB or
|
||||
~/.local/share/opencode/opencode.db)
|
||||
--dry-run Export + list; do not mine into palace
|
||||
--no-repair Skip `mempalace repair` after mining
|
||||
-h, --help Show this help
|
||||
|
||||
What gets mined:
|
||||
- Each qualifying session → one Claude Code JSONL file
|
||||
- Staged under ~/.cache/mempalace-session/<wing>/
|
||||
- Filed via `mempalace mine --mode convos`
|
||||
|
||||
Transcript shape per session:
|
||||
- Synthetic header as first user turn:
|
||||
[session: <title> | <directory> | <YYYY-MM-DD>]
|
||||
- User/assistant messages extracted from message.data + part.data
|
||||
- Tool calls → Claude Code `tool_use` blocks
|
||||
- Tool outputs → `tool_result` blocks (folded into the assistant turn by the
|
||||
mempalace normalizer)
|
||||
- `step-start` / `step-finish` parts are dropped (noise)
|
||||
- `reasoning` parts prefixed with `[reasoning]` and kept as text
|
||||
|
||||
Dedup:
|
||||
- source_file = absolute staging path (deterministic per session ID)
|
||||
- Re-runs skip unchanged sessions. To force re-mining, delete the staging
|
||||
dir: rm -rf ~/.cache/mempalace-session/<wing>/
|
||||
|
||||
Rationale:
|
||||
Opencode lacks a session-stopping hook (upstream PRs #16598, #16769 still
|
||||
open). Until that lands + mempalace hooks_cli.py gains an opencode harness,
|
||||
this wrapper is how we get automatic session capture.
|
||||
EOF
|
||||
}
|
||||
|
||||
# ── Parse args ───────────────────────────────────────────────────────
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
-h|--help) usage; exit 0 ;;
|
||||
--wing) WING="${2:-}"; shift 2 ;;
|
||||
--session) SESSION_ID="${2:-}"; shift 2 ;;
|
||||
--since) SINCE="${2:-}"; shift 2 ;;
|
||||
--min-messages) MIN_MESSAGES="${2:-}"; shift 2 ;;
|
||||
--agent) AGENT="${2:-}"; shift 2 ;;
|
||||
--db) OPENCODE_DB="${2:-}"; shift 2 ;;
|
||||
--dry-run) DRY_RUN=1; shift ;;
|
||||
--no-repair) NO_REPAIR=1; shift ;;
|
||||
--) shift; break ;;
|
||||
-*) echo "error: unknown option: $1" >&2; usage >&2; exit 1 ;;
|
||||
*) echo "error: unexpected arg: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# ── Preflight ────────────────────────────────────────────────────────
|
||||
if [[ ! -f "$OPENCODE_DB" ]]; then
|
||||
echo "error: opencode.db not found at $OPENCODE_DB" >&2
|
||||
echo " override with --db <path> or OPENCODE_DB env var" >&2
|
||||
exit 2
|
||||
fi
|
||||
if ! command -v mempalace >/dev/null 2>&1; then
|
||||
echo "error: mempalace CLI not found in PATH" >&2
|
||||
exit 3
|
||||
fi
|
||||
if ! [[ "$MIN_MESSAGES" =~ ^[0-9]+$ ]]; then
|
||||
echo "error: --min-messages must be an integer" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Staging dir ──────────────────────────────────────────────────────
|
||||
# Deterministic per-wing path so source_file dedup works across re-runs.
|
||||
CACHE_ROOT="${XDG_CACHE_HOME:-$HOME/.cache}/mempalace-session"
|
||||
STAGE="$CACHE_ROOT/$WING"
|
||||
mkdir -p "$STAGE"
|
||||
|
||||
# ── Export sessions (Python heredoc) ────────────────────────────────
|
||||
# Writes one JSONL file per qualifying session into $STAGE.
|
||||
# Prints: EXPORTED <count> on stdout, plus per-session lines.
|
||||
export_count=$(python3 - "$OPENCODE_DB" "$STAGE" "$SESSION_ID" "$SINCE" "$MIN_MESSAGES" <<'PY'
|
||||
import sqlite3, json, sys, os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
db_path, stage, session_filter, since, min_messages = sys.argv[1:6]
|
||||
min_messages = int(min_messages)
|
||||
stage = Path(stage)
|
||||
|
||||
# Convert --since YYYY-MM-DD to epoch ms (opencode uses ms timestamps)
|
||||
since_ms = None
|
||||
if since:
|
||||
try:
|
||||
since_ms = int(datetime.strptime(since, "%Y-%m-%d").replace(tzinfo=timezone.utc).timestamp() * 1000)
|
||||
except ValueError:
|
||||
print(f"error: --since must be YYYY-MM-DD, got {since!r}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
|
||||
conn.row_factory = sqlite3.Row
|
||||
cur = conn.cursor()
|
||||
|
||||
# Select sessions
|
||||
q = "SELECT * FROM session WHERE 1=1"
|
||||
params = []
|
||||
if session_filter:
|
||||
q += " AND id = ?"
|
||||
params.append(session_filter)
|
||||
if since_ms is not None:
|
||||
q += " AND time_updated >= ?"
|
||||
params.append(since_ms)
|
||||
q += " ORDER BY time_updated"
|
||||
cur.execute(q, params)
|
||||
sessions = [dict(r) for r in cur.fetchall()]
|
||||
|
||||
if not sessions:
|
||||
print("EXPORTED 0")
|
||||
sys.exit(0)
|
||||
|
||||
# Prefetch messages + parts for qualifying sessions
|
||||
exported = 0
|
||||
skipped_short = 0
|
||||
for sess in sessions:
|
||||
sid = sess["id"]
|
||||
cur.execute("SELECT COUNT(*) FROM message WHERE session_id=?", (sid,))
|
||||
msg_count = cur.fetchone()[0]
|
||||
if msg_count < min_messages:
|
||||
skipped_short += 1
|
||||
continue
|
||||
|
||||
cur.execute(
|
||||
"SELECT * FROM message WHERE session_id=? ORDER BY time_created", (sid,)
|
||||
)
|
||||
messages = [dict(r) for r in cur.fetchall()]
|
||||
cur.execute(
|
||||
"SELECT * FROM part WHERE session_id=? ORDER BY time_created", (sid,)
|
||||
)
|
||||
parts_by_msg: dict[str, list] = {}
|
||||
for r in cur.fetchall():
|
||||
d = dict(r)
|
||||
parts_by_msg.setdefault(d["message_id"], []).append(d)
|
||||
|
||||
# Build JSONL lines
|
||||
out_lines: list[dict] = []
|
||||
|
||||
# Synthetic header as first user turn — injects title/directory/date
|
||||
# into the transcript so semantic search can find sessions by topic,
|
||||
# not just by session-id filename.
|
||||
title = sess.get("title") or "(untitled)"
|
||||
directory = sess.get("directory") or "?"
|
||||
date_str = datetime.fromtimestamp(
|
||||
sess["time_created"] / 1000, tz=timezone.utc
|
||||
).strftime("%Y-%m-%d")
|
||||
header = f"[session: {title} | {directory} | {date_str}]"
|
||||
out_lines.append({"type": "user", "message": {"content": header}})
|
||||
|
||||
for msg in messages:
|
||||
mdata = json.loads(msg["data"])
|
||||
role = mdata.get("role")
|
||||
if role not in ("user", "assistant"):
|
||||
continue
|
||||
parts = parts_by_msg.get(msg["id"], [])
|
||||
|
||||
blocks = []
|
||||
tool_results = []
|
||||
for p in parts:
|
||||
try:
|
||||
pd = json.loads(p["data"])
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
t = pd.get("type")
|
||||
if t == "text":
|
||||
txt = (pd.get("text") or "").strip()
|
||||
if txt:
|
||||
blocks.append({"type": "text", "text": txt})
|
||||
elif t == "tool":
|
||||
# opencode tool part → tool_use block + deferred tool_result
|
||||
state = pd.get("state") or {}
|
||||
tool_name = pd.get("tool") or "Unknown"
|
||||
call_id = pd.get("callID") or p["id"]
|
||||
tool_input = state.get("input") or {}
|
||||
tool_output = state.get("output")
|
||||
blocks.append({
|
||||
"type": "tool_use",
|
||||
"id": call_id,
|
||||
"name": tool_name,
|
||||
"input": tool_input,
|
||||
})
|
||||
if tool_output:
|
||||
tool_results.append({
|
||||
"type": "tool_result",
|
||||
"tool_use_id": call_id,
|
||||
"content": str(tool_output),
|
||||
})
|
||||
elif t in ("step-start", "step-finish"):
|
||||
continue
|
||||
elif t == "reasoning":
|
||||
rtext = (pd.get("text") or "").strip()
|
||||
if rtext:
|
||||
blocks.append({"type": "text", "text": f"[reasoning] {rtext}"})
|
||||
|
||||
if not blocks:
|
||||
continue
|
||||
|
||||
# Simplify single-text-block messages to a bare string (more tolerant
|
||||
# of normalizer edge cases; mempalace accepts either shape).
|
||||
if len(blocks) == 1 and blocks[0]["type"] == "text":
|
||||
content = blocks[0]["text"]
|
||||
else:
|
||||
content = blocks
|
||||
|
||||
out_lines.append({
|
||||
"type": role,
|
||||
"message": {"content": content},
|
||||
})
|
||||
|
||||
# For assistants, follow up with a synthetic human tool_result message
|
||||
# per tool call. The mempalace normalizer's `is_tool_only` branch
|
||||
# folds these back into the assistant turn (see normalize.py:211-214).
|
||||
if role == "assistant" and tool_results:
|
||||
out_lines.append({
|
||||
"type": "human",
|
||||
"message": {"content": tool_results},
|
||||
})
|
||||
|
||||
# Must have at least 2 turns for the normalizer to accept the file
|
||||
if len(out_lines) < 2:
|
||||
skipped_short += 1
|
||||
continue
|
||||
|
||||
slug = sess.get("slug") or "session"
|
||||
out_path = stage / f"{slug}_{sid}.jsonl"
|
||||
with open(out_path, "w", encoding="utf-8") as f:
|
||||
for obj in out_lines:
|
||||
f.write(json.dumps(obj, ensure_ascii=False) + "\n")
|
||||
|
||||
# Set mtime to session time_updated so dedup sees a stable value.
|
||||
try:
|
||||
ts = sess["time_updated"] / 1000
|
||||
os.utime(out_path, (ts, ts))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
exported += 1
|
||||
print(f" {out_path.name} ({msg_count} msgs, {len(out_lines)} turns)",
|
||||
file=sys.stderr)
|
||||
|
||||
print(f"EXPORTED {exported}")
|
||||
if skipped_short:
|
||||
print(f"SKIPPED_SHORT {skipped_short}", file=sys.stderr)
|
||||
PY
|
||||
)
|
||||
|
||||
# Parse count from stdout
|
||||
count="${export_count##*EXPORTED }"
|
||||
count="${count%%[!0-9]*}"
|
||||
count="${count:-0}"
|
||||
|
||||
if [[ "$count" -eq 0 ]]; then
|
||||
echo "no sessions qualified for export"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Exported $count session(s) to $STAGE"
|
||||
|
||||
if [[ $DRY_RUN -eq 1 ]]; then
|
||||
echo "--dry-run: skipping mine step"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# ── Run the mine ─────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "Mining into wing '$WING'..."
|
||||
if ! mempalace mine "$STAGE" --mode convos --wing "$WING" --agent "$AGENT"; then
|
||||
echo "error: mempalace mine failed" >&2
|
||||
exit 4
|
||||
fi
|
||||
|
||||
# ── Repair index ─────────────────────────────────────────────────────
|
||||
if [[ $NO_REPAIR -eq 0 ]]; then
|
||||
echo ""
|
||||
echo "Rebuilding HNSW index..."
|
||||
mempalace repair --yes
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Done. Wing '$WING' updated. Remember to reconnect any live MCP sessions."
|
||||
Reference in New Issue
Block a user