954c3f2ebb
Producer-side MemPalace tooling: two bash wrappers that bridge opencode session history and project documentation into the palace. Originally developed in cli_utils (2026-04-28); split into its own repo on 2026-04-30 because the conceptual fit was weak — cli_utils is interactive shell tooling, while this is agent memory infrastructure with its own architecture, dependency surface, and growth trajectory. Contents: - bin/mempalace-docs — docs-only mining wrapper (originally a2ddcc9 in cli_utils), bridges the gap until MemPalace PR #1213 (exclude_patterns) merges upstream. - bin/mempalace-session — opencode → palace session bridge (originally dacca0e in cli_utils). Reads ~/.local/share/opencode/opencode.db, exports each session to Claude Code JSONL, mines via 'mempalace mine --mode convos'. Bridges the gap until opencode session-stopping hooks + an opencode harness in hooks_cli.py land upstream. - ARCHITECTURE.md — canonical spec: architecture diagram, component details, setup recipe, operational notes, upstream-retirement roadmap. Originally a4cf314 in cli_utils. - SKILL.md — companion agent skill (producer side). Pairs with the consumer-side mempalace skill. Symlinked into ~/.agents/skills/opencode-mempalace-bridge/ by install.sh. - install.sh — idempotent installer, also handles --uninstall. - AGENTS.md — repo conventions. History of the individual files is not preserved in this split; see cli_utils (gitea.jordbo.se/joakimp/cli_utils) commits a2ddcc9, dacca0e, and a4cf314 for the original authorship context.
269 lines
9.3 KiB
Bash
Executable File
269 lines
9.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# mempalace-docs — mine a project into MemPalace with docs-only filtering
|
|
#
|
|
# Works around the fact that upstream `mempalace mine` has a hardcoded
|
|
# READABLE_EXTENSIONS list that includes .py / .ts / .js / .go / .rs etc,
|
|
# which pollutes the palace with low-signal code-fragment drawers.
|
|
#
|
|
# Strategy: stage a copy of only docs/config/script files into /tmp, then
|
|
# run `mempalace mine` against that staging dir. Wing is derived from the
|
|
# source directory name (override with --wing).
|
|
#
|
|
# Once MemPalace PR #1213 (exclude_patterns in mempalace.yaml) lands, this
|
|
# wrapper becomes a thin shim over `mempalace mine` with a default
|
|
# exclude_patterns injected.
|
|
#
|
|
# Usage:
|
|
# mempalace-docs <directory>
|
|
# mempalace-docs <directory> --wing <name>
|
|
# mempalace-docs <directory> --agent <name>
|
|
# mempalace-docs <directory> --dry-run
|
|
# mempalace-docs --help
|
|
#
|
|
# Exit codes:
|
|
# 0 success
|
|
# 1 usage / argument error
|
|
# 2 source directory missing
|
|
# 3 mempalace CLI not installed
|
|
# 4 mine failed
|
|
#
|
|
# Dependencies: bash, find, cp, mempalace (v3.3.3+)
|
|
|
|
set -euo pipefail
|
|
|
|
# ── Defaults ─────────────────────────────────────────────────────────
|
|
AGENT="${USER:-mempalace}"
|
|
WING=""
|
|
SRC=""
|
|
DRY_RUN=0
|
|
NO_REPAIR=0
|
|
|
|
# File patterns to include. Docs + config + intent-bearing scripts.
|
|
# Everything else (code) is excluded by omission.
|
|
INCLUDE_GLOBS=(
|
|
'*.md' '*.mdx' '*.rst' '*.txt'
|
|
'*.yml' '*.yaml' '*.toml'
|
|
'*.json' # includes package.json, pyproject companions; lockfiles filtered below
|
|
'*.sh' '*.bash' '*.zsh' '*.fish'
|
|
'Dockerfile*' 'Makefile*' 'Containerfile*'
|
|
'*.conf' '*.cfg' '*.ini'
|
|
'LICENSE*' 'COPYING*' 'NOTICE*' 'AUTHORS*' 'CONTRIBUTORS*'
|
|
)
|
|
|
|
# Path segments to always skip (in addition to .gitignore).
|
|
SKIP_DIRS=(
|
|
'.git' '.venv' 'venv' '__pycache__' 'node_modules'
|
|
'.mypy_cache' '.pytest_cache' '.ruff_cache' '.tox' '.nox'
|
|
'dist' 'build' '.next' '.nuxt' 'target' 'coverage'
|
|
'.DS_Store'
|
|
)
|
|
|
|
# Filename patterns to skip even if caught by an include glob.
|
|
SKIP_FILES=(
|
|
'package-lock.json' 'yarn.lock' 'pnpm-lock.yaml' 'poetry.lock'
|
|
'Cargo.lock' 'Gemfile.lock' 'composer.lock'
|
|
'.gitignore' '.dockerignore'
|
|
)
|
|
|
|
# ── Usage ────────────────────────────────────────────────────────────
|
|
usage() {
|
|
cat <<'EOF'
|
|
mempalace-docs — mine a project into MemPalace, docs/config/scripts only
|
|
|
|
Usage:
|
|
mempalace-docs <directory> [options]
|
|
|
|
Options:
|
|
--wing <name> Override wing name (default: source directory name)
|
|
--agent <name> Agent name recorded on drawers (default: $USER)
|
|
--dry-run List files that would be mined; do not file
|
|
--no-repair Skip `mempalace repair` after mining
|
|
-h, --help Show this help
|
|
|
|
What gets mined:
|
|
Docs: *.md *.mdx *.rst *.txt
|
|
Config: *.yml *.yaml *.toml *.json *.conf *.cfg *.ini
|
|
Scripts: *.sh *.bash *.zsh *.fish Dockerfile* Makefile*
|
|
Legal: LICENSE* COPYING* NOTICE* AUTHORS*
|
|
|
|
What gets skipped (by design):
|
|
Source code: .py .ts .tsx .js .jsx .go .rs .java .cpp .c .rb .kt .swift
|
|
Caches / deps: .git .venv venv node_modules __pycache__ .mypy_cache
|
|
.pytest_cache .ruff_cache dist build .next target coverage
|
|
Lockfiles: package-lock.json yarn.lock poetry.lock Cargo.lock ...
|
|
|
|
Rationale:
|
|
The palace is for context and intent. Agents read code directly via
|
|
grep/glob/Read — mining it creates a parallel, lossier, drift-prone
|
|
copy that pollutes semantic search.
|
|
|
|
This wrapper is a bridge until MemPalace PR #1213 (exclude_patterns)
|
|
lands upstream.
|
|
EOF
|
|
}
|
|
|
|
# ── Parse args ───────────────────────────────────────────────────────
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
-h|--help) usage; exit 0 ;;
|
|
--wing) WING="${2:-}"; shift 2 ;;
|
|
--agent) AGENT="${2:-}"; shift 2 ;;
|
|
--dry-run) DRY_RUN=1; shift ;;
|
|
--no-repair) NO_REPAIR=1; shift ;;
|
|
--) shift; break ;;
|
|
-*) echo "error: unknown option: $1" >&2; usage >&2; exit 1 ;;
|
|
*) if [[ -z "$SRC" ]]; then SRC="$1"; shift; else echo "error: unexpected arg: $1" >&2; exit 1; fi ;;
|
|
esac
|
|
done
|
|
|
|
if [[ -z "$SRC" ]]; then usage >&2; exit 1; fi
|
|
if [[ ! -d "$SRC" ]]; then
|
|
echo "error: not a directory: $SRC" >&2; exit 2
|
|
fi
|
|
if ! command -v mempalace >/dev/null 2>&1; then
|
|
echo "error: mempalace CLI not found in PATH" >&2; exit 3
|
|
fi
|
|
|
|
SRC="$(cd "$SRC" && pwd)"
|
|
|
|
# Determine wing name with the following precedence:
|
|
# 1. explicit --wing flag (user override)
|
|
# 2. `wing:` value in $SRC/mempalace.yaml (respect existing project config)
|
|
# 3. sanitized source directory basename (hyphens → underscores, matching
|
|
# mempalace's convention for implicit wing names)
|
|
if [[ -z "$WING" && -f "$SRC/mempalace.yaml" ]]; then
|
|
WING="$(awk -F': *' '/^wing:/ { gsub(/["\x27 ]/,"",$2); print $2; exit }' "$SRC/mempalace.yaml" 2>/dev/null || true)"
|
|
fi
|
|
if [[ -z "$WING" ]]; then
|
|
WING="$(basename "$SRC" | tr '-' '_')"
|
|
fi
|
|
|
|
# ── Build staging directory ──────────────────────────────────────────
|
|
# Use a deterministic, per-wing cache path so re-runs produce the same
|
|
# source_file paths the miner saw last time. This is critical: mempalace
|
|
# dedup keys on source_file + source_mtime, so a mktemp path would cause
|
|
# every run to re-file the entire wing.
|
|
CACHE_ROOT="${XDG_CACHE_HOME:-$HOME/.cache}/mempalace-docs"
|
|
STAGE="$CACHE_ROOT/$WING"
|
|
mkdir -p "$CACHE_ROOT"
|
|
rm -rf "$STAGE"
|
|
mkdir -p "$STAGE"
|
|
# Only clean up the per-wing stage on exit — leave $CACHE_ROOT itself
|
|
# alone in case other wings are staging concurrently.
|
|
trap 'rm -rf "$STAGE"' EXIT INT TERM
|
|
|
|
# Build find expression
|
|
find_cmd=(find "$SRC" -type f)
|
|
|
|
# Prune unwanted dirs
|
|
for d in "${SKIP_DIRS[@]}"; do
|
|
find_cmd+=('!' -path "*/$d/*" '!' -path "*/$d")
|
|
done
|
|
|
|
# Include only matching names
|
|
find_cmd+=('(' -false)
|
|
for g in "${INCLUDE_GLOBS[@]}"; do
|
|
find_cmd+=('-o' '-name' "$g")
|
|
done
|
|
find_cmd+=(')')
|
|
|
|
# Gather matches, then filter skip_files
|
|
mapfile -t matches < <("${find_cmd[@]}")
|
|
|
|
filtered=()
|
|
for f in "${matches[@]}"; do
|
|
base="$(basename "$f")"
|
|
skip=0
|
|
for sf in "${SKIP_FILES[@]}"; do
|
|
if [[ "$base" == "$sf" ]]; then skip=1; break; fi
|
|
done
|
|
[[ $skip -eq 0 ]] && filtered+=("$f")
|
|
done
|
|
|
|
count="${#filtered[@]}"
|
|
|
|
if [[ $count -eq 0 ]]; then
|
|
echo "no matching files found in $SRC" >&2
|
|
exit 0
|
|
fi
|
|
|
|
if [[ $DRY_RUN -eq 1 ]]; then
|
|
echo "Would mine $count files into wing '$WING':"
|
|
printf ' %s\n' "${filtered[@]}" | sed "s#^ $SRC/# #"
|
|
exit 0
|
|
fi
|
|
|
|
# Copy into staging, preserving mtime (critical for mempalace dedup —
|
|
# the miner compares stored mtime against the staged copy's mtime).
|
|
for f in "${filtered[@]}"; do
|
|
rel="${f#$SRC/}"
|
|
dest="$STAGE/$rel"
|
|
mkdir -p "$(dirname "$dest")"
|
|
cp -p "$f" "$dest"
|
|
done
|
|
|
|
# Purge any drawers in this wing that came from the original source
|
|
# directory. The miner records source_file = absolute path from the
|
|
# staging dir; this differs from a prior `mempalace mine <source>` run,
|
|
# so without this purge the wing would accumulate duplicates every time
|
|
# we switch between upstream `mempalace mine` and this wrapper.
|
|
# We only purge source_file paths matching $SRC/*, leaving other wings
|
|
# and other sources alone.
|
|
python3 - "$WING" "$SRC" <<'PY'
|
|
import sqlite3, sys, os
|
|
wing, src = sys.argv[1], sys.argv[2].rstrip("/")
|
|
db_path = os.path.expanduser("~/.mempalace/palace/chroma.sqlite3")
|
|
if not os.path.exists(db_path):
|
|
sys.exit(0)
|
|
db = sqlite3.connect(db_path)
|
|
cur = db.cursor()
|
|
# Find embedding ids in target wing whose source_file is under $SRC/
|
|
q = """
|
|
SELECT DISTINCT w.id
|
|
FROM embedding_metadata w
|
|
JOIN embedding_metadata s ON w.id = s.id AND s.key = 'source_file'
|
|
WHERE w.key = 'wing'
|
|
AND w.string_value = ?
|
|
AND (s.string_value LIKE ? OR s.string_value LIKE ?)
|
|
"""
|
|
pats = (f"{src}/%", f"{src}")
|
|
ids = [r[0] for r in cur.execute(q, (wing, pats[0], pats[1]))]
|
|
if ids:
|
|
ph = ",".join("?" * len(ids))
|
|
for tbl in ("embedding_metadata", "embeddings"):
|
|
try:
|
|
cur.execute(f"DELETE FROM {tbl} WHERE id IN ({ph})", ids)
|
|
except sqlite3.OperationalError:
|
|
pass
|
|
db.commit()
|
|
print(f" purged {len(ids)} pre-existing drawers for {src} from wing '{wing}'")
|
|
db.close()
|
|
PY
|
|
|
|
# Write mempalace.yaml into staging dir so the miner uses the right wing
|
|
cat > "$STAGE/mempalace.yaml" <<EOF
|
|
wing: $WING
|
|
rooms:
|
|
- name: general
|
|
description: Docs, config, and scripts from $WING
|
|
keywords: [general]
|
|
EOF
|
|
|
|
echo "Staging $count files into wing '$WING'..."
|
|
|
|
# ── Run the mine ─────────────────────────────────────────────────────
|
|
if ! mempalace mine "$STAGE" --agent "$AGENT" --wing "$WING"; then
|
|
echo "error: mempalace mine failed" >&2
|
|
exit 4
|
|
fi
|
|
|
|
# ── Repair index ─────────────────────────────────────────────────────
|
|
if [[ $NO_REPAIR -eq 0 ]]; then
|
|
echo ""
|
|
echo "Rebuilding HNSW index..."
|
|
mempalace repair --yes
|
|
fi
|
|
|
|
echo ""
|
|
echo "Done. Wing '$WING' is ready. Remember to reconnect any live MCP sessions."
|