7551947466
Baking the mempalace fallback skill fixed *availability*, but mempalace had no proactive-load directive anywhere (pi-toolkit's global AGENTS.md only points to pi-extensions), so a new container would still surface it only via description-matching — the same under-utilisation the pi-extensions directive was created to fix. Add a session-start pointer to the pi-devbox managed AGENTS.md block (pi-global-AGENTS.append.md): gated to pi-devbox containers and conditional on the MemPalace MCP tools being present. Memory continuity matters most in a frequently-recreated container — the palace is its only cross-recreate memory. - pi-global-AGENTS.append.md: '## Session start: load the mempalace skill'. - smoke-test: assert the pointer merges into the global AGENTS.md at build. - docs: VENDORED.md, README, CHANGELOG [Unreleased]. Now both skills are complete in pi-devbox: directive + skill file. pi-extensions = directive (pi-toolkit) + baked skill; mempalace = directive (this block) + baked skill.
270 lines
13 KiB
Bash
Executable File
270 lines
13 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# smoke-test.sh — sanity checks for the pi-devbox image
|
||
#
|
||
# Usage: ./scripts/smoke-test.sh <image>
|
||
#
|
||
# Verifies:
|
||
# - pi binary present and (if EXPECTED_PI_VERSION set) matches CI's resolved version
|
||
# - new v1.0.0 base additions (pandoc, graphviz, imagemagick, yq, tealdeer)
|
||
# - tmux 0-indexing baked in /etc/tmux.conf (required for pi-studio variants)
|
||
# - pi-toolkit cloned at /opt/pi-toolkit
|
||
# - pi-extensions cloned at /opt/pi-extensions
|
||
# - pi-fork + pi-observational-memory cloned with node_modules baked
|
||
# - entrypoint deploys pi-toolkit keybindings symlink
|
||
# - entrypoint deploys ≥4 extensions
|
||
# - mempalace bridge symlink present
|
||
# - settings.json bootstrapped
|
||
# - pi-fork + pi-observational-memory registered via `pi install`
|
||
# - (studio variant only, auto-detected) pi-studio cloned + prebuilt
|
||
# client bundle present + registered via `pi install`
|
||
# - image size within threshold
|
||
|
||
set -euo pipefail
|
||
|
||
IMAGE="${1:?usage: $0 <image>}"
|
||
PASS=0; FAIL=0
|
||
# pi-devbox v1.0.0 (decoupled from opencode-devbox) added pandoc, graphviz,
|
||
# imagemagick, yq, tealdeer, and a baked /etc/tmux.conf. Local arm64 build
|
||
# observed 3.20 GB. CI amd64 builds may differ slightly; threshold below
|
||
# carries +300 MB margin to absorb arch differences without false reds.
|
||
# Tighten in a follow-up release once amd64 actuals are observed in CI logs.
|
||
SIZE_THRESHOLD_MB=3500
|
||
|
||
run() {
|
||
local label="$1"; local cmd="$2"
|
||
if docker run --rm --entrypoint="" "$IMAGE" sh -c "$cmd" >/dev/null 2>&1; then
|
||
printf " ✅ %s\n" "$label"; PASS=$((PASS+1))
|
||
else
|
||
printf " ❌ %s\n" "$label"; FAIL=$((FAIL+1))
|
||
fi
|
||
}
|
||
|
||
# Stricter version of `run` that asserts an expected substring in stdout.
|
||
# Catches the "image bytes silently identical to previous release" class of
|
||
# regression — Docker layer cache hit on `npm install -g <pkg>` because the
|
||
# bare command string is identical across builds, even when `latest` would
|
||
# resolve differently. Discovered 2026-05-23 — every pi-devbox release
|
||
# v0.74.0..v0.75.5 had been shipping the same image bytes.
|
||
run_expect() {
|
||
local label="$1"; local cmd="$2"; local expect="$3"
|
||
local out
|
||
out=$(docker run --rm --entrypoint="" "$IMAGE" sh -c "$cmd" 2>&1) || true
|
||
if echo "$out" | grep -Fq "$expect"; then
|
||
printf " ✅ %s (got %s)\n" "$label" "$expect"; PASS=$((PASS+1))
|
||
else
|
||
printf " ❌ %s — expected substring %q, got: %s\n" "$label" "$expect" "$out"; FAIL=$((FAIL+1))
|
||
fi
|
||
}
|
||
|
||
echo "=== pi-devbox smoke test: $IMAGE ==="
|
||
echo ""
|
||
|
||
# ── Binaries ─────────────────────────────────────────────────────────
|
||
echo "── Binaries ──"
|
||
if [ -n "${EXPECTED_PI_VERSION:-}" ]; then
|
||
run_expect "pi version matches build arg" "pi --version" "$EXPECTED_PI_VERSION"
|
||
else
|
||
run "pi" "pi --version"
|
||
fi
|
||
run "node" "node --version"
|
||
run "git" "git --version"
|
||
run "aws" "aws --version"
|
||
run "uv" "uv --version"
|
||
run "nvim" "nvim --version"
|
||
run "mempalace-mcp" "mempalace-mcp --help"
|
||
# v1.0.0 base additions — verify presence and basic functionality.
|
||
run "pandoc" "pandoc --version"
|
||
run "graphviz (dot)" "dot -V"
|
||
run "imagemagick" "magick --version"
|
||
run "yq" "yq --version"
|
||
run "tldr (tealdeer)" "tldr --version"
|
||
run "socat" "socat -V"
|
||
run "studio-expose helper" "test -x /usr/local/bin/studio-expose"
|
||
run "image-baked pi-devbox-environment skill" \
|
||
"test -f /usr/local/share/pi-devbox/skills/pi-devbox-environment/SKILL.md"
|
||
run "global-AGENTS append snippet present" \
|
||
"test -f /usr/local/share/pi-devbox/pi-global-AGENTS.append.md"
|
||
run "pi-devbox block merged into pi-global-AGENTS.md" \
|
||
"grep -q 'pi-devbox:managed-block' /opt/pi-toolkit/pi-global-AGENTS.md"
|
||
run "mempalace session-start pointer merged into global AGENTS.md" \
|
||
"grep -q 'load the mempalace skill' /opt/pi-toolkit/pi-global-AGENTS.md"
|
||
# Vendored fallback skills (so a no-skillset container still resolves the
|
||
# AGENTS.md 'read the pi-extensions skill' pointer).
|
||
run "image-baked pi-extensions fallback skill" \
|
||
"test -f /usr/local/share/pi-devbox/skills/pi-extensions/SKILL.md"
|
||
run "pi-extensions skill ships its helper" \
|
||
"test -f /usr/local/share/pi-devbox/skills/pi-extensions/evaluate-extension-usage.py"
|
||
run "image-baked mempalace fallback skill" \
|
||
"test -f /usr/local/share/pi-devbox/skills/mempalace/SKILL.md"
|
||
# Layered freshness: when the pinned pi-extensions clone carries the skill, the
|
||
# baked copy must be the fresh package copy (Option 1), not the stale snapshot.
|
||
run "pi-extensions skill refreshed from package when present" \
|
||
"if [ -f /opt/pi-extensions/skill/SKILL.md ]; then cmp -s /opt/pi-extensions/skill/SKILL.md /usr/local/share/pi-devbox/skills/pi-extensions/SKILL.md; else true; fi"
|
||
|
||
# ── tmux 0-indexing (required for pi-studio variants) ─────────────────
|
||
echo ""
|
||
echo "── tmux config ──"
|
||
run_expect "/etc/tmux.conf has base-index 0" \
|
||
"cat /etc/tmux.conf" "set -g base-index 0"
|
||
run_expect "/etc/tmux.conf has pane-base-index 0" \
|
||
"cat /etc/tmux.conf" "set -g pane-base-index 0"
|
||
|
||
# ── Repo clones ───────────────────────────────────────────────────────
|
||
echo ""
|
||
echo "── Repo clones ──"
|
||
run "pi-toolkit clone" "test -d /opt/pi-toolkit && git -C /opt/pi-toolkit rev-parse --short HEAD"
|
||
run "pi-extensions clone" "test -d /opt/pi-extensions && git -C /opt/pi-extensions rev-parse --short HEAD"
|
||
run "pi-fork clone + node_modules" \
|
||
"test -f /opt/pi-fork/package.json && test -d /opt/pi-fork/node_modules"
|
||
run "pi-observational-memory clone + node_modules" \
|
||
"test -f /opt/pi-observational-memory/package.json && test -d /opt/pi-observational-memory/node_modules"
|
||
|
||
# pi-studio is present only in the :latest-studio variant. Auto-detect by
|
||
# probing /opt/pi-studio so this one script covers both variants.
|
||
if docker run --rm --entrypoint="" "$IMAGE" sh -c 'test -d /opt/pi-studio' >/dev/null 2>&1; then
|
||
STUDIO_VARIANT=1
|
||
echo " ℹ️ pi-studio detected — running studio assertions"
|
||
run "pi-studio clone + node_modules" \
|
||
"test -f /opt/pi-studio/package.json && test -d /opt/pi-studio/node_modules"
|
||
run "pi-studio prebuilt client bundle" \
|
||
"test -f /opt/pi-studio/client/studio-client.js"
|
||
else
|
||
STUDIO_VARIANT=0
|
||
echo " ℹ️ pi-studio not present (non-studio variant) — skipping studio clone checks"
|
||
fi
|
||
|
||
# ── Build provenance (manifest + OCI labels) ─────────────────────────
|
||
echo ""
|
||
echo "── Build provenance ──"
|
||
run "/etc/pi-devbox/build-manifest.json present" \
|
||
"test -f /etc/pi-devbox/build-manifest.json"
|
||
run_expect "manifest records pi-extensions component" \
|
||
"cat /etc/pi-devbox/build-manifest.json" '"pi-extensions"'
|
||
run_expect "manifest records pi_version" \
|
||
"cat /etc/pi-devbox/build-manifest.json" '"pi_version"'
|
||
# Every component must be a resolved commit (or null for pi-studio in the
|
||
# non-studio variant) — 'unknown' means a clone silently failed to resolve.
|
||
run "manifest has no unresolved ('unknown') components" \
|
||
"! grep -q '\"unknown\"' /etc/pi-devbox/build-manifest.json"
|
||
# OCI labels live in the image config, not the container fs — inspect them
|
||
# from the host docker rather than via `docker run`.
|
||
LBL=$(docker inspect --format '{{ index .Config.Labels "se.jordbo.pi-devbox.pi-extensions-ref" }}' "$IMAGE" 2>/dev/null || true)
|
||
if [ -n "$LBL" ] && [ "$LBL" != "<no value>" ]; then
|
||
printf " ✅ OCI label se.jordbo.pi-devbox.pi-extensions-ref=%s\n" "$LBL"; PASS=$((PASS+1))
|
||
else
|
||
printf " ❌ OCI label se.jordbo.pi-devbox.pi-extensions-ref missing or empty\n"; FAIL=$((FAIL+1))
|
||
fi
|
||
|
||
# ── Runtime deployment (needs entrypoint to run) ──────────────────────
|
||
echo ""
|
||
echo "── Runtime deployment ──"
|
||
# Spin up a long-running container WITHOUT overriding the entrypoint, so
|
||
# the baked entrypoint chain (entrypoint.sh → entrypoint-user.sh) runs and
|
||
# deploys pi-toolkit + pi-extensions to ~/.pi/agent/. Override CMD to
|
||
# tail -f /dev/null so the container stays alive while we docker-exec.
|
||
CID=$(docker run -d --rm "$IMAGE" tail -f /dev/null)
|
||
cleanup() { docker rm -f "$CID" >/dev/null 2>&1 || true; }
|
||
trap cleanup EXIT
|
||
|
||
# Wait for entrypoint-user.sh to finish deploying pi-toolkit + extensions.
|
||
# Gate on BOTH the keybindings symlink (deployed by pi-toolkit) AND the
|
||
# mempalace.ts bridge (deployed last by entrypoint-user.sh) AND ≥4 *.ts
|
||
# extensions present. Parallel build load can otherwise sample the *.ts
|
||
# count mid-deploy and produce a flake. See opencode-devbox c6f9d11
|
||
# (2026-06-08) — same fix transplanted.
|
||
for i in $(seq 1 45); do
|
||
if docker exec "$CID" sh -c '
|
||
test -L /home/developer/.pi/agent/keybindings.json && \
|
||
test -L /home/developer/.pi/agent/extensions/mempalace.ts && \
|
||
test -L /home/developer/.agents/skills/pi-devbox-environment && \
|
||
test -L /home/developer/.agents/skills/pi-extensions && \
|
||
test -L /home/developer/.agents/skills/mempalace && \
|
||
count=$(ls -1 /home/developer/.pi/agent/extensions/*.ts 2>/dev/null | wc -l) && \
|
||
[ "$count" -ge 4 ]
|
||
' >/dev/null 2>&1; then
|
||
break
|
||
fi
|
||
sleep 1
|
||
done
|
||
|
||
exec_test() {
|
||
local label="$1"; local cmd="$2"
|
||
if docker exec -u developer "$CID" sh -c "$cmd" >/dev/null 2>&1; then
|
||
printf " ✅ %s\n" "$label"; PASS=$((PASS+1))
|
||
else
|
||
printf " ❌ %s\n" "$label"; FAIL=$((FAIL+1))
|
||
fi
|
||
}
|
||
|
||
exec_test "keybindings.json (pi-toolkit)" 'test -L $HOME/.pi/agent/keybindings.json && echo ok'
|
||
exec_test "extensions ≥ 4 (pi-extensions)" 'count=$(ls -1 $HOME/.pi/agent/extensions/*.ts 2>/dev/null | wc -l); [ $count -ge 4 ] && echo "$count extensions"'
|
||
exec_test "mempalace.ts bridge" 'test -L $HOME/.pi/agent/extensions/mempalace.ts && echo ok'
|
||
exec_test "settings.json bootstrapped" 'test -f $HOME/.pi/agent/settings.json && echo ok'
|
||
exec_test "pi-devbox-environment skill linked" 'test -L $HOME/.agents/skills/pi-devbox-environment && test -f $HOME/.agents/skills/pi-devbox-environment/SKILL.md && echo ok'
|
||
exec_test "pi-extensions skill linked (fallback)" 'test -L $HOME/.agents/skills/pi-extensions && test -f $HOME/.agents/skills/pi-extensions/SKILL.md && echo ok'
|
||
exec_test "mempalace skill linked (fallback)" 'test -L $HOME/.agents/skills/mempalace && test -f $HOME/.agents/skills/mempalace/SKILL.md && echo ok'
|
||
|
||
# pi-fork + pi-observational-memory are registered by entrypoint-user.sh via
|
||
# `pi install /opt/<pkg>`, which runs slightly after the keybindings marker.
|
||
for i in $(seq 1 15); do
|
||
if docker exec "$CID" grep -q pi-observational-memory \
|
||
/home/developer/.pi/agent/settings.json 2>/dev/null; then
|
||
break
|
||
fi
|
||
sleep 1
|
||
done
|
||
exec_test "pi-fork registered (fork tool)" 'grep -q pi-fork $HOME/.pi/agent/settings.json && echo ok'
|
||
exec_test "pi-observational-memory registered (recall tool)" 'grep -q pi-observational-memory $HOME/.pi/agent/settings.json && echo ok'
|
||
|
||
# pi-studio registration (studio variant only) — registered by the same
|
||
# entrypoint-user.sh local-path install loop as fork/obsmem.
|
||
if [ "${STUDIO_VARIANT:-0}" = "1" ]; then
|
||
for i in $(seq 1 15); do
|
||
if docker exec "$CID" grep -q pi-studio \
|
||
/home/developer/.pi/agent/settings.json 2>/dev/null; then
|
||
break
|
||
fi
|
||
sleep 1
|
||
done
|
||
exec_test "pi-studio registered (/studio command + studio_* tools)" \
|
||
'grep -q pi-studio $HOME/.pi/agent/settings.json && echo ok'
|
||
fi
|
||
|
||
# ── /tmp/sshcm directory created by entrypoint ────────────────────────
|
||
exec_test "/tmp/sshcm dir mode 700 (ssh ControlMaster)" \
|
||
'test -d /tmp/sshcm && [ "$(stat -c %a /tmp/sshcm)" = "700" ] && echo ok'
|
||
|
||
# ── Image size ────────────────────────────────────────────────────────
|
||
echo ""
|
||
echo "── Image size ──"
|
||
# Sum all layers via `docker history`. Docker's `image inspect --format='{{.Size}}'`
|
||
# returns ONLY the variant-unique layer when the base is content-addressed and
|
||
# shared (the case in this repo's two-phase build), which understates the
|
||
# user-facing image size by 2+ GB. Summing layer sizes from history is the
|
||
# metric Hub displays to users and the one we actually want to gate on.
|
||
SIZE_MB=$(docker history --format '{{.Size}}' "$IMAGE" | python3 -c '
|
||
import sys, re
|
||
total=0.0
|
||
for line in sys.stdin:
|
||
s=line.strip()
|
||
if s in ("0B", ""): continue
|
||
m=re.match(r"^([0-9.]+)(B|kB|MB|GB)$", s)
|
||
if not m: continue
|
||
v=float(m.group(1)); u=m.group(2)
|
||
mult={"B":1/1048576,"kB":1/1024,"MB":1,"GB":1024}[u]
|
||
total+=v*mult
|
||
print(int(total))
|
||
')
|
||
if [ -z "$SIZE_MB" ] || [ "$SIZE_MB" = "0" ]; then
|
||
printf " ⚠️ image size: could not parse — skipping check\n"
|
||
elif [ "$SIZE_MB" -le "$SIZE_THRESHOLD_MB" ]; then
|
||
printf " ✅ size: %d MB (threshold %d MB)\n" "$SIZE_MB" "$SIZE_THRESHOLD_MB"; PASS=$((PASS+1))
|
||
else
|
||
printf " ❌ size: %d MB exceeds threshold %d MB\n" "$SIZE_MB" "$SIZE_THRESHOLD_MB"; FAIL=$((FAIL+1))
|
||
fi
|
||
|
||
# ── Summary ───────────────────────────────────────────────────────────
|
||
echo ""
|
||
echo "=== Results: ${PASS} passed, ${FAIL} failed ==="
|
||
[ "$FAIL" -eq 0 ]
|