Files
pi-devbox/scripts/smoke-test.sh
T
Joakim Persson 7551947466 feat(skills): add mempalace proactive-load directive for containers
Baking the mempalace fallback skill fixed *availability*, but mempalace had
no proactive-load directive anywhere (pi-toolkit's global AGENTS.md only
points to pi-extensions), so a new container would still surface it only via
description-matching — the same under-utilisation the pi-extensions directive
was created to fix.

Add a session-start pointer to the pi-devbox managed AGENTS.md block
(pi-global-AGENTS.append.md): gated to pi-devbox containers and conditional on
the MemPalace MCP tools being present. Memory continuity matters most in a
frequently-recreated container — the palace is its only cross-recreate memory.

- pi-global-AGENTS.append.md: '## Session start: load the mempalace skill'.
- smoke-test: assert the pointer merges into the global AGENTS.md at build.
- docs: VENDORED.md, README, CHANGELOG [Unreleased].

Now both skills are complete in pi-devbox: directive + skill file.
pi-extensions = directive (pi-toolkit) + baked skill; mempalace = directive
(this block) + baked skill.
2026-06-23 15:54:13 +02:00

270 lines
13 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# smoke-test.sh — sanity checks for the pi-devbox image
#
# Usage: ./scripts/smoke-test.sh <image>
#
# Verifies:
# - pi binary present and (if EXPECTED_PI_VERSION set) matches CI's resolved version
# - new v1.0.0 base additions (pandoc, graphviz, imagemagick, yq, tealdeer)
# - tmux 0-indexing baked in /etc/tmux.conf (required for pi-studio variants)
# - pi-toolkit cloned at /opt/pi-toolkit
# - pi-extensions cloned at /opt/pi-extensions
# - pi-fork + pi-observational-memory cloned with node_modules baked
# - entrypoint deploys pi-toolkit keybindings symlink
# - entrypoint deploys ≥4 extensions
# - mempalace bridge symlink present
# - settings.json bootstrapped
# - pi-fork + pi-observational-memory registered via `pi install`
# - (studio variant only, auto-detected) pi-studio cloned + prebuilt
# client bundle present + registered via `pi install`
# - image size within threshold
set -euo pipefail
IMAGE="${1:?usage: $0 <image>}"
PASS=0; FAIL=0
# pi-devbox v1.0.0 (decoupled from opencode-devbox) added pandoc, graphviz,
# imagemagick, yq, tealdeer, and a baked /etc/tmux.conf. Local arm64 build
# observed 3.20 GB. CI amd64 builds may differ slightly; threshold below
# carries +300 MB margin to absorb arch differences without false reds.
# Tighten in a follow-up release once amd64 actuals are observed in CI logs.
SIZE_THRESHOLD_MB=3500
run() {
local label="$1"; local cmd="$2"
if docker run --rm --entrypoint="" "$IMAGE" sh -c "$cmd" >/dev/null 2>&1; then
printf " ✅ %s\n" "$label"; PASS=$((PASS+1))
else
printf " ❌ %s\n" "$label"; FAIL=$((FAIL+1))
fi
}
# Stricter version of `run` that asserts an expected substring in stdout.
# Catches the "image bytes silently identical to previous release" class of
# regression — Docker layer cache hit on `npm install -g <pkg>` because the
# bare command string is identical across builds, even when `latest` would
# resolve differently. Discovered 2026-05-23 — every pi-devbox release
# v0.74.0..v0.75.5 had been shipping the same image bytes.
run_expect() {
local label="$1"; local cmd="$2"; local expect="$3"
local out
out=$(docker run --rm --entrypoint="" "$IMAGE" sh -c "$cmd" 2>&1) || true
if echo "$out" | grep -Fq "$expect"; then
printf " ✅ %s (got %s)\n" "$label" "$expect"; PASS=$((PASS+1))
else
printf " ❌ %s — expected substring %q, got: %s\n" "$label" "$expect" "$out"; FAIL=$((FAIL+1))
fi
}
echo "=== pi-devbox smoke test: $IMAGE ==="
echo ""
# ── Binaries ─────────────────────────────────────────────────────────
echo "── Binaries ──"
if [ -n "${EXPECTED_PI_VERSION:-}" ]; then
run_expect "pi version matches build arg" "pi --version" "$EXPECTED_PI_VERSION"
else
run "pi" "pi --version"
fi
run "node" "node --version"
run "git" "git --version"
run "aws" "aws --version"
run "uv" "uv --version"
run "nvim" "nvim --version"
run "mempalace-mcp" "mempalace-mcp --help"
# v1.0.0 base additions — verify presence and basic functionality.
run "pandoc" "pandoc --version"
run "graphviz (dot)" "dot -V"
run "imagemagick" "magick --version"
run "yq" "yq --version"
run "tldr (tealdeer)" "tldr --version"
run "socat" "socat -V"
run "studio-expose helper" "test -x /usr/local/bin/studio-expose"
run "image-baked pi-devbox-environment skill" \
"test -f /usr/local/share/pi-devbox/skills/pi-devbox-environment/SKILL.md"
run "global-AGENTS append snippet present" \
"test -f /usr/local/share/pi-devbox/pi-global-AGENTS.append.md"
run "pi-devbox block merged into pi-global-AGENTS.md" \
"grep -q 'pi-devbox:managed-block' /opt/pi-toolkit/pi-global-AGENTS.md"
run "mempalace session-start pointer merged into global AGENTS.md" \
"grep -q 'load the mempalace skill' /opt/pi-toolkit/pi-global-AGENTS.md"
# Vendored fallback skills (so a no-skillset container still resolves the
# AGENTS.md 'read the pi-extensions skill' pointer).
run "image-baked pi-extensions fallback skill" \
"test -f /usr/local/share/pi-devbox/skills/pi-extensions/SKILL.md"
run "pi-extensions skill ships its helper" \
"test -f /usr/local/share/pi-devbox/skills/pi-extensions/evaluate-extension-usage.py"
run "image-baked mempalace fallback skill" \
"test -f /usr/local/share/pi-devbox/skills/mempalace/SKILL.md"
# Layered freshness: when the pinned pi-extensions clone carries the skill, the
# baked copy must be the fresh package copy (Option 1), not the stale snapshot.
run "pi-extensions skill refreshed from package when present" \
"if [ -f /opt/pi-extensions/skill/SKILL.md ]; then cmp -s /opt/pi-extensions/skill/SKILL.md /usr/local/share/pi-devbox/skills/pi-extensions/SKILL.md; else true; fi"
# ── tmux 0-indexing (required for pi-studio variants) ─────────────────
echo ""
echo "── tmux config ──"
run_expect "/etc/tmux.conf has base-index 0" \
"cat /etc/tmux.conf" "set -g base-index 0"
run_expect "/etc/tmux.conf has pane-base-index 0" \
"cat /etc/tmux.conf" "set -g pane-base-index 0"
# ── Repo clones ───────────────────────────────────────────────────────
echo ""
echo "── Repo clones ──"
run "pi-toolkit clone" "test -d /opt/pi-toolkit && git -C /opt/pi-toolkit rev-parse --short HEAD"
run "pi-extensions clone" "test -d /opt/pi-extensions && git -C /opt/pi-extensions rev-parse --short HEAD"
run "pi-fork clone + node_modules" \
"test -f /opt/pi-fork/package.json && test -d /opt/pi-fork/node_modules"
run "pi-observational-memory clone + node_modules" \
"test -f /opt/pi-observational-memory/package.json && test -d /opt/pi-observational-memory/node_modules"
# pi-studio is present only in the :latest-studio variant. Auto-detect by
# probing /opt/pi-studio so this one script covers both variants.
if docker run --rm --entrypoint="" "$IMAGE" sh -c 'test -d /opt/pi-studio' >/dev/null 2>&1; then
STUDIO_VARIANT=1
echo " ️ pi-studio detected — running studio assertions"
run "pi-studio clone + node_modules" \
"test -f /opt/pi-studio/package.json && test -d /opt/pi-studio/node_modules"
run "pi-studio prebuilt client bundle" \
"test -f /opt/pi-studio/client/studio-client.js"
else
STUDIO_VARIANT=0
echo " ️ pi-studio not present (non-studio variant) — skipping studio clone checks"
fi
# ── Build provenance (manifest + OCI labels) ─────────────────────────
echo ""
echo "── Build provenance ──"
run "/etc/pi-devbox/build-manifest.json present" \
"test -f /etc/pi-devbox/build-manifest.json"
run_expect "manifest records pi-extensions component" \
"cat /etc/pi-devbox/build-manifest.json" '"pi-extensions"'
run_expect "manifest records pi_version" \
"cat /etc/pi-devbox/build-manifest.json" '"pi_version"'
# Every component must be a resolved commit (or null for pi-studio in the
# non-studio variant) — 'unknown' means a clone silently failed to resolve.
run "manifest has no unresolved ('unknown') components" \
"! grep -q '\"unknown\"' /etc/pi-devbox/build-manifest.json"
# OCI labels live in the image config, not the container fs — inspect them
# from the host docker rather than via `docker run`.
LBL=$(docker inspect --format '{{ index .Config.Labels "se.jordbo.pi-devbox.pi-extensions-ref" }}' "$IMAGE" 2>/dev/null || true)
if [ -n "$LBL" ] && [ "$LBL" != "<no value>" ]; then
printf " ✅ OCI label se.jordbo.pi-devbox.pi-extensions-ref=%s\n" "$LBL"; PASS=$((PASS+1))
else
printf " ❌ OCI label se.jordbo.pi-devbox.pi-extensions-ref missing or empty\n"; FAIL=$((FAIL+1))
fi
# ── Runtime deployment (needs entrypoint to run) ──────────────────────
echo ""
echo "── Runtime deployment ──"
# Spin up a long-running container WITHOUT overriding the entrypoint, so
# the baked entrypoint chain (entrypoint.sh → entrypoint-user.sh) runs and
# deploys pi-toolkit + pi-extensions to ~/.pi/agent/. Override CMD to
# tail -f /dev/null so the container stays alive while we docker-exec.
CID=$(docker run -d --rm "$IMAGE" tail -f /dev/null)
cleanup() { docker rm -f "$CID" >/dev/null 2>&1 || true; }
trap cleanup EXIT
# Wait for entrypoint-user.sh to finish deploying pi-toolkit + extensions.
# Gate on BOTH the keybindings symlink (deployed by pi-toolkit) AND the
# mempalace.ts bridge (deployed last by entrypoint-user.sh) AND ≥4 *.ts
# extensions present. Parallel build load can otherwise sample the *.ts
# count mid-deploy and produce a flake. See opencode-devbox c6f9d11
# (2026-06-08) — same fix transplanted.
for i in $(seq 1 45); do
if docker exec "$CID" sh -c '
test -L /home/developer/.pi/agent/keybindings.json && \
test -L /home/developer/.pi/agent/extensions/mempalace.ts && \
test -L /home/developer/.agents/skills/pi-devbox-environment && \
test -L /home/developer/.agents/skills/pi-extensions && \
test -L /home/developer/.agents/skills/mempalace && \
count=$(ls -1 /home/developer/.pi/agent/extensions/*.ts 2>/dev/null | wc -l) && \
[ "$count" -ge 4 ]
' >/dev/null 2>&1; then
break
fi
sleep 1
done
exec_test() {
local label="$1"; local cmd="$2"
if docker exec -u developer "$CID" sh -c "$cmd" >/dev/null 2>&1; then
printf " ✅ %s\n" "$label"; PASS=$((PASS+1))
else
printf " ❌ %s\n" "$label"; FAIL=$((FAIL+1))
fi
}
exec_test "keybindings.json (pi-toolkit)" 'test -L $HOME/.pi/agent/keybindings.json && echo ok'
exec_test "extensions ≥ 4 (pi-extensions)" 'count=$(ls -1 $HOME/.pi/agent/extensions/*.ts 2>/dev/null | wc -l); [ $count -ge 4 ] && echo "$count extensions"'
exec_test "mempalace.ts bridge" 'test -L $HOME/.pi/agent/extensions/mempalace.ts && echo ok'
exec_test "settings.json bootstrapped" 'test -f $HOME/.pi/agent/settings.json && echo ok'
exec_test "pi-devbox-environment skill linked" 'test -L $HOME/.agents/skills/pi-devbox-environment && test -f $HOME/.agents/skills/pi-devbox-environment/SKILL.md && echo ok'
exec_test "pi-extensions skill linked (fallback)" 'test -L $HOME/.agents/skills/pi-extensions && test -f $HOME/.agents/skills/pi-extensions/SKILL.md && echo ok'
exec_test "mempalace skill linked (fallback)" 'test -L $HOME/.agents/skills/mempalace && test -f $HOME/.agents/skills/mempalace/SKILL.md && echo ok'
# pi-fork + pi-observational-memory are registered by entrypoint-user.sh via
# `pi install /opt/<pkg>`, which runs slightly after the keybindings marker.
for i in $(seq 1 15); do
if docker exec "$CID" grep -q pi-observational-memory \
/home/developer/.pi/agent/settings.json 2>/dev/null; then
break
fi
sleep 1
done
exec_test "pi-fork registered (fork tool)" 'grep -q pi-fork $HOME/.pi/agent/settings.json && echo ok'
exec_test "pi-observational-memory registered (recall tool)" 'grep -q pi-observational-memory $HOME/.pi/agent/settings.json && echo ok'
# pi-studio registration (studio variant only) — registered by the same
# entrypoint-user.sh local-path install loop as fork/obsmem.
if [ "${STUDIO_VARIANT:-0}" = "1" ]; then
for i in $(seq 1 15); do
if docker exec "$CID" grep -q pi-studio \
/home/developer/.pi/agent/settings.json 2>/dev/null; then
break
fi
sleep 1
done
exec_test "pi-studio registered (/studio command + studio_* tools)" \
'grep -q pi-studio $HOME/.pi/agent/settings.json && echo ok'
fi
# ── /tmp/sshcm directory created by entrypoint ────────────────────────
exec_test "/tmp/sshcm dir mode 700 (ssh ControlMaster)" \
'test -d /tmp/sshcm && [ "$(stat -c %a /tmp/sshcm)" = "700" ] && echo ok'
# ── Image size ────────────────────────────────────────────────────────
echo ""
echo "── Image size ──"
# Sum all layers via `docker history`. Docker's `image inspect --format='{{.Size}}'`
# returns ONLY the variant-unique layer when the base is content-addressed and
# shared (the case in this repo's two-phase build), which understates the
# user-facing image size by 2+ GB. Summing layer sizes from history is the
# metric Hub displays to users and the one we actually want to gate on.
SIZE_MB=$(docker history --format '{{.Size}}' "$IMAGE" | python3 -c '
import sys, re
total=0.0
for line in sys.stdin:
s=line.strip()
if s in ("0B", ""): continue
m=re.match(r"^([0-9.]+)(B|kB|MB|GB)$", s)
if not m: continue
v=float(m.group(1)); u=m.group(2)
mult={"B":1/1048576,"kB":1/1024,"MB":1,"GB":1024}[u]
total+=v*mult
print(int(total))
')
if [ -z "$SIZE_MB" ] || [ "$SIZE_MB" = "0" ]; then
printf " ⚠️ image size: could not parse — skipping check\n"
elif [ "$SIZE_MB" -le "$SIZE_THRESHOLD_MB" ]; then
printf " ✅ size: %d MB (threshold %d MB)\n" "$SIZE_MB" "$SIZE_THRESHOLD_MB"; PASS=$((PASS+1))
else
printf " ❌ size: %d MB exceeds threshold %d MB\n" "$SIZE_MB" "$SIZE_THRESHOLD_MB"; FAIL=$((FAIL+1))
fi
# ── Summary ───────────────────────────────────────────────────────────
echo ""
echo "=== Results: ${PASS} passed, ${FAIL} failed ==="
[ "$FAIL" -eq 0 ]