#!/usr/bin/env bash # smoke-test.sh — sanity checks for the pi-devbox image # # Usage: ./scripts/smoke-test.sh # # Verifies: # - pi binary present and (if EXPECTED_PI_VERSION set) matches CI's resolved version # - new v1.0.0 base additions (pandoc, graphviz, imagemagick, yq, tealdeer) # - tmux 0-indexing baked in /etc/tmux.conf (required for pi-studio variants) # - pi-toolkit cloned at /opt/pi-toolkit # - pi-extensions cloned at /opt/pi-extensions # - pi-fork + pi-observational-memory cloned with node_modules baked # - entrypoint deploys pi-toolkit keybindings symlink # - entrypoint deploys ≥4 extensions # - mempalace bridge symlink present # - settings.json bootstrapped # - pi-fork + pi-observational-memory registered via `pi install` # - (studio variant only, auto-detected) pi-studio cloned + prebuilt # client bundle present + registered via `pi install` # - image size within threshold set -euo pipefail IMAGE="${1:?usage: $0 }" PASS=0; FAIL=0 # pi-devbox v1.0.0 (decoupled from opencode-devbox) added pandoc, graphviz, # imagemagick, yq, tealdeer, and a baked /etc/tmux.conf. Local arm64 build # observed 3.20 GB. CI amd64 builds may differ slightly; threshold below # carries +300 MB margin to absorb arch differences without false reds. # Tighten in a follow-up release once amd64 actuals are observed in CI logs. SIZE_THRESHOLD_MB=3500 run() { local label="$1"; local cmd="$2" if docker run --rm --entrypoint="" "$IMAGE" sh -c "$cmd" >/dev/null 2>&1; then printf " ✅ %s\n" "$label"; PASS=$((PASS+1)) else printf " ❌ %s\n" "$label"; FAIL=$((FAIL+1)) fi } # Stricter version of `run` that asserts an expected substring in stdout. # Catches the "image bytes silently identical to previous release" class of # regression — Docker layer cache hit on `npm install -g ` because the # bare command string is identical across builds, even when `latest` would # resolve differently. Discovered 2026-05-23 — every pi-devbox release # v0.74.0..v0.75.5 had been shipping the same image bytes. run_expect() { local label="$1"; local cmd="$2"; local expect="$3" local out out=$(docker run --rm --entrypoint="" "$IMAGE" sh -c "$cmd" 2>&1) || true if echo "$out" | grep -Fq "$expect"; then printf " ✅ %s (got %s)\n" "$label" "$expect"; PASS=$((PASS+1)) else printf " ❌ %s — expected substring %q, got: %s\n" "$label" "$expect" "$out"; FAIL=$((FAIL+1)) fi } echo "=== pi-devbox smoke test: $IMAGE ===" echo "" # ── Binaries ───────────────────────────────────────────────────────── echo "── Binaries ──" if [ -n "${EXPECTED_PI_VERSION:-}" ]; then run_expect "pi version matches build arg" "pi --version" "$EXPECTED_PI_VERSION" else run "pi" "pi --version" fi run "node" "node --version" run "git" "git --version" run "aws" "aws --version" run "uv" "uv --version" run "nvim" "nvim --version" run "mempalace-mcp" "mempalace-mcp --help" # v1.0.0 base additions — verify presence and basic functionality. run "pandoc" "pandoc --version" run "graphviz (dot)" "dot -V" run "imagemagick" "magick --version" run "yq" "yq --version" run "tldr (tealdeer)" "tldr --version" run "socat" "socat -V" run "studio-expose helper" "test -x /usr/local/bin/studio-expose" # ── tmux 0-indexing (required for pi-studio variants) ───────────────── echo "" echo "── tmux config ──" run_expect "/etc/tmux.conf has base-index 0" \ "cat /etc/tmux.conf" "set -g base-index 0" run_expect "/etc/tmux.conf has pane-base-index 0" \ "cat /etc/tmux.conf" "set -g pane-base-index 0" # ── Repo clones ─────────────────────────────────────────────────────── echo "" echo "── Repo clones ──" run "pi-toolkit clone" "test -d /opt/pi-toolkit && git -C /opt/pi-toolkit rev-parse --short HEAD" run "pi-extensions clone" "test -d /opt/pi-extensions && git -C /opt/pi-extensions rev-parse --short HEAD" run "pi-fork clone + node_modules" \ "test -f /opt/pi-fork/package.json && test -d /opt/pi-fork/node_modules" run "pi-observational-memory clone + node_modules" \ "test -f /opt/pi-observational-memory/package.json && test -d /opt/pi-observational-memory/node_modules" # pi-studio is present only in the :latest-studio variant. Auto-detect by # probing /opt/pi-studio so this one script covers both variants. if docker run --rm --entrypoint="" "$IMAGE" sh -c 'test -d /opt/pi-studio' >/dev/null 2>&1; then STUDIO_VARIANT=1 echo " ℹ️ pi-studio detected — running studio assertions" run "pi-studio clone + node_modules" \ "test -f /opt/pi-studio/package.json && test -d /opt/pi-studio/node_modules" run "pi-studio prebuilt client bundle" \ "test -f /opt/pi-studio/client/studio-client.js" else STUDIO_VARIANT=0 echo " ℹ️ pi-studio not present (non-studio variant) — skipping studio clone checks" fi # ── Runtime deployment (needs entrypoint to run) ────────────────────── echo "" echo "── Runtime deployment ──" # Spin up a long-running container WITHOUT overriding the entrypoint, so # the baked entrypoint chain (entrypoint.sh → entrypoint-user.sh) runs and # deploys pi-toolkit + pi-extensions to ~/.pi/agent/. Override CMD to # tail -f /dev/null so the container stays alive while we docker-exec. CID=$(docker run -d --rm "$IMAGE" tail -f /dev/null) cleanup() { docker rm -f "$CID" >/dev/null 2>&1 || true; } trap cleanup EXIT # Wait for entrypoint-user.sh to finish deploying pi-toolkit + extensions. # Gate on BOTH the keybindings symlink (deployed by pi-toolkit) AND the # mempalace.ts bridge (deployed last by entrypoint-user.sh) AND ≥4 *.ts # extensions present. Parallel build load can otherwise sample the *.ts # count mid-deploy and produce a flake. See opencode-devbox c6f9d11 # (2026-06-08) — same fix transplanted. for i in $(seq 1 45); do if docker exec "$CID" sh -c ' test -L /home/developer/.pi/agent/keybindings.json && \ test -L /home/developer/.pi/agent/extensions/mempalace.ts && \ count=$(ls -1 /home/developer/.pi/agent/extensions/*.ts 2>/dev/null | wc -l) && \ [ "$count" -ge 4 ] ' >/dev/null 2>&1; then break fi sleep 1 done exec_test() { local label="$1"; local cmd="$2" if docker exec -u developer "$CID" sh -c "$cmd" >/dev/null 2>&1; then printf " ✅ %s\n" "$label"; PASS=$((PASS+1)) else printf " ❌ %s\n" "$label"; FAIL=$((FAIL+1)) fi } exec_test "keybindings.json (pi-toolkit)" 'test -L $HOME/.pi/agent/keybindings.json && echo ok' exec_test "extensions ≥ 4 (pi-extensions)" 'count=$(ls -1 $HOME/.pi/agent/extensions/*.ts 2>/dev/null | wc -l); [ $count -ge 4 ] && echo "$count extensions"' exec_test "mempalace.ts bridge" 'test -L $HOME/.pi/agent/extensions/mempalace.ts && echo ok' exec_test "settings.json bootstrapped" 'test -f $HOME/.pi/agent/settings.json && echo ok' # pi-fork + pi-observational-memory are registered by entrypoint-user.sh via # `pi install /opt/`, which runs slightly after the keybindings marker. for i in $(seq 1 15); do if docker exec "$CID" grep -q pi-observational-memory \ /home/developer/.pi/agent/settings.json 2>/dev/null; then break fi sleep 1 done exec_test "pi-fork registered (fork tool)" 'grep -q pi-fork $HOME/.pi/agent/settings.json && echo ok' exec_test "pi-observational-memory registered (recall tool)" 'grep -q pi-observational-memory $HOME/.pi/agent/settings.json && echo ok' # pi-studio registration (studio variant only) — registered by the same # entrypoint-user.sh local-path install loop as fork/obsmem. if [ "${STUDIO_VARIANT:-0}" = "1" ]; then for i in $(seq 1 15); do if docker exec "$CID" grep -q pi-studio \ /home/developer/.pi/agent/settings.json 2>/dev/null; then break fi sleep 1 done exec_test "pi-studio registered (/studio command + studio_* tools)" \ 'grep -q pi-studio $HOME/.pi/agent/settings.json && echo ok' fi # ── /tmp/sshcm directory created by entrypoint ──────────────────────── exec_test "/tmp/sshcm dir mode 700 (ssh ControlMaster)" \ 'test -d /tmp/sshcm && [ "$(stat -c %a /tmp/sshcm)" = "700" ] && echo ok' # ── Image size ──────────────────────────────────────────────────────── echo "" echo "── Image size ──" # Sum all layers via `docker history`. Docker's `image inspect --format='{{.Size}}'` # returns ONLY the variant-unique layer when the base is content-addressed and # shared (the case in this repo's two-phase build), which understates the # user-facing image size by 2+ GB. Summing layer sizes from history is the # metric Hub displays to users and the one we actually want to gate on. SIZE_MB=$(docker history --format '{{.Size}}' "$IMAGE" | python3 -c ' import sys, re total=0.0 for line in sys.stdin: s=line.strip() if s in ("0B", ""): continue m=re.match(r"^([0-9.]+)(B|kB|MB|GB)$", s) if not m: continue v=float(m.group(1)); u=m.group(2) mult={"B":1/1048576,"kB":1/1024,"MB":1,"GB":1024}[u] total+=v*mult print(int(total)) ') if [ -z "$SIZE_MB" ] || [ "$SIZE_MB" = "0" ]; then printf " ⚠️ image size: could not parse — skipping check\n" elif [ "$SIZE_MB" -le "$SIZE_THRESHOLD_MB" ]; then printf " ✅ size: %d MB (threshold %d MB)\n" "$SIZE_MB" "$SIZE_THRESHOLD_MB"; PASS=$((PASS+1)) else printf " ❌ size: %d MB exceeds threshold %d MB\n" "$SIZE_MB" "$SIZE_THRESHOLD_MB"; FAIL=$((FAIL+1)) fi # ── Summary ─────────────────────────────────────────────────────────── echo "" echo "=== Results: ${PASS} passed, ${FAIL} failed ===" [ "$FAIL" -eq 0 ]