Files
pi-devbox/scripts/smoke-test.sh
T
pi a78e59fb5b feat(studio): add :latest-studio variant (PR-3)
Bundle pi-studio (omaclaren/pi-studio) as a new -studio image variant:
browser prompt editor, KaTeX/Mermaid preview, tmux-backed literate REPLs,
/studio command + studio_* agent tools.

- Dockerfile.variant: INSTALL_STUDIO + PI_STUDIO_REPO/REF args; vendor
  pi-studio to /opt/pi-studio (no build step — prebuilt client in git;
  npm install --omit=dev for 3 prod deps). STUDIO_PORT=8765 advisory.
- entrypoint-user.sh: register /opt/pi-studio via the existing pi install
  local-path loop (auto-skips in non-studio variant).
- smoke-test.sh: auto-detected studio assertions (clone + prebuilt client
  + pi install registration).
- CI: resolve PI_STUDIO_REF to a SHA; independent smoke-studio +
  build-variant-studio jobs that gate ONLY the -studio tags, so a studio
  failure never blocks the core :latest release.
- README: 'Using pi-studio' section documenting the container access
  reality — pi-studio hard-binds 127.0.0.1 (index.ts .listen(port,
  '127.0.0.1'), no --host flag), so -p publish alone can't reach it.
  Documents host-networking and loopback-bridge paths, the remote ssh -L
  forward, and the mosh caveat (no port forwarding; run parallel ssh -L).
- CHANGELOG/AGENTS/DOCKER_HUB updated. Will tag as v1.1.0 (minor).

No tag created — stopping for review.
2026-06-10 23:15:29 +02:00

220 lines
10 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# smoke-test.sh — sanity checks for the pi-devbox image
#
# Usage: ./scripts/smoke-test.sh <image>
#
# Verifies:
# - pi binary present and (if EXPECTED_PI_VERSION set) matches CI's resolved version
# - new v1.0.0 base additions (pandoc, graphviz, imagemagick, yq, tealdeer)
# - tmux 0-indexing baked in /etc/tmux.conf (required for pi-studio variants)
# - pi-toolkit cloned at /opt/pi-toolkit
# - pi-extensions cloned at /opt/pi-extensions
# - pi-fork + pi-observational-memory cloned with node_modules baked
# - entrypoint deploys pi-toolkit keybindings symlink
# - entrypoint deploys ≥4 extensions
# - mempalace bridge symlink present
# - settings.json bootstrapped
# - pi-fork + pi-observational-memory registered via `pi install`
# - (studio variant only, auto-detected) pi-studio cloned + prebuilt
# client bundle present + registered via `pi install`
# - image size within threshold
set -euo pipefail
IMAGE="${1:?usage: $0 <image>}"
PASS=0; FAIL=0
# pi-devbox v1.0.0 (decoupled from opencode-devbox) added pandoc, graphviz,
# imagemagick, yq, tealdeer, and a baked /etc/tmux.conf. Local arm64 build
# observed 3.20 GB. CI amd64 builds may differ slightly; threshold below
# carries +300 MB margin to absorb arch differences without false reds.
# Tighten in a follow-up release once amd64 actuals are observed in CI logs.
SIZE_THRESHOLD_MB=3500
run() {
local label="$1"; local cmd="$2"
if docker run --rm --entrypoint="" "$IMAGE" sh -c "$cmd" >/dev/null 2>&1; then
printf " ✅ %s\n" "$label"; PASS=$((PASS+1))
else
printf " ❌ %s\n" "$label"; FAIL=$((FAIL+1))
fi
}
# Stricter version of `run` that asserts an expected substring in stdout.
# Catches the "image bytes silently identical to previous release" class of
# regression — Docker layer cache hit on `npm install -g <pkg>` because the
# bare command string is identical across builds, even when `latest` would
# resolve differently. Discovered 2026-05-23 — every pi-devbox release
# v0.74.0..v0.75.5 had been shipping the same image bytes.
run_expect() {
local label="$1"; local cmd="$2"; local expect="$3"
local out
out=$(docker run --rm --entrypoint="" "$IMAGE" sh -c "$cmd" 2>&1) || true
if echo "$out" | grep -Fq "$expect"; then
printf " ✅ %s (got %s)\n" "$label" "$expect"; PASS=$((PASS+1))
else
printf " ❌ %s — expected substring %q, got: %s\n" "$label" "$expect" "$out"; FAIL=$((FAIL+1))
fi
}
echo "=== pi-devbox smoke test: $IMAGE ==="
echo ""
# ── Binaries ─────────────────────────────────────────────────────────
echo "── Binaries ──"
if [ -n "${EXPECTED_PI_VERSION:-}" ]; then
run_expect "pi version matches build arg" "pi --version" "$EXPECTED_PI_VERSION"
else
run "pi" "pi --version"
fi
run "node" "node --version"
run "git" "git --version"
run "aws" "aws --version"
run "uv" "uv --version"
run "nvim" "nvim --version"
run "mempalace-mcp" "mempalace-mcp --help"
# v1.0.0 base additions — verify presence and basic functionality.
run "pandoc" "pandoc --version"
run "graphviz (dot)" "dot -V"
run "imagemagick" "magick --version"
run "yq" "yq --version"
run "tldr (tealdeer)" "tldr --version"
# ── tmux 0-indexing (required for pi-studio variants) ─────────────────
echo ""
echo "── tmux config ──"
run_expect "/etc/tmux.conf has base-index 0" \
"cat /etc/tmux.conf" "set -g base-index 0"
run_expect "/etc/tmux.conf has pane-base-index 0" \
"cat /etc/tmux.conf" "set -g pane-base-index 0"
# ── Repo clones ───────────────────────────────────────────────────────
echo ""
echo "── Repo clones ──"
run "pi-toolkit clone" "test -d /opt/pi-toolkit && git -C /opt/pi-toolkit rev-parse --short HEAD"
run "pi-extensions clone" "test -d /opt/pi-extensions && git -C /opt/pi-extensions rev-parse --short HEAD"
run "pi-fork clone + node_modules" \
"test -f /opt/pi-fork/package.json && test -d /opt/pi-fork/node_modules"
run "pi-observational-memory clone + node_modules" \
"test -f /opt/pi-observational-memory/package.json && test -d /opt/pi-observational-memory/node_modules"
# pi-studio is present only in the :latest-studio variant. Auto-detect by
# probing /opt/pi-studio so this one script covers both variants.
if docker run --rm --entrypoint="" "$IMAGE" sh -c 'test -d /opt/pi-studio' >/dev/null 2>&1; then
STUDIO_VARIANT=1
echo " ️ pi-studio detected — running studio assertions"
run "pi-studio clone + node_modules" \
"test -f /opt/pi-studio/package.json && test -d /opt/pi-studio/node_modules"
run "pi-studio prebuilt client bundle" \
"test -f /opt/pi-studio/client/studio-client.js"
else
STUDIO_VARIANT=0
echo " ️ pi-studio not present (non-studio variant) — skipping studio clone checks"
fi
# ── Runtime deployment (needs entrypoint to run) ──────────────────────
echo ""
echo "── Runtime deployment ──"
# Spin up a long-running container WITHOUT overriding the entrypoint, so
# the baked entrypoint chain (entrypoint.sh → entrypoint-user.sh) runs and
# deploys pi-toolkit + pi-extensions to ~/.pi/agent/. Override CMD to
# tail -f /dev/null so the container stays alive while we docker-exec.
CID=$(docker run -d --rm "$IMAGE" tail -f /dev/null)
cleanup() { docker rm -f "$CID" >/dev/null 2>&1 || true; }
trap cleanup EXIT
# Wait for entrypoint-user.sh to finish deploying pi-toolkit + extensions.
# Gate on BOTH the keybindings symlink (deployed by pi-toolkit) AND the
# mempalace.ts bridge (deployed last by entrypoint-user.sh) AND ≥4 *.ts
# extensions present. Parallel build load can otherwise sample the *.ts
# count mid-deploy and produce a flake. See opencode-devbox c6f9d11
# (2026-06-08) — same fix transplanted.
for i in $(seq 1 45); do
if docker exec "$CID" sh -c '
test -L /home/developer/.pi/agent/keybindings.json && \
test -L /home/developer/.pi/agent/extensions/mempalace.ts && \
count=$(ls -1 /home/developer/.pi/agent/extensions/*.ts 2>/dev/null | wc -l) && \
[ "$count" -ge 4 ]
' >/dev/null 2>&1; then
break
fi
sleep 1
done
exec_test() {
local label="$1"; local cmd="$2"
if docker exec -u developer "$CID" sh -c "$cmd" >/dev/null 2>&1; then
printf " ✅ %s\n" "$label"; PASS=$((PASS+1))
else
printf " ❌ %s\n" "$label"; FAIL=$((FAIL+1))
fi
}
exec_test "keybindings.json (pi-toolkit)" 'test -L $HOME/.pi/agent/keybindings.json && echo ok'
exec_test "extensions ≥ 4 (pi-extensions)" 'count=$(ls -1 $HOME/.pi/agent/extensions/*.ts 2>/dev/null | wc -l); [ $count -ge 4 ] && echo "$count extensions"'
exec_test "mempalace.ts bridge" 'test -L $HOME/.pi/agent/extensions/mempalace.ts && echo ok'
exec_test "settings.json bootstrapped" 'test -f $HOME/.pi/agent/settings.json && echo ok'
# pi-fork + pi-observational-memory are registered by entrypoint-user.sh via
# `pi install /opt/<pkg>`, which runs slightly after the keybindings marker.
for i in $(seq 1 15); do
if docker exec "$CID" grep -q pi-observational-memory \
/home/developer/.pi/agent/settings.json 2>/dev/null; then
break
fi
sleep 1
done
exec_test "pi-fork registered (fork tool)" 'grep -q pi-fork $HOME/.pi/agent/settings.json && echo ok'
exec_test "pi-observational-memory registered (recall tool)" 'grep -q pi-observational-memory $HOME/.pi/agent/settings.json && echo ok'
# pi-studio registration (studio variant only) — registered by the same
# entrypoint-user.sh local-path install loop as fork/obsmem.
if [ "${STUDIO_VARIANT:-0}" = "1" ]; then
for i in $(seq 1 15); do
if docker exec "$CID" grep -q pi-studio \
/home/developer/.pi/agent/settings.json 2>/dev/null; then
break
fi
sleep 1
done
exec_test "pi-studio registered (/studio command + studio_* tools)" \
'grep -q pi-studio $HOME/.pi/agent/settings.json && echo ok'
fi
# ── /tmp/sshcm directory created by entrypoint ────────────────────────
exec_test "/tmp/sshcm dir mode 700 (ssh ControlMaster)" \
'test -d /tmp/sshcm && [ "$(stat -c %a /tmp/sshcm)" = "700" ] && echo ok'
# ── Image size ────────────────────────────────────────────────────────
echo ""
echo "── Image size ──"
# Sum all layers via `docker history`. Docker's `image inspect --format='{{.Size}}'`
# returns ONLY the variant-unique layer when the base is content-addressed and
# shared (the case in this repo's two-phase build), which understates the
# user-facing image size by 2+ GB. Summing layer sizes from history is the
# metric Hub displays to users and the one we actually want to gate on.
SIZE_MB=$(docker history --format '{{.Size}}' "$IMAGE" | python3 -c '
import sys, re
total=0.0
for line in sys.stdin:
s=line.strip()
if s in ("0B", ""): continue
m=re.match(r"^([0-9.]+)(B|kB|MB|GB)$", s)
if not m: continue
v=float(m.group(1)); u=m.group(2)
mult={"B":1/1048576,"kB":1/1024,"MB":1,"GB":1024}[u]
total+=v*mult
print(int(total))
')
if [ -z "$SIZE_MB" ] || [ "$SIZE_MB" = "0" ]; then
printf " ⚠️ image size: could not parse — skipping check\n"
elif [ "$SIZE_MB" -le "$SIZE_THRESHOLD_MB" ]; then
printf " ✅ size: %d MB (threshold %d MB)\n" "$SIZE_MB" "$SIZE_THRESHOLD_MB"; PASS=$((PASS+1))
else
printf " ❌ size: %d MB exceeds threshold %d MB\n" "$SIZE_MB" "$SIZE_THRESHOLD_MB"; FAIL=$((FAIL+1))
fi
# ── Summary ───────────────────────────────────────────────────────────
echo ""
echo "=== Results: ${PASS} passed, ${FAIL} failed ==="
[ "$FAIL" -eq 0 ]