Files
pi-devbox/scripts/smoke-test.sh
T
pi 7d8ee4cea1 feat(studio): bundle studio-expose bridge + socat (opt-in STUDIO_EXPOSE)
pi-studio binds the container's 127.0.0.1, which a published Docker port
can't reach. Add a robust, portable bridge rather than a doc-only one-liner:

- Dockerfile.base: add socat (~1 MB, generally useful TCP relay).
- rootfs/usr/local/bin/studio-expose: socat TCP relay listening on the
  container's egress IPv4 (not 0.0.0.0 — that would EADDRINUSE against
  Studio's loopback listener) forwarding to 127.0.0.1:PORT on the SAME
  port, so Studio's printed token URL works verbatim. Robust egress-IP
  detection (hostname -I, loopback-filtered; ip route get fallback),
  --help, port validation, foreground.
- entrypoint-user.sh: opt-in STUDIO_EXPOSE=1 auto-starts the bridge in the
  background (studio variant only). Default OFF — Studio stays loopback-only
  (its secure default) unless explicitly opted in.
- README: 'Using pi-studio' now documents host-networking (A) and the
  studio-expose/STUDIO_EXPOSE bridge (B) with a security note; ssh -L for
  remote, mosh caveat retained.
- smoke-test: assert socat + studio-expose present (base-level).
- CHANGELOG/AGENTS updated.

No tag — stopping for review.
2026-06-10 23:33:44 +02:00

222 lines
10 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# smoke-test.sh — sanity checks for the pi-devbox image
#
# Usage: ./scripts/smoke-test.sh <image>
#
# Verifies:
# - pi binary present and (if EXPECTED_PI_VERSION set) matches CI's resolved version
# - new v1.0.0 base additions (pandoc, graphviz, imagemagick, yq, tealdeer)
# - tmux 0-indexing baked in /etc/tmux.conf (required for pi-studio variants)
# - pi-toolkit cloned at /opt/pi-toolkit
# - pi-extensions cloned at /opt/pi-extensions
# - pi-fork + pi-observational-memory cloned with node_modules baked
# - entrypoint deploys pi-toolkit keybindings symlink
# - entrypoint deploys ≥4 extensions
# - mempalace bridge symlink present
# - settings.json bootstrapped
# - pi-fork + pi-observational-memory registered via `pi install`
# - (studio variant only, auto-detected) pi-studio cloned + prebuilt
# client bundle present + registered via `pi install`
# - image size within threshold
set -euo pipefail
IMAGE="${1:?usage: $0 <image>}"
PASS=0; FAIL=0
# pi-devbox v1.0.0 (decoupled from opencode-devbox) added pandoc, graphviz,
# imagemagick, yq, tealdeer, and a baked /etc/tmux.conf. Local arm64 build
# observed 3.20 GB. CI amd64 builds may differ slightly; threshold below
# carries +300 MB margin to absorb arch differences without false reds.
# Tighten in a follow-up release once amd64 actuals are observed in CI logs.
SIZE_THRESHOLD_MB=3500
run() {
local label="$1"; local cmd="$2"
if docker run --rm --entrypoint="" "$IMAGE" sh -c "$cmd" >/dev/null 2>&1; then
printf " ✅ %s\n" "$label"; PASS=$((PASS+1))
else
printf " ❌ %s\n" "$label"; FAIL=$((FAIL+1))
fi
}
# Stricter version of `run` that asserts an expected substring in stdout.
# Catches the "image bytes silently identical to previous release" class of
# regression — Docker layer cache hit on `npm install -g <pkg>` because the
# bare command string is identical across builds, even when `latest` would
# resolve differently. Discovered 2026-05-23 — every pi-devbox release
# v0.74.0..v0.75.5 had been shipping the same image bytes.
run_expect() {
local label="$1"; local cmd="$2"; local expect="$3"
local out
out=$(docker run --rm --entrypoint="" "$IMAGE" sh -c "$cmd" 2>&1) || true
if echo "$out" | grep -Fq "$expect"; then
printf " ✅ %s (got %s)\n" "$label" "$expect"; PASS=$((PASS+1))
else
printf " ❌ %s — expected substring %q, got: %s\n" "$label" "$expect" "$out"; FAIL=$((FAIL+1))
fi
}
echo "=== pi-devbox smoke test: $IMAGE ==="
echo ""
# ── Binaries ─────────────────────────────────────────────────────────
echo "── Binaries ──"
if [ -n "${EXPECTED_PI_VERSION:-}" ]; then
run_expect "pi version matches build arg" "pi --version" "$EXPECTED_PI_VERSION"
else
run "pi" "pi --version"
fi
run "node" "node --version"
run "git" "git --version"
run "aws" "aws --version"
run "uv" "uv --version"
run "nvim" "nvim --version"
run "mempalace-mcp" "mempalace-mcp --help"
# v1.0.0 base additions — verify presence and basic functionality.
run "pandoc" "pandoc --version"
run "graphviz (dot)" "dot -V"
run "imagemagick" "magick --version"
run "yq" "yq --version"
run "tldr (tealdeer)" "tldr --version"
run "socat" "socat -V"
run "studio-expose helper" "test -x /usr/local/bin/studio-expose"
# ── tmux 0-indexing (required for pi-studio variants) ─────────────────
echo ""
echo "── tmux config ──"
run_expect "/etc/tmux.conf has base-index 0" \
"cat /etc/tmux.conf" "set -g base-index 0"
run_expect "/etc/tmux.conf has pane-base-index 0" \
"cat /etc/tmux.conf" "set -g pane-base-index 0"
# ── Repo clones ───────────────────────────────────────────────────────
echo ""
echo "── Repo clones ──"
run "pi-toolkit clone" "test -d /opt/pi-toolkit && git -C /opt/pi-toolkit rev-parse --short HEAD"
run "pi-extensions clone" "test -d /opt/pi-extensions && git -C /opt/pi-extensions rev-parse --short HEAD"
run "pi-fork clone + node_modules" \
"test -f /opt/pi-fork/package.json && test -d /opt/pi-fork/node_modules"
run "pi-observational-memory clone + node_modules" \
"test -f /opt/pi-observational-memory/package.json && test -d /opt/pi-observational-memory/node_modules"
# pi-studio is present only in the :latest-studio variant. Auto-detect by
# probing /opt/pi-studio so this one script covers both variants.
if docker run --rm --entrypoint="" "$IMAGE" sh -c 'test -d /opt/pi-studio' >/dev/null 2>&1; then
STUDIO_VARIANT=1
echo " ️ pi-studio detected — running studio assertions"
run "pi-studio clone + node_modules" \
"test -f /opt/pi-studio/package.json && test -d /opt/pi-studio/node_modules"
run "pi-studio prebuilt client bundle" \
"test -f /opt/pi-studio/client/studio-client.js"
else
STUDIO_VARIANT=0
echo " ️ pi-studio not present (non-studio variant) — skipping studio clone checks"
fi
# ── Runtime deployment (needs entrypoint to run) ──────────────────────
echo ""
echo "── Runtime deployment ──"
# Spin up a long-running container WITHOUT overriding the entrypoint, so
# the baked entrypoint chain (entrypoint.sh → entrypoint-user.sh) runs and
# deploys pi-toolkit + pi-extensions to ~/.pi/agent/. Override CMD to
# tail -f /dev/null so the container stays alive while we docker-exec.
CID=$(docker run -d --rm "$IMAGE" tail -f /dev/null)
cleanup() { docker rm -f "$CID" >/dev/null 2>&1 || true; }
trap cleanup EXIT
# Wait for entrypoint-user.sh to finish deploying pi-toolkit + extensions.
# Gate on BOTH the keybindings symlink (deployed by pi-toolkit) AND the
# mempalace.ts bridge (deployed last by entrypoint-user.sh) AND ≥4 *.ts
# extensions present. Parallel build load can otherwise sample the *.ts
# count mid-deploy and produce a flake. See opencode-devbox c6f9d11
# (2026-06-08) — same fix transplanted.
for i in $(seq 1 45); do
if docker exec "$CID" sh -c '
test -L /home/developer/.pi/agent/keybindings.json && \
test -L /home/developer/.pi/agent/extensions/mempalace.ts && \
count=$(ls -1 /home/developer/.pi/agent/extensions/*.ts 2>/dev/null | wc -l) && \
[ "$count" -ge 4 ]
' >/dev/null 2>&1; then
break
fi
sleep 1
done
exec_test() {
local label="$1"; local cmd="$2"
if docker exec -u developer "$CID" sh -c "$cmd" >/dev/null 2>&1; then
printf " ✅ %s\n" "$label"; PASS=$((PASS+1))
else
printf " ❌ %s\n" "$label"; FAIL=$((FAIL+1))
fi
}
exec_test "keybindings.json (pi-toolkit)" 'test -L $HOME/.pi/agent/keybindings.json && echo ok'
exec_test "extensions ≥ 4 (pi-extensions)" 'count=$(ls -1 $HOME/.pi/agent/extensions/*.ts 2>/dev/null | wc -l); [ $count -ge 4 ] && echo "$count extensions"'
exec_test "mempalace.ts bridge" 'test -L $HOME/.pi/agent/extensions/mempalace.ts && echo ok'
exec_test "settings.json bootstrapped" 'test -f $HOME/.pi/agent/settings.json && echo ok'
# pi-fork + pi-observational-memory are registered by entrypoint-user.sh via
# `pi install /opt/<pkg>`, which runs slightly after the keybindings marker.
for i in $(seq 1 15); do
if docker exec "$CID" grep -q pi-observational-memory \
/home/developer/.pi/agent/settings.json 2>/dev/null; then
break
fi
sleep 1
done
exec_test "pi-fork registered (fork tool)" 'grep -q pi-fork $HOME/.pi/agent/settings.json && echo ok'
exec_test "pi-observational-memory registered (recall tool)" 'grep -q pi-observational-memory $HOME/.pi/agent/settings.json && echo ok'
# pi-studio registration (studio variant only) — registered by the same
# entrypoint-user.sh local-path install loop as fork/obsmem.
if [ "${STUDIO_VARIANT:-0}" = "1" ]; then
for i in $(seq 1 15); do
if docker exec "$CID" grep -q pi-studio \
/home/developer/.pi/agent/settings.json 2>/dev/null; then
break
fi
sleep 1
done
exec_test "pi-studio registered (/studio command + studio_* tools)" \
'grep -q pi-studio $HOME/.pi/agent/settings.json && echo ok'
fi
# ── /tmp/sshcm directory created by entrypoint ────────────────────────
exec_test "/tmp/sshcm dir mode 700 (ssh ControlMaster)" \
'test -d /tmp/sshcm && [ "$(stat -c %a /tmp/sshcm)" = "700" ] && echo ok'
# ── Image size ────────────────────────────────────────────────────────
echo ""
echo "── Image size ──"
# Sum all layers via `docker history`. Docker's `image inspect --format='{{.Size}}'`
# returns ONLY the variant-unique layer when the base is content-addressed and
# shared (the case in this repo's two-phase build), which understates the
# user-facing image size by 2+ GB. Summing layer sizes from history is the
# metric Hub displays to users and the one we actually want to gate on.
SIZE_MB=$(docker history --format '{{.Size}}' "$IMAGE" | python3 -c '
import sys, re
total=0.0
for line in sys.stdin:
s=line.strip()
if s in ("0B", ""): continue
m=re.match(r"^([0-9.]+)(B|kB|MB|GB)$", s)
if not m: continue
v=float(m.group(1)); u=m.group(2)
mult={"B":1/1048576,"kB":1/1024,"MB":1,"GB":1024}[u]
total+=v*mult
print(int(total))
')
if [ -z "$SIZE_MB" ] || [ "$SIZE_MB" = "0" ]; then
printf " ⚠️ image size: could not parse — skipping check\n"
elif [ "$SIZE_MB" -le "$SIZE_THRESHOLD_MB" ]; then
printf " ✅ size: %d MB (threshold %d MB)\n" "$SIZE_MB" "$SIZE_THRESHOLD_MB"; PASS=$((PASS+1))
else
printf " ❌ size: %d MB exceeds threshold %d MB\n" "$SIZE_MB" "$SIZE_THRESHOLD_MB"; FAIL=$((FAIL+1))
fi
# ── Summary ───────────────────────────────────────────────────────────
echo ""
echo "=== Results: ${PASS} passed, ${FAIL} failed ==="
[ "$FAIL" -eq 0 ]