7d8ee4cea1
pi-studio binds the container's 127.0.0.1, which a published Docker port can't reach. Add a robust, portable bridge rather than a doc-only one-liner: - Dockerfile.base: add socat (~1 MB, generally useful TCP relay). - rootfs/usr/local/bin/studio-expose: socat TCP relay listening on the container's egress IPv4 (not 0.0.0.0 — that would EADDRINUSE against Studio's loopback listener) forwarding to 127.0.0.1:PORT on the SAME port, so Studio's printed token URL works verbatim. Robust egress-IP detection (hostname -I, loopback-filtered; ip route get fallback), --help, port validation, foreground. - entrypoint-user.sh: opt-in STUDIO_EXPOSE=1 auto-starts the bridge in the background (studio variant only). Default OFF — Studio stays loopback-only (its secure default) unless explicitly opted in. - README: 'Using pi-studio' now documents host-networking (A) and the studio-expose/STUDIO_EXPOSE bridge (B) with a security note; ssh -L for remote, mosh caveat retained. - smoke-test: assert socat + studio-expose present (base-level). - CHANGELOG/AGENTS updated. No tag — stopping for review.
222 lines
10 KiB
Bash
Executable File
222 lines
10 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# smoke-test.sh — sanity checks for the pi-devbox image
|
||
#
|
||
# Usage: ./scripts/smoke-test.sh <image>
|
||
#
|
||
# Verifies:
|
||
# - pi binary present and (if EXPECTED_PI_VERSION set) matches CI's resolved version
|
||
# - new v1.0.0 base additions (pandoc, graphviz, imagemagick, yq, tealdeer)
|
||
# - tmux 0-indexing baked in /etc/tmux.conf (required for pi-studio variants)
|
||
# - pi-toolkit cloned at /opt/pi-toolkit
|
||
# - pi-extensions cloned at /opt/pi-extensions
|
||
# - pi-fork + pi-observational-memory cloned with node_modules baked
|
||
# - entrypoint deploys pi-toolkit keybindings symlink
|
||
# - entrypoint deploys ≥4 extensions
|
||
# - mempalace bridge symlink present
|
||
# - settings.json bootstrapped
|
||
# - pi-fork + pi-observational-memory registered via `pi install`
|
||
# - (studio variant only, auto-detected) pi-studio cloned + prebuilt
|
||
# client bundle present + registered via `pi install`
|
||
# - image size within threshold
|
||
|
||
set -euo pipefail
|
||
|
||
IMAGE="${1:?usage: $0 <image>}"
|
||
PASS=0; FAIL=0
|
||
# pi-devbox v1.0.0 (decoupled from opencode-devbox) added pandoc, graphviz,
|
||
# imagemagick, yq, tealdeer, and a baked /etc/tmux.conf. Local arm64 build
|
||
# observed 3.20 GB. CI amd64 builds may differ slightly; threshold below
|
||
# carries +300 MB margin to absorb arch differences without false reds.
|
||
# Tighten in a follow-up release once amd64 actuals are observed in CI logs.
|
||
SIZE_THRESHOLD_MB=3500
|
||
|
||
run() {
|
||
local label="$1"; local cmd="$2"
|
||
if docker run --rm --entrypoint="" "$IMAGE" sh -c "$cmd" >/dev/null 2>&1; then
|
||
printf " ✅ %s\n" "$label"; PASS=$((PASS+1))
|
||
else
|
||
printf " ❌ %s\n" "$label"; FAIL=$((FAIL+1))
|
||
fi
|
||
}
|
||
|
||
# Stricter version of `run` that asserts an expected substring in stdout.
|
||
# Catches the "image bytes silently identical to previous release" class of
|
||
# regression — Docker layer cache hit on `npm install -g <pkg>` because the
|
||
# bare command string is identical across builds, even when `latest` would
|
||
# resolve differently. Discovered 2026-05-23 — every pi-devbox release
|
||
# v0.74.0..v0.75.5 had been shipping the same image bytes.
|
||
run_expect() {
|
||
local label="$1"; local cmd="$2"; local expect="$3"
|
||
local out
|
||
out=$(docker run --rm --entrypoint="" "$IMAGE" sh -c "$cmd" 2>&1) || true
|
||
if echo "$out" | grep -Fq "$expect"; then
|
||
printf " ✅ %s (got %s)\n" "$label" "$expect"; PASS=$((PASS+1))
|
||
else
|
||
printf " ❌ %s — expected substring %q, got: %s\n" "$label" "$expect" "$out"; FAIL=$((FAIL+1))
|
||
fi
|
||
}
|
||
|
||
echo "=== pi-devbox smoke test: $IMAGE ==="
|
||
echo ""
|
||
|
||
# ── Binaries ─────────────────────────────────────────────────────────
|
||
echo "── Binaries ──"
|
||
if [ -n "${EXPECTED_PI_VERSION:-}" ]; then
|
||
run_expect "pi version matches build arg" "pi --version" "$EXPECTED_PI_VERSION"
|
||
else
|
||
run "pi" "pi --version"
|
||
fi
|
||
run "node" "node --version"
|
||
run "git" "git --version"
|
||
run "aws" "aws --version"
|
||
run "uv" "uv --version"
|
||
run "nvim" "nvim --version"
|
||
run "mempalace-mcp" "mempalace-mcp --help"
|
||
# v1.0.0 base additions — verify presence and basic functionality.
|
||
run "pandoc" "pandoc --version"
|
||
run "graphviz (dot)" "dot -V"
|
||
run "imagemagick" "magick --version"
|
||
run "yq" "yq --version"
|
||
run "tldr (tealdeer)" "tldr --version"
|
||
run "socat" "socat -V"
|
||
run "studio-expose helper" "test -x /usr/local/bin/studio-expose"
|
||
|
||
# ── tmux 0-indexing (required for pi-studio variants) ─────────────────
|
||
echo ""
|
||
echo "── tmux config ──"
|
||
run_expect "/etc/tmux.conf has base-index 0" \
|
||
"cat /etc/tmux.conf" "set -g base-index 0"
|
||
run_expect "/etc/tmux.conf has pane-base-index 0" \
|
||
"cat /etc/tmux.conf" "set -g pane-base-index 0"
|
||
|
||
# ── Repo clones ───────────────────────────────────────────────────────
|
||
echo ""
|
||
echo "── Repo clones ──"
|
||
run "pi-toolkit clone" "test -d /opt/pi-toolkit && git -C /opt/pi-toolkit rev-parse --short HEAD"
|
||
run "pi-extensions clone" "test -d /opt/pi-extensions && git -C /opt/pi-extensions rev-parse --short HEAD"
|
||
run "pi-fork clone + node_modules" \
|
||
"test -f /opt/pi-fork/package.json && test -d /opt/pi-fork/node_modules"
|
||
run "pi-observational-memory clone + node_modules" \
|
||
"test -f /opt/pi-observational-memory/package.json && test -d /opt/pi-observational-memory/node_modules"
|
||
|
||
# pi-studio is present only in the :latest-studio variant. Auto-detect by
|
||
# probing /opt/pi-studio so this one script covers both variants.
|
||
if docker run --rm --entrypoint="" "$IMAGE" sh -c 'test -d /opt/pi-studio' >/dev/null 2>&1; then
|
||
STUDIO_VARIANT=1
|
||
echo " ℹ️ pi-studio detected — running studio assertions"
|
||
run "pi-studio clone + node_modules" \
|
||
"test -f /opt/pi-studio/package.json && test -d /opt/pi-studio/node_modules"
|
||
run "pi-studio prebuilt client bundle" \
|
||
"test -f /opt/pi-studio/client/studio-client.js"
|
||
else
|
||
STUDIO_VARIANT=0
|
||
echo " ℹ️ pi-studio not present (non-studio variant) — skipping studio clone checks"
|
||
fi
|
||
|
||
# ── Runtime deployment (needs entrypoint to run) ──────────────────────
|
||
echo ""
|
||
echo "── Runtime deployment ──"
|
||
# Spin up a long-running container WITHOUT overriding the entrypoint, so
|
||
# the baked entrypoint chain (entrypoint.sh → entrypoint-user.sh) runs and
|
||
# deploys pi-toolkit + pi-extensions to ~/.pi/agent/. Override CMD to
|
||
# tail -f /dev/null so the container stays alive while we docker-exec.
|
||
CID=$(docker run -d --rm "$IMAGE" tail -f /dev/null)
|
||
cleanup() { docker rm -f "$CID" >/dev/null 2>&1 || true; }
|
||
trap cleanup EXIT
|
||
|
||
# Wait for entrypoint-user.sh to finish deploying pi-toolkit + extensions.
|
||
# Gate on BOTH the keybindings symlink (deployed by pi-toolkit) AND the
|
||
# mempalace.ts bridge (deployed last by entrypoint-user.sh) AND ≥4 *.ts
|
||
# extensions present. Parallel build load can otherwise sample the *.ts
|
||
# count mid-deploy and produce a flake. See opencode-devbox c6f9d11
|
||
# (2026-06-08) — same fix transplanted.
|
||
for i in $(seq 1 45); do
|
||
if docker exec "$CID" sh -c '
|
||
test -L /home/developer/.pi/agent/keybindings.json && \
|
||
test -L /home/developer/.pi/agent/extensions/mempalace.ts && \
|
||
count=$(ls -1 /home/developer/.pi/agent/extensions/*.ts 2>/dev/null | wc -l) && \
|
||
[ "$count" -ge 4 ]
|
||
' >/dev/null 2>&1; then
|
||
break
|
||
fi
|
||
sleep 1
|
||
done
|
||
|
||
exec_test() {
|
||
local label="$1"; local cmd="$2"
|
||
if docker exec -u developer "$CID" sh -c "$cmd" >/dev/null 2>&1; then
|
||
printf " ✅ %s\n" "$label"; PASS=$((PASS+1))
|
||
else
|
||
printf " ❌ %s\n" "$label"; FAIL=$((FAIL+1))
|
||
fi
|
||
}
|
||
|
||
exec_test "keybindings.json (pi-toolkit)" 'test -L $HOME/.pi/agent/keybindings.json && echo ok'
|
||
exec_test "extensions ≥ 4 (pi-extensions)" 'count=$(ls -1 $HOME/.pi/agent/extensions/*.ts 2>/dev/null | wc -l); [ $count -ge 4 ] && echo "$count extensions"'
|
||
exec_test "mempalace.ts bridge" 'test -L $HOME/.pi/agent/extensions/mempalace.ts && echo ok'
|
||
exec_test "settings.json bootstrapped" 'test -f $HOME/.pi/agent/settings.json && echo ok'
|
||
|
||
# pi-fork + pi-observational-memory are registered by entrypoint-user.sh via
|
||
# `pi install /opt/<pkg>`, which runs slightly after the keybindings marker.
|
||
for i in $(seq 1 15); do
|
||
if docker exec "$CID" grep -q pi-observational-memory \
|
||
/home/developer/.pi/agent/settings.json 2>/dev/null; then
|
||
break
|
||
fi
|
||
sleep 1
|
||
done
|
||
exec_test "pi-fork registered (fork tool)" 'grep -q pi-fork $HOME/.pi/agent/settings.json && echo ok'
|
||
exec_test "pi-observational-memory registered (recall tool)" 'grep -q pi-observational-memory $HOME/.pi/agent/settings.json && echo ok'
|
||
|
||
# pi-studio registration (studio variant only) — registered by the same
|
||
# entrypoint-user.sh local-path install loop as fork/obsmem.
|
||
if [ "${STUDIO_VARIANT:-0}" = "1" ]; then
|
||
for i in $(seq 1 15); do
|
||
if docker exec "$CID" grep -q pi-studio \
|
||
/home/developer/.pi/agent/settings.json 2>/dev/null; then
|
||
break
|
||
fi
|
||
sleep 1
|
||
done
|
||
exec_test "pi-studio registered (/studio command + studio_* tools)" \
|
||
'grep -q pi-studio $HOME/.pi/agent/settings.json && echo ok'
|
||
fi
|
||
|
||
# ── /tmp/sshcm directory created by entrypoint ────────────────────────
|
||
exec_test "/tmp/sshcm dir mode 700 (ssh ControlMaster)" \
|
||
'test -d /tmp/sshcm && [ "$(stat -c %a /tmp/sshcm)" = "700" ] && echo ok'
|
||
|
||
# ── Image size ────────────────────────────────────────────────────────
|
||
echo ""
|
||
echo "── Image size ──"
|
||
# Sum all layers via `docker history`. Docker's `image inspect --format='{{.Size}}'`
|
||
# returns ONLY the variant-unique layer when the base is content-addressed and
|
||
# shared (the case in this repo's two-phase build), which understates the
|
||
# user-facing image size by 2+ GB. Summing layer sizes from history is the
|
||
# metric Hub displays to users and the one we actually want to gate on.
|
||
SIZE_MB=$(docker history --format '{{.Size}}' "$IMAGE" | python3 -c '
|
||
import sys, re
|
||
total=0.0
|
||
for line in sys.stdin:
|
||
s=line.strip()
|
||
if s in ("0B", ""): continue
|
||
m=re.match(r"^([0-9.]+)(B|kB|MB|GB)$", s)
|
||
if not m: continue
|
||
v=float(m.group(1)); u=m.group(2)
|
||
mult={"B":1/1048576,"kB":1/1024,"MB":1,"GB":1024}[u]
|
||
total+=v*mult
|
||
print(int(total))
|
||
')
|
||
if [ -z "$SIZE_MB" ] || [ "$SIZE_MB" = "0" ]; then
|
||
printf " ⚠️ image size: could not parse — skipping check\n"
|
||
elif [ "$SIZE_MB" -le "$SIZE_THRESHOLD_MB" ]; then
|
||
printf " ✅ size: %d MB (threshold %d MB)\n" "$SIZE_MB" "$SIZE_THRESHOLD_MB"; PASS=$((PASS+1))
|
||
else
|
||
printf " ❌ size: %d MB exceeds threshold %d MB\n" "$SIZE_MB" "$SIZE_THRESHOLD_MB"; FAIL=$((FAIL+1))
|
||
fi
|
||
|
||
# ── Summary ───────────────────────────────────────────────────────────
|
||
echo ""
|
||
echo "=== Results: ${PASS} passed, ${FAIL} failed ==="
|
||
[ "$FAIL" -eq 0 ]
|