diff --git a/.gitea/README.md b/.gitea/README.md index 97d98e5..b444882 100644 --- a/.gitea/README.md +++ b/.gitea/README.md @@ -75,7 +75,13 @@ The split-base architecture is what the `docker-publish-split.yml` workflow exer └──────────────────────────┘ ``` -### Step 1: `base-decide` (and `resolve-versions` in parallel) +### Step 1: `resolve-versions`, then `base-decide` + +**`resolve-versions`** resolves floating refs to concrete values: `omos_version` +(npm `latest`) and `mempalace_toolkit_ref` (the `mempalace-toolkit` `main` HEAD +resolved to a commit SHA via the gitea commits API). **`base-decide`** now +**depends on `resolve-versions`** (they no longer run in parallel) because it +folds `mempalace_toolkit_ref` into the base hash — see below. **`base-decide`** computes a SHA-256 hash over the inputs that determine the base image's content: @@ -90,6 +96,9 @@ the base image's content: ! -name '._*' \ -print0 | sort -z | xargs -0 cat cat entrypoint.sh entrypoint-user.sh + echo "$mempalace_toolkit_ref" # CI-resolved SHA; mempalace-toolkit is + # cloned in Dockerfile.base, so a moved + # toolkit must force a base rebuild } | sha256sum | cut -c1-12 ``` diff --git a/.gitea/workflows/docker-publish-split.yml b/.gitea/workflows/docker-publish-split.yml index b743a84..8018a47 100644 --- a/.gitea/workflows/docker-publish-split.yml +++ b/.gitea/workflows/docker-publish-split.yml @@ -48,6 +48,7 @@ env: jobs: # ── Phase 1: decide whether base needs rebuilding ────────────────── base-decide: + needs: [resolve-versions] runs-on: ubuntu-latest container: image: catthehacker/ubuntu:act-latest @@ -77,6 +78,10 @@ jobs: ! -name '._*' \ -print0 2>/dev/null | sort -z | xargs -0 cat 2>/dev/null cat entrypoint.sh entrypoint-user.sh + # mempalace-toolkit is cloned in Dockerfile.base at a ref CI + # resolves to a SHA; fold it in so base_tag changes when the + # toolkit moves (otherwise a toolkit-only fix never lands). + echo "${{ needs.resolve-versions.outputs.mempalace_toolkit_ref }}" } | sha256sum | cut -c1-12 ) BASE_TAG="base-${HASH}" @@ -121,6 +126,7 @@ jobs: image: catthehacker/ubuntu:act-latest outputs: omos_version: ${{ steps.resolve.outputs.omos_version }} + mempalace_toolkit_ref: ${{ steps.resolve.outputs.mempalace_toolkit_ref }} steps: - name: Resolve omos version from npm registry id: resolve @@ -134,10 +140,24 @@ jobs: OMOS_VERSION=$(curl -sf "https://registry.npmjs.org/oh-my-opencode-slim/latest" | jq -r '.version') echo "omos_version=${OMOS_VERSION}" >> "$GITHUB_OUTPUT" echo "Resolved OMOS_VERSION=${OMOS_VERSION}" + # Resolve mempalace-toolkit main HEAD to a commit SHA. Unlike omos + # (an npm pkg baked into the VARIANT), mempalace-toolkit is cloned + # in Dockerfile.base, so this SHA is ALSO folded into the + # base-decide hash to force a base rebuild when the toolkit moves + # (without it, a toolkit-only fix silently fails to land unless + # Dockerfile.base itself changes). gitea allows unauthenticated + # public-repo commit listing; the token header is harmless if the + # env vars are unset (degrades to anon, still HTTP 200). + MEMPALACE_TOOLKIT_REF=$(curl -sf -H "Authorization: token ${GITEA_BUILD_TOKEN:-${GITHUB_TOKEN:-}}" \ + "https://gitea.jordbo.se/api/v1/repos/joakimp/mempalace-toolkit/commits?limit=1&sha=main" \ + | jq -r '.[0].sha // "main"' 2>/dev/null || echo "main") + [ -n "$MEMPALACE_TOOLKIT_REF" ] || MEMPALACE_TOOLKIT_REF=main + echo "mempalace_toolkit_ref=${MEMPALACE_TOOLKIT_REF}" >> "$GITHUB_OUTPUT" + echo "Resolved MEMPALACE_TOOLKIT_REF=${MEMPALACE_TOOLKIT_REF}" # ── Phase 2: build & push base (multi-arch), only when needed ────── build-base: - needs: [base-decide] + needs: [base-decide, resolve-versions] if: needs.base-decide.outputs.need_build == 'true' runs-on: ubuntu-latest container: @@ -185,6 +205,7 @@ jobs: shell: bash env: BASE_TAG_FULL: ${{ env.IMAGE }}:${{ needs.base-decide.outputs.base_tag }} + MEMPALACE_TOOLKIT_REF: ${{ needs.resolve-versions.outputs.mempalace_toolkit_ref }} run: | set -euo pipefail # 3-attempt retry around `docker buildx build --push` for transient @@ -205,6 +226,7 @@ jobs: if docker buildx build \ --platform linux/amd64,linux/arm64 \ --file Dockerfile.base \ + --build-arg MEMPALACE_TOOLKIT_REF="${MEMPALACE_TOOLKIT_REF}" \ --push \ --tag "${BASE_TAG_FULL}" \ .; then diff --git a/AGENTS.md b/AGENTS.md index c96fef2..08d0bd3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -101,6 +101,7 @@ curl -s https://api.github.com/repos/anomalyco/opencode/releases/tags/v1.15.10 | - **GitHub/Gitea-sourced binaries float by default** — gosu, fzf, git-lfs, gitleaks, nvim, bat, eza, zoxide, uv, gitea-mcp, Go, oh-my-opencode-slim all default to `latest`. Each build-time install step reads the `/releases/latest` Location redirect (or the go.dev JSON feed for Go) and derives the concrete version. Use the same `ARCH` case-switch pattern for multi-arch support (amd64/arm64) — mind project-specific arch-name deviations (gitleaks uses `x64`, bat/eza/zoxide use `x86_64`/`aarch64`, gosu uses `amd64`/`arm64`). Intentional pins: `OPENCODE_VERSION` (drives the image tag), `NODE_VERSION=22` (major pin), `DEBIAN_VERSION=trixie-slim` (OS base). Adding a new upstream tool: follow the existing floated-version pattern, don't hardcode a specific tag. - **Resolved versions are logged by the smoke test** — `scripts/smoke-test.sh` prints a "Resolved component versions" table as its first step. CI logs always capture what got baked into a given image even when ARGs default to `latest`. - **`OMOS_VERSION` MUST be passed by CI as a concrete version**, not left at the `latest` default. The npm install step in `Dockerfile.variant` (`oh-my-opencode-slim@${OMOS_VERSION}`) produces an identical layer-hash when the ARG value is byte-identical across builds; combined with the registry buildcache (`base-buildcache`) the layer gets reused even when `latest` would have resolved to a newer upstream. This is the same class of bug that bit pi-devbox v0.74.0 → v0.75.5 (silent same-bytes-across-releases regression discovered 2026-05-23, fixed in pi-devbox v0.75.5b). It is currently *masked* in opencode-devbox by `OPENCODE_VERSION` being a hard-coded ARG that bumps every release — that bump invalidates the parent-chain cache key for the downstream omos layer — but the masking would fail the moment a `vN.N.Nb` opencode-version-unchanged release ships that only bumps omos. Preventative fix: `.gitea/workflows/docker-publish-split.yml` has a `resolve-versions` job that runs `npm view oh-my-opencode-slim version`, exposing the concrete value as an output that the omos smoke + build jobs consume via build-args. Smoke tests assert via the `EXPECTED_OMOS_VERSION` env var — would catch the regression on the next release rather than several releases later. **If you change the variant build-args list, the resolve-versions job, or the smoke EXPECTED_*_VERSION wiring, audit all affected jobs in lockstep.** +- **`resolve-versions` also pins `mempalace-toolkit` to a SHA** — `resolve-versions` resolves the `mempalace-toolkit` `main` HEAD to a commit SHA (`mempalace_toolkit_ref` output) via the gitea commits API (`/api/v1/repos/joakimp/mempalace-toolkit/commits?limit=1&sha=main`; gitea allows **unauthenticated** public-repo listing, so no secret is required). Unlike every other dependency, `mempalace-toolkit` is cloned in **`Dockerfile.base`**, not the variant — so the resolve→build-arg→variant plumbing bypasses it. To make a moved toolkit actually land, the resolved SHA is **folded into the `base-decide` hash** (so `base_tag` changes → base rebuilds) AND passed to `build-base` as `--build-arg MEMPALACE_TOOLKIT_REF`. Consequently **`base-decide` now depends on `resolve-versions`** (they no longer run in parallel), and the base clone uses a SHA-capable `git fetch + checkout FETCH_HEAD` (a `git clone --branch <40-char-SHA>` would fail). Trade-off: `base_tag` now reflects a live gitea lookup — on an API blip it falls back to `main`, triggering one *extra* base rebuild, never a *missed* one. If you touch `resolve-versions`, `base-decide`'s hash inputs, or the `build-base` build-args, audit `.gitea/README.md` Step 1 in lockstep. - **Registry buildkit cache-export is currently disabled** — do NOT re-add `cache-from`/`cache-to` to the `build-base` step in `.gitea/workflows/docker-publish-split.yml` without first verifying that buildkit's `mode=max` cache-export to `registry-1.docker.io` no longer returns HTTP 400 from the Hub CDN edge. The regression surfaced ~2026-05-23 and broke five consecutive opencode-devbox publish attempts (runs #332/333/334/336 + a rerun); root-caused on 2026-05-28 by a manual host-side publish that reproduced the same 400 only on `--cache-to` while image push worked fine. Failure shape is stable (`Offset:0` in the `_state` token, HTML response body = CDN-tier rejection, not registry backend), repo-specific (we're the only repo writing `:base-buildcache` mode=max), and explains why pinning `setup-buildx-action@v4.0.0` didn't help (action pin doesn't change the bundled buildkit version on the catthehacker runner image). Trade-off: dockerfile.base changes pay a full ~3 min rebuild instead of pulling cached layers; unchanged bases short-circuit at the Hub-probe step in `base-decide` and never re-build anyway. Variants don't use registry cache so they're unaffected. Re-enable condition: upstream moby/buildkit fix lands AND a low-risk test run succeeds without 400s. See CHANGELOG v1.15.12 `Unreleased` block for the full diagnostic chain. Manual escape-hatch publish procedure: `docs/manual-host-publish.md`. - **Push steps wrap `docker buildx build --push` in a 3-attempt retry loop** (15s, 30s backoff) for transient `registry-1.docker.io` blips — rate limits, brief 5xx, CDN flap. Implemented as inline `shell: bash` steps with `docker buildx build` raw rather than `docker/build-push-action@v7` so the loop is visible and tweakable. Affects the 1 base + 5 variant push steps in `.gitea/workflows/docker-publish-split.yml`; smoke-test builds (`load: true`, no push) are untouched. **This does NOT mask deterministic failures** — a true regression (like the cache-export 400 of 2026-05-23..28) fails all 3 attempts identically and the job still fails. Orthogonal to the cache-export disablement above: cache-export was about a deterministic protocol mismatch, retry is about absorbing genuine transients. Both are belt-and-braces with the `ci-release-watcher` skill's transient-rerun heuristic. If you change the matrix of push steps, keep the retry wrapper consistent across them — the pattern is duplicated rather than factored out because Gitea Actions doesn't support reusable composite shell steps cleanly. - **Shell scripts use `set -euo pipefail`** — both entrypoints are strict. Errors in volume chown or SSH permission operations are intentionally suppressed with `|| true`. diff --git a/CHANGELOG.md b/CHANGELOG.md index a73c793..b7c75a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,32 @@ Tags follow `v{opencode_version}[letter]` — bare tag for the first build on a ## Unreleased +### Changed + +- **`mempalace-toolkit` is now CI-resolved to a commit SHA.** It is the only + dependency cloned in `Dockerfile.base` (everything else is in the variant), + so it bypassed the `resolve-versions` → build-arg plumbing and its ref stayed + a literal `main` — meaning a toolkit-only fix would silently fail to land + unless `Dockerfile.base` itself changed. Now `resolve-versions` resolves the + `mempalace-toolkit` `main` HEAD to a SHA (new `mempalace_toolkit_ref` output, + via the gitea commits API — unauthenticated, no secret needed), `base-decide` + folds that SHA into the base-tag hash (so a moved toolkit forces a base + rebuild) and now **depends on `resolve-versions`**, and `build-base` passes it + as `--build-arg MEMPALACE_TOOLKIT_REF`. The base clone switched from + `git clone --branch` to a SHA-capable `git fetch + checkout FETCH_HEAD`. + Trade-off: `base_tag` now reflects a live gitea lookup — an API blip falls + back to `main` and triggers one *extra* rebuild, never a *missed* one. Updated + `.gitea/README.md` Step 1 and `AGENTS.md` Critical conventions in lockstep. + ### Docs (no image change) +- Correct the MemPalace `diary_write` anyOf workaround watch-target: upstream + PR #1735 was **closed unmerged** (2026-06-11), so the old “remove once #1735 + ships” TODO pointed at a dead PR. Issue #1728 is still open; PR #1717 is the + current live candidate; mempalace PyPI latest is still 3.4.0 (== our pin), so + the workaround stays. Rewrote the `Dockerfile.base` tracking comment and added + a durable note under `AGENTS.md` Critical conventions. + - Fix the quick-start description in `README.md` and the Hub `HUB_TEMPLATE` (`scripts/generate-dockerhub-md.py`, regenerated `DOCKER_HUB.md`): bare `docker compose run --rm devbox` lands in a **login shell** (default `CMD` is diff --git a/Dockerfile.base b/Dockerfile.base index 5944ff0..43b8534 100644 --- a/Dockerfile.base +++ b/Dockerfile.base @@ -312,9 +312,22 @@ RUN if [ "${INSTALL_MEMPALACE}" = "true" ]; then \ # ── mempalace-toolkit — bash wrappers for session/docs mining ──────── ARG INSTALL_MEMPALACE_TOOLKIT=true ARG MEMPALACE_TOOLKIT_REF=main +# MEMPALACE_TOOLKIT_REF accepts EITHER a branch name OR a commit SHA. CI +# resolves it to a SHA (resolve-versions job) and folds that SHA into the +# base-decide hash so the base rebuilds when the toolkit moves. `git clone +# --branch <40-char-SHA>` fails ("Remote branch not found"), so use +# `git fetch + checkout FETCH_HEAD`, which works for name and SHA. RUN if [ "${INSTALL_MEMPALACE}" = "true" ] && [ "${INSTALL_MEMPALACE_TOOLKIT}" = "true" ]; then \ - git clone --depth 1 --branch "${MEMPALACE_TOOLKIT_REF}" \ - https://gitea.jordbo.se/joakimp/mempalace-toolkit.git /opt/mempalace-toolkit && \ + rm -rf /opt/mempalace-toolkit && mkdir -p /opt/mempalace-toolkit && \ + git -C /opt/mempalace-toolkit init -q && \ + git -C /opt/mempalace-toolkit remote add origin https://gitea.jordbo.se/joakimp/mempalace-toolkit.git && \ + ok=0; for i in 1 2 3 4 5; do \ + if git -C /opt/mempalace-toolkit fetch --depth 1 origin "${MEMPALACE_TOOLKIT_REF}" && \ + git -C /opt/mempalace-toolkit checkout -q FETCH_HEAD; then ok=1; break; fi; \ + echo "git fetch mempalace-toolkit@${MEMPALACE_TOOLKIT_REF} failed (attempt $i/5), retrying in $((i*5))s..."; \ + sleep $((i*5)); \ + done; \ + [ "$ok" = "1" ] && \ ln -sf /opt/mempalace-toolkit/bin/mempalace-session /usr/local/bin/mempalace-session && \ ln -sf /opt/mempalace-toolkit/bin/mempalace-docs /usr/local/bin/mempalace-docs && \ chmod +x /opt/mempalace-toolkit/bin/mempalace-session /opt/mempalace-toolkit/bin/mempalace-docs && \