diff --git a/.gitea/workflows/docker-publish.yml b/.gitea/workflows/docker-publish.yml index 73bc515..bc89fec 100644 --- a/.gitea/workflows/docker-publish.yml +++ b/.gitea/workflows/docker-publish.yml @@ -112,15 +112,22 @@ jobs: echo "pi_version=${PI_VERSION}" >> "$GITHUB_OUTPUT" echo "Resolved PI_VERSION=${PI_VERSION} from tag ${TAG}" - - name: Build and push (amd64 + arm64) - uses: docker/build-push-action@v7 - with: - context: . - platforms: linux/amd64,linux/arm64 - push: true - tags: ${{ steps.tags.outputs.tags }} - build-args: | - PI_VERSION=${{ steps.resolve.outputs.pi_version }} + - name: Build and push (amd64 + arm64) — with retry + shell: bash + env: + PI_VERSION: ${{ steps.resolve.outputs.pi_version }} + TAGS: ${{ steps.tags.outputs.tags }} + run: | + set -euo pipefail + # Convert newline-delimited TAGS env var (build-push-action's native + # format from the `Compute tags` step) into a bash array of -t flags. + TAG_FLAGS=() + while IFS= read -r t; do [[ -n "$t" ]] && TAG_FLAGS+=( -t "$t" ); done <<< "${TAGS}" + # 3-attempt retry around `docker buildx build --push` for transient + # registry-1.docker.io blips (rate limits, CDN flap, brief 5xx). + # Does NOT mask deterministic failures: a true regression (e.g. the + # cache-export 400 hit 2026-05-23..28) will fail all 3 attempts + # identically and the job still fails — by design. # Registry cache disabled: buildkit's mode=max cache-export to # registry-1.docker.io reproducibly returns HTTP 400 on resumable- # upload PUT (Hub-CDN protocol mismatch with buildx 0.34.x, surfaced @@ -130,6 +137,25 @@ jobs: # root-cause analysis. Re-enable when buildkit upstream resolves. # Single-stage Dockerfile + tiny diff (npm install pi only) means # build is fast even without cache (~30-60s). + for attempt in 1 2 3; do + echo "==> Build+push attempt ${attempt}/3" + if docker buildx build \ + --platform linux/amd64,linux/arm64 \ + --push \ + --build-arg "PI_VERSION=${PI_VERSION}" \ + "${TAG_FLAGS[@]}" \ + .; then + echo "==> Attempt ${attempt} succeeded" + exit 0 + fi + if [[ "${attempt}" -lt 3 ]]; then + backoff=$(( attempt * 15 )) + echo "==> Attempt ${attempt} failed, sleeping ${backoff}s before retry" + sleep "${backoff}" + fi + done + echo "==> All 3 build+push attempts failed" + exit 1 update-description: needs: publish diff --git a/CHANGELOG.md b/CHANGELOG.md index 01db228..69c7970 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,13 @@ Tags follow the pi npm version: `v{pi_version}[letter]` — bare tag for the fir ## Unreleased -_(no changes since v0.76.0)_ +### CI: workflow-level retry around `docker buildx build --push` + +The single push step in `.gitea/workflows/docker-publish.yml` is now wrapped in a 3-attempt retry loop with backoff (15s, 30s) as belt-and-braces against transient `registry-1.docker.io` blips (rate limits, brief 5xx, CDN flap). Replaces the `docker/build-push-action@v7` invocation with a `shell: bash` step that runs `docker buildx build --push` directly so the loop is visible and tweakable. + +Does **not** mask deterministic failures: a true regression (e.g. the cache-export 400 we hit 2026-05-23..28) will fail all 3 attempts identically and the job still fails by design. Only intended to absorb genuinely transient failures that single-attempt CI was vulnerable to. + +No image-side change. ## v0.76.0 — 2026-05-28