#!/usr/bin/env python3 """ Generate DOCKER_HUB.md from README.md. Rationale --------- README.md is the authoritative source. DOCKER_HUB.md is a subset intended for users pulling the pre-built image from Docker Hub — so build-from-source instructions, developer setup (git hooks, gitleaks), and CI/contribution content are dropped. Docker Hub enforces a 25 kB limit on the full description field. Usage ----- Regenerate in place: python3 scripts/generate-dockerhub-md.py Fail if DOCKER_HUB.md is out of sync with what this script would emit (run this in CI): python3 scripts/generate-dockerhub-md.py --check Design ------ Sections are selected and in some cases rewritten via `SECTION_RULES` below. This keeps the transformation explicit and easy to audit — if a new section is added to README.md that should also appear on Docker Hub, extend SECTION_RULES rather than inventing implicit heuristics. """ from __future__ import annotations import argparse import re import sys from pathlib import Path REPO_ROOT = Path(__file__).resolve().parent.parent README = REPO_ROOT / "README.md" DOCKER_HUB = REPO_ROOT / "DOCKER_HUB.md" # Max size for Docker Hub full_description (bytes, UTF-8). MAX_SIZE_BYTES = 25_000 # Per-section transformation. # # Each key is a top-level section title as it appears in README.md # (without the leading "## "). # # The value is one of: # "keep" — include verbatim. # "drop" — exclude entirely. # "replace" — substitute a custom body (see REPLACEMENTS). # "trim" — keep but drop selected level-3 sub-sections listed # in TRIM_SUBSECTIONS[title]. # # Unknown sections default to "drop" with a warning — forcing an # explicit decision whenever README gains a new section. SECTION_RULES: dict[str, str] = { "Why?": "drop", # build-motivation, not user-facing "Quick Start": "replace", # swap docker compose clone flow for docker run "Features": "keep", "Usage": "keep", "Configuration": "trim", # drop dev-build sub-sections "oh-my-opencode-slim (Multi-Agent Orchestration)": "keep", "AWS Bedrock Authentication": "keep", "MemPalace — persistent AI memory": "keep", "Gitea MCP server": "keep", "Context7 MCP server": "drop", "Shell defaults": "drop", # detail, full README covers it "Secret Scanning": "drop", # dev-only — gitleaks is for committers "Architecture": "keep", "License": "replace", # point at source repo instead } # Level-3 sub-section titles (without the leading "### ") to drop from # sections flagged as "trim". These are dev/build-oriented — Docker Hub # users already have the image and don't need rebuild or multi-user # compose instructions. TRIM_SUBSECTIONS: dict[str, set[str]] = { "Configuration": { "Multi-user setup", "Rebuilding the Image", "Build Args", }, } # Replacement bodies. Keys match SECTION_RULES entries marked "replace". # Each value is the full section including the "## Title" heading. REPLACEMENTS: dict[str, str] = { "Quick Start": """## Quick Start ```bash docker run -it --rm \\ -e ANTHROPIC_API_KEY=your-key \\ -e OPENCODE_PROVIDER=anthropic \\ -e GIT_USER_NAME="Your Name" \\ -e GIT_USER_EMAIL="you@example.com" \\ -v ~/projects:/workspace \\ -v ~/.ssh:/home/developer/.ssh:ro \\ joakimp/opencode-devbox:latest ``` This drops you straight into opencode with your project mounted at `/workspace`. For an interactive shell first (useful for AWS SSO login): ```bash docker run -it --rm \\ -e ANTHROPIC_API_KEY=your-key \\ -e OPENCODE_PROVIDER=anthropic \\ -v ~/projects:/workspace \\ -v ~/.ssh:/home/developer/.ssh:ro \\ joakimp/opencode-devbox:latest bash ``` Then run `opencode` when ready. For docker-compose users, see the source repo for `docker-compose.yml` and `.env.example` templates. """, "License": """## Source MIT licensed. Source, issues, and `docker-compose.yml` templates: """, } # Prepended to the generated file. HEADER = """# opencode-devbox — Docker Hub Portable AI developer environment for [opencode](https://opencode.ai). Debian-based, with git, SSH, Node.js, AWS CLI v2, and common dev tools pre-installed. ## Image Variants Two image variants are published for each release: | Tag | Description | |---|---| | `latest` / `vX.Y.Z` | Base image — opencode, Node.js, AWS CLI, dev tools | | `latest-omos` / `vX.Y.Z-omos` | Base + [oh-my-opencode-slim](https://github.com/alvinunreal/oh-my-opencode-slim) multi-agent orchestration and Bun | Both variants support `linux/amd64` and `linux/arm64`. > **NOTE:** This file is auto-generated from `README.md` by `scripts/generate-dockerhub-md.py`. Edit README.md and regenerate rather than editing this file directly. """ def split_sections(md: str) -> list[tuple[str, str]]: """Split markdown on level-2 headings, returning (title, body) pairs. The body includes the heading line and everything up to (but not including) the next level-2 heading or EOF. Content before the first ``## `` is returned with an empty title (the document preamble). """ pattern = re.compile(r"^## ", re.MULTILINE) parts = pattern.split(md) preamble, *rest = parts sections: list[tuple[str, str]] = [] if preamble.strip(): sections.append(("", preamble)) for part in rest: line, _, body = part.partition("\n") sections.append((line.strip(), f"## {line}\n{body}")) return sections def trim_subsections(body: str, drop: set[str]) -> str: """Remove level-3 sub-sections whose title is in `drop`. A sub-section starts at a line beginning with "### " and ends at the next "### " or "## " (or EOF). """ if not drop: return body # Split on level-3 headings while preserving the level-2 header # block. First piece is everything up to the first "### ". parts = re.split(r"(^### .+\n)", body, flags=re.MULTILINE) # parts alternates: [before_first_h3, "### Title\n", body, "### Title\n", body, ...] kept: list[str] = [parts[0]] if parts else [] i = 1 while i < len(parts): heading = parts[i] content = parts[i + 1] if i + 1 < len(parts) else "" title = heading[4:].strip() if title not in drop: kept.append(heading) kept.append(content) i += 2 return "".join(kept) def generate() -> str: """Produce the DOCKER_HUB.md content string.""" readme = README.read_text(encoding="utf-8") sections = split_sections(readme) out: list[str] = [HEADER] unknown: list[str] = [] for title, body in sections: if title == "": # README preamble is replaced by our HEADER; skip. continue rule = SECTION_RULES.get(title) if rule is None: unknown.append(title) continue if rule == "drop": continue if rule == "keep": out.append(body.rstrip() + "\n\n") elif rule == "trim": trimmed = trim_subsections(body, TRIM_SUBSECTIONS.get(title, set())) out.append(trimmed.rstrip() + "\n\n") elif rule == "replace": out.append(REPLACEMENTS[title].rstrip() + "\n\n") else: # pragma: no cover — programmer error raise AssertionError(f"unknown rule {rule!r} for section {title!r}") if unknown: print( "ERROR: README.md contains sections not classified in " "SECTION_RULES:\n - " + "\n - ".join(unknown) + "\n\nAdd each to SECTION_RULES in " "scripts/generate-dockerhub-md.py (choose keep/drop/replace).", file=sys.stderr, ) raise SystemExit(2) return "".join(out).rstrip() + "\n" def main() -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--check", action="store_true", help="Fail if DOCKER_HUB.md differs from generated content.", ) args = parser.parse_args() content = generate() size = len(content.encode("utf-8")) if size > MAX_SIZE_BYTES: print( f"ERROR: generated DOCKER_HUB.md is {size} bytes, exceeding the " f"Docker Hub limit of {MAX_SIZE_BYTES} bytes.", file=sys.stderr, ) return 1 if args.check: existing = DOCKER_HUB.read_text(encoding="utf-8") if DOCKER_HUB.exists() else "" if existing != content: print( "ERROR: DOCKER_HUB.md is out of sync with README.md.\n" "Run: python3 scripts/generate-dockerhub-md.py", file=sys.stderr, ) # Show a small diff hint. import difflib diff = difflib.unified_diff( existing.splitlines(keepends=True), content.splitlines(keepends=True), fromfile="DOCKER_HUB.md (committed)", tofile="DOCKER_HUB.md (generated)", n=2, ) sys.stderr.writelines(list(diff)[:80]) return 1 print( f"OK: DOCKER_HUB.md is in sync with README.md " f"({size} bytes, {MAX_SIZE_BYTES} limit).", ) return 0 DOCKER_HUB.write_text(content, encoding="utf-8") print( f"Wrote {DOCKER_HUB} ({size} bytes, {MAX_SIZE_BYTES} limit).", ) return 0 if __name__ == "__main__": raise SystemExit(main())