Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f8da7890df | |||
| b17dc1fa1f | |||
| 3eec9bc23c | |||
| 4744f05232 |
@@ -47,6 +47,7 @@ env:
|
|||||||
jobs:
|
jobs:
|
||||||
# ── Phase 1: decide whether base needs rebuilding ──────────────────
|
# ── Phase 1: decide whether base needs rebuilding ──────────────────
|
||||||
base-decide:
|
base-decide:
|
||||||
|
needs: [resolve-versions]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container:
|
container:
|
||||||
image: catthehacker/ubuntu:act-latest
|
image: catthehacker/ubuntu:act-latest
|
||||||
@@ -75,6 +76,10 @@ jobs:
|
|||||||
! -name '._*' \
|
! -name '._*' \
|
||||||
-print0 2>/dev/null | sort -z | xargs -0 cat 2>/dev/null
|
-print0 2>/dev/null | sort -z | xargs -0 cat 2>/dev/null
|
||||||
cat entrypoint.sh entrypoint-user.sh
|
cat entrypoint.sh entrypoint-user.sh
|
||||||
|
# mempalace-toolkit is cloned in Dockerfile.base at a ref CI
|
||||||
|
# resolves to a SHA; fold it in so base_tag changes when the
|
||||||
|
# toolkit moves (otherwise a toolkit-only fix never lands).
|
||||||
|
echo "${{ needs.resolve-versions.outputs.mempalace_toolkit_ref }}"
|
||||||
} | sha256sum | cut -c1-12
|
} | sha256sum | cut -c1-12
|
||||||
)
|
)
|
||||||
BASE_TAG="base-${HASH}"
|
BASE_TAG="base-${HASH}"
|
||||||
@@ -117,6 +122,7 @@ jobs:
|
|||||||
toolkit_ref: ${{ steps.resolve.outputs.toolkit_ref }}
|
toolkit_ref: ${{ steps.resolve.outputs.toolkit_ref }}
|
||||||
extensions_ref: ${{ steps.resolve.outputs.extensions_ref }}
|
extensions_ref: ${{ steps.resolve.outputs.extensions_ref }}
|
||||||
studio_ref: ${{ steps.resolve.outputs.studio_ref }}
|
studio_ref: ${{ steps.resolve.outputs.studio_ref }}
|
||||||
|
mempalace_toolkit_ref: ${{ steps.resolve.outputs.mempalace_toolkit_ref }}
|
||||||
steps:
|
steps:
|
||||||
- name: Resolve pi version + companion refs
|
- name: Resolve pi version + companion refs
|
||||||
id: resolve
|
id: resolve
|
||||||
@@ -151,6 +157,16 @@ jobs:
|
|||||||
[ -n "$EXTENSIONS_REF" ] || EXTENSIONS_REF=main
|
[ -n "$EXTENSIONS_REF" ] || EXTENSIONS_REF=main
|
||||||
echo "toolkit_ref=${TOOLKIT_REF}" >> "$GITHUB_OUTPUT"
|
echo "toolkit_ref=${TOOLKIT_REF}" >> "$GITHUB_OUTPUT"
|
||||||
echo "extensions_ref=${EXTENSIONS_REF}" >> "$GITHUB_OUTPUT"
|
echo "extensions_ref=${EXTENSIONS_REF}" >> "$GITHUB_OUTPUT"
|
||||||
|
# Resolve mempalace-toolkit main HEAD to a SHA. UNLIKE the others,
|
||||||
|
# mempalace-toolkit is cloned in Dockerfile.base, so this SHA is
|
||||||
|
# ALSO folded into the base-decide hash to force a base rebuild
|
||||||
|
# when the toolkit moves (without it, a toolkit-only fix silently
|
||||||
|
# fails to land unless Dockerfile.base itself changes).
|
||||||
|
MEMPALACE_TOOLKIT_REF=$(curl -sf -H "Authorization: token ${GITEA_BUILD_TOKEN:-${GITHUB_TOKEN:-}}" \
|
||||||
|
"https://gitea.jordbo.se/api/v1/repos/joakimp/mempalace-toolkit/commits?limit=1&sha=main" \
|
||||||
|
| jq -r '.[0].sha // "main"' 2>/dev/null || echo "main")
|
||||||
|
[ -n "$MEMPALACE_TOOLKIT_REF" ] || MEMPALACE_TOOLKIT_REF=main
|
||||||
|
echo "mempalace_toolkit_ref=${MEMPALACE_TOOLKIT_REF}" >> "$GITHUB_OUTPUT"
|
||||||
# Resolve pi-studio (omaclaren/pi-studio) main HEAD to a SHA for
|
# Resolve pi-studio (omaclaren/pi-studio) main HEAD to a SHA for
|
||||||
# the :latest-studio variant — same cache-busting rationale.
|
# the :latest-studio variant — same cache-busting rationale.
|
||||||
STUDIO_REF=$(curl -sf -H "Accept: application/vnd.github.sha" \
|
STUDIO_REF=$(curl -sf -H "Accept: application/vnd.github.sha" \
|
||||||
@@ -161,10 +177,11 @@ jobs:
|
|||||||
echo "Resolved PI_FORK_REF=${FORK_REF}, PI_OBSMEM_REF=${OBSMEM_REF}"
|
echo "Resolved PI_FORK_REF=${FORK_REF}, PI_OBSMEM_REF=${OBSMEM_REF}"
|
||||||
echo "Resolved PI_TOOLKIT_REF=${TOOLKIT_REF}, PI_EXTENSIONS_REF=${EXTENSIONS_REF}"
|
echo "Resolved PI_TOOLKIT_REF=${TOOLKIT_REF}, PI_EXTENSIONS_REF=${EXTENSIONS_REF}"
|
||||||
echo "Resolved PI_STUDIO_REF=${STUDIO_REF}"
|
echo "Resolved PI_STUDIO_REF=${STUDIO_REF}"
|
||||||
|
echo "Resolved MEMPALACE_TOOLKIT_REF=${MEMPALACE_TOOLKIT_REF}"
|
||||||
|
|
||||||
# ── Phase 2: build & push base (multi-arch), only when needed ──────
|
# ── Phase 2: build & push base (multi-arch), only when needed ──────
|
||||||
build-base:
|
build-base:
|
||||||
needs: [base-decide]
|
needs: [base-decide, resolve-versions]
|
||||||
if: needs.base-decide.outputs.need_build == 'true'
|
if: needs.base-decide.outputs.need_build == 'true'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
container:
|
container:
|
||||||
@@ -206,6 +223,7 @@ jobs:
|
|||||||
shell: bash
|
shell: bash
|
||||||
env:
|
env:
|
||||||
BASE_TAG_FULL: ${{ env.IMAGE }}:${{ needs.base-decide.outputs.base_tag }}
|
BASE_TAG_FULL: ${{ env.IMAGE }}:${{ needs.base-decide.outputs.base_tag }}
|
||||||
|
MEMPALACE_TOOLKIT_REF: ${{ needs.resolve-versions.outputs.mempalace_toolkit_ref }}
|
||||||
run: |
|
run: |
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
# 3-attempt retry around `docker buildx build --push` for transient
|
# 3-attempt retry around `docker buildx build --push` for transient
|
||||||
@@ -219,6 +237,7 @@ jobs:
|
|||||||
if docker buildx build \
|
if docker buildx build \
|
||||||
--platform linux/amd64,linux/arm64 \
|
--platform linux/amd64,linux/arm64 \
|
||||||
--file Dockerfile.base \
|
--file Dockerfile.base \
|
||||||
|
--build-arg MEMPALACE_TOOLKIT_REF="${MEMPALACE_TOOLKIT_REF}" \
|
||||||
--push \
|
--push \
|
||||||
--tag "${BASE_TAG_FULL}" \
|
--tag "${BASE_TAG_FULL}" \
|
||||||
.; then
|
.; then
|
||||||
|
|||||||
@@ -11,6 +11,43 @@ Pre-v1.0.0 tags followed the pi npm version (`v{pi_version}[letter]`).
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- **`mempalace-toolkit` is now CI-resolved to a commit SHA**, closing a
|
||||||
|
silent-staleness footgun. It is the only companion cloned in
|
||||||
|
`Dockerfile.base` (all others are cloned in `Dockerfile.variant`), so it
|
||||||
|
was never run through the `resolve-versions` → build-arg plumbing. Its
|
||||||
|
ref stayed a literal `main`, and because the base only rebuilds when the
|
||||||
|
hash of `Dockerfile.base + rootfs/* + entrypoints` changes, a
|
||||||
|
toolkit-only fix would *not* land in the image unless `Dockerfile.base`
|
||||||
|
itself happened to change (as it did, incidentally, in v1.1.1).
|
||||||
|
|
||||||
|
Now `resolve-versions` resolves `mempalace-toolkit` `main` HEAD to a SHA
|
||||||
|
(new `mempalace_toolkit_ref` output), `base-decide` folds that SHA into
|
||||||
|
the base-tag hash (so a moved toolkit forces a base rebuild), and
|
||||||
|
`build-base` passes it as `--build-arg MEMPALACE_TOOLKIT_REF`. The base
|
||||||
|
clone switched from `git clone --branch` to a SHA-capable
|
||||||
|
`git fetch <ref> + checkout FETCH_HEAD` (the `--branch <40-char-SHA>`
|
||||||
|
footgun previously fixed in `Dockerfile.variant`, run 374).
|
||||||
|
|
||||||
|
Note: `base-decide` now depends on `resolve-versions`, so the base tag
|
||||||
|
reflects a live gitea API lookup. On an API blip it falls back to `main`
|
||||||
|
— which hashes differently than a SHA and triggers one *extra* rebuild,
|
||||||
|
never a *missed* one (fail-toward-rebuild).
|
||||||
|
|
||||||
|
### Docs (no image change)
|
||||||
|
|
||||||
|
- Correct the MemPalace `diary_write` anyOf workaround watch-target in
|
||||||
|
`Dockerfile.base`: upstream PR #1735 was **closed unmerged** (2026-06-11),
|
||||||
|
so the old “remove once #1735 ships” TODO pointed at a dead PR. Issue #1728
|
||||||
|
is still open; PR #1717 is the current live candidate; mempalace PyPI latest
|
||||||
|
is still 3.4.0 (== our pin), so the workaround stays. Removal trigger is now
|
||||||
|
a PyPI release > 3.4.0 that actually strips the root-level anyOf.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## v1.1.1 — 2026-06-13
|
## v1.1.1 — 2026-06-13
|
||||||
|
|
||||||
Patch release: pi `0.79.1` → `0.79.3` (auto-resolved at build) plus the
|
Patch release: pi `0.79.1` → `0.79.3` (auto-resolved at build) plus the
|
||||||
|
|||||||
+27
-7
@@ -316,12 +316,18 @@ RUN if [ "${INSTALL_MEMPALACE}" = "true" ]; then \
|
|||||||
# kwarg alias so existing callers still work.
|
# kwarg alias so existing callers still work.
|
||||||
#
|
#
|
||||||
# Idempotent and self-deactivating: once upstream releases the fix the
|
# Idempotent and self-deactivating: once upstream releases the fix the
|
||||||
# regex no longer matches and this RUN is a silent no-op.
|
# regex no longer matches (and the WARN below fires) — that's the signal
|
||||||
# Upstream tracking:
|
# to delete this RUN.
|
||||||
|
# Upstream status (last checked 2026-06-14):
|
||||||
|
# issue #1728 — STILL OPEN (root-level anyOf rejected by Anthropic/Codex)
|
||||||
|
# PR #1735 — CLOSED UNMERGED 2026-06-11; do NOT watch it (dead)
|
||||||
|
# PR #1717 — open; the current live fix candidate to watch
|
||||||
|
# mempalace PyPI latest = 3.4.0 (== our pin) → no release contains the fix yet
|
||||||
# https://github.com/MemPalace/mempalace/issues/1728
|
# https://github.com/MemPalace/mempalace/issues/1728
|
||||||
# https://github.com/MemPalace/mempalace/pull/1735
|
# https://github.com/MemPalace/mempalace/pull/1717
|
||||||
# TODO: remove this RUN once a mempalace release containing PR #1735 is on
|
# TODO: remove this RUN once a mempalace release > 3.4.0 that actually strips
|
||||||
# PyPI and installed by the line above.
|
# the root-level anyOf ships on PyPI and is installed by the line above.
|
||||||
|
# Keep MEMPALACE_VERSION in lockstep with opencode-devbox when bumping.
|
||||||
RUN if [ "${INSTALL_MEMPALACE}" = "true" ]; then \
|
RUN if [ "${INSTALL_MEMPALACE}" = "true" ]; then \
|
||||||
MP_FILE="$(find /opt/uv-tools/mempalace -path '*/mempalace/mcp_server.py' | head -n1)" && \
|
MP_FILE="$(find /opt/uv-tools/mempalace -path '*/mempalace/mcp_server.py' | head -n1)" && \
|
||||||
if [ -z "$MP_FILE" ]; then echo "mempalace mcp_server.py not found" >&2; exit 1; fi && \
|
if [ -z "$MP_FILE" ]; then echo "mempalace mcp_server.py not found" >&2; exit 1; fi && \
|
||||||
@@ -336,9 +342,23 @@ RUN if [ "${INSTALL_MEMPALACE}" = "true" ]; then \
|
|||||||
# ── mempalace-toolkit — bash wrappers for session/docs mining ────────
|
# ── mempalace-toolkit — bash wrappers for session/docs mining ────────
|
||||||
ARG INSTALL_MEMPALACE_TOOLKIT=true
|
ARG INSTALL_MEMPALACE_TOOLKIT=true
|
||||||
ARG MEMPALACE_TOOLKIT_REF=main
|
ARG MEMPALACE_TOOLKIT_REF=main
|
||||||
|
# MEMPALACE_TOOLKIT_REF accepts EITHER a branch name OR a commit SHA. CI
|
||||||
|
# resolves it to a SHA (resolve-versions job) and folds that SHA into the
|
||||||
|
# base-decide hash so the base rebuilds when the toolkit moves. `git clone
|
||||||
|
# --branch <40-char-SHA>` fails ("Remote branch not found") — the same
|
||||||
|
# footgun fixed in Dockerfile.variant (v1.0.0-rerun, run 374) — so use
|
||||||
|
# `git fetch <ref> + checkout FETCH_HEAD`, which works for name and SHA.
|
||||||
RUN if [ "${INSTALL_MEMPALACE}" = "true" ] && [ "${INSTALL_MEMPALACE_TOOLKIT}" = "true" ]; then \
|
RUN if [ "${INSTALL_MEMPALACE}" = "true" ] && [ "${INSTALL_MEMPALACE_TOOLKIT}" = "true" ]; then \
|
||||||
git clone --depth 1 --branch "${MEMPALACE_TOOLKIT_REF}" \
|
rm -rf /opt/mempalace-toolkit && mkdir -p /opt/mempalace-toolkit && \
|
||||||
https://gitea.jordbo.se/joakimp/mempalace-toolkit.git /opt/mempalace-toolkit && \
|
git -C /opt/mempalace-toolkit init -q && \
|
||||||
|
git -C /opt/mempalace-toolkit remote add origin https://gitea.jordbo.se/joakimp/mempalace-toolkit.git && \
|
||||||
|
ok=0; for i in 1 2 3 4 5; do \
|
||||||
|
if git -C /opt/mempalace-toolkit fetch --depth 1 origin "${MEMPALACE_TOOLKIT_REF}" && \
|
||||||
|
git -C /opt/mempalace-toolkit checkout -q FETCH_HEAD; then ok=1; break; fi; \
|
||||||
|
echo "git fetch mempalace-toolkit@${MEMPALACE_TOOLKIT_REF} failed (attempt $i/5), retrying in $((i*5))s..."; \
|
||||||
|
sleep $((i*5)); \
|
||||||
|
done; \
|
||||||
|
[ "$ok" = "1" ] && \
|
||||||
ln -sf /opt/mempalace-toolkit/bin/mempalace-session /usr/local/bin/mempalace-session && \
|
ln -sf /opt/mempalace-toolkit/bin/mempalace-session /usr/local/bin/mempalace-session && \
|
||||||
ln -sf /opt/mempalace-toolkit/bin/mempalace-docs /usr/local/bin/mempalace-docs && \
|
ln -sf /opt/mempalace-toolkit/bin/mempalace-docs /usr/local/bin/mempalace-docs && \
|
||||||
chmod +x /opt/mempalace-toolkit/bin/mempalace-session /opt/mempalace-toolkit/bin/mempalace-docs && \
|
chmod +x /opt/mempalace-toolkit/bin/mempalace-session /opt/mempalace-toolkit/bin/mempalace-docs && \
|
||||||
|
|||||||
@@ -0,0 +1,302 @@
|
|||||||
|
# Design: single-writer MemPalace broker (cross-host serialization)
|
||||||
|
|
||||||
|
> **Status:** DRAFT / RFC — not yet implemented. Captures the design so it can be
|
||||||
|
> picked up later. Authored 2026-06-14.
|
||||||
|
> **Owner:** unassigned. **Tracking:** queue item #4 ("host-side mempalace-mcp
|
||||||
|
> daemon over a UNIX/shared socket").
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
|
||||||
|
The pi-devbox container's `~/.mempalace` (`/home/developer/.mempalace`) is a
|
||||||
|
**virtiofs bind-mount of the host's `/Users/joakim/.mempalace`** (verified
|
||||||
|
2026-06-14 via `/proc/mounts`: `mac /home/developer/.mempalace virtiofs rw`).
|
||||||
|
Container pi and host-native pi therefore **read and write ONE shared palace** —
|
||||||
|
full memory parity already exists; nothing needs to be built to *enable* sharing.
|
||||||
|
|
||||||
|
The actual hazard is the opposite of sharing: **concurrency**. Two pi processes
|
||||||
|
(one native on the host, one in the container) can open the same
|
||||||
|
`chroma.sqlite3` / `knowledge_graph.sqlite3` and write at the same time. The
|
||||||
|
palace directory already shows the scars of this:
|
||||||
|
|
||||||
|
- `chroma.sqlite3.broken-20260505`
|
||||||
|
- many `*.corrupt-20260528`
|
||||||
|
- a long run of `*.drift-2026*`
|
||||||
|
- `locks/` with `mine_palace_*.lock` files, including a **stale** one.
|
||||||
|
|
||||||
|
These are mempalace's defensive lock + auto-snapshot/repair machinery firing
|
||||||
|
under concurrent access.
|
||||||
|
|
||||||
|
### Why a shared lock file is NOT sufficient
|
||||||
|
|
||||||
|
The container runs inside a Linux VM (OrbStack / Docker Desktop on macOS); the
|
||||||
|
palace bytes live on the macOS host, surfaced into the VM via virtiofs.
|
||||||
|
Consequences:
|
||||||
|
|
||||||
|
- A **UNIX-domain socket file** visible at `~/.mempalace/broker.sock` inside the
|
||||||
|
container is a *host-kernel* object. The container's kernel can see the inode
|
||||||
|
but **cannot connect to it** across the VM boundary.
|
||||||
|
- **flock / advisory lockfiles are not coherent across the host↔VM boundary.**
|
||||||
|
A lock taken on the host is not reliably seen in the container and vice-versa.
|
||||||
|
(The stale `mine_palace_*.lock` is direct evidence the existing lock scheme is
|
||||||
|
not bulletproof across this boundary.)
|
||||||
|
|
||||||
|
**Therefore the only trustworthy serialization is to route every write through a
|
||||||
|
single process.** That single process is the broker. The design question is *not*
|
||||||
|
"how do we lock" — it's "**where does the one writer live, and how does every pi
|
||||||
|
(host or container) reach it across the VM boundary?**"
|
||||||
|
|
||||||
|
## Goals
|
||||||
|
|
||||||
|
1. Exactly one process opens the palace SQLite files at any time (single writer;
|
||||||
|
concurrent reads are fine).
|
||||||
|
2. Works in all three topologies on a given host:
|
||||||
|
- native pi only,
|
||||||
|
- native pi + container pi,
|
||||||
|
- container pi only.
|
||||||
|
3. pi configuration is **identical** in every topology (no per-environment MCP
|
||||||
|
config divergence).
|
||||||
|
4. No new corruption pathway introduced; degrade safely when the broker is
|
||||||
|
genuinely unreachable and there are no peers.
|
||||||
|
|
||||||
|
### Non-goals (for this iteration)
|
||||||
|
|
||||||
|
- opencode / opencode-devbox co-existence (see "Co-existence with opencode"
|
||||||
|
below — deferred until the pi case is solved).
|
||||||
|
- Multi-host palace replication. This is about one host's local palace.
|
||||||
|
- Changing mempalace's on-disk format or its public MCP tool surface.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
pi (host) ─stdio─► mp-shim ─┐
|
||||||
|
├─► mempalace-broker ─► chroma.sqlite3
|
||||||
|
pi (ctr) ─stdio─► mp-shim ─┘ (SINGLE owner; knowledge_graph.sqlite3
|
||||||
|
serialized writer, + in-memory HNSW index
|
||||||
|
concurrent readers)
|
||||||
|
```
|
||||||
|
|
||||||
|
### `mempalace-broker`
|
||||||
|
|
||||||
|
A long-lived process that is the **only** opener of the palace SQLite files. It:
|
||||||
|
|
||||||
|
- runs the real mempalace engine,
|
||||||
|
- holds the HNSW index in memory,
|
||||||
|
- pushes all mutations through a single writer queue (reads may fan out),
|
||||||
|
- exposes the mempalace MCP JSON-RPC surface over one or more transports,
|
||||||
|
- is the canonical owner of palace state for the lifetime of the host session.
|
||||||
|
|
||||||
|
**Bonus:** a single always-resident owner also eliminates the stale-HNSW-index
|
||||||
|
problem that `mempalace_reconnect` exists to work around — there is never an
|
||||||
|
external writer to desync the in-memory index against.
|
||||||
|
|
||||||
|
### `mp-shim`
|
||||||
|
|
||||||
|
A tiny stdio↔transport adapter. pi's mempalace MCP config points at the shim
|
||||||
|
**everywhere, unchanged**. pi still believes it is speaking stdio MCP to a local
|
||||||
|
server; the shim forwards JSON-RPC to the broker over whichever transport is
|
||||||
|
available, and handles all discovery / startup / election complexity. Keeping
|
||||||
|
pi's config identical across topologies is a hard requirement (goal #3) and the
|
||||||
|
shim is what makes it possible.
|
||||||
|
|
||||||
|
## Canonical owner = the host
|
||||||
|
|
||||||
|
The broker's home is **always the host**, because:
|
||||||
|
|
||||||
|
1. The palace bytes physically live there (`/Users/joakim/.mempalace`).
|
||||||
|
2. The host outlives any container — ownership does not evaporate on
|
||||||
|
`docker compose down`.
|
||||||
|
3. Containers already have a route back to it (`host.docker.internal` and the
|
||||||
|
verified dssh ControlMaster bridge).
|
||||||
|
|
||||||
|
The broker binds **two listeners feeding one queue**:
|
||||||
|
|
||||||
|
- **AF_UNIX** at `$MEMPALACE_PATH/broker.sock` — for host-native pi (fast,
|
||||||
|
filesystem-perms-secured).
|
||||||
|
- a **cross-boundary** transport for container clients (below).
|
||||||
|
|
||||||
|
## Transport matrix
|
||||||
|
|
||||||
|
| Topology | Broker runs on | Host pi reaches it via | Container pi reaches it via |
|
||||||
|
|---|---|---|---|
|
||||||
|
| native only | host | AF_UNIX socket | — |
|
||||||
|
| native + container | host | AF_UNIX socket | SSH-forwarded socket (preferred) or TCP |
|
||||||
|
| container only | host (started via bridge) | — | SSH-forwarded socket or TCP |
|
||||||
|
|
||||||
|
### Cross-boundary transport options
|
||||||
|
|
||||||
|
**(a) SSH-forwarded UNIX socket over the existing dssh ControlMaster — PREFERRED.**
|
||||||
|
The container's `setup-lan-access.sh` already establishes a ControlMaster to the
|
||||||
|
host with `ControlPersist 4h`. The container shim forwards the host broker socket
|
||||||
|
over that master:
|
||||||
|
|
||||||
|
```
|
||||||
|
ssh -F ~/.ssh-local/config \
|
||||||
|
-L "$XDG_RUNTIME_DIR/mp.sock:$HOME/.mempalace/broker.sock" host
|
||||||
|
```
|
||||||
|
|
||||||
|
then connects to the local forwarded socket. Auth = SSH key; nothing is
|
||||||
|
LAN-exposed; no extra shared secret needed; rides the persistent master so setup
|
||||||
|
cost is near-zero. Most portable across non-OrbStack hosts.
|
||||||
|
|
||||||
|
**(b) TCP on `host.docker.internal:PORT` — fallback.** Simpler, but the broker
|
||||||
|
must bind a routable interface (not just `127.0.0.1`), which requires a
|
||||||
|
**shared-secret token** to prevent other local/LAN processes from talking to it.
|
||||||
|
The token is written to `broker.json` in the virtiofs-mounted palace dir
|
||||||
|
(readable from both sides). More care required to get the bind + auth right.
|
||||||
|
|
||||||
|
## Discovery + on-demand start (the shim's algorithm)
|
||||||
|
|
||||||
|
Run by the shim on every pi session start, so it is correct regardless of who is
|
||||||
|
already running:
|
||||||
|
|
||||||
|
```
|
||||||
|
1. If $MEMPALACE_BROKER is set → use it verbatim (escape hatch).
|
||||||
|
2. Read $MEMPALACE_PATH/broker.json → endpoint + pid + token.
|
||||||
|
Try to connect (UNIX if host; forwarded-sock / TCP if container).
|
||||||
|
If connected & healthy → done.
|
||||||
|
3. Broker not reachable → START IT:
|
||||||
|
- On host: flock($MEMPALACE_PATH/broker.lock, non-blocking)
|
||||||
|
win → exec broker, wait for broker.json, connect.
|
||||||
|
lose → someone else is starting it; backoff + retry connect.
|
||||||
|
- In container: run `ssh host 'mempalace-broker --ensure'` (idempotent;
|
||||||
|
performs the SAME flock election ON THE HOST), then forward +
|
||||||
|
connect.
|
||||||
|
4. Last-resort fallback (no broker, cannot start one):
|
||||||
|
open the palace DIRECTLY — but ONLY after asserting this process is the sole
|
||||||
|
writer (no other live broker/pid recorded in broker.json). Degrades to
|
||||||
|
today's behaviour for the genuinely-alone case; never used when a broker
|
||||||
|
exists.
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key trick:** host-side election uses `flock` on the host, where it is coherent
|
||||||
|
(same kernel) — bulletproof. The cross-boundary case **never relies on cross-VM
|
||||||
|
locking**; it relies on `ssh host 'broker --ensure'`, which runs the election on
|
||||||
|
the host where flock works. That is what makes the design topology-independent.
|
||||||
|
|
||||||
|
### Lifecycle
|
||||||
|
|
||||||
|
- Broker writes `broker.json` (endpoint + pid + token) **atomically** after
|
||||||
|
binding.
|
||||||
|
- Broker holds `broker.lock` for its entire lifetime → at most one host broker.
|
||||||
|
- Idle-exit after N minutes with no connected clients; the next client
|
||||||
|
re-elects. (Or keep-alive; idle-exit is friendlier on resources.)
|
||||||
|
- Clients reclaim a stale lock if the pid recorded in `broker.json` is dead.
|
||||||
|
- Clients retry with backoff while a broker is mid-startup.
|
||||||
|
|
||||||
|
## Engine vs. shim — what the image must still ship
|
||||||
|
|
||||||
|
The component bundled in the images today is really **two separable pieces**:
|
||||||
|
|
||||||
|
- the **mempalace engine** — opens the SQLite files, computes embeddings, owns
|
||||||
|
the HNSW index (the heavy part: chromadb, embedding model, etc.), and
|
||||||
|
- the thin client surface pi actually talks to.
|
||||||
|
|
||||||
|
In the brokered design these split cleanly:
|
||||||
|
|
||||||
|
- the **broker** is the only thing that runs the *engine*;
|
||||||
|
- the **shim** is **engine-free** — it just forwards MCP JSON-RPC. It needs no
|
||||||
|
chromadb, no embedding model, no heavy deps. Embeddings/search happen
|
||||||
|
broker-side. (Potential image-slimming opportunity, though see below for why
|
||||||
|
we keep the engine bundled anyway.)
|
||||||
|
|
||||||
|
Whether the bundled engine is "used as-is" or merely fronted by the broker
|
||||||
|
**depends on who owns the broker**:
|
||||||
|
|
||||||
|
**A) Host runs the broker (native, or native+container — the common case).**
|
||||||
|
The *host's* engine is authoritative and used as-is. The broker is purely an
|
||||||
|
intermediate step so writes can't collide; the host engine does the read/write.
|
||||||
|
The container's **bundled engine is dormant** — the container uses only its shim
|
||||||
|
to reach the host broker. The engine in the image is not needed for this path.
|
||||||
|
|
||||||
|
**B) Container lands on a host with no mempalace (fresh-host case).**
|
||||||
|
The bundled engine earns its keep — you cannot conjure an engine onto the host
|
||||||
|
without installing one. Either the container runs the broker *itself*
|
||||||
|
(in-container ownership, bundled engine used as-is) or it falls back to degraded
|
||||||
|
direct mode (single writer, bundled engine used directly).
|
||||||
|
|
||||||
|
**Decision: keep shipping the engine in the images** — but for three specific
|
||||||
|
reasons, not because the brokered path needs it:
|
||||||
|
|
||||||
|
1. **Self-containedness** — pi-devbox's promise is "works on any host." A
|
||||||
|
container with no memory unless the host pre-installed mempalace breaks that,
|
||||||
|
especially for the Docker Hub audience.
|
||||||
|
2. **Fresh-host bootstrap** (case B) — no host engine to borrow.
|
||||||
|
3. **Degraded fallback** — the no-broker-reachable path opens the DB locally and
|
||||||
|
needs the engine present.
|
||||||
|
|
||||||
|
In the host-managed common case the bundled engine is just dormant insurance;
|
||||||
|
the shim is the only piece the container actively uses.
|
||||||
|
|
||||||
|
### Version-coherence note
|
||||||
|
|
||||||
|
Because **only the broker's engine ever writes**, its version defines the
|
||||||
|
on-disk format. Host-vs-bundled engine version skew is therefore **harmless in
|
||||||
|
the brokered path** (only one engine ever touches the bytes). Skew only bites in
|
||||||
|
**degraded direct mode**, where the container writes with a possibly-different
|
||||||
|
engine version than the host would. This argues for the broker pinning/owning
|
||||||
|
the authoritative engine version and treating the bundled engine as
|
||||||
|
fallback-only.
|
||||||
|
|
||||||
|
> Partially resolves the "where the broker binary ships" open question below:
|
||||||
|
> the **shim** must ship on both sides; the **engine** must ship on the host
|
||||||
|
> (to run the broker) and stays bundled in the image as fallback/bootstrap
|
||||||
|
> insurance, not as the authoritative writer in the common case.
|
||||||
|
|
||||||
|
## The genuinely hard case
|
||||||
|
|
||||||
|
**Container-only with no SSH bridge configured** (e.g. plain Linux Docker,
|
||||||
|
`HOST_SSH_USER` unset, no `host.docker.internal`). The container cannot start or
|
||||||
|
reach a host broker. Options, none free:
|
||||||
|
|
||||||
|
1. **Require the bridge** for multi-writer container setups, and document it as a
|
||||||
|
precondition. Reasonable: pi-devbox already ships `setup-lan-access.sh` and
|
||||||
|
the bridge is the supported path.
|
||||||
|
2. **Run the broker inside the container**, publishing a Docker port the host can
|
||||||
|
later reach. Works, but inverts ownership and the broker dies with the
|
||||||
|
container — only acceptable if containers are the *sole* writers on that host.
|
||||||
|
3. **Accept degraded mode** (algorithm step 4): a lone container with no peers
|
||||||
|
has no concurrency, so direct access is safe *as long as* nothing else opens
|
||||||
|
the palace concurrently. The host shim also checks `broker.json` before
|
||||||
|
opening directly, so a later host pi will not silently start a second
|
||||||
|
uncoordinated writer.
|
||||||
|
|
||||||
|
**Summary:** fully robust for native-only, native+container, and
|
||||||
|
container-only-with-bridge. The only residual sharp edge is container-only
|
||||||
|
*without* a bridge *and* a future concurrent host writer — intrinsic (no shared
|
||||||
|
coherent lock exists across that boundary), best handled by mandating the bridge
|
||||||
|
rather than pretending file locks work.
|
||||||
|
|
||||||
|
## Co-existence with opencode / opencode-devbox (DEFERRED — context only)
|
||||||
|
|
||||||
|
The palace is shared by more than pi. opencode (native) and opencode-devbox
|
||||||
|
(container) also write to the same `~/.mempalace`. **Assumption to verify:**
|
||||||
|
opencode sessions write to **different wings** than pi sessions (pi uses
|
||||||
|
`wing_pi`, diaries per-agent, etc.), so cross-tool intermixing into the *same*
|
||||||
|
destination may be a non-issue at the application level.
|
||||||
|
|
||||||
|
However, the corruption risk here is at the **SQLite-file level, not the wing
|
||||||
|
level** — two processes writing different wings of the *same* `chroma.sqlite3`
|
||||||
|
concurrently is still a concurrent write to one file. So the broker, once it
|
||||||
|
exists, is the right serialization point for opencode too: opencode's mempalace
|
||||||
|
client would route through the same broker via the same shim mechanism.
|
||||||
|
|
||||||
|
**Decision:** do not design for opencode co-existence yet. Resolve the pi case
|
||||||
|
first; then revisit whether opencode clients adopt the same shim. The residual
|
||||||
|
risk in the interim is native + container *opencode* sessions writing the same
|
||||||
|
palace simultaneously — explicitly deferred ("cross that bridge later").
|
||||||
|
|
||||||
|
## Open questions / TODO before implementation
|
||||||
|
|
||||||
|
- Does the mempalace engine expose an embeddable entrypoint suitable for running
|
||||||
|
inside a long-lived broker, or does the broker wrap the existing MCP server
|
||||||
|
binary and multiplex stdio clients onto it? (Affects whether reads can truly
|
||||||
|
fan out or are also serialized.)
|
||||||
|
- Idle-exit timeout default + whether to expose it via env.
|
||||||
|
- `broker.json` schema + atomic-write + stale-pid-reclaim details.
|
||||||
|
- TCP-path token handling and safe bind interface selection on Linux Docker
|
||||||
|
(`--add-host=host.docker.internal:host-gateway`).
|
||||||
|
- Where the broker binary ships: baked into `Dockerfile.base`? host install via
|
||||||
|
pi-toolkit / mempalace-toolkit? Both, since both sides need the shim and the
|
||||||
|
host needs the broker.
|
||||||
|
- Smoke-test plan: prove single-writer invariant under a deliberate concurrent
|
||||||
|
host+container write storm (should produce zero `.corrupt`/`.drift` snapshots).
|
||||||
Reference in New Issue
Block a user