From 7a8de0463f4c7ca5d512184f21d99e7685ab6cdc Mon Sep 17 00:00:00 2001 From: Joakim Persson Date: Mon, 27 Apr 2026 20:05:45 +0200 Subject: [PATCH] Separate ChromaDB model cache into its own named volume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ONNX embedding model (~79 MB) downloads to ~/.cache/chroma/ on first mempalace search. Without persistence it re-downloads on every container recreation. Add a separate devbox-chroma-cache volume rather than mixing it into the palace data volume — model cache is disposable (delete and re-download), palace data is precious (back up and migrate). Both volumes are commented out by default (opt-in). Updated README.md storage section to explain the two-volume split and the air-gapped pre-population path. Added chroma cache row to DOCKER_HUB.md data storage table. --- DOCKER_HUB.md | 1 + README.md | 17 ++++++++++++++--- docker-compose.shared.yml | 4 ++++ docker-compose.yml | 7 +++++++ 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/DOCKER_HUB.md b/DOCKER_HUB.md index 6041b12..4a9fdd4 100644 --- a/DOCKER_HUB.md +++ b/DOCKER_HUB.md @@ -231,6 +231,7 @@ Understanding what survives container restarts and what doesn't: | `/home/developer/.local/share/zoxide` | Named volume `devbox-zoxide` | ✅ Yes — Docker volume | Zoxide directory history (`z ` jump targets) | | `/home/developer/.local/share/nvim` | Named volume `devbox-nvim-data` | ✅ Yes — Docker volume | Neovim plugins, Mason LSP installs, Lazy plugin cache | | `/home/developer/.mempalace` | Named volume `devbox-palace` (if configured) | ✅ Yes — Docker volume | MemPalace conversation memory, knowledge graph, embeddings | +| `/home/developer/.cache/chroma` | Named volume `devbox-chroma-cache` (if configured) | ✅ Yes — Docker volume | ChromaDB ONNX embedding model (~79 MB, downloaded on first use) | | `/home/developer/.local/share/uv` | Named volume (if configured) | ✅ Yes — Docker volume | Python installs, uv tool installs | | `/home/developer/.rustup` | Named volume (if configured) | ✅ Yes — Docker volume | Rust toolchains | | `/home/developer/.cargo` | Named volume (if configured) | ✅ Yes — Docker volume | Cargo binaries, registry cache | diff --git a/README.md b/README.md index 9febd99..af25774 100644 --- a/README.md +++ b/README.md @@ -495,9 +495,20 @@ Each workspace gets its own isolated "wing" — memories never leak between proj ### Storage -- **Palace data** (ChromaDB vectors, SQLite knowledge graph, drawers): `~/.mempalace/` — persists via the `devbox-palace` named volume -- **Embedding model** (~300 MB): downloaded on first use, cached inside the palace directory -- **No API keys required** for core functionality (local embeddings via ONNX) +Two separate named volumes keep different data classes apart: + +- **Palace data** (`~/.mempalace/`): ChromaDB vectors, SQLite knowledge graph, drawers. This is your memory — back it up, treat it as precious. Persists via the `devbox-palace` named volume. +- **Embedding model cache** (`~/.cache/chroma/`): ONNX model (~79 MB), downloaded automatically on first search. Disposable — blow it away and it re-downloads in ~4 seconds. Persists via the `devbox-chroma-cache` named volume so you don't re-download on every container recreation. +- **No API keys required** for core functionality (local embeddings via ONNX). + +Both volumes are commented out by default in `docker-compose.yml` — uncomment to enable: + +```yaml +- devbox-palace:/home/developer/.mempalace +- devbox-chroma-cache:/home/developer/.cache/chroma +``` + +**Air-gapped environments:** pre-populate the `devbox-chroma-cache` volume with the `all-MiniLM-L6-v2/` model contents. The palace volume needs no pre-population. ## Shell defaults diff --git a/docker-compose.shared.yml b/docker-compose.shared.yml index c76d3e9..25a915a 100644 --- a/docker-compose.shared.yml +++ b/docker-compose.shared.yml @@ -64,6 +64,9 @@ services: # Optional: persist MemPalace data (conversation memory, knowledge graph) # - devbox-palace:/home/developer/.mempalace + # Optional: persist ChromaDB embedding model cache (~79 MB) + # - devbox-chroma-cache:/home/developer/.cache/chroma + # Optional: AWS credentials (per-user if available) # - ${HOME}/${SIGNUM}/.aws:/home/developer/.aws @@ -74,3 +77,4 @@ volumes: devbox-nvim-data: devbox-uv: # devbox-palace: + # devbox-chroma-cache: diff --git a/docker-compose.yml b/docker-compose.yml index f4c7d1c..5c7b83d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -96,6 +96,12 @@ services: # embeddings). Without this, palace data is lost on container recreation. # - devbox-palace:/home/developer/.mempalace + # Optional: persist ChromaDB embedding model cache (~79 MB, downloaded on + # first mempalace search). Without this, the model re-downloads on every + # container recreation. Separate from palace data — model cache is + # disposable, palace data is precious. + # - devbox-chroma-cache:/home/developer/.cache/chroma + # Optional: AWS credentials/SSO config (not read-only — SSO writes token cache) # - ~/.aws:/home/developer/.aws @@ -107,6 +113,7 @@ volumes: devbox-nvim-data: devbox-uv: # devbox-palace: + # devbox-chroma-cache: # devbox-rustup: # devbox-cargo: # devbox-vscode: