From 5145b9a7dbbceef4cd1ef04fe58f0505c45b0f2c Mon Sep 17 00:00:00 2001 From: Storme-bit Date: Fri, 17 Apr 2026 03:46:17 -0700 Subject: [PATCH] update documentation --- .vs/slnx.sqlite | Bin 0 -> 90112 bytes .vs/slnx.sqlite-journal | Bin 0 -> 21032 bytes docs/README.md | 28 +- docs/architecture/overview.md | 80 +++-- docs/deployment/homelab.md | 116 +++++-- .../{overview.md => homelab-overview.md} | 15 +- docs/services/API-routes.md | 283 ++++++++++++++++++ docs/services/Memory-isolation.md | 128 ++++++++ docs/services/chat-client.md | 198 +++++------- docs/services/embedding-service.md | 83 ++--- docs/services/inference-service.md | 140 ++------- docs/services/memory-service.md | 276 ++++------------- docs/services/orchestration-service.md | 269 ++++------------- 13 files changed, 822 insertions(+), 794 deletions(-) create mode 100644 .vs/slnx.sqlite create mode 100644 .vs/slnx.sqlite-journal rename docs/homelab/{overview.md => homelab-overview.md} (92%) create mode 100644 docs/services/API-routes.md create mode 100644 docs/services/Memory-isolation.md diff --git a/.vs/slnx.sqlite b/.vs/slnx.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..918f871d0e8435d715fe3a687c50fe24966d80b7 GIT binary patch literal 90112 zcmeI4&u`mC7RN=~F>Tq3)25Dq^`Sfzh`F(nHbM5#qQJ@$ov@K*SwEZ@UO}McaY9t0 z5{XV@pnFiV4X_PRp#Q?2dfoqG|A$`pu-MC9iUo>ZIzw{!V@O4+v+HcNUm~_e&YPL{ zKJ(t=8`4f~w`AHxIqvnlhOOL6T}nw(>i3G0N~I+Fb%lQM<1+m)$4}@_);{i#QrD0F zd4WnvD?c&WcUOK|{%-lw(!KOIi?0^GmuKg9XTP2Kai%)+*|?$5{y$G(foVAW;G&$p zpOc2WrgcP~)lR#Iy-txFo0e&tJ*(w@v>tAm9ikt#^dzB`LRHJxHAOEMwFk<Qh`qY@KrDuta=iP>7-DdcOK z`qih)ayFNfzRuakVTXvOi{}|nnRndu8a$?yqB*Q?&hoOH*eooeT4{&AB@QbCWisCK&hP$ zY|`}+ikBElCFp$?mZe_XVDVY)eKFbqE>!4sPP$fHE|XS=Hwja*)?gAJZ(!GZmkL zM_%3Rl7Ve>A2$`2JnH&3&7?G6F;C1b%Gn#L^nAtfL_RGWuf@Lc3$9{mZ&@2 z@<84YUZe>;&-DynVb`1>&3-sG%kp`BQzz-nmh59$&T2WnWTGRtDn_4Jc8jHLv27u7 z{Y0to2%)7E#aO|Rba&pMw=+itu>YT z-NuYI#GCOgCN&Ad5qOl>^MV7@MalaA22peWmAurfhA*L?;H+D|0$tmQIL7P zYEda_Tlq$*uC!13PQ=)o z?{*j!v}$E$K6~w&wC{M%;0dkKNNYfzoDhpvZPC-&h~Ef}>lStX4cb#3-jU?&+M4v7 zIAeOcc|T?%V;p!_LoVlCIX6$+Gdd@99R=@f(7oC_=?wiYt^6ZJ|KR`u5C8!X009sH z0T2KI5C8!X009vAl@quk&84npYxE6F*YKXoesulT_1o_L|A*Ad55IB)p*{$J00@8p z2!H?xfB*=900@8p2!KF@Kw6qzTl9VjAx&GPm4Bt^KO7(c0w4eaAOHd&00JNY0w4ea zAOHf>LEzHt)y2p!0kHS~^pE~=fB*=900@8p2!H?xfB*=900@8p2uv{n_WmFH|0!Nz zm<9qM00JNY0w4eaAOHd&00JNY0+ayu|8N5k009sH0T2KI5C8!X009sH0T7se0@(jg z{~E(W5C8!X009sH0T2KI5C8!X009ud-~Yo0KmY_l00ck)1V8`;KmY_l00cl_`U&9u z|MaghECc}%009sH0T2KI5C8!X009sH0lfc*4}bs&fB*=900@8p2!H?xfB*=9!1NQq z`~T@*V^|0RAOHd&00JNY0w4eaAOHd&00Mab4<7&l5C8!X009sH0T2KI5C8!X0D2w(>fy0YNy@9UZ+ToP0O^+p4Db`PM+gI|9`i@?v!)|NkI@OAPHAE_tZQh z{cvwC?nsUHwMbJ@D``O! zSH=Zc-pf}Dck|Wto40SPzQO`m-ev2g(>dKeF*@e4Nsf{Rh^`0kcaM5Wz>8bVTl4KU z84S!rv%^gEv5I1X560vIlfwX6pwvzWHtG5Z#Y+sO67)U`%Tljxu=uR@z8Gx)7b^5R zCtWKpmr1L`8%uJwMdPaF#Fe9I+psLsFBt7dq!m})jWSViIT~eR^T=^RF(>w+z89ap zslI5WorrtcbRtgdC7v%uBTj6l^Ye%ko5C?OLBx3rnxu2gb0QJv%sbv;-ppTkSci?` z{vOeW3;NE>FT~0D$-p+ckDCfh9(8@2W>T83m?!2I(uxn0`W6_4XA2cq=fKM+s6+brcMb=n|CG?OD9F5CwWt)et$d?YSK23iCt_`$rfHD(X65WhYtrz2 zCroO@woU7y6VqP3?OvIS`cquaaU-YPcRP#@PyWAq%|N|>#BYSgb&ERx2JNX1??`fXZB2SkoH0G!ydN`>F%G<|A(!*6oSUcZ8J!clj)Hf( zH%VvzpZYqrQeOUM>EBB`3peFI&i`$0d-gB%;<+9#SKpIYbGe0A##57g!RqviVLx*2 zekK`+=N{udUAwP{ZHh6-r0?tXyEJABjcQHb)8aDunpdW=*s9U(MWL?5bAghtDVxS1 zQh)158_KBQW%32r=Uox^?#;Te6cK5*A1Pbao$c7HLDbS(g!*@$RWnRWcDR}b=j_^6 z?Rs2uuRvn$?4UBgT<6#{jtyUasa*ZTRfYbjT=#9I#)M6`9|e6~BZc*3(v?6)FPG^y z^fS8gjNxFCTQ$0`52OYtvy>r}9Rz9!tHS*M@_J6nw=$!KFJS4JDomNJ7ed{u*ctarx>oa+qt**DyJ z^J+P49pC3fZu?x&HRE-UQeL-}3ho2r3;8qDHQwE(u04o8IjS4|hr~|04UKqIQ|MVX zc{WK8wsW}o4t?x!EArUEsSm5=KWsgz%@_3~ahZ?K2(5tDw{LPA-y>&}^bsz+J#60(bm4E5d zsBrfs5@X(PjR)>8-^M0|S8~`-9%sM!k)??9KbwdGbI$Sz(5LPBXQ12L-H1f=Vntz7 z@w3Z_-0@#bHvW^R=yNIWnbQXsWxV{***l$3W)WXnjNGZ=0{H*`&J{N%0|5{K0T2KI5C8!X009sH0T2Lzw?Y8#|KAFBObP-Z00JNY U0w4eaAOHd&00JNY0_RHLXNWs!LjV8( literal 0 HcmV?d00001 diff --git a/.vs/slnx.sqlite-journal b/.vs/slnx.sqlite-journal new file mode 100644 index 0000000000000000000000000000000000000000..39237b15308d25a7cf9681f38b9c73fec0d50fcd GIT binary patch literal 21032 zcmeI(y$QoG5C+h5jPqkKNtGsb7RV4;ATxyE#+CalodF_&z&jjI2qdBE-l_UP5!-m& zpWkx2M(oOR*)Hrd3mnH2oNAZfB*pk1PBly&~kubsucbOcn|~#5FkK+009C7 z2oNAZAhkft0s5K)bmw1`X%HYlfB*pk1PBlyK!5-N0*M61uD_jL?{%G*Pt5`T-~kMM literal 0 HcmV?d00001 diff --git a/docs/README.md b/docs/README.md index 86e10c8..f6af35a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,13 +1,23 @@ # NexusAI Documentation -## Contents +## Architecture - [Architecture Overview](architecture/overview.md) -- [Services](services/) - - [Shared Package](services/shared.md) - - [Memory Service](services/memory-service.md) - - [Embedding Service](services/embedding-service.md) - - [Inference Service](services/inference-service.md) - - [Orchestration Service](services/orchestration-service.md) - - [Chat Client](services/chat-client.md) -- [Deployment](deployment/homelab.md) \ No newline at end of file + +## Services + +- [Shared Package](services/shared.md) +- [Memory Service](services/memory-service.md) +- [Embedding Service](services/embedding-service.md) +- [Inference Service](services/inference-service.md) +- [Orchestration Service](services/orchestration-service.md) +- [Chat Client](services/chat-client.md) + +## Reference + +- [API Routes](reference/api-routes.md) — all HTTP endpoints across all services +- [Memory Isolation](reference/memory-isolation.md) — project-scoped memory model + +## Deployment + +- [Homelab](deployment/homelab.md) \ No newline at end of file diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md index 19bf479..25e54aa 100644 --- a/docs/architecture/overview.md +++ b/docs/architecture/overview.md @@ -1,56 +1,80 @@ # Architecture Overview -NexusAI is a modular, memory-centric AI system designed for persistent, context-aware conversations. It separates concerns across different services that can be independently deployed and evolved. +NexusAI is a modular, memory-centric AI assistant designed for persistent, +context-aware conversations. It separates concerns across independent services +that can be evolved and deployed separately. ## Core Design Principles -- **Decoupled layers:** memory, inference, and orchestration are independent of each other -- **Hybrid retrieval:** semantic similarity (Qdrant) combined with structured storage (SQLite) for flexible, ranked context assembly -- **Home lab:** services are distributed across nodes according to available hardware and resources +- **Decoupled layers** — memory, inference, and orchestration are independent of each other +- **Hybrid retrieval** — semantic similarity (Qdrant) combined with structured storage (SQLite) for flexible, ranked context assembly +- **Project-scoped memory** — sessions can be grouped into projects with shared or isolated memory pools +- **Home lab first** — services are distributed across nodes according to available hardware ## Memory Model -Memory is split between SQLite and Qdrant, which work together as a pair: +Memory is split between SQLite and Qdrant, which always work as a pair: -- **SQLite:** episodic interactions, entities, relationships, summaries -- **Qdrant:** vector embeddings for semantic similarity search +- **SQLite** — episodic interactions, entities, relationships, summaries, sessions, projects +- **Qdrant** — vector embeddings for semantic similarity search -When recalling memory, Qdrant returns IDs and similarity scores, which are used to fetch -full content from SQLite. Neither SQLite nor Qdrant work in isolation. +When recalling memory, Qdrant returns IDs and similarity scores, which are used +to fetch full content from SQLite. Neither store works in isolation. + +Episode embeddings carry a `{ sessionId, createdAt }` payload in Qdrant, +enabling per-session and per-project filtering at search time. See +`memory-isolation.md` for how project-scoped retrieval works. ## Hardware Layout | Node | Address | Role | |---|---|---| -| Main PC | local | Primary inference (RTX A4000 16GB) | -| Mini PC 1 | 192.168.0.81 | Memory service, Embedding service, Qdrant | -| Mini PC 2 | 192.168.0.205 | Orchestration service, Chat Client, Gitea | +| Main PC | 192.168.0.79 | Primary inference — RTX A4000 16GB | +| Mini PC 1 | 192.168.0.81 | Memory service, Embedding service, Qdrant, Ollama | +| Mini PC 2 | 192.168.0.205 | Orchestration service, Chat Client, Caddy, Authelia, Gitea | ## Service Communication -All services expose a REST HTTP API. The orchestration service is the single entry point — -clients do not talk directly to the memory or inference services. +All services expose a REST HTTP API. The orchestration service is the single +entry point — clients never talk directly to memory or inference services. ``` -Client -└─► Orchestration (:4000) - ├─► Chat Client (static files, /srv/nexusai) - ├─► Memory Service (:3002) - │ ├─► Qdrant (:6333) - │ └─► SQLite - ├─► Embedding Service (:3003) - │ └─► Ollama - └─► Inference Service (:3001) - └─► Ollama +Client (browser) +└─► Caddy (HTTPS + Authelia SSO) + └─► Orchestration (:4000) — Mini PC 2 + ├─► Memory Service (:3002) — Mini PC 1 + │ ├─► SQLite (local file) + │ └─► Qdrant (:6333) — Mini PC 1 + ├─► Embedding Service (:3003) — Mini PC 1 + │ └─► Ollama (:11434) — Mini PC 1 + ├─► Inference Service (:3001) — Main PC + │ └─► llama-server (:8080) — Main PC + └─► Qdrant (:6333) — Mini PC 1 (direct — semantic search) ``` +Note: Orchestration queries Qdrant directly for semantic search (bypassing +the memory service) but always fetches full episode content from the memory +service by ID after the vector search. + ## Technology Choices | Concern | Choice | Reason | |---|---|---| -| Language | Node.js (JavaScript) | Familiar stack, async I/O suits service architecture | +| Language | Node.js (CommonJS) | Familiar stack, async I/O suits service architecture | | Package management | npm workspaces | Monorepo with shared code, no publishing needed | | Vector store | Qdrant | Mature, Docker-native, excellent Node.js client | -| Relational store | SQLite (better-sqlite3) | Zero-ops, fast, sufficient for single-user | -| LLM runtime | Ollama | Easiest local LLM management, serves embeddings too | -| Version control | Gitea (self-hosted) | Code stays on local network | \ No newline at end of file +| Relational store | SQLite (better-sqlite3) | Zero-ops, fast, sufficient for single-user scale | +| LLM inference | llama.cpp (`llama-server`) | Maximum GPU utilisation on RTX A4000, OpenAI-compatible API | +| Embeddings | Ollama (`nomic-embed-text`) | Co-located with memory service on Mini PC 1, 768-dim Cosine | +| Reverse proxy | Caddy + Authelia | Automatic HTTPS, SSO/MFA for all exposed services | +| Version control | Gitea (self-hosted) | Code stays on local network | + +## Current State + +The core four-service architecture is complete and operational. Key capabilities: + +- **Hybrid memory retrieval** — recent episodes + semantic search combined into every prompt +- **Projects** — sessions grouped with shared or isolated memory pools +- **Auto-naming** — sessions named automatically from first exchange via inference +- **Project-scoped semantic search** — Qdrant filtered by project session IDs +- **Chat client** — view-based UI with sidebar navigation, project views, session management \ No newline at end of file diff --git a/docs/deployment/homelab.md b/docs/deployment/homelab.md index cc43870..565a9a4 100644 --- a/docs/deployment/homelab.md +++ b/docs/deployment/homelab.md @@ -7,50 +7,73 @@ services appropriate for its hardware. ## Mini PC 1 — 192.168.0.81 -Runs: Qdrant, Memory Service, Embedding Service +Runs: Qdrant, Memory Service, Embedding Service, Ollama + ```bash -ssh username@192.168.0.81 -cd ~/nexusai +ssh storme@192.168.0.81 docker compose -f docker-compose.mini1.yml up -d # Qdrant -npm run memory -npm run embedding +npm run memory # port 3002 +npm run embedding # port 3003 +ollama serve # port 11434 — must bind 0.0.0.0 (OLLAMA_HOST=0.0.0.0) ``` +> Ollama must be started with `OLLAMA_HOST=0.0.0.0` to accept connections +> from other services on the LAN. Without this, embedding requests from the +> memory service will be refused. + ## Mini PC 2 — 192.168.0.205 -Runs: Gitea, Orchestration Service, Chat Client (via Caddy) -```bash -ssh username@192.168.0.205 +Runs: Orchestration Service, Chat Client (via Caddy), Gitea, Caddy, Authelia -cd ~/gitea -docker compose up -d # Gitea +```bash +ssh storme@192.168.0.205 cd /opt/stacks/network docker compose up -d # Caddy, Authelia, and other network services -cd ~/nexusai -npm run orchestration +cd ~/nexusAI +npm run orchestration # port 4000 ``` -## Main PC +## Main PC — 192.168.0.79 -Runs: Ollama, Inference Service -```bash -ollama serve -npm run inference +Runs: Inference Service, llama-server + +```powershell +# Start llama-server first — inference service depends on it +.\llama-gpu\llama-server.exe ` + -m .\models\gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf ` + -ngl 99 --reasoning off --host 0.0.0.0 --port 8080 -c 64000 + +# Then start inference service +npm run inference # port 3001 ``` ## Chat Client Deployment -The chat client is a React + Vite app build to static files and served by Caddy on Mini PC 2 (Infrastructure node). It does not run as a Node process +The chat client is a React + Vite app built to static files and served by +Caddy on Mini PC 2. It does not run as a Node process. + ```bash -# On dev machine or Mini PC 2 after git pull +# On Mini PC 2 after git pull cd ~/nexusAI/packages/chat-client -npm run build + +# Set production URL before building +VITE_ORCHESTRATION_URL=https://nexus.jellystorm.com npm run build + # Output lands in packages/chat-client/dist/ -# Caddy serves this directory directly via volume mount +# Caddy serves this directory directly via Docker volume mount ``` -Caddy config (`/opt/docker/caddy/Caddyfile`): + +> Do NOT set `VITE_ORCHESTRATION_URL` during local dev — Vite's proxy handles +> routing and setting the HTTPS domain will cause Authelia to intercept API +> requests, producing confusing JSON parse errors. + +## Caddy Configuration + +The Caddyfile on Mini PC 2 must include a handle block for each route prefix +the client needs to reach. Current required blocks for NexusAI: + ```caddy nexus.jellystorm.com { import authelia @@ -63,6 +86,14 @@ nexus.jellystorm.com { reverse_proxy 192.168.0.205:4000 } + handle /models* { + reverse_proxy 192.168.0.205:4000 + } + + handle /projects* { + reverse_proxy 192.168.0.205:4000 + } + handle { root * /srv/nexusai try_files {path} /index.html @@ -71,18 +102,45 @@ nexus.jellystorm.com { } ``` -The Caddy container mounts the dist directory via Docker volume: +When adding new top-level routes to the orchestration service, add a matching +handle block here and reload Caddy: + +```bash +caddy reload --config /path/to/Caddyfile +``` + +The Caddy container mounts the `dist` directory via Docker volume: + ```yaml - /home/storme/nexusAI/packages/chat-client/dist:/srv/nexusai ``` > After adding or changing volume mounts, a full `docker compose down caddy && docker compose up -d caddy` -> is required. Caddyfile-only changes only need `docker compose restart caddy`. - - +> is required. Caddyfile-only changes only need `caddy reload`. ## Environment Files -Each node needs a `.env` file in the relevant service package directory. -These are not committed to git. See each service's documentation for -required variables. \ No newline at end of file +Each service needs a `.env` file in its package directory. These are not +committed to git. See each service's documentation for required variables. + +| Service | Location | Key Variables | +|---|---|---| +| Memory | `packages/memory-service/.env` | `SQLITE_PATH`, `QDRANT_URL`, `EMBEDDING_SERVICE_URL` | +| Embedding | `packages/embedding-service/.env` | `OLLAMA_URL`, `EMBEDDING_MODEL` | +| Inference | `packages/inference-service/.env` | `INFERENCE_PROVIDER`, `INFERENCE_URL`, `DEFAULT_MODEL` | +| Orchestration | `packages/orchestration-service/src/.env` | `MEMORY_SERVICE_URL`, `EMBEDDING_SERVICE_URL`, `INFERENCE_SERVICE_URL`, `QDRANT_URL`, `MODELS_MANIFEST_PATH` | +| Chat client | `packages/chat-client/.env` | `VITE_ORCHESTRATION_URL` (production builds only) | + +## Models Manifest + +The models manifest (`models.json`) lives on the Main PC alongside the model +files, accessible to orchestration via an SMB mount at `/mnt/nexus-models`. + +```json +[ + { "value": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf", "label": "Gemma 4 26B Claude Distill" } +] +``` + +`value` must exactly match the model name as reported by `llama-server` +(including `.gguf` extension). No service restart needed to pick up changes. \ No newline at end of file diff --git a/docs/homelab/overview.md b/docs/homelab/homelab-overview.md similarity index 92% rename from docs/homelab/overview.md rename to docs/homelab/homelab-overview.md index 9ff6344..04739f8 100644 --- a/docs/homelab/overview.md +++ b/docs/homelab/homelab-overview.md @@ -39,21 +39,21 @@ All external access is routed through **Caddy** (reverse proxy) with **Authelia* |------|--------| | GPU | NVIDIA RTX A4000 | | Role | Primary AI inference node | -| Key Services | Ollama (inference) | +| Key Services | llama-server (llama.cpp), Inference Service | ### Mini PC 1 — Media Node (`192.168.0.81`) | Spec | Detail | |------|--------| | GPU | NVIDIA RTX 5050 | | Role | Media services, embeddings, vector storage | -| Key Services | Jellyfin, Nextcloud, Qdrant, arr stack, NexusAI memory/embedding | +| Key Services | Jellyfin, Nextcloud, Qdrant, arr stack, NexusAI memory/embedding, Ollama | | Storage | NVMe (OS) + 3x external HDDs (see [Storage Layout](#storage-layout)) | ### Mini PC 2 — Infrastructure Node (`192.168.0.205`) | Spec | Detail | |------|--------| -| Role | Network management, monitoring, auth, DNS, git | -| Key Services | Caddy, Authelia, Tailscale, Pihole, Grafana, Gitea | +| Role | Network management, monitoring, auth, DNS, git, NexusAI orchestration | +| Key Services | Caddy, Authelia, Tailscale, Pihole, Grafana, Gitea, NexusAI orchestration | | Storage | NVMe (OS only) | --- @@ -155,7 +155,8 @@ All external access is routed through **Caddy** (reverse proxy) with **Authelia* | Service | Notes | |---------|-------| -| Ollama | Runs LLM inference using the RTX A4000. Also serves `nomic-embed-text` embeddings (768-dim vectors) consumed by NexusAI's embedding service on Mini PC 1. | +| llama-server (llama.cpp) | Primary LLM inference using the RTX A4000. Started manually before the inference service. Serves the OpenAI-compatible API on port 8080. | +| Ollama | Serves `nomic-embed-text` embeddings (768-dim vectors) consumed by NexusAI's embedding service on Mini PC 1. | --- @@ -234,7 +235,7 @@ Phase 1 focused on establishing a stable, secure, and observable foundation: - ✅ Self-hosted git (Gitea) - ✅ Media stack fully operational (Jellyfin, arr stack, Nextcloud) - ✅ Download pipeline with VPN isolation (Gluetun + qBittorrent) -- ✅ NexusAI foundation services running (Qdrant, Ollama) +- ✅ NexusAI foundation services running (Qdrant, Ollama, llama.cpp) - ✅ Container management across nodes (Portainer + agent) --- @@ -249,6 +250,6 @@ Phase 2 shifts focus to resilience, security hardening, and smart home integrati - **Additional security hardening** — Audit exposed services, tighten firewall rules, review Authelia policies - **IP webcam integration** — Add camera feeds into the homelab ecosystem - **Home Assistant** — Integrate smart home automation and sensor data -- **Continued NexusAI development** — Entities layer, embedding service, inference and orchestration buildout +- **Continued NexusAI development** — Entity extraction pipeline, summaries layer, SettingsView implementation > This section will be expanded as Phase 2 planning matures. \ No newline at end of file diff --git a/docs/services/API-routes.md b/docs/services/API-routes.md new file mode 100644 index 0000000..876cbfc --- /dev/null +++ b/docs/services/API-routes.md @@ -0,0 +1,283 @@ +# API Routes + +All HTTP endpoints across NexusAI services. Clients communicate only with +the orchestration service (port 4000) — memory service routes are listed +here for reference and direct debugging use. + +--- + +## Orchestration Service — port 4000 + +### Health + +| Method | Path | Description | +|---|---|---| +| GET | /health | Service health check | + +### Chat + +| Method | Path | Description | +|---|---|---| +| POST | /chat | Send a message, receive full response | +| POST | /chat/stream | Send a message, receive SSE token stream | + +**POST /chat and POST /chat/stream — request body:** +```json +{ + "sessionId": "your-session-uuid", + "message": "Hello, my name is Tim.", + "model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf", + "temperature": 0.7 +} +``` +`model` and `temperature` are optional. + +**POST /chat — response:** +```json +{ + "sessionId": "your-session-uuid", + "response": "Hello Tim! How can I help you today?", + "model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf", + "tokenCount": 87 +} +``` + +**POST /chat/stream — response (SSE):** +``` +data: {"text":"Hello"} +data: {"text":" Tim"} +data: {"done":true,"model":"gemma-4-26B...gguf","tokenCount":87} +``` + +### Sessions + +| Method | Path | Description | +|---|---|---| +| GET | /sessions | Paginated session list | +| GET | /sessions/:sessionId/history | Paginated episode history for a session | +| PATCH | /sessions/:sessionId | Update session name and/or project assignment | +| DELETE | /sessions/:sessionId | Delete session and all its episodes | + +**GET /sessions — query params:** + +| Param | Default | Description | +|---|---|---| +| limit | 20 | Sessions per page | +| offset | 0 | Pagination offset | +| projectId | — | Filter by project (integer ID) | + +**PATCH /sessions/:sessionId — body:** +```json +{ "name": "My Session", "projectId": 3 } +``` +Either `name` or `projectId` is required. Both can be sent together. +Returns the updated session object. + +**GET /sessions/:sessionId/history — query params:** + +| Param | Default | Description | +|---|---|---| +| limit | 20 | Episodes per page | +| offset | 0 | Pagination offset | + +Returns `{ sessionId, episodes: [...] }`. Episodes ordered newest first. + +### Projects + +| Method | Path | Description | +|---|---|---| +| GET | /projects | Get all projects | +| POST | /projects | Create a new project | +| PATCH | /projects/:id | Update a project | +| DELETE | /projects/:id | Delete a project (nulls session assignments) | + +**POST /projects — body:** +```json +{ + "name": "My Project", + "description": "Optional description", + "colour": "#3d3a79", + "icon": null, + "isolated": 0 +} +``` +`name` is required. All other fields optional. `isolated` is `0` or `1`. +Returns `201` with the created project object. + +**PATCH /projects/:id — body:** same fields as POST, all optional. + +### Models + +| Method | Path | Description | +|---|---|---| +| GET | /models | Available models from `models.json` manifest | + +Returns array: `[{ "value": "model-name.gguf", "label": "Display Name" }]` + +--- + +## Memory Service — port 3002 + +Direct access is for debugging only. All client traffic goes through +orchestration. + +### Health + +| Method | Path | Description | +|---|---|---| +| GET | /health | Service health check | + +### Sessions + +| Method | Path | Description | +|---|---|---| +| POST | /sessions | Create a new session | +| GET | /sessions | Paginated session list with optional projectId filter | +| GET | /sessions/:id | Get session by internal ID | +| GET | /sessions/by-external/:externalId | Get session by external ID | +| PATCH | /sessions/by-external/:externalId | Update session fields | +| DELETE | /sessions/by-external/:externalId | Delete session (cascades to episodes) | + +> Route ordering: `by-external/:externalId` must be defined before `/:id` +> to prevent `by-external` being captured as an ID param. + +**POST /sessions — body:** +```json +{ "externalId": "unique-uuid", "metadata": {} } +``` + +**PATCH /sessions/by-external/:externalId — body:** +```json +{ "name": "Session Name", "projectId": 3 } +``` +Both fields are optional. Only provided fields are updated — other fields +are not touched. + +### Episodes + +| Method | Path | Description | +|---|---|---| +| POST | /episodes | Create episode + auto-embed into Qdrant | +| GET | /episodes/search?q=&limit= | FTS keyword search across all episodes | +| GET | /episodes/:id | Get episode by ID | +| GET | /sessions/:id/episodes?limit=&offset= | Paginated episodes for a session | +| DELETE | /episodes/:id | Delete an episode | + +> Route ordering: `/episodes/search` must be defined before `/episodes/:id`. + +**POST /episodes — body:** +```json +{ + "sessionId": 1, + "userMessage": "Hello", + "aiResponse": "Hi there!", + "tokenCount": 10 +} +``` + +### Projects + +| Method | Path | Description | +|---|---|---| +| POST | /projects | Create a new project | +| GET | /projects | Get all projects | +| GET | /projects/:id | Get project by ID | +| PATCH | /projects/:id | Update a project | +| DELETE | /projects/:id | Delete project + null session assignments | + +Same request/response shape as orchestration `/projects` above. + +### Entities + +| Method | Path | Description | +|---|---|---| +| POST | /entities | Upsert entity (creates or updates by name + type) | +| GET | /entities/by-type/:type | All entities of a given type | +| GET | /entities/:id | Get entity by ID | +| DELETE | /entities/:id | Delete entity (cascades to relationships) | + +> Route ordering: `/entities/by-type/:type` must be before `/entities/:id`. + +**POST /entities — body:** +```json +{ + "name": "NexusAI", + "type": "project", + "notes": "My AI memory project", + "metadata": {} +} +``` + +### Relationships + +| Method | Path | Description | +|---|---|---| +| POST | /relationships | Upsert a relationship between two entities | +| GET | /entities/:id/relationships | All relationships for an entity | +| DELETE | /relationships | Delete a specific relationship | + +**POST /relationships — body:** +```json +{ "fromId": 1, "toId": 2, "label": "uses", "metadata": {} } +``` + +**DELETE /relationships — body:** +```json +{ "fromId": 1, "toId": 2, "label": "uses" } +``` + +Relationships are identified by the composite key `(fromId, toId, label)`. +Delete uses request body rather than URL params since this three-part key +is awkward to encode in a path. + +--- + +## Embedding Service — port 3003 + +| Method | Path | Description | +|---|---|---| +| GET | /health | Service health check | +| POST | /embed | Embed a single text string | +| POST | /embed/batch | Embed an array of text strings | + +**POST /embed — body:** +```json +{ "text": "Hello from NexusAI" } +``` + +**POST /embed — response:** +```json +{ "embedding": [0.123, -0.456, ...], "model": "nomic-embed-text", "dimensions": 768 } +``` + +--- + +## Inference Service — port 3001 + +| Method | Path | Description | +|---|---|---| +| GET | /health | Health check — reports active provider and model | +| POST | /complete | Full completion — awaits entire response | +| POST | /complete/stream | Streaming completion via SSE | + +**POST /complete — body:** +```json +{ + "prompt": "What is the capital of France?", + "model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf", + "temperature": 0.7, + "maxTokens": 1024 +} +``` +All fields except `prompt` are optional. + +**POST /complete — response:** +```json +{ + "text": "The capital of France is Paris.", + "model": "gemma-4-26B...gguf", + "done": true, + "evalCount": 8, + "promptEvalCount": 41 +} +``` \ No newline at end of file diff --git a/docs/services/Memory-isolation.md b/docs/services/Memory-isolation.md new file mode 100644 index 0000000..3b2f1f4 --- /dev/null +++ b/docs/services/Memory-isolation.md @@ -0,0 +1,128 @@ +# Memory Isolation + +NexusAI implements project-scoped memory — sessions belonging to the same +project can share semantic context, and isolated projects can be restricted +from drawing on memory outside the project. This document describes how the +system works end-to-end. + +## Concepts + +**Session** — a single conversation thread. Identified by `external_id`. + +**Project** — a named grouping of sessions. Has an `isolated` flag (0 or 1). + +**Semantic search** — at inference time, the user's message is embedded and +compared against past episodes in Qdrant to surface relevant context. The +scope of this search is controlled by the project context. + +## Semantic Search Scope + +| Session state | Semantic search scope | +|---|---| +| No project | Own session's episodes only | +| Assigned to a non-isolated project | All episodes across all sessions in the project | +| Assigned to an isolated project | All episodes within the project only | +| Removed from a project | Own session's episodes only (from that point) | + +Sessions with no project assigned behave the same as they always have — +only their own past episodes are searched. + +## How It Works + +### Step 1 — Project context resolution (orchestration) + +In `chat/index.js`, immediately after session resolution: + +```js +let projectSessionIds = null; +if (session.project_id) { + const project = await memory.getProject(session.project_id); + if (project) { + const projectSessions = await memory.getProjectSessions(session.project_id); + projectSessionIds = projectSessions.map(s => s.id); + } +} +``` + +If the session belongs to any project (isolated or not), `projectSessionIds` +is populated with the internal integer IDs of all sessions in that project. + +For **non-isolated projects**, this expands the search to all project sessions. +For **isolated projects**, the same set is used but the intent is restriction +— since `projectSessionIds` only contains project sessions, no external +episodes can appear. + +Both cases use the same code path — the `isolated` flag does not change the +query logic, only the conceptual meaning. + +### Step 2 — Qdrant filter construction + +In `services/qdrant.js`, `searchEpisodes` builds the filter: + +```js +if (projectSessionIds) { + body.filter = { + should: projectSessionIds.map(id => ({ + key: 'sessionId', match: { value: id } + })) + }; +} else if (sessionId) { + body.filter = { must: [{ key: 'sessionId', match: { value: sessionId } }] }; +} +``` + +`should` is Qdrant's "match any of" operator — equivalent to SQL +`WHERE sessionId IN (...)`. When `projectSessionIds` is set, the single-session +filter is not used. + +### Step 3 — Episode payloads + +Every episode upserted into Qdrant carries `{ sessionId, createdAt }` in its +payload. `sessionId` here is the **internal integer ID** from SQLite. This +is what the Qdrant filter matches against. + +This means the filter works correctly regardless of when episodes were created +or when a session was added to a project — the payload is immutable. + +## Important Behaviours + +**Pre-existing episodes are included immediately.** When a session is added +to a project and a new message is sent, Qdrant can match all of that session's +existing episodes since the filter only requires the `sessionId` to be in the +project's session list. + +**Removing a session from a project takes effect immediately.** On the next +message, `getProjectSessions` will not include that session's ID, so its +episodes disappear from the semantic search scope. + +**New sessions created from ProjectView are assigned after the first message.** +The `useChat` hook writes the `project_id` assignment via `updateSession` after +`onDone` fires. There is a brief window during the first message where the +session has no project assigned. The project is correctly applied from the +second message onward. + +## Isolated vs Non-Isolated + +The `isolated` flag is stored on the project but does not currently change the +query logic — both isolated and non-isolated projects result in a +`projectSessionIds` filter. The distinction is semantic and enforced by +the project's membership: + +- **Non-isolated** — intentionally draws from all sessions in the project, + creating a shared memory pool for related conversations +- **Isolated** — by design contains only sessions explicitly added to it, + so the same filter naturally restricts context to project-only episodes + +If cross-project contamination became a concern (e.g. a session accidentally +added to the wrong project), removing it from the project immediately restores +isolation. + +## Qdrant Payload Structure + +Episodes are stored with this payload: +```json +{ "sessionId": 42, "createdAt": 1776080188 } +``` + +`sessionId` is the SQLite `sessions.id` integer, not the `external_id` UUID. +This is important when building filters — always use internal IDs. \ No newline at end of file diff --git a/docs/services/chat-client.md b/docs/services/chat-client.md index f8796a8..ab7a0f8 100644 --- a/docs/services/chat-client.md +++ b/docs/services/chat-client.md @@ -55,10 +55,6 @@ VITE_ORCHESTRATION_URL=https://nexus.jellystorm.com during local development, bypassing Caddy and Authelia entirely: ```js -// vite.config.js -import { defineConfig } from 'vite'; -import react from '@vitejs/plugin-react'; - export default defineConfig({ plugins: [react()], server: { @@ -72,7 +68,8 @@ export default defineConfig({ }); ``` -If new routes are added to the orchestration service, add them here too. +When adding new top-level routes to the orchestration service, add a matching +entry here too. ## Internal Structure @@ -93,12 +90,13 @@ src/ │ ├── Sidebar.jsx # Left sidebar — projects, recent chats, navigation │ ├── ChatWindow.jsx # Centre panel — message thread and input bar │ ├── MessageBubble.jsx # Individual message bubble (user or assistant) -│ ├── InfoPanel.jsx # Right panel — model selector and session metadata -│ ├── SessionModal.jsx # Modal for session rename and delete confirmation -│ ├── ProjectModal.jsx # Modal for project create, edit, and delete confirmation +│ ├── InfoPanel.jsx # Right panel — model selector and session metadata (slide-in) +│ ├── SessionModal.jsx # Modal for session rename, project assignment, delete +│ ├── ProjectModal.jsx # Modal for project create, edit, delete │ ├── AllChatsView.jsx # Full paginated session list with multi-select bulk delete │ ├── AllProjectsView.jsx # Project tile grid with create/edit/delete -│ └── SettingsView.jsx # Settings placeholder (sections: Appearance, Memory, Models, About) +│ ├── ProjectView.jsx # Individual project — session list, new chat button +│ └── SettingsView.jsx # Settings placeholder (Appearance, Memory, Models, About) ├── index.css # Global reset, CSS variables, utility classes └── main.jsx # React entry point ``` @@ -107,9 +105,9 @@ src/ ## Layout -The app uses a view-based layout. `App.jsx` manages a `view` state -(`'chat' | 'all-chats' | 'all-projects' | 'settings'`) that controls which -main panel is rendered. The left sidebar and right info panel are always present. +The app uses a view-based layout. `App.jsx` manages a `view` state string +that controls which main panel is rendered. The left sidebar and right info +panel are persistent across all views. ``` ┌──────────────────┬──────────────────────────────┐ @@ -117,9 +115,9 @@ main panel is rendered. The left sidebar and right info panel are always present │ (collapsible) │ │ │ │ chat → ChatWindow │ │ + New Chat │ all-chats → AllChatsView │ -│ ⊞ New Project │ all-projects → AllProjectsView│ -│ │ settings → SettingsView │ -│ PROJECTS ▾ │ │ +│ ⊞ View Projects │ all-projects → AllProjectsView│ +│ │ project → ProjectView │ +│ PROJECTS ▾ │ settings → SettingsView │ │ [tile] [tile] │ │ │ All Projects → │ │ │ │ │ @@ -132,10 +130,22 @@ main panel is rendered. The left sidebar and right info panel are always present └──────────────────┴──────────────────────────────┘ ``` -The sidebar collapses to a 48px icon rail. The right info panel (`InfoPanel`) -slides in from the right over the main area using `transform: translateX()` — -it is hidden by default (`rightOpen` starts `false`) and toggled via a button -in the `ChatWindow` header. +The sidebar collapses to a 48px icon rail. The right `InfoPanel` slides in +from the right using `transform: translateX()` — hidden by default, toggled +via the `⊹` button in the `ChatWindow` header. + +## View Routing + +| View | Component | Trigger | +|---|---|---| +| `'chat'` | `ChatWindow` | Default; selecting a session; new chat | +| `'all-chats'` | `AllChatsView` | "All Chats →" or ☰ icon in collapsed rail | +| `'all-projects'` | `AllProjectsView` | "View Projects" button or ⊞ icon | +| `'project'` | `ProjectView` | Clicking a project tile in the sidebar | +| `'settings'` | `SettingsView` | Settings button or ⚙ icon | + +`activeProject` state in `App.jsx` tracks which project `ProjectView` is +displaying. Set via `onSelectProject` before navigating to `'project'`. ## CSS Architecture @@ -181,91 +191,47 @@ rules, inline styles for dynamic prop-driven values. | `.label-upper` | Uppercase section label style | | `.truncate` | Text overflow ellipsis | -## API Layer - -All orchestration calls are centralised in `src/api/orchestration.js`: - -| Function | Method | Path | Description | -|---|---|---|---| -| `fetchSessions` | GET | /sessions | Load session list for sidebar | -| `fetchSessionHistory` | GET | /sessions/:id/history | Load episode history on session select | -| `sendMessage` | POST | /chat | Send message, await full response | -| `streamMessage` | POST | /chat/stream | Send message, receive SSE token stream | -| `fetchModels` | GET | /models | Load available models from manifest | -| `renameSession` | PATCH | /sessions/:id | Rename a session | -| `deleteSession` | DELETE | /sessions/:id | Delete a session | -| `fetchProjects` | GET | /projects | Load project list | -| `createProject` | POST | /projects | Create a new project | -| `updateProject` | PATCH | /projects/:id | Update a project | -| `deleteProject` | DELETE | /projects/:id | Delete a project | - -`streamMessage` returns an abort function — call it to cancel a stream mid-flight. -Uses a buffer pattern to handle SSE chunks that may span multiple network packets. - ## Streaming -The chat input sends messages via `POST /chat/stream`. Tokens arrive as SSE events: +Messages are sent via `POST /chat/stream`. Tokens arrive as SSE events and +are written into the active assistant bubble token by token via +`updateLastMessage`. The blinking cursor in `MessageBubble` is shown while +`message.streaming === true`. -``` -data: {"text":"Hello"} -data: {"text":" Tim"} -data: {"done":true,"model":"gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf","tokenCount":87} -``` - -An empty assistant bubble is appended immediately when the stream opens, then -updated token by token using `updateLastMessage`. The blinking cursor in -`MessageBubble` is shown while `message.streaming === true` and disappears -when the done event is received. Model name and token count from the done -event are stored in `useChat` state and displayed in the InfoPanel. - -## Dynamic Model Selector - -Available models are fetched from `GET /models` on mount via the `useModels` hook. -The hook initialises with `FALLBACK_MODELS` from `constants.js` and replaces them -with the server response on success. If the fetch fails, the fallback list is used -silently — a warning is logged to the console. - -To add a model, update `models.json` on the main PC — no client rebuild needed. - -`FALLBACK_MODELS` in `constants.js` should be kept in sync with `models.json` -as a reasonable last-resort list in case the endpoint is unreachable. +`useChat` accepts an optional `projectId` parameter in `sendMessage`. After +the first message completes in a new session, if `projectId` is set, +`updateSession` is called to write the project assignment to the backend. ## Session Management -Sessions are identified by `external_id` — a UUID generated client-side via the -`uuid` package. New sessions are created locally and auto-registered in the memory -service on the first message. The session list refreshes after each completed -response to surface newly created sessions. +Sessions are identified by `external_id` — a UUID generated client-side via +the `uuid` package. New sessions are created locally and auto-registered in +the memory service on the first message. The session list refreshes after +each completed response to surface newly created sessions. -### Session Name Display +### Auto-naming -The chat header and session rows both display `session.name` if set, falling back -to `session.external_id` if no name has been assigned: +After the first exchange completes, orchestration fires a secondary inference +call with a short naming prompt (max 20 tokens, temperature 0.3). The result +is written back as `session.name`. The client fires a second `refreshSessions` +after a 3-second delay to pick up the name once written. -```js -activeSession.name || activeSession.external_id -``` +Manually renamed sessions are never overwritten — the `!session.name` guard +in `chat/index.js` prevents this. ### Session Actions -Session rows in the sidebar support rename and delete via two entry points: +Session rows support rename, project assignment, and delete via: +- **Hover** — reveals ✎ and ✕ icon buttons alongside the row +- **Right-click** — context menu with the same actions -- **Hover** — reveals ✎ (rename) and ✕ (delete) icon buttons alongside the row -- **Right-click** — opens a context menu with the same actions - -Both trigger `SessionModal` — a shared modal component with two modes: - -| Mode | Trigger | Behaviour | -|---|---|---| -| `settings` | Rename button / context menu rename | Shows name input, saves on Enter or Save button | -| `confirm-delete` | Delete button / context menu delete | Shows confirmation dialog, requires explicit Delete click | - -Actions are disabled on unsaved (new) sessions that haven't had a first message sent yet. +`SessionModal` handles rename and project assignment together in `settings` +mode, and delete confirmation in `confirm-delete` mode. ### Active Session Clearing on Delete -When the deleted session is the currently active one, `App.jsx` detects the match -and calls `selectSession(null)` to clear the chat window before refreshing the list: +When the deleted session is the currently active one, `App.jsx` clears the +chat window before refreshing the list: ```js function handleSessionsChange(deletedSession) { @@ -276,53 +242,23 @@ function handleSessionsChange(deletedSession) { } ``` -### Context Menu +### Key Patterns -Implemented via `useContextMenu` hook — tracks `{ x, y, session }` state and -attaches a `window` click listener to dismiss on any outside click. Rendered -outside the sidebar div via a React fragment to avoid being clipped by -`overflow: hidden`. - -### Button Nesting - -Session row action icons (✎ ✕) are rendered as siblings of the session -`