From 5145b9a7dbbceef4cd1ef04fe58f0505c45b0f2c Mon Sep 17 00:00:00 2001
From: Storme-bit <tk.stomre@gmail.com>
Date: Fri, 17 Apr 2026 03:46:17 -0700
Subject: [PATCH] update documentation

---
 .vs/slnx.sqlite                               | Bin 0 -> 90112 bytes
 .vs/slnx.sqlite-journal                       | Bin 0 -> 21032 bytes
 docs/README.md                                |  28 +-
 docs/architecture/overview.md                 |  80 +++--
 docs/deployment/homelab.md                    | 116 +++++--
 .../{overview.md => homelab-overview.md}      |  15 +-
 docs/services/API-routes.md                   | 283 ++++++++++++++++++
 docs/services/Memory-isolation.md             | 128 ++++++++
 docs/services/chat-client.md                  | 198 +++++-------
 docs/services/embedding-service.md            |  83 ++---
 docs/services/inference-service.md            | 140 ++-------
 docs/services/memory-service.md               | 276 ++++-------------
 docs/services/orchestration-service.md        | 269 ++++-------------
 13 files changed, 822 insertions(+), 794 deletions(-)
 create mode 100644 .vs/slnx.sqlite
 create mode 100644 .vs/slnx.sqlite-journal
 rename docs/homelab/{overview.md => homelab-overview.md} (92%)
 create mode 100644 docs/services/API-routes.md
 create mode 100644 docs/services/Memory-isolation.md
diff --git a/.vs/slnx.sqlite b/.vs/slnx.sqlite
new file mode 100644
index 0000000000000000000000000000000000000000..918f871d0e8435d715fe3a687c50fe24966d80b7
GIT binary patch
literal 90112
zcmeI4&u`mC7RN=~F>Tq3)25Dq^`Sfzh`F(nHbM5#qQJ@$ov@K*SwEZ@UO}McaY9t0
z5{XV@pnFiV4X_PRp#Q?2dfoqG|A$`pu-MC9iUo>ZIzw{!V@O4+v+HcNUm~_e&YPL{
zKJ(t=8`4f~w`AHxIqvnlhOOL6T}nw(>i3G0N~I+Fb%lQM<1+m)$4}@_);{i#QrD0F
zd4WnvD?c&WcUOK|{%-lw(!KOIi?0^GmuKg9XTP2Kai%)+*|?$5{y$G(foVAW;G&$p
zpOc2WrgcP~)lR#Iy-txFo0e&tJ*(w@v>tAm9ikt#^dzB`LRHJxHAOEMwFk<<L}N5F
zX@wp;<+yxPSwDzQ+f;|E897^^YTij!vut!pvT_&4C>Qh`qY@KrDuta=iP>7-DdcOK
z`qih)ayFNfzRuakVTXvOi{}|nnR<S+q$OCMNw2$HVi5JCCgoe#?r2q|Qq{Nf)qUlj
zwy)$H^&P!Thuzl7b*dHpYLrSF-YAuR@2Po2`r+PO#uZ_UP}R1ys#Y#&HRXW!&@9`u
zPn+xh1v-QpbfN3ks1z9pilLA%YLTX*R?>ndu8a$?yqB*Q?&hoOH*eooeT4<Cyvx=}
zr*pb{Vsy-7lN==t5M2-6?;iD%fETxzx8~bzG8mYLW`~*RV->{&AB@QbCWisCK&hP$
zY|`}+ikBElCFp$?mZe_XVDVY)eKFbqE>!4sPP$fHE|XS=H<sjVi^f&Ui7Q9bwqaSM
zUohH_NGq<o8)c&4ax}`s=8@xsVovNseJ?(HQ+?4$I}!J?=|r5^OFUnSMx5A8=jRb8
zHictmf{61LG)d=}=R_jTnRmRyyqUl7unrr={XL=$7xbN%Ux>wja*)?gAJZ(!GZmkL
zM_%3Rl7Ve>A2$`2JnH&3&7?G6F;C1b%Gn#L^nAtfL_RGWuf@Lc3$9{mZ&@2<eYfG?
zdAN+<Ddb+f%`=SNWB3(UWg(IHt$U<Dpn(?4{LfPha#mHP7oRwO$Om(WkDe_?eK6V>
z@<84YUZe>;&-DynVb`1>&3-sG%kp`BQzz-nmh59$&T2WnWTGRtDn_4Jc8jHLv27u7
z{Y0to2%)7E#aO|Rba&pMw=+itu><qv;pV)Y{hVgL(;)MOqR^W<-ZPZ{LgKP#k5>YT
z-NuYI#<tfOv3em~7R9DzRFV~*AER*4bBuCH;MEs*=iDS+2$FP&mZ;~2Xn=^#p%4+9
z&K4@J&ViRtP>GCOgCN&Ad5qOl>^MV7@MalaA22peWmAurfhA*L?;H+D|0$tmQIL7P
zYEda_Tlq$*uC!13PQ=<gP17Ln&C1!2)}-P4PMFk)ZJX9ZC#Jo6+r2Uu^{2R;<3>)o
z?{*j!v}$E$K6~w&wC{M%;0dkKNNYfzoDhpvZPC-&h~Ef}>lStX4cb#3-jU?&+M4v7
zIAeOcc|T?%V;p!_LoVlCIX6$+Gdd@99R=@f(7oC_=?wiYt^6ZJ|KR`u5C8!X009sH
z0T2KI5C8!X009vAl@quk&84npYxE6F*YKXoesulT_1o_L|A*Ad55IB)p*{$J00@8p
z2!H?xfB*=900@8p2!KF@Kw6qzTl9VjAx&GPm4Bt^KO7(c0w4eaAOHd&00JNY0w4ea
zAOHf>LEzHt)y2p!0kHS~^pE~=fB*=900@8p2!H?xfB*=900@8p2uv{n_WmFH|0!Nz
zm<9qM00JNY0w4eaAOHd&00JNY0+ayu|8N5k009sH0T2KI5C8!X009sH0T7se0@(jg
z{~E(W5C8!X009sH0T2KI5C8!X009ud-~Yo0KmY_l00ck)1V8`;KmY_l00cl_`U&9u
z|MaghECc}%009sH0T2KI5C8!X009sH0lfc*4}bs&fB*=900@8p2!H?xfB*=9!1NQq
z`~T@*V^|0RAOHd&00JNY0w4eaAOHd&00Mab4<7&l5C8!X009sH0T2KI5C8!X0D<Wz
zfcO8?zs9f-1V8`;KmY_l00ck)1V8`;KmY`=|A!BN00@8p2!H?xfB*=900@8p2!O!!
z6Tt8PPyZUjLJ$A}5C8!X009sH0T2KI5C8!XVEg~HJeyki-NnCO94vpgd}--k`kTd9
z3*XDL^SiU(&ipu2o%sx$KwxqLbAQd=&q>2w(>fy0YNy@9UZ+ToP0O^+p4D<cS`W9(
z4$+TVdXmsep{nKUnxdDB+5_caqA{A8v_g-aa$LTttRF<DZK}i7jGQe{HSZ*=SvI;P
zS-Fd2lnZ)}QHcpQmBLP`#B43^6!JAq{p!<YIh)H#U*~M&utP-C#q*4(Og+C@(h@As
zq}N?8F^Kw6lk%-=ceJWfsp{ML>b`PM+gI|9`i@?v!)|NkI@OAPHA<xoZ<I>E_tZQh
z{cvwC<BG6FsA^kURVx>?nsUH<XqIi-r_FW$0v$pPy3loNREi7)#Zbr>wMbJ@D``O!
zSH=Zc-pf}Dck|Wto40SPzQO`m-ev2g(>dKeF*@e4Nsf{Rh^`0kcaM5Wz>8bVTl4KU
z84S!rv%^gEv5I1X560vIlfwX6pwvzWHtG5Z#Y+sO67)U`%Tljxu=uR@z8Gx)7b^5R
zCtWKpmr1L`8%uJwMdPaF#Fe9I+psLsFBt7dq!m})jWSViIT~eR^T=^RF(>w+z89ap
zslI5WorrtcbRtgdC7v%uBTj6l^Ye%ko5C?OLBx3rnxu2gb0QJv%sbv;-ppTkSci?`
z{vOeW3;NE>FT~<MImqj$k7<_VnTk)rBd>0D$-p+ckDCfh9(8@2W>T83m?!2I<?Ibr
zdcNX#BA=Fx*J9uJ1y`}Ox2%n_zT5EcJY2@_6ml=#<{3usG5m_FvXDsp);-c6&_Ii2
z{^zL$IjgGDi%%Rs<b%1xN6!|cJ{WBbc_42HFVX~_=X!>(uxn0`W<MO8W%<0msgrbO
zOZKrWXSEz(GSQJ+6{AlqyT#JB*tU?kexg)(gwRroVys|Dx;t;s+nJ++*n#=-aC2VH
zeoiyrX^{CsQRqz_?-|N}A#vHW$18!vZevCpW7}(tSiKM~i(=C<D#;4Zk5M@2IYzl8
z@al`Zb8eC@1W7tXOVsm1G(g1WP>6_4XA2cq=fKM+s6<AtL6Ga5JjUuNcATLkc(V+z
z4;Y)KvZ=?*z>+brcMb=n|CG?OD9F5CwWt)et$d?YSK23iCt_`$rfHD(X65WhYtrz2
zCroO@woU7y6VqP3?OvIS`cquaaU-YPcRP#<TD3AWpS^ZX+IKu>@PyWAq%|N<PKZUT
zw&>|>#BYSgb&ERx2JNX1??`fXZB2SkoH0G!ydN`>F%G<|A(!*6oSUcZ8J!clj)Hf(
zH%VvzpZYqrQeOUM>EBB`3peFI&i`$0d-gB%;<+9#SKpIYbGe0A##57g!RqviVLx*2
zekK`+=N{udUAwP{ZHh6-r0?tXyEJABjcQHb)8aDunpdW=*s9U(MWL?5bAghtDVxS1
zQh)158_KBQW%32r=Uox^?#;Te6cK5*A1Pbao$c7HLDbS(g!*@$RWnRWcDR}b=j_^6
z?Rs2uuRvn$?4UBgT<6#{jtyUasa*ZTRfYbjT=#9I#)M6`9|e6~BZc*3(v?6)FPG^y
z^fS8gjNxFCTQ$0`52O<fRz3~u!?F%%PjgNt&6l`#{kI!}-UHt|GU?YTJBgCXi{a=N
z_ao}1cj-Pk*<%#F=2Wu7^~-YhK$V93LRnEXY-8j$C~ycSb|soiqm3iSNW4$8;aH{(
z#^9Kn>Ytvy>r}9Rz9!tHS*M@_J6nw=$!KFJS4JDomNJ7ed{u*ctarx>oa+qt**DyJ
z^J+P49pC3fZu?x&HRE-UQeL-}3ho2r3;8qDHQwE(u04o8IjS4|hr~|04UKqIQ|MVX
zc{WK8wsW}o4t?x!EArUEsSm5<n9yX<CN2LH^9TOqG4Kt)`MpuW;!7kCl^8C5AG%up
zXIBBbW4L)~oYF<|m{>=KWsgz%@_3~ahZ?K2(5tDw{LPA-y>&}^bsz+J#60(bm4E5d
zsBrfs5@X(PjR)>8-^M0|S8~`-9%sM!k)??9KbwdGbI$Sz(5LPBXQ12L-H1f=Vntz7
z@w3Z_-0@#bHvW^R=yNIWnbQXsWxV{***l$3W)WXnjNGZ=<p*AVoasr&8&+Way~h9F
z%Mbkh|G7Toz+@l*0w4eaAOHd&00JNY0w4eaAn;ZQVE_MCuwzmX009sH0T2KI5C8!X
z009sH0T4J>0{H*`&J{N%0|5{K0T2KI5C8!X009sH0T2Lzw?Y8#|KAFBObP-Z00JNY
U0w4eaAOHd&00JNY0_RHLXNWs!LjV8(

literal 0
HcmV?d00001

diff --git a/.vs/slnx.sqlite-journal b/.vs/slnx.sqlite-journal
new file mode 100644
index 0000000000000000000000000000000000000000..39237b15308d25a7cf9681f38b9c73fec0d50fcd
GIT binary patch
literal 21032
zcmeI(y$QoG5C+h5jPqkKNtGsb7RV4;ATxyE#+CalodF_&z&jjI2qdBE-l_UP5!-m&
zpWkx2M(oOR*)Hrd3mnH<opxvLvDyd_AV7cs0RjXF5FkK+K(yTFAP4Xm2oNAZfB*pk
z1PBlyK!8AgftCa8<pBA6twI6>2oNAZfB*pk1PBly&~kubsucbOcn|~#5FkK+009C7
z2oNAZAhkft0s5K)bmw1`X%HYlfB*pk1PBlyK!5-N0*M61uD_jL?{%G*Pt5`T-~kMM

literal 0
HcmV?d00001

diff --git a/docs/README.md b/docs/README.md
index 86e10c8..f6af35a 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,13 +1,23 @@
 # NexusAI Documentation
 
-## Contents
+## Architecture
 
 - [Architecture Overview](architecture/overview.md)
-- [Services](services/)
-  - [Shared Package](services/shared.md)
-  - [Memory Service](services/memory-service.md)
-  - [Embedding Service](services/embedding-service.md)
-  - [Inference Service](services/inference-service.md)
-  - [Orchestration Service](services/orchestration-service.md)
-  - [Chat Client](services/chat-client.md)
-- [Deployment](deployment/homelab.md)
\ No newline at end of file
+
+## Services
+
+- [Shared Package](services/shared.md)
+- [Memory Service](services/memory-service.md)
+- [Embedding Service](services/embedding-service.md)
+- [Inference Service](services/inference-service.md)
+- [Orchestration Service](services/orchestration-service.md)
+- [Chat Client](services/chat-client.md)
+
+## Reference
+
+- [API Routes](reference/api-routes.md) — all HTTP endpoints across all services
+- [Memory Isolation](reference/memory-isolation.md) — project-scoped memory model
+
+## Deployment
+
+- [Homelab](deployment/homelab.md)
\ No newline at end of file
diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md
index 19bf479..25e54aa 100644
--- a/docs/architecture/overview.md
+++ b/docs/architecture/overview.md
@@ -1,56 +1,80 @@
 # Architecture Overview
 
-NexusAI is a modular, memory-centric AI system designed for persistent, context-aware conversations. It separates concerns across different services that can be independently deployed and evolved.
+NexusAI is a modular, memory-centric AI assistant designed for persistent,
+context-aware conversations. It separates concerns across independent services
+that can be evolved and deployed separately.
 
 ## Core Design Principles
 
-- **Decoupled layers:** memory, inference, and orchestration are independent of each other
-- **Hybrid retrieval:** semantic similarity (Qdrant) combined with structured storage (SQLite) for flexible, ranked context assembly
-- **Home lab:** services are distributed across nodes according to available hardware and resources
+- **Decoupled layers** — memory, inference, and orchestration are independent of each other
+- **Hybrid retrieval** — semantic similarity (Qdrant) combined with structured storage (SQLite) for flexible, ranked context assembly
+- **Project-scoped memory** — sessions can be grouped into projects with shared or isolated memory pools
+- **Home lab first** — services are distributed across nodes according to available hardware
 
 ## Memory Model
 
-Memory is split between SQLite and Qdrant, which work together as a pair:
+Memory is split between SQLite and Qdrant, which always work as a pair:
 
-- **SQLite:** episodic interactions, entities, relationships, summaries
-- **Qdrant:** vector embeddings for semantic similarity search
+- **SQLite** — episodic interactions, entities, relationships, summaries, sessions, projects
+- **Qdrant** — vector embeddings for semantic similarity search
 
-When recalling memory, Qdrant returns IDs and similarity scores, which are used to fetch
-full content from SQLite. Neither SQLite nor Qdrant work in isolation.
+When recalling memory, Qdrant returns IDs and similarity scores, which are used
+to fetch full content from SQLite. Neither store works in isolation.
+
+Episode embeddings carry a `{ sessionId, createdAt }` payload in Qdrant,
+enabling per-session and per-project filtering at search time. See
+`memory-isolation.md` for how project-scoped retrieval works.
 
 ## Hardware Layout
 
 | Node | Address | Role |
 |---|---|---|
-| Main PC | local | Primary inference (RTX A4000 16GB) |
-| Mini PC 1 | 192.168.0.81 | Memory service, Embedding service, Qdrant |
-| Mini PC 2 | 192.168.0.205 | Orchestration service, Chat Client, Gitea |
+| Main PC | 192.168.0.79 | Primary inference — RTX A4000 16GB |
+| Mini PC 1 | 192.168.0.81 | Memory service, Embedding service, Qdrant, Ollama |
+| Mini PC 2 | 192.168.0.205 | Orchestration service, Chat Client, Caddy, Authelia, Gitea |
 
 ## Service Communication
 
-All services expose a REST HTTP API. The orchestration service is the single entry point —
-clients do not talk directly to the memory or inference services.
+All services expose a REST HTTP API. The orchestration service is the single
+entry point — clients never talk directly to memory or inference services.
 
 ```
-Client
-└─► Orchestration (:4000)
-    ├─► Chat Client (static files, /srv/nexusai)
-    ├─► Memory Service (:3002)
-    │     ├─► Qdrant (:6333)
-    │     └─► SQLite
-    ├─► Embedding Service (:3003)
-    │     └─► Ollama
-    └─► Inference Service (:3001)
-          └─► Ollama
+Client (browser)
+└─► Caddy (HTTPS + Authelia SSO)
+    └─► Orchestration (:4000) — Mini PC 2
+        ├─► Memory Service (:3002) — Mini PC 1
+        │     ├─► SQLite (local file)
+        │     └─► Qdrant (:6333) — Mini PC 1
+        ├─► Embedding Service (:3003) — Mini PC 1
+        │     └─► Ollama (:11434) — Mini PC 1
+        ├─► Inference Service (:3001) — Main PC
+        │     └─► llama-server (:8080) — Main PC
+        └─► Qdrant (:6333) — Mini PC 1 (direct — semantic search)
 ```
 
+Note: Orchestration queries Qdrant directly for semantic search (bypassing
+the memory service) but always fetches full episode content from the memory
+service by ID after the vector search.
+
 ## Technology Choices
 
 | Concern | Choice | Reason |
 |---|---|---|
-| Language | Node.js (JavaScript) | Familiar stack, async I/O suits service architecture |
+| Language | Node.js (CommonJS) | Familiar stack, async I/O suits service architecture |
 | Package management | npm workspaces | Monorepo with shared code, no publishing needed |
 | Vector store | Qdrant | Mature, Docker-native, excellent Node.js client |
-| Relational store | SQLite (better-sqlite3) | Zero-ops, fast, sufficient for single-user |
-| LLM runtime | Ollama | Easiest local LLM management, serves embeddings too |
-| Version control | Gitea (self-hosted) | Code stays on local network |
\ No newline at end of file
+| Relational store | SQLite (better-sqlite3) | Zero-ops, fast, sufficient for single-user scale |
+| LLM inference | llama.cpp (`llama-server`) | Maximum GPU utilisation on RTX A4000, OpenAI-compatible API |
+| Embeddings | Ollama (`nomic-embed-text`) | Co-located with memory service on Mini PC 1, 768-dim Cosine |
+| Reverse proxy | Caddy + Authelia | Automatic HTTPS, SSO/MFA for all exposed services |
+| Version control | Gitea (self-hosted) | Code stays on local network |
+
+## Current State
+
+The core four-service architecture is complete and operational. Key capabilities:
+
+- **Hybrid memory retrieval** — recent episodes + semantic search combined into every prompt
+- **Projects** — sessions grouped with shared or isolated memory pools
+- **Auto-naming** — sessions named automatically from first exchange via inference
+- **Project-scoped semantic search** — Qdrant filtered by project session IDs
+- **Chat client** — view-based UI with sidebar navigation, project views, session management
\ No newline at end of file
diff --git a/docs/deployment/homelab.md b/docs/deployment/homelab.md
index cc43870..565a9a4 100644
--- a/docs/deployment/homelab.md
+++ b/docs/deployment/homelab.md
@@ -7,50 +7,73 @@ services appropriate for its hardware.
 
 ## Mini PC 1 — 192.168.0.81
 
-Runs: Qdrant, Memory Service, Embedding Service
+Runs: Qdrant, Memory Service, Embedding Service, Ollama
+
 ```bash
-ssh username@192.168.0.81
-cd ~/nexusai
+ssh storme@192.168.0.81
 docker compose -f docker-compose.mini1.yml up -d  # Qdrant
-npm run memory
-npm run embedding
+npm run memory      # port 3002
+npm run embedding   # port 3003
+ollama serve        # port 11434 — must bind 0.0.0.0 (OLLAMA_HOST=0.0.0.0)
 ```
 
+> Ollama must be started with `OLLAMA_HOST=0.0.0.0` to accept connections
+> from other services on the LAN. Without this, embedding requests from the
+> memory service will be refused.
+
 ## Mini PC 2 — 192.168.0.205
 
-Runs: Gitea, Orchestration Service, Chat Client (via Caddy)
-```bash
-ssh username@192.168.0.205
+Runs: Orchestration Service, Chat Client (via Caddy), Gitea, Caddy, Authelia
 
-cd ~/gitea
-docker compose up -d        # Gitea
+```bash
+ssh storme@192.168.0.205
 
 cd /opt/stacks/network
 docker compose up -d        # Caddy, Authelia, and other network services
 
-cd ~/nexusai
-npm run orchestration
+cd ~/nexusAI
+npm run orchestration       # port 4000
 ```
 
-## Main PC
+## Main PC — 192.168.0.79
 
-Runs: Ollama, Inference Service
-```bash
-ollama serve
-npm run inference
+Runs: Inference Service, llama-server
+
+```powershell
+# Start llama-server first — inference service depends on it
+.\llama-gpu\llama-server.exe `
+  -m .\models\gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf `
+  -ngl 99 --reasoning off --host 0.0.0.0 --port 8080 -c 64000
+
+# Then start inference service
+npm run inference            # port 3001
 ```
 
 ## Chat Client Deployment
 
-The chat client is a React + Vite app build to static files and served by Caddy on Mini PC 2 (Infrastructure node).  It does not run as a Node process
+The chat client is a React + Vite app built to static files and served by
+Caddy on Mini PC 2. It does not run as a Node process.
+
 ```bash
-# On dev machine or Mini PC 2 after git pull
+# On Mini PC 2 after git pull
 cd ~/nexusAI/packages/chat-client
-npm run build
+
+# Set production URL before building
+VITE_ORCHESTRATION_URL=https://nexus.jellystorm.com npm run build
+
 # Output lands in packages/chat-client/dist/
-# Caddy serves this directory directly via volume mount
+# Caddy serves this directory directly via Docker volume mount
 ```
-Caddy config (`/opt/docker/caddy/Caddyfile`):
+
+> Do NOT set `VITE_ORCHESTRATION_URL` during local dev — Vite's proxy handles
+> routing and setting the HTTPS domain will cause Authelia to intercept API
+> requests, producing confusing JSON parse errors.
+
+## Caddy Configuration
+
+The Caddyfile on Mini PC 2 must include a handle block for each route prefix
+the client needs to reach. Current required blocks for NexusAI:
+
 ```caddy
 nexus.jellystorm.com {
     import authelia
@@ -63,6 +86,14 @@ nexus.jellystorm.com {
         reverse_proxy 192.168.0.205:4000
     }
 
+    handle /models* {
+        reverse_proxy 192.168.0.205:4000
+    }
+
+    handle /projects* {
+        reverse_proxy 192.168.0.205:4000
+    }
+
     handle {
         root * /srv/nexusai
         try_files {path} /index.html
@@ -71,18 +102,45 @@ nexus.jellystorm.com {
 }
 ```
 
-The Caddy container mounts the dist directory via Docker volume:
+When adding new top-level routes to the orchestration service, add a matching
+handle block here and reload Caddy:
+
+```bash
+caddy reload --config /path/to/Caddyfile
+```
+
+The Caddy container mounts the `dist` directory via Docker volume:
+
 ```yaml
 - /home/storme/nexusAI/packages/chat-client/dist:/srv/nexusai
 ```
 
 > After adding or changing volume mounts, a full `docker compose down caddy && docker compose up -d caddy`
-> is required. Caddyfile-only changes only need `docker compose restart caddy`.
-
-
+> is required. Caddyfile-only changes only need `caddy reload`.
 
 ## Environment Files
 
-Each node needs a `.env` file in the relevant service package directory.
-These are not committed to git. See each service's documentation for
-required variables.
\ No newline at end of file
+Each service needs a `.env` file in its package directory. These are not
+committed to git. See each service's documentation for required variables.
+
+| Service | Location | Key Variables |
+|---|---|---|
+| Memory | `packages/memory-service/.env` | `SQLITE_PATH`, `QDRANT_URL`, `EMBEDDING_SERVICE_URL` |
+| Embedding | `packages/embedding-service/.env` | `OLLAMA_URL`, `EMBEDDING_MODEL` |
+| Inference | `packages/inference-service/.env` | `INFERENCE_PROVIDER`, `INFERENCE_URL`, `DEFAULT_MODEL` |
+| Orchestration | `packages/orchestration-service/src/.env` | `MEMORY_SERVICE_URL`, `EMBEDDING_SERVICE_URL`, `INFERENCE_SERVICE_URL`, `QDRANT_URL`, `MODELS_MANIFEST_PATH` |
+| Chat client | `packages/chat-client/.env` | `VITE_ORCHESTRATION_URL` (production builds only) |
+
+## Models Manifest
+
+The models manifest (`models.json`) lives on the Main PC alongside the model
+files, accessible to orchestration via an SMB mount at `/mnt/nexus-models`.
+
+```json
+[
+  { "value": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf", "label": "Gemma 4 26B Claude Distill" }
+]
+```
+
+`value` must exactly match the model name as reported by `llama-server`
+(including `.gguf` extension). No service restart needed to pick up changes.
\ No newline at end of file
diff --git a/docs/homelab/overview.md b/docs/homelab/homelab-overview.md
similarity index 92%
rename from docs/homelab/overview.md
rename to docs/homelab/homelab-overview.md
index 9ff6344..04739f8 100644
--- a/docs/homelab/overview.md
+++ b/docs/homelab/homelab-overview.md
@@ -39,21 +39,21 @@ All external access is routed through **Caddy** (reverse proxy) with **Authelia*
 |------|--------|
 | GPU | NVIDIA RTX A4000 |
 | Role | Primary AI inference node |
-| Key Services | Ollama (inference) |
+| Key Services | llama-server (llama.cpp), Inference Service |
 
 ### Mini PC 1 — Media Node (`192.168.0.81`)
 | Spec | Detail |
 |------|--------|
 | GPU | NVIDIA RTX 5050 |
 | Role | Media services, embeddings, vector storage |
-| Key Services | Jellyfin, Nextcloud, Qdrant, arr stack, NexusAI memory/embedding |
+| Key Services | Jellyfin, Nextcloud, Qdrant, arr stack, NexusAI memory/embedding, Ollama |
 | Storage | NVMe (OS) + 3x external HDDs (see [Storage Layout](#storage-layout)) |
 
 ### Mini PC 2 — Infrastructure Node (`192.168.0.205`)
 | Spec | Detail |
 |------|--------|
-| Role | Network management, monitoring, auth, DNS, git |
-| Key Services | Caddy, Authelia, Tailscale, Pihole, Grafana, Gitea |
+| Role | Network management, monitoring, auth, DNS, git, NexusAI orchestration |
+| Key Services | Caddy, Authelia, Tailscale, Pihole, Grafana, Gitea, NexusAI orchestration |
 | Storage | NVMe (OS only) |
 
 ---
@@ -155,7 +155,8 @@ All external access is routed through **Caddy** (reverse proxy) with **Authelia*
 
 | Service | Notes |
 |---------|-------|
-| Ollama | Runs LLM inference using the RTX A4000. Also serves `nomic-embed-text` embeddings (768-dim vectors) consumed by NexusAI's embedding service on Mini PC 1. |
+| llama-server (llama.cpp) | Primary LLM inference using the RTX A4000. Started manually before the inference service. Serves the OpenAI-compatible API on port 8080. |
+| Ollama | Serves `nomic-embed-text` embeddings (768-dim vectors) consumed by NexusAI's embedding service on Mini PC 1. |
 
 ---
 
@@ -234,7 +235,7 @@ Phase 1 focused on establishing a stable, secure, and observable foundation:
 - ✅ Self-hosted git (Gitea)
 - ✅ Media stack fully operational (Jellyfin, arr stack, Nextcloud)
 - ✅ Download pipeline with VPN isolation (Gluetun + qBittorrent)
-- ✅ NexusAI foundation services running (Qdrant, Ollama)
+- ✅ NexusAI foundation services running (Qdrant, Ollama, llama.cpp)
 - ✅ Container management across nodes (Portainer + agent)
 
 ---
@@ -249,6 +250,6 @@ Phase 2 shifts focus to resilience, security hardening, and smart home integrati
 - **Additional security hardening** — Audit exposed services, tighten firewall rules, review Authelia policies
 - **IP webcam integration** — Add camera feeds into the homelab ecosystem
 - **Home Assistant** — Integrate smart home automation and sensor data
-- **Continued NexusAI development** — Entities layer, embedding service, inference and orchestration buildout
+- **Continued NexusAI development** — Entity extraction pipeline, summaries layer, SettingsView implementation
 
 > This section will be expanded as Phase 2 planning matures.
\ No newline at end of file
diff --git a/docs/services/API-routes.md b/docs/services/API-routes.md
new file mode 100644
index 0000000..876cbfc
--- /dev/null
+++ b/docs/services/API-routes.md
@@ -0,0 +1,283 @@
+# API Routes
+
+All HTTP endpoints across NexusAI services. Clients communicate only with
+the orchestration service (port 4000) — memory service routes are listed
+here for reference and direct debugging use.
+
+---
+
+## Orchestration Service — port 4000
+
+### Health
+
+| Method | Path | Description |
+|---|---|---|
+| GET | /health | Service health check |
+
+### Chat
+
+| Method | Path | Description |
+|---|---|---|
+| POST | /chat | Send a message, receive full response |
+| POST | /chat/stream | Send a message, receive SSE token stream |
+
+**POST /chat and POST /chat/stream — request body:**
+```json
+{
+  "sessionId": "your-session-uuid",
+  "message": "Hello, my name is Tim.",
+  "model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf",
+  "temperature": 0.7
+}
+```
+`model` and `temperature` are optional.
+
+**POST /chat — response:**
+```json
+{
+  "sessionId": "your-session-uuid",
+  "response": "Hello Tim! How can I help you today?",
+  "model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf",
+  "tokenCount": 87
+}
+```
+
+**POST /chat/stream — response (SSE):**
+```
+data: {"text":"Hello"}
+data: {"text":" Tim"}
+data: {"done":true,"model":"gemma-4-26B...gguf","tokenCount":87}
+```
+
+### Sessions
+
+| Method | Path | Description |
+|---|---|---|
+| GET | /sessions | Paginated session list |
+| GET | /sessions/:sessionId/history | Paginated episode history for a session |
+| PATCH | /sessions/:sessionId | Update session name and/or project assignment |
+| DELETE | /sessions/:sessionId | Delete session and all its episodes |
+
+**GET /sessions — query params:**
+
+| Param | Default | Description |
+|---|---|---|
+| limit | 20 | Sessions per page |
+| offset | 0 | Pagination offset |
+| projectId | — | Filter by project (integer ID) |
+
+**PATCH /sessions/:sessionId — body:**
+```json
+{ "name": "My Session", "projectId": 3 }
+```
+Either `name` or `projectId` is required. Both can be sent together.
+Returns the updated session object.
+
+**GET /sessions/:sessionId/history — query params:**
+
+| Param | Default | Description |
+|---|---|---|
+| limit | 20 | Episodes per page |
+| offset | 0 | Pagination offset |
+
+Returns `{ sessionId, episodes: [...] }`. Episodes ordered newest first.
+
+### Projects
+
+| Method | Path | Description |
+|---|---|---|
+| GET | /projects | Get all projects |
+| POST | /projects | Create a new project |
+| PATCH | /projects/:id | Update a project |
+| DELETE | /projects/:id | Delete a project (nulls session assignments) |
+
+**POST /projects — body:**
+```json
+{
+  "name": "My Project",
+  "description": "Optional description",
+  "colour": "#3d3a79",
+  "icon": null,
+  "isolated": 0
+}
+```
+`name` is required. All other fields optional. `isolated` is `0` or `1`.
+Returns `201` with the created project object.
+
+**PATCH /projects/:id — body:** same fields as POST, all optional.
+
+### Models
+
+| Method | Path | Description |
+|---|---|---|
+| GET | /models | Available models from `models.json` manifest |
+
+Returns array: `[{ "value": "model-name.gguf", "label": "Display Name" }]`
+
+---
+
+## Memory Service — port 3002
+
+Direct access is for debugging only. All client traffic goes through
+orchestration.
+
+### Health
+
+| Method | Path | Description |
+|---|---|---|
+| GET | /health | Service health check |
+
+### Sessions
+
+| Method | Path | Description |
+|---|---|---|
+| POST | /sessions | Create a new session |
+| GET | /sessions | Paginated session list with optional projectId filter |
+| GET | /sessions/:id | Get session by internal ID |
+| GET | /sessions/by-external/:externalId | Get session by external ID |
+| PATCH | /sessions/by-external/:externalId | Update session fields |
+| DELETE | /sessions/by-external/:externalId | Delete session (cascades to episodes) |
+
+> Route ordering: `by-external/:externalId` must be defined before `/:id`
+> to prevent `by-external` being captured as an ID param.
+
+**POST /sessions — body:**
+```json
+{ "externalId": "unique-uuid", "metadata": {} }
+```
+
+**PATCH /sessions/by-external/:externalId — body:**
+```json
+{ "name": "Session Name", "projectId": 3 }
+```
+Both fields are optional. Only provided fields are updated — other fields
+are not touched.
+
+### Episodes
+
+| Method | Path | Description |
+|---|---|---|
+| POST | /episodes | Create episode + auto-embed into Qdrant |
+| GET | /episodes/search?q=&limit= | FTS keyword search across all episodes |
+| GET | /episodes/:id | Get episode by ID |
+| GET | /sessions/:id/episodes?limit=&offset= | Paginated episodes for a session |
+| DELETE | /episodes/:id | Delete an episode |
+
+> Route ordering: `/episodes/search` must be defined before `/episodes/:id`.
+
+**POST /episodes — body:**
+```json
+{
+  "sessionId": 1,
+  "userMessage": "Hello",
+  "aiResponse": "Hi there!",
+  "tokenCount": 10
+}
+```
+
+### Projects
+
+| Method | Path | Description |
+|---|---|---|
+| POST | /projects | Create a new project |
+| GET | /projects | Get all projects |
+| GET | /projects/:id | Get project by ID |
+| PATCH | /projects/:id | Update a project |
+| DELETE | /projects/:id | Delete project + null session assignments |
+
+Same request/response shape as orchestration `/projects` above.
+
+### Entities
+
+| Method | Path | Description |
+|---|---|---|
+| POST | /entities | Upsert entity (creates or updates by name + type) |
+| GET | /entities/by-type/:type | All entities of a given type |
+| GET | /entities/:id | Get entity by ID |
+| DELETE | /entities/:id | Delete entity (cascades to relationships) |
+
+> Route ordering: `/entities/by-type/:type` must be before `/entities/:id`.
+
+**POST /entities — body:**
+```json
+{
+  "name": "NexusAI",
+  "type": "project",
+  "notes": "My AI memory project",
+  "metadata": {}
+}
+```
+
+### Relationships
+
+| Method | Path | Description |
+|---|---|---|
+| POST | /relationships | Upsert a relationship between two entities |
+| GET | /entities/:id/relationships | All relationships for an entity |
+| DELETE | /relationships | Delete a specific relationship |
+
+**POST /relationships — body:**
+```json
+{ "fromId": 1, "toId": 2, "label": "uses", "metadata": {} }
+```
+
+**DELETE /relationships — body:**
+```json
+{ "fromId": 1, "toId": 2, "label": "uses" }
+```
+
+Relationships are identified by the composite key `(fromId, toId, label)`.
+Delete uses request body rather than URL params since this three-part key
+is awkward to encode in a path.
+
+---
+
+## Embedding Service — port 3003
+
+| Method | Path | Description |
+|---|---|---|
+| GET | /health | Service health check |
+| POST | /embed | Embed a single text string |
+| POST | /embed/batch | Embed an array of text strings |
+
+**POST /embed — body:**
+```json
+{ "text": "Hello from NexusAI" }
+```
+
+**POST /embed — response:**
+```json
+{ "embedding": [0.123, -0.456, ...], "model": "nomic-embed-text", "dimensions": 768 }
+```
+
+---
+
+## Inference Service — port 3001
+
+| Method | Path | Description |
+|---|---|---|
+| GET | /health | Health check — reports active provider and model |
+| POST | /complete | Full completion — awaits entire response |
+| POST | /complete/stream | Streaming completion via SSE |
+
+**POST /complete — body:**
+```json
+{
+  "prompt": "What is the capital of France?",
+  "model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf",
+  "temperature": 0.7,
+  "maxTokens": 1024
+}
+```
+All fields except `prompt` are optional.
+
+**POST /complete — response:**
+```json
+{
+  "text": "The capital of France is Paris.",
+  "model": "gemma-4-26B...gguf",
+  "done": true,
+  "evalCount": 8,
+  "promptEvalCount": 41
+}
+```
\ No newline at end of file
diff --git a/docs/services/Memory-isolation.md b/docs/services/Memory-isolation.md
new file mode 100644
index 0000000..3b2f1f4
--- /dev/null
+++ b/docs/services/Memory-isolation.md
@@ -0,0 +1,128 @@
+# Memory Isolation
+
+NexusAI implements project-scoped memory — sessions belonging to the same
+project can share semantic context, and isolated projects can be restricted
+from drawing on memory outside the project. This document describes how the
+system works end-to-end.
+
+## Concepts
+
+**Session** — a single conversation thread. Identified by `external_id`.
+
+**Project** — a named grouping of sessions. Has an `isolated` flag (0 or 1).
+
+**Semantic search** — at inference time, the user's message is embedded and
+compared against past episodes in Qdrant to surface relevant context. The
+scope of this search is controlled by the project context.
+
+## Semantic Search Scope
+
+| Session state | Semantic search scope |
+|---|---|
+| No project | Own session's episodes only |
+| Assigned to a non-isolated project | All episodes across all sessions in the project |
+| Assigned to an isolated project | All episodes within the project only |
+| Removed from a project | Own session's episodes only (from that point) |
+
+Sessions with no project assigned behave the same as they always have —
+only their own past episodes are searched.
+
+## How It Works
+
+### Step 1 — Project context resolution (orchestration)
+
+In `chat/index.js`, immediately after session resolution:
+
+```js
+let projectSessionIds = null;
+if (session.project_id) {
+  const project = await memory.getProject(session.project_id);
+  if (project) {
+    const projectSessions = await memory.getProjectSessions(session.project_id);
+    projectSessionIds = projectSessions.map(s => s.id);
+  }
+}
+```
+
+If the session belongs to any project (isolated or not), `projectSessionIds`
+is populated with the internal integer IDs of all sessions in that project.
+
+For **non-isolated projects**, this expands the search to all project sessions.  
+For **isolated projects**, the same set is used but the intent is restriction
+— since `projectSessionIds` only contains project sessions, no external
+episodes can appear.
+
+Both cases use the same code path — the `isolated` flag does not change the
+query logic, only the conceptual meaning.
+
+### Step 2 — Qdrant filter construction
+
+In `services/qdrant.js`, `searchEpisodes` builds the filter:
+
+```js
+if (projectSessionIds) {
+  body.filter = {
+    should: projectSessionIds.map(id => ({
+      key: 'sessionId', match: { value: id }
+    }))
+  };
+} else if (sessionId) {
+  body.filter = { must: [{ key: 'sessionId', match: { value: sessionId } }] };
+}
+```
+
+`should` is Qdrant's "match any of" operator — equivalent to SQL
+`WHERE sessionId IN (...)`. When `projectSessionIds` is set, the single-session
+filter is not used.
+
+### Step 3 — Episode payloads
+
+Every episode upserted into Qdrant carries `{ sessionId, createdAt }` in its
+payload. `sessionId` here is the **internal integer ID** from SQLite. This
+is what the Qdrant filter matches against.
+
+This means the filter works correctly regardless of when episodes were created
+or when a session was added to a project — the payload is immutable.
+
+## Important Behaviours
+
+**Pre-existing episodes are included immediately.** When a session is added
+to a project and a new message is sent, Qdrant can match all of that session's
+existing episodes since the filter only requires the `sessionId` to be in the
+project's session list.
+
+**Removing a session from a project takes effect immediately.** On the next
+message, `getProjectSessions` will not include that session's ID, so its
+episodes disappear from the semantic search scope.
+
+**New sessions created from ProjectView are assigned after the first message.**
+The `useChat` hook writes the `project_id` assignment via `updateSession` after
+`onDone` fires. There is a brief window during the first message where the
+session has no project assigned. The project is correctly applied from the
+second message onward.
+
+## Isolated vs Non-Isolated
+
+The `isolated` flag is stored on the project but does not currently change the
+query logic — both isolated and non-isolated projects result in a
+`projectSessionIds` filter. The distinction is semantic and enforced by
+the project's membership:
+
+- **Non-isolated** — intentionally draws from all sessions in the project,
+  creating a shared memory pool for related conversations
+- **Isolated** — by design contains only sessions explicitly added to it,
+  so the same filter naturally restricts context to project-only episodes
+
+If cross-project contamination became a concern (e.g. a session accidentally
+added to the wrong project), removing it from the project immediately restores
+isolation.
+
+## Qdrant Payload Structure
+
+Episodes are stored with this payload:
+```json
+{ "sessionId": 42, "createdAt": 1776080188 }
+```
+
+`sessionId` is the SQLite `sessions.id` integer, not the `external_id` UUID.
+This is important when building filters — always use internal IDs.
\ No newline at end of file
diff --git a/docs/services/chat-client.md b/docs/services/chat-client.md
index f8796a8..ab7a0f8 100644
--- a/docs/services/chat-client.md
+++ b/docs/services/chat-client.md
@@ -55,10 +55,6 @@ VITE_ORCHESTRATION_URL=https://nexus.jellystorm.com
 during local development, bypassing Caddy and Authelia entirely:
 
 ```js
-// vite.config.js
-import { defineConfig } from 'vite';
-import react from '@vitejs/plugin-react';
-
 export default defineConfig({
   plugins: [react()],
   server: {
@@ -72,7 +68,8 @@ export default defineConfig({
 });
 ```
 
-If new routes are added to the orchestration service, add them here too.
+When adding new top-level routes to the orchestration service, add a matching
+entry here too.
 
 ## Internal Structure
 
@@ -93,12 +90,13 @@ src/
 │   ├── Sidebar.jsx          # Left sidebar — projects, recent chats, navigation
 │   ├── ChatWindow.jsx       # Centre panel — message thread and input bar
 │   ├── MessageBubble.jsx    # Individual message bubble (user or assistant)
-│   ├── InfoPanel.jsx        # Right panel — model selector and session metadata
-│   ├── SessionModal.jsx     # Modal for session rename and delete confirmation
-│   ├── ProjectModal.jsx     # Modal for project create, edit, and delete confirmation
+│   ├── InfoPanel.jsx        # Right panel — model selector and session metadata (slide-in)
+│   ├── SessionModal.jsx     # Modal for session rename, project assignment, delete
+│   ├── ProjectModal.jsx     # Modal for project create, edit, delete
 │   ├── AllChatsView.jsx     # Full paginated session list with multi-select bulk delete
 │   ├── AllProjectsView.jsx  # Project tile grid with create/edit/delete
-│   └── SettingsView.jsx     # Settings placeholder (sections: Appearance, Memory, Models, About)
+│   ├── ProjectView.jsx      # Individual project — session list, new chat button
+│   └── SettingsView.jsx     # Settings placeholder (Appearance, Memory, Models, About)
 ├── index.css                # Global reset, CSS variables, utility classes
 └── main.jsx                 # React entry point
 ```
@@ -107,9 +105,9 @@ src/
 
 ## Layout
 
-The app uses a view-based layout. `App.jsx` manages a `view` state
-(`'chat' | 'all-chats' | 'all-projects' | 'settings'`) that controls which
-main panel is rendered. The left sidebar and right info panel are always present.
+The app uses a view-based layout. `App.jsx` manages a `view` state string
+that controls which main panel is rendered. The left sidebar and right info
+panel are persistent across all views.
 
 ```
 ┌──────────────────┬──────────────────────────────┐
@@ -117,9 +115,9 @@ main panel is rendered. The left sidebar and right info panel are always present
 │  (collapsible)   │                               │
 │                  │  chat         → ChatWindow    │
 │ + New Chat       │  all-chats    → AllChatsView  │
-│ ⊞ New Project    │  all-projects → AllProjectsView│
-│                  │  settings     → SettingsView  │
-│ PROJECTS ▾       │                               │
+│ ⊞ View Projects  │  all-projects → AllProjectsView│
+│                  │  project      → ProjectView   │
+│ PROJECTS ▾       │  settings     → SettingsView  │
 │  [tile] [tile]   │                               │
 │  All Projects →  │                               │
 │                  │                               │
@@ -132,10 +130,22 @@ main panel is rendered. The left sidebar and right info panel are always present
 └──────────────────┴──────────────────────────────┘
 ```
 
-The sidebar collapses to a 48px icon rail. The right info panel (`InfoPanel`)
-slides in from the right over the main area using `transform: translateX()` —
-it is hidden by default (`rightOpen` starts `false`) and toggled via a button
-in the `ChatWindow` header.
+The sidebar collapses to a 48px icon rail. The right `InfoPanel` slides in
+from the right using `transform: translateX()` — hidden by default, toggled
+via the `⊹` button in the `ChatWindow` header.
+
+## View Routing
+
+| View | Component | Trigger |
+|---|---|---|
+| `'chat'` | `ChatWindow` | Default; selecting a session; new chat |
+| `'all-chats'` | `AllChatsView` | "All Chats →" or ☰ icon in collapsed rail |
+| `'all-projects'` | `AllProjectsView` | "View Projects" button or ⊞ icon |
+| `'project'` | `ProjectView` | Clicking a project tile in the sidebar |
+| `'settings'` | `SettingsView` | Settings button or ⚙ icon |
+
+`activeProject` state in `App.jsx` tracks which project `ProjectView` is
+displaying. Set via `onSelectProject` before navigating to `'project'`.
 
 ## CSS Architecture
 
@@ -181,91 +191,47 @@ rules, inline styles for dynamic prop-driven values.
 | `.label-upper` | Uppercase section label style |
 | `.truncate` | Text overflow ellipsis |
 
-## API Layer
-
-All orchestration calls are centralised in `src/api/orchestration.js`:
-
-| Function | Method | Path | Description |
-|---|---|---|---|
-| `fetchSessions` | GET | /sessions | Load session list for sidebar |
-| `fetchSessionHistory` | GET | /sessions/:id/history | Load episode history on session select |
-| `sendMessage` | POST | /chat | Send message, await full response |
-| `streamMessage` | POST | /chat/stream | Send message, receive SSE token stream |
-| `fetchModels` | GET | /models | Load available models from manifest |
-| `renameSession` | PATCH | /sessions/:id | Rename a session |
-| `deleteSession` | DELETE | /sessions/:id | Delete a session |
-| `fetchProjects` | GET | /projects | Load project list |
-| `createProject` | POST | /projects | Create a new project |
-| `updateProject` | PATCH | /projects/:id | Update a project |
-| `deleteProject` | DELETE | /projects/:id | Delete a project |
-
-`streamMessage` returns an abort function — call it to cancel a stream mid-flight.
-Uses a buffer pattern to handle SSE chunks that may span multiple network packets.
-
 ## Streaming
 
-The chat input sends messages via `POST /chat/stream`. Tokens arrive as SSE events:
+Messages are sent via `POST /chat/stream`. Tokens arrive as SSE events and
+are written into the active assistant bubble token by token via
+`updateLastMessage`. The blinking cursor in `MessageBubble` is shown while
+`message.streaming === true`.
 
-```
-data: {"text":"Hello"}
-data: {"text":" Tim"}
-data: {"done":true,"model":"gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf","tokenCount":87}
-```
-
-An empty assistant bubble is appended immediately when the stream opens, then
-updated token by token using `updateLastMessage`. The blinking cursor in
-`MessageBubble` is shown while `message.streaming === true` and disappears
-when the done event is received. Model name and token count from the done
-event are stored in `useChat` state and displayed in the InfoPanel.
-
-## Dynamic Model Selector
-
-Available models are fetched from `GET /models` on mount via the `useModels` hook.
-The hook initialises with `FALLBACK_MODELS` from `constants.js` and replaces them
-with the server response on success. If the fetch fails, the fallback list is used
-silently — a warning is logged to the console.
-
-To add a model, update `models.json` on the main PC — no client rebuild needed.
-
-`FALLBACK_MODELS` in `constants.js` should be kept in sync with `models.json`
-as a reasonable last-resort list in case the endpoint is unreachable.
+`useChat` accepts an optional `projectId` parameter in `sendMessage`. After
+the first message completes in a new session, if `projectId` is set,
+`updateSession` is called to write the project assignment to the backend.
 
 ## Session Management
 
-Sessions are identified by `external_id` — a UUID generated client-side via the
-`uuid` package. New sessions are created locally and auto-registered in the memory
-service on the first message. The session list refreshes after each completed
-response to surface newly created sessions.
+Sessions are identified by `external_id` — a UUID generated client-side via
+the `uuid` package. New sessions are created locally and auto-registered in
+the memory service on the first message. The session list refreshes after
+each completed response to surface newly created sessions.
 
-### Session Name Display
+### Auto-naming
 
-The chat header and session rows both display `session.name` if set, falling back
-to `session.external_id` if no name has been assigned:
+After the first exchange completes, orchestration fires a secondary inference
+call with a short naming prompt (max 20 tokens, temperature 0.3). The result
+is written back as `session.name`. The client fires a second `refreshSessions`
+after a 3-second delay to pick up the name once written.
 
-```js
-activeSession.name || activeSession.external_id
-```
+Manually renamed sessions are never overwritten — the `!session.name` guard
+in `chat/index.js` prevents this.
 
 ### Session Actions
 
-Session rows in the sidebar support rename and delete via two entry points:
+Session rows support rename, project assignment, and delete via:
+- **Hover** — reveals ✎ and ✕ icon buttons alongside the row
+- **Right-click** — context menu with the same actions
 
-- **Hover** — reveals ✎ (rename) and ✕ (delete) icon buttons alongside the row
-- **Right-click** — opens a context menu with the same actions
-
-Both trigger `SessionModal` — a shared modal component with two modes:
-
-| Mode | Trigger | Behaviour |
-|---|---|---|
-| `settings` | Rename button / context menu rename | Shows name input, saves on Enter or Save button |
-| `confirm-delete` | Delete button / context menu delete | Shows confirmation dialog, requires explicit Delete click |
-
-Actions are disabled on unsaved (new) sessions that haven't had a first message sent yet.
+`SessionModal` handles rename and project assignment together in `settings`
+mode, and delete confirmation in `confirm-delete` mode.
 
 ### Active Session Clearing on Delete
 
-When the deleted session is the currently active one, `App.jsx` detects the match
-and calls `selectSession(null)` to clear the chat window before refreshing the list:
+When the deleted session is the currently active one, `App.jsx` clears the
+chat window before refreshing the list:
 
 ```js
 function handleSessionsChange(deletedSession) {
@@ -276,53 +242,23 @@ function handleSessionsChange(deletedSession) {
 }
 ```
 
-### Context Menu
+### Key Patterns
 
-Implemented via `useContextMenu` hook — tracks `{ x, y, session }` state and
-attaches a `window` click listener to dismiss on any outside click. Rendered
-outside the sidebar div via a React fragment to avoid being clipped by
-`overflow: hidden`.
-
-### Button Nesting
-
-Session row action icons (✎ ✕) are rendered as siblings of the session
-`<button>`, not children — HTML does not allow `<button>` inside `<button>`.
-The outer `<div>` owns hover state and context menu; the inner `<button>` handles
-session selection; action icon buttons sit alongside it in the same flex row.
+- Button nesting: action icons are siblings of row buttons, not children — HTML forbids `<button>` inside `<button>`
+- Context menu rendered outside sidebar via React fragment to avoid `overflow: hidden` clipping
+- `useContextMenu` dismisses on a `window` click listener
+- Dynamic `updateSession` SQL builds `SET` clause from only the fields passed — prevents accidental overwrites
 
 ## Project Management
 
-Projects are a first-class concept in the UI. The `useProjects` hook fetches
-the project list from `GET /projects` on mount and exposes a `refreshProjects`
-callback for keeping the sidebar in sync after mutations.
+`useProjects` fetches the project list from `GET /projects` on mount and
+exposes `refreshProjects` for keeping the sidebar in sync after mutations.
 
-### Project Actions
+`ProjectModal` handles create, edit, and delete confirmation. Fields: name
+(required), description (optional), colour picker, isolated toggle.
 
-Projects are managed from `AllProjectsView` via `ProjectModal`:
+`ProjectView` shows the project's name, description, isolated badge (if set),
+and a filtered session list. The "+ New Chat" button creates a new session,
+navigates to `'chat'`, and writes the project assignment after the first message.
 
-| Mode | Behaviour |
-|---|---|
-| `create` | Name (required), description (optional), colour picker |
-| `edit` | Same fields as create, pre-populated |
-| `confirm-delete` | Confirmation dialog — sessions in the project are not deleted |
-
-The sidebar Projects section shows up to 6 project tiles as coloured badge buttons.
-Clicking any tile navigates to `AllProjectsView`. The "All Projects →" link is
-always shown below the tiles.
-
-After any create, edit, or delete in `AllProjectsView`, `onProjectsChange` is called
-to trigger `refreshProjects` in `App.jsx`, keeping the sidebar tiles in sync.
-
-## View Routing
-
-`App.jsx` manages a `view` state string that controls which main panel renders:
-
-| View | Component | Trigger |
-|---|---|---|
-| `'chat'` | `ChatWindow` | Default; selecting a session from sidebar or AllChatsView |
-| `'all-chats'` | `AllChatsView` | "All Chats →" link or ☰ icon in collapsed rail |
-| `'all-projects'` | `AllProjectsView` | "All Projects →" link, ⊞ icon, or New Project button |
-| `'settings'` | `SettingsView` | Settings button or ⚙ icon in collapsed rail |
-
-`AllChatsView` navigates back to `'chat'` on session row click, passing the selected
-session to `selectSession` so history loads immediately.
\ No newline at end of file
+For memory isolation behaviour, see `memory-isolation.md`.
\ No newline at end of file
diff --git a/docs/services/embedding-service.md b/docs/services/embedding-service.md
index 1a4870a..e9b7d1e 100644
--- a/docs/services/embedding-service.md
+++ b/docs/services/embedding-service.md
@@ -27,80 +27,43 @@ minimizing network hops on the memory write path.
 | OLLAMA_URL | No | http://localhost:11434 | Ollama instance URL |
 | EMBEDDING_MODEL | No | nomic-embed-text | Ollama embedding model to use |
 
+> Ollama must be running with `OLLAMA_HOST=0.0.0.0` to accept LAN connections
+> from other services.
+
 ## Model
 
-**nomic-embed-text** via Ollama produces **768-dimension** vectors using **Cosine similarity**.
-This must match the `QDRANT.VECTOR_SIZE` constant in `@nexusai/shared`.
+**nomic-embed-text** via Ollama produces **768-dimension** vectors with
+**Cosine similarity**. This must match `QDRANT.VECTOR_SIZE` in `@nexusai/shared`.
 
 If the embedding model is changed, the Qdrant collections must be reinitialized
-with the new vector dimension — updating `QDRANT.VECTOR_SIZE` in `constants.js` is
-the single change required to keep everything consistent.
+with the new vector dimension. Updating `QDRANT.VECTOR_SIZE` in `constants.js`
+is the single change required to keep everything consistent.
 
 ## Ollama API
 
-Uses the `/api/embed` endpoint (Ollama v0.4+). Request shape:
+Uses the `/api/embed` endpoint (Ollama v0.4+):
+
 ```json
+// Request
 { "model": "nomic-embed-text", "input": "text to embed" }
-```
-Response key is `embeddings[0]` — an array of 768 floats.
 
-## Endpoints
-
-### Health
-
-| Method | Path | Description |
-|---|---|---|
-| GET | /health | Service health check |
-
-### Embed
-
-| Method | Path | Description |
-|---|---|---|
-| POST | /embed | Embed a single text string |
-| POST | /embed/batch | Embed an array of text strings |
-
----
-
-**POST /embed**
-
-Embeds a single text string and returns the vector.
-
-Request body:
-```json
-{
-  "text": "Hello from NexusAI"
-}
+// Response key
+embeddings[0]  // array of 768 floats
 ```
 
-Response:
-```json
-{
-  "embedding": [0.123, -0.456, ...],
-  "model": "nomic-embed-text",
-  "dimensions": 768
-}
-```
+> Earlier Ollama versions used `/api/embeddings` with a `prompt` key and
+> returned `embedding` (singular). Use `/api/embed`, `input`, and
+> `embeddings[0]` for Ollama v0.4+.
 
----
+## Usage in NexusAI
 
-**POST /embed/batch**
+The embedding service is called in two places:
 
-Embeds an array of strings sequentially and returns all vectors in the same order.
-Ollama does not natively parallelize embeddings, so requests are processed one at a time.
+1. **Memory service** — after each episode is saved to SQLite, the combined
+   `User: ..\nAssistant: ..` text is embedded and upserted into Qdrant.
+   This is fire-and-forget — failures are logged but don't affect the response.
 
-Request body:
-```json
-{
-  "texts": ["first sentence", "second sentence"]
-}
-```
+2. **Orchestration service** — the user's message is embedded at the start of
+   the chat pipeline to perform semantic search against past episodes.
 
-Response:
-```json
-{
-  "embeddings": [[0.123, ...], [0.456, ...]],
-  "model": "nomic-embed-text",
-  "dimensions": 768,
-  "count": 2
-}
-```
\ No newline at end of file
+For all HTTP endpoints, see `api-routes.md`.
\ No newline at end of file
diff --git a/docs/services/inference-service.md b/docs/services/inference-service.md
index 49d668c..bdcd686 100644
--- a/docs/services/inference-service.md
+++ b/docs/services/inference-service.md
@@ -24,20 +24,19 @@ to switch inference backends without changes to the rest of the system.
 | Variable | Required | Default | Description |
 |---|---|---|---|
 | PORT | No | 3001 | Port to listen on |
-| INFERENCE_PROVIDER | No | llamacpp | Active inference provider (`ollama` or `llamacpp`) |
+| INFERENCE_PROVIDER | No | llamacpp | Active provider (`ollama` or `llamacpp`) |
 | INFERENCE_URL | No | http://localhost:8080 | URL of the inference runtime |
 | DEFAULT_MODEL | No | local-model | Default model name passed to the provider |
 
 > `INFERENCE_URL` points to `llama-server` directly (port 8080), not to this
-> service itself. The orchestration service uses `INFERENCE_SERVICE_URL` to
-> reach this service on port 3001.
+> service. The orchestration service uses `INFERENCE_SERVICE_URL` to reach
+> this service on port 3001.
 
 ## Provider Architecture
 
-The inference service uses a provider pattern to abstract the underlying
-LLM runtime. The active provider is selected at startup via `INFERENCE_PROVIDER`
-and loaded from `src/providers/`. Both providers expose identical function
-signatures, so the rest of the service is unaware of which backend is active.
+The active provider is selected at startup via `INFERENCE_PROVIDER` and
+loaded from `src/providers/`. Both providers expose identical function
+signatures.
 
 ### Supported Providers
 
@@ -46,28 +45,36 @@ signatures, so the rest of the service is unaware of which backend is active.
 | llama.cpp | `llamacpp` | llama.cpp server (OpenAI-compatible API) — **current default** |
 | Ollama | `ollama` | Ollama via the `ollama` npm package — available as fallback |
 
-Switching providers requires only a `.env` change — no code modifications needed:
+Switching providers requires only a `.env` change — no code modifications:
 ```
 INFERENCE_PROVIDER=llamacpp
 INFERENCE_URL=http://localhost:8080
 ```
 
-### Provider Validation
+The provider loader throws immediately on an unknown value, preventing silent
+misconfiguration.
+
+## Internal Structure
 
-The provider loader validates `INFERENCE_PROVIDER` at startup and throws immediately
-if an unknown value is set — prevents silent misconfiguration:
 ```
-Error: Unknown inference provider: "foo". Valid options: ollama, llamacpp
+src/
+├── providers/
+│   ├── ollama.js      # Ollama provider
+│   └── llamacpp.js    # llama.cpp provider (OpenAI-compatible REST)
+├── routes/
+│   └── inference.js   # /complete and /complete/stream route handlers
+├── infer.js           # Provider loader — selects and re-exports active provider
+└── index.js           # Express app + route definitions
 ```
 
 ## llama.cpp Provider
 
-The llama.cpp provider uses the OpenAI-compatible REST API exposed by `llama-server`.
+Uses the OpenAI-compatible REST API exposed by `llama-server`.
 
 ### Starting llama-server
 
-`llama-server` must be started manually on the main PC before the inference service
-can handle requests. It loads a single model at startup:
+Must be started manually on the main PC before the inference service can
+handle requests:
 
 ```powershell
 .\llama-gpu\llama-server.exe `
@@ -79,40 +86,29 @@ can handle requests. It loads a single model at startup:
   -c 64000
 ```
 
-Key flags:
-
 | Flag | Description |
 |---|---|
-| `-m` | Path to the `.gguf` model file |
 | `-ngl 99` | Offload as many layers as possible to GPU |
-| `--reasoning off` | Disables thinking/reasoning delay on Gemma 4 models |
-| `--host 0.0.0.0` | Allows connections from other machines on the LAN |
-| `--port 8080` | Port for the llama-server HTTP API |
+| `--reasoning off` | Disables thinking delay on Gemma 4 models |
+| `--host 0.0.0.0` | Allows LAN connections |
 | `-c 64000` | Context window size in tokens |
 
-> `-c 64000` is intentionally large. Monitor VRAM usage — if pressure builds,
-> reduce this value. The NexusAI memory architecture handles context injection
-> so a smaller window (6–8K) is often sufficient.
+> `-c 64000` is intentionally large. NexusAI's memory architecture handles
+> context injection so 6–8K is often sufficient if VRAM pressure builds.
 
 ### Model Naming
 
-The model name sent in API requests must match the name as reported by
-`llama-server` — including the `.gguf` extension. The reported name can be
-verified with:
+The model name in requests must match the name reported by `llama-server`
+including the `.gguf` extension:
 
 ```powershell
 Invoke-RestMethod -Uri "http://192.168.0.79:8080/v1/models"
 ```
 
-Set `DEFAULT_MODEL` in `.env` to the exact reported name:
-```
-DEFAULT_MODEL=gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf
-```
+Set `DEFAULT_MODEL` in `.env` to the exact reported name.
 
 ### Inference Parameters
 
-The llamacpp provider maps NexusAI options to OpenAI-compatible fields:
-
 | NexusAI option | API field | Default |
 |---|---|---|
 | `temperature` | `temperature` | 0.7 |
@@ -122,18 +118,6 @@ The llamacpp provider maps NexusAI options to OpenAI-compatible fields:
 | `repeatPenalty` | `repeat_penalty` | 1.1 |
 | `seed` | `seed` | null (random) |
 
-## Internal Structure
-```
-src/
-├── providers/
-│   ├── ollama.js      # Ollama provider — uses ollama npm package
-│   └── llamacpp.js    # llama.cpp provider — uses OpenAI-compatible REST API
-├── routes/
-│   └── inference.js   # /complete and /complete/stream route handlers
-├── infer.js           # Provider loader — selects and re-exports active provider
-└── index.js           # Express app + route definitions
-```
-
 ## Streaming Response Format
 
 The llama.cpp provider yields chunks in this shape:
@@ -143,7 +127,7 @@ The llama.cpp provider yields chunks in this shape:
 { response: '', done: true, model: "model-name.gguf", tokenCount: 42 }
 ```
 
-The inference route re-emits these as SSE events:
+The inference route re-emits as SSE:
 ```
 data: {"response":"token text"}
 data: {"done":true,"model":"model-name.gguf","tokenCount":42}
@@ -151,66 +135,6 @@ data: [DONE]
 ```
 
 `model` and `tokenCount` are captured from the llama.cpp `finish_reason: stop`
-chunk (`usage.completion_tokens`) and emitted on the done event so the
-orchestration layer can forward them to the client.
+chunk and emitted on the done event.
 
-## Endpoints
-
-### Health
-
-| Method | Path | Description |
-|---|---|---|
-| GET | /health | Service health check — reports active provider and model |
-
-### Inference
-
-| Method | Path | Description |
-|---|---|---|
-| POST | /complete | Standard completion — returns full response when done |
-| POST | /complete/stream | Streaming completion via Server-Sent Events |
-
----
-
-**POST /complete**
-
-Request body:
-```json
-{
-  "prompt": "What is the capital of France?",
-  "model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf",
-  "temperature": 0.7,
-  "maxTokens": 1024
-}
-```
-
-`model` is optional — falls back to `DEFAULT_MODEL` if omitted.  
-`maxTokens` is optional — defaults to 1024.  
-`temperature` is optional — defaults to 0.7.
-
-Response:
-```json
-{
-  "text": "The capital of France is Paris.",
-  "model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf",
-  "done": true,
-  "evalCount": 8,
-  "promptEvalCount": 41
-}
-```
-
----
-
-**POST /complete/stream**
-
-Same request body as `/complete`.
-
-Response is a stream of Server-Sent Events:
-```
-data: {"response":"The"}
-data: {"response":" capital of France is Paris."}
-data: {"done":true,"model":"gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf","tokenCount":8}
-data: [DONE]
-```
-
-Clients should accumulate `response` fields to build the full response string.
-The `done` event carries `model` and `tokenCount` for display in the UI.
\ No newline at end of file
+For all HTTP endpoints, see `api-routes.md`.
\ No newline at end of file
diff --git a/docs/services/memory-service.md b/docs/services/memory-service.md
index 0ad494f..ccbc5ab 100644
--- a/docs/services/memory-service.md
+++ b/docs/services/memory-service.md
@@ -43,48 +43,34 @@ src/
 │   └── index.js       # Qdrant collection management, upsert, search, delete
 ├── entities/
 │   └── index.js       # Entity + relationship CRUD
-└── index.js           # Express app + route definitions
+└── index.js           # Express app + all route definitions
 ```
 
 ## SQLite Schema
 
 Six core tables:
 
-- **sessions** — top-level conversation containers, identified by an `external_id`, optional `name`, and optional `project_id`
+- **sessions** — top-level conversation containers. Fields: `external_id`, `name`, `project_id`, `metadata`
 - **episodes** — individual exchanges (user message + AI response) tied to a session
 - **entities** — named things the system learns about (people, places, concepts)
 - **relationships** — directional labeled links between entities
 - **summaries** — condensed episode groups for efficient context retrieval
-- **projects** — named groupings of sessions with optional description, colour, and icon
+- **projects** — named groupings of sessions with `name`, `description`, `colour`, `icon`, `isolated`
 
 ### Migrations
 
-Schema changes that cannot be expressed in `CREATE TABLE IF NOT EXISTS` are applied
-as migrations in `db/index.js` at startup, wrapped in try/catch to safely ignore
-already-applied changes:
+Schema changes that cannot use `CREATE TABLE IF NOT EXISTS` are applied as
+idempotent migrations in `db/index.js` at startup:
 
 ```js
-try {
-    db.exec(`ALTER TABLE sessions ADD COLUMN name TEXT`);
-} catch {}
-
-try {
-    db.exec(`ALTER TABLE sessions ADD COLUMN project_id INTEGER REFERENCES projects(id)`);
-} catch {}
-
-try {
-    db.exec(`CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project_id)`);
-} catch {}
+try { db.exec(`ALTER TABLE sessions ADD COLUMN name TEXT`); } catch {}
+try { db.exec(`ALTER TABLE sessions ADD COLUMN project_id INTEGER REFERENCES projects(id)`); } catch {}
+try { db.exec(`CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project_id)`); } catch {}
+try { db.exec(`ALTER TABLE projects ADD COLUMN isolated INTEGER NOT NULL DEFAULT 0`); } catch {}
 ```
 
-This pattern is idempotent — safe to run on every startup. New migrations should
-always be appended here rather than modifying the schema file, since `ALTER TABLE`
-and index creation on existing tables cannot use `IF NOT EXISTS` guards in SQLite.
-
-Current migrations:
-- `ALTER TABLE sessions ADD COLUMN name TEXT` — adds display name to sessions
-- `ALTER TABLE sessions ADD COLUMN project_id INTEGER` — links sessions to projects
-- `CREATE INDEX idx_sessions_project` — index on the new project_id column
+New migrations are always appended here — never modify the schema file for
+existing tables since `ALTER TABLE` cannot use `IF NOT EXISTS`.
 
 ### FTS5 Full-Text Search
 
@@ -96,11 +82,27 @@ keep the FTS index automatically in sync with the episodes table.
 
 - `journal_mode = WAL` — non-blocking reads during writes
 - `foreign_keys = ON` — enforces referential integrity and cascade deletes
-- PRAGMAs are set via `db.pragma()` separately from `db.exec()`
+- PRAGMAs set via `db.pragma()`, not `db.exec()`
+
+### Dynamic Session Updates
+
+`updateSession` builds its `SET` clause dynamically from only the fields
+passed — prevents partial updates from overwriting fields that weren't
+touched:
+
+```js
+function updateSession(id, { name, projectId } = {}) {
+  const updates = [];
+  const values = [];
+  if (name !== undefined)      { updates.push('name = ?');       values.push(name ?? null); }
+  if (projectId !== undefined) { updates.push('project_id = ?'); values.push(projectId ?? null); }
+  // ...
+}
+```
 
 ## Qdrant / Semantic Layer
 
-Three collections are initialized on service startup (created if they don't already exist):
+Three Qdrant collections are initialized on service startup:
 
 | Collection | Purpose |
 |---|---|
@@ -108,208 +110,50 @@ Three collections are initialized on service startup (created if they don't alre
 | `entities` | Embeddings for named entities |
 | `summaries` | Embeddings for condensed episode summaries |
 
-All collections use **768-dimension vectors** with **Cosine similarity**, matching the
-output of the `nomic-embed-text` embedding model via Ollama.
+All collections use **768-dimension vectors** with **Cosine similarity**,
+matching `nomic-embed-text` via Ollama. Vector size and distance metric are
+defined in `@nexusai/shared` — not hardcoded here.
 
-Vector dimension and distance metric are defined in `@nexusai/shared` constants
-(`QDRANT.VECTOR_SIZE`, `QDRANT.DISTANCE_METRIC`) — not hardcoded in this service.
-
-### Semantic Layer Operations
-
-Each collection exposes three operations via helper functions in `src/semantic/index.js`:
-
-- **Upsert** — stores a vector with a payload containing the SQLite row ID, enabling
-  lookups back to the full content after a vector search
-- **Search** — returns the top-k most similar vectors, with optional Qdrant filter
-- **Delete** — removes a vector point by ID
-
-The `wait: true` flag is used on all write operations so the caller receives confirmation
-only after Qdrant has committed the change.
+Each collection exposes three operations in `src/semantic/index.js`:
+upsert, search (with optional Qdrant filter), and delete. The `wait: true`
+flag is used on all writes.
 
 ## Embedding Write Path
 
-When a new episode is created, the memory service automatically generates and stores
-a vector embedding in Qdrant via the embedding service:
+When a new episode is created:
 
-1. Episode is saved to SQLite synchronously — the response is returned immediately
-2. Both sides of the exchange are combined into a single text:
-   ```
-   User: {userMessage}
-   Assistant: {aiResponse}
-   ```
-3. This text is sent to the embedding service (`POST /embed`)
-4. The returned vector is upserted into the `episodes` Qdrant collection with a
-   payload of `{ sessionId, createdAt }` for filtering and lookups
+1. Episode saved to SQLite synchronously — response returned immediately
+2. User message + AI response combined: `User: ...\nAssistant: ...`
+3. Text sent to embedding service (`POST /embed`)
+4. Vector upserted into `episodes` Qdrant collection with payload `{ sessionId, createdAt }`
 
-The embedding step is **fire-and-forget** — it runs asynchronously after the SQLite
-insert succeeds. If embedding fails, the episode is still saved and searchable via
-FTS. The error is logged but does not affect the API response.
+This step is **fire-and-forget** — if embedding fails, the episode is still
+saved and searchable via FTS. The error is logged but not surfaced.
 
-### Hybrid Retrieval Pattern
-
-Qdrant and SQLite work as a pair — neither operates in isolation:
-
-1. Query is embedded and searched in Qdrant → returns IDs + similarity scores
-2. IDs are used to fetch full content from SQLite
-3. Results are ranked and assembled into a context package
+> The Qdrant payload stores `sessionId` (the internal integer ID). This is
+> used for per-session and per-project filtering during semantic search. See
+> `memory-isolation.md` for how project-level filtering works.
 
 ## Entity Layer
 
-Entities and relationships are stored in SQLite with two key constraints:
+Entities and relationships use upsert semantics with composite unique
+constraints to prevent duplicates:
 
-- `UNIQUE(name, type)` on entities — ensures no duplicates; upsert updates existing records
-- `UNIQUE(from_id, to_id, label)` on relationships — prevents duplicate edges
-- `ON DELETE CASCADE` on both `from_id` and `to_id` — deleting an entity automatically
-  removes all relationships where it appears on either end
+- `UNIQUE(name, type)` on entities
+- `UNIQUE(from_id, to_id, label)` on relationships
+- `ON DELETE CASCADE` on relationship foreign keys
 
-## Endpoints
+## Project Delete Behaviour
 
-### Health
+Deleting a project runs as a transaction — it first nulls out `project_id`
+on all assigned sessions, then deletes the project. This avoids a foreign
+key constraint failure since `sessions.project_id` has no `ON DELETE` rule:
 
-| Method | Path | Description |
-|---|---|---|
-| GET | /health | Service health check |
-
-### Sessions
-
-| Method | Path | Description |
-|---|---|---|
-| POST | /sessions | Create a new session |
-| GET | /sessions | Get paginated list of all sessions |
-| GET | /sessions/:id | Get session by internal ID |
-| GET | /sessions/by-external/:externalId | Get session by external ID |
-| PATCH | /sessions/by-external/:externalId | Update session name |
-| DELETE | /sessions/by-external/:externalId | Delete session (cascades to episodes + summaries) |
-
-> Route ordering matters in Express: `by-external/:externalId` must be defined before
-> `/:id` to prevent the literal string `by-external` being captured as an ID parameter.
-
-**POST /sessions body:**
-```json
-{
-  "externalId": "unique-session-id",
-  "metadata": {}
-}
+```js
+const doDelete = db.transaction(() => {
+  db.prepare(`UPDATE sessions SET project_id = NULL WHERE project_id = ?`).run(id);
+  db.prepare(`DELETE FROM projects WHERE id = ?`).run(id);
+});
 ```
 
-**PATCH /sessions/by-external/:externalId body:**
-```json
-{
-  "name": "My Renamed Session"
-}
-```
-
-Returns the updated session object. `name` is required and must be non-empty.
-
-**DELETE /sessions/by-external/:externalId**
-
-Returns `204 No Content` on success. Cascades to delete all associated episodes
-and summaries via SQLite `ON DELETE CASCADE`.
-
-### Episodes
-
-| Method | Path | Description |
-|---|---|---|
-| POST | /episodes | Create episode + auto-embed into Qdrant |
-| GET | /episodes/search?q=&limit= | Full-text search across episodes |
-| GET | /episodes/:id | Get episode by ID |
-| GET | /sessions/:id/episodes?limit=&offset= | Get paginated episodes for a session |
-| DELETE | /episodes/:id | Delete an episode |
-
-**POST /episodes body:**
-```json
-{
-  "sessionId": 1,
-  "userMessage": "Hello",
-  "aiResponse": "Hi there!",
-  "tokenCount": 10,
-  "metadata": {}
-}
-```
-
-> Note: `/episodes/search` must be defined before `/episodes/:id` in Express to prevent
-> the word `search` being captured as an ID parameter.
-
-### Projects
-
-| Method | Path | Description |
-|---|---|---|
-| POST | /projects | Create a new project |
-| GET | /projects | Get all projects |
-| GET | /projects/:id | Get project by ID |
-| PATCH | /projects/:id | Update a project |
-| DELETE | /projects/:id | Delete a project |
-
-**POST /projects body:**
-```json
-{
-  "name": "My Project",
-  "description": "Optional description",
-  "colour": "#3d3a79",
-  "icon": null
-}
-```
-
-`name` is required. `description`, `colour`, and `icon` are optional.
-
-Returns `201` with the created project object on success.
-
-**PATCH /projects/:id body:** same fields as POST, all optional.
-
-**DELETE /projects/:id**
-
-Returns `204 No Content`. Sessions assigned to the project are not deleted —
-their `project_id` foreign key is left as-is (nullable, no cascade).
-
-### Entities
-
-| Method | Path | Description |
-|---|---|---|
-| POST | /entities | Upsert an entity (creates or updates by name + type) |
-| GET | /entities/by-type/:type | Get all entities of a given type |
-| GET | /entities/:id | Get entity by internal ID |
-| DELETE | /entities/:id | Delete entity (cascades to relationships) |
-
-**POST /entities body:**
-```json
-{
-  "name": "NexusAI",
-  "type": "project",
-  "notes": "My AI memory project",
-  "metadata": {}
-}
-```
-
-> Note: `/entities/by-type/:type` must be defined before `/entities/:id` in Express to
-> prevent `by-type` being captured as an ID parameter.
-
-### Relationships
-
-| Method | Path | Description |
-|---|---|---|
-| POST | /relationships | Upsert a relationship between two entities |
-| GET | /entities/:id/relationships | Get all relationships originating from an entity |
-| DELETE | /relationships | Delete a specific relationship |
-
-**POST /relationships body:**
-```json
-{
-  "fromId": 1,
-  "toId": 2,
-  "label": "uses",
-  "metadata": {}
-}
-```
-
-**DELETE /relationships body:**
-```json
-{
-  "fromId": 1,
-  "toId": 2,
-  "label": "uses"
-}
-```
-
-> Relationships are identified by the composite key `(fromId, toId, label)`. Delete uses
-> the request body rather than URL params as this three-part key is awkward to express
-> cleanly in a path.
\ No newline at end of file
+For all HTTP endpoints, see `api-routes.md`.
\ No newline at end of file
diff --git a/docs/services/orchestration-service.md b/docs/services/orchestration-service.md
index 676faac..3807e7e 100644
--- a/docs/services/orchestration-service.md
+++ b/docs/services/orchestration-service.md
@@ -39,56 +39,58 @@ src/
 │   ├── memory.js      # HTTP client for memory service
 │   ├── inference.js   # HTTP client for inference service
 │   ├── embedding.js   # HTTP client for embedding service
-│   └── qdrant.js      # HTTP client for Qdrant vector search
+│   └── qdrant.js      # HTTP client for Qdrant (direct vector search)
 ├── chat/
-│   └── index.js       # Core pipeline logic — context assembly and coordination
+│   └── index.js       # Core pipeline — context assembly, isolation, auto-naming
 ├── routes/
-│   ├── chat.js        # POST /chat and POST /chat/stream route handlers
-│   ├── sessions.js    # Session list, history, rename, and delete routes
-│   ├── projects.js    # Project CRUD routes — proxies to memory service
-│   └── models.js      # GET /models — reads models.json manifest from disk
+│   ├── chat.js        # POST /chat and POST /chat/stream
+│   ├── sessions.js    # Session CRUD proxy
+│   ├── projects.js    # Project CRUD proxy
+│   └── models.js      # GET /models — reads models.json from disk
 └── index.js           # Express app entry point
 ```
 
-The `services/` layer wraps all downstream HTTP calls in named functions,
-keeping the pipeline logic in `chat/index.js` readable and ensuring that
+The `services/` layer wraps all downstream HTTP calls in named functions.
 URL or endpoint changes have a single place to be updated.
 
 ## Chat Pipeline
 
-Both `POST /chat` and `POST /chat/stream` share the same context assembly
-steps. The only difference is how the inference response is delivered to
-the client.
+Both `POST /chat` and `POST /chat/stream` share the same steps. The only
+difference is how the inference response is delivered to the client.
 
-1. **Session resolution** — looks up the session by `externalId` in the memory
-   service. If not found, auto-creates a new session. Clients can generate a
-   UUID for new conversations and pass it directly — no pre-creation step needed.
+### Steps
 
-2. **Recent episode retrieval** — fetches the most recent episodes for the session
-   (default: 5) from the memory service.
+1. **Session resolution** — look up session by `externalId`. Auto-create if
+   not found. Clients generate a UUID for new conversations — no pre-creation
+   step needed.
 
-3. **Semantic search** — embeds the user message via the embedding service, then
-   queries Qdrant for the top-5 most similar past episodes (score threshold: 0.75).
-   Results are deduplicated against the recent episode set using a `Set` of IDs.
-   Full episode content is fetched from the memory service by ID. This step is
-   non-critical — if it fails, a warning is logged and the pipeline continues with
+2. **Project context resolution** — if the session has a `project_id`, fetch
+   the project and all its session IDs. Used to scope semantic search. See
+   `memory-isolation.md` for full behaviour.
+
+3. **Recent episode retrieval** — fetch the most recent episodes for the
+   session (`RECENT_EPISODE_LIMIT`, default 5).
+
+4. **Semantic search** — embed the user message, query Qdrant for the top-5
+   most similar past episodes (`SCORE_THRESHOLD` 0.75). Deduplicated against
+   recent episodes. Non-critical — if it fails, pipeline continues with
    recency-only context.
 
-4. **Prompt assembly** — combines the system prompt, semantic episodes (if any),
-   recent episodes, and the current user message into a single prompt string.
+5. **Prompt assembly** — combine system prompt, semantic episodes, recent
+   episodes, and user message.
 
-5. **Inference** — sends the assembled prompt to the inference service. `/chat`
-   awaits the full response; `/chat/stream` opens an SSE connection and pipes
-   chunks to the client as they arrive.
+6. **Inference** — send to inference service. `/chat` awaits full response;
+   `/chat/stream` pipes SSE chunks to the client.
 
-6. **Episode write** — writes the new exchange (user message + AI response)
-   back to the memory service as a fire-and-forget operation. For streaming,
-   the full response text is accumulated across chunks before writing.
+7. **Episode write** — write the exchange back to memory. Fire-and-forget
+   for `/chat`; awaited for `/chat/stream` to ensure the full text is
+   accumulated before saving.
 
-7. **Response** — returns the AI response, model name, session ID, and token
-   count to the client.
+8. **Auto-naming** — on `isFirstMessage && !session.name`, fire a secondary
+   inference call with a naming prompt (max 20 tokens, temperature 0.3) and
+   write the result back as `session.name`. Fully fire-and-forget.
 
-## Prompt Structure
+### Prompt Structure
 
 ```
 [System prompt]
@@ -108,212 +110,67 @@ User: {current message}
 Assistant:
 ```
 
-Semantic episodes appear before recent episodes so the model encounters
-long-range relevant context before the immediate conversation flow.
+Semantic episodes appear before recent episodes so the model sees
+long-range context before the immediate conversation flow.
 
 ## SSE Stream Format
 
-The inference service emits chunks from the llama.cpp provider in this format:
+Inference service → orchestration:
 ```
 data: {"response":"Hello","done":false}
-data: {"response":"!","done":false}
-data: {"done":true,"model":"gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf","tokenCount":42}
+data: {"done":true,"model":"gemma-4-26B...gguf","tokenCount":42}
 data: [DONE]
 ```
 
-The orchestration service re-emits to the client as:
+Orchestration → client:
 ```
 data: {"text":"Hello"}
-data: {"text":"!"}
-data: {"done":true,"model":"gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf","tokenCount":42}
+data: {"done":true,"model":"gemma-4-26B...gguf","tokenCount":42}
 ```
 
-The `[DONE]` sentinel from the inference service is consumed internally
-and not forwarded. The client stream is terminated by `res.end()` after
-the done event. Model name and token count are included on the done event
-so the client can display them in the UI.
+The `[DONE]` sentinel is consumed internally and not forwarded. The stream
+is terminated by `res.end()` after the done event.
 
 ## Models Manifest
 
-The `/models` endpoint reads a `models.json` file from disk at the path
-specified by `MODELS_MANIFEST_PATH`. The file lives on the main PC alongside
-the model files, and is accessible to orchestration via a network share
-mounted at `/mnt/nexus-models`.
+`GET /models` reads `models.json` fresh on each request from
+`MODELS_MANIFEST_PATH`. The file lives on the main PC alongside model files,
+accessible via an SMB mount at `/mnt/nexus-models`.
 
-The manifest is read fresh on each request — no restart needed when models
-are added or removed.
-
-**models.json format:**
 ```json
 [
   { "value": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf", "label": "Gemma 4 26B Claude Distill" }
 ]
 ```
 
-- `value` — must match the model name as reported by `llama-server` (including `.gguf` extension)
-- `label` — display name shown in the UI
+`value` must match the model name as reported by `llama-server` (including
+`.gguf` extension). No service restart needed when models are added or removed.
 
-## Endpoints
+## Sessions Route Behaviour
 
-### Health
+`PATCH /sessions/:sessionId` accepts either `name`, `projectId`, or both.
+The validation guard only rejects requests where neither is provided:
 
-| Method | Path | Description |
-|---|---|---|
-| GET | /health | Service health check — reports downstream service URLs |
-
-### Chat
-
-| Method | Path | Description |
-|---|---|---|
-| POST | /chat | Send a message and receive a complete response |
-| POST | /chat/stream | Send a message and receive a streaming SSE response |
-
-### Sessions
-
-| Method | Path | Description |
-|---|---|---|
-| GET | /sessions | Get paginated list of all sessions |
-| GET | /sessions/:sessionId/history | Get paginated episode history for a session |
-| PATCH | /sessions/:sessionId | Rename a session |
-| DELETE | /sessions/:sessionId | Delete a session and all its episodes |
-
-### Projects
-
-Projects are proxied directly from the memory service with no transformation.
-
-| Method | Path | Description |
-|---|---|---|
-| GET | /projects | Get all projects |
-| POST | /projects | Create a new project |
-| PATCH | /projects/:id | Update a project |
-| DELETE | /projects/:id | Delete a project |
-
-### Models
-
-| Method | Path | Description |
-|---|---|---|
-| GET | /models | Get list of available models from manifest file |
-
----
-
-**POST /chat**
-
-Request body:
-```json
-{
-  "sessionId": "your-session-uuid",
-  "message": "Hello, my name is Tim.",
-  "model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf",
-  "temperature": 0.7
+```js
+if (!name?.trim() && projectId === undefined) {
+  return res.status(400).json({ error: 'name or projectId is required' });
 }
 ```
 
-`model` and `temperature` are optional — fall back to inference service defaults
-if omitted.
-
-Response:
-```json
-{
-  "sessionId": "your-session-uuid",
-  "response": "Hello Tim! How can I help you today?",
-  "model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf",
-  "tokenCount": 87
-}
-```
-
----
-
-**POST /chat/stream**
-
-Same request body as `POST /chat`.
-
-Response is a stream of Server-Sent Events:
-```
-data: {"text":"Hello"}
-data: {"text":" Tim"}
-data: {"done":true,"model":"gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf","tokenCount":87}
-```
-
----
-
-**PATCH /sessions/:sessionId**
-
-Request body:
-```json
-{ "name": "My Renamed Session" }
-```
-
-Returns the updated session object. `name` is required and trimmed of whitespace.
-
----
-
-**DELETE /sessions/:sessionId**
-
-Returns `204 No Content`. Cascades to delete all episodes for the session.
-
----
-
-**GET /sessions/:sessionId/history**
-
-Query parameters:
-
-| Parameter | Default | Description |
-|---|---|---|
-| limit | 20 | Maximum number of episodes to return |
-| offset | 0 | Number of episodes to skip (for pagination) |
-
-Response:
-```json
-{
-  "sessionId": "your-session-uuid",
-  "episodes": [
-    {
-      "id": 42,
-      "session_id": 1,
-      "user_message": "Hello, my name is Tim.",
-      "ai_response": "Hello Tim! How can I help you today?",
-      "token_count": 87,
-      "created_at": 1712345678,
-      "metadata": null
-    }
-  ]
-}
-```
-
-Episodes are ordered newest first.
-
----
-
-**GET /models**
-
-Returns the parsed contents of `models.json`:
-```json
-[
-  { "value": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf", "label": "Gemma 4 26B Claude Distill" }
-]
-```
-
-Returns `500` if the manifest file cannot be read or parsed.
+This allows `useChat` to write project assignment separately from rename
+operations.
 
 ## Caddy Configuration
 
-The Caddy reverse proxy on Mini PC 2 must have a handle block for each route
-prefix the client needs to reach. Current required blocks:
+Each route prefix needs a handle block in the Caddyfile on Mini PC 2:
 
 ```
-handle /chat* {
-    reverse_proxy localhost:4000
-}
-handle /sessions* {
-    reverse_proxy localhost:4000
-}
-handle /models* {
-    reverse_proxy localhost:4000
-}
-handle /projects* {
-    reverse_proxy localhost:4000
-}
+handle /chat*     { reverse_proxy localhost:4000 }
+handle /sessions* { reverse_proxy localhost:4000 }
+handle /models*   { reverse_proxy localhost:4000 }
+handle /projects* { reverse_proxy localhost:4000 }
 ```
 
-When adding new top-level routes to the orchestration service, add a matching
-block here and reload Caddy: `caddy reload --config /path/to/Caddyfile`
\ No newline at end of file
+After updating: `caddy reload --config /path/to/Caddyfile`
+
+For all HTTP endpoints, see `api-routes.md`.
\ No newline at end of file