Compare commits
194 Commits
2ff3276462
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e4908193bd | ||
|
|
b58a4e4692 | ||
|
|
055683424d | ||
|
|
27ad614130 | ||
|
|
8ade5c68ca | ||
|
|
49982a85de | ||
|
|
9c6c5c9a42 | ||
|
|
c9cbac87ac | ||
|
|
1a97b19280 | ||
|
|
9fe8e568cf | ||
|
|
5ad01c6ad8 | ||
|
|
aac0923351 | ||
|
|
54218894c0 | ||
|
|
66a95f4479 | ||
| 78476e166f | |||
|
|
696ead29f8 | ||
|
|
45db47a584 | ||
|
|
095c9a623e | ||
|
|
f5011fddca | ||
|
|
86e78cc4c6 | ||
|
|
c86b565eed | ||
|
|
be1c38b654 | ||
|
|
4f3b18de08 | ||
|
|
43fa12899c | ||
|
|
84f01ef209 | ||
|
|
a50a748bcf | ||
|
|
32e8a83233 | ||
|
|
855de6d0af | ||
|
|
fcaf0e651f | ||
|
|
6cdee72af2 | ||
|
|
4c6bd1df2d | ||
|
|
2429fedb2c | ||
|
|
bdc5947fcb | ||
|
|
785047a824 | ||
|
|
acda21317b | ||
|
|
32365e67f4 | ||
|
|
59918d5733 | ||
|
|
01f35b7b82 | ||
|
|
21a7e5f3b5 | ||
|
|
c81a1cb20e | ||
|
|
781bf8a615 | ||
|
|
b44d35e7cb | ||
|
|
22686fca3c | ||
|
|
588e8395f8 | ||
|
|
936b04742e | ||
|
|
9ab63cca19 | ||
|
|
528318b374 | ||
|
|
43dc800a0a | ||
|
|
143df71efa | ||
|
|
72b41056a5 | ||
|
|
5de64ba68e | ||
|
|
405676edb5 | ||
|
|
980053a0ee | ||
|
|
3636ef3ff9 | ||
|
|
d2352ea48b | ||
|
|
af04cef307 | ||
|
|
17e2fd8f14 | ||
|
|
c9f3f5bc79 | ||
|
|
2fc372815f | ||
|
|
395c06137c | ||
|
|
98b89d44a5 | ||
|
|
57edf97270 | ||
|
|
cb6428448d | ||
|
|
a674f4d774 | ||
|
|
7824404319 | ||
|
|
0619c4c7f3 | ||
|
|
225728e531 | ||
|
|
8c807fb35b | ||
|
|
4cc87d96b6 | ||
|
|
57e8c4c486 | ||
|
|
ef5bfd5757 | ||
|
|
a6e17e33a0 | ||
|
|
01ed60a547 | ||
|
|
2769f436fa | ||
|
|
15c1bec609 | ||
|
|
fa3b0859f0 | ||
|
|
a0154e15e6 | ||
|
|
9c903a56ae | ||
|
|
56355d232b | ||
|
|
ed57a0331a | ||
|
|
e1375e7d1b | ||
|
|
1fc6e8a66d | ||
|
|
ee8f5bb5f0 | ||
|
|
c87760cc01 | ||
|
|
e69ceb44e7 | ||
|
|
ad5ecb5ff3 | ||
|
|
44989a2b8b | ||
|
|
c198a00dde | ||
|
|
dd4013685b | ||
|
|
2d1f7176ff | ||
|
|
6935459428 | ||
|
|
4b75529806 | ||
|
|
daf5b9a8ae | ||
|
|
2b47b06563 | ||
|
|
616383e9bc | ||
|
|
8bd4836cd7 | ||
|
|
9950ea3b62 | ||
|
|
9fccc4809d | ||
|
|
68f2d758b1 | ||
|
|
072758df9c | ||
|
|
8a5caf7399 | ||
|
|
afae2af85b | ||
|
|
77275cf476 | ||
|
|
1cc7b62d79 | ||
|
|
fc864041c5 | ||
|
|
8ae12c8c50 | ||
|
|
bf074295eb | ||
|
|
b3fb936494 | ||
|
|
05f1fbb04e | ||
|
|
930a6dbd13 | ||
|
|
99a4914d66 | ||
|
|
91e4f68a8c | ||
|
|
7e50e82d8c | ||
|
|
cfa1358174 | ||
|
|
1ed76e4d95 | ||
|
|
06d7031e44 | ||
|
|
902725b7f7 | ||
|
|
cf7f387add | ||
|
|
b4fd3ed72c | ||
|
|
cef1803af6 | ||
|
|
0cad85d4a7 | ||
|
|
4070eb5559 | ||
|
|
ba1e6b32e7 | ||
|
|
940b636175 | ||
|
|
2d2164451d | ||
|
|
ec44b935d1 | ||
|
|
bb05d1508d | ||
|
|
ac1bd963ef | ||
|
|
5145b9a7db | ||
|
|
27e3c98304 | ||
|
|
e1c16a5714 | ||
|
|
0db2896b55 | ||
|
|
46f3013a51 | ||
|
|
5f5fec9d00 | ||
|
|
f83e37f5c7 | ||
|
|
e8b81554c7 | ||
|
|
3f79cd4a41 | ||
|
|
4f388faaef | ||
|
|
1d420789b3 | ||
|
|
11449bb207 | ||
|
|
eb702624c3 | ||
|
|
996db6d4f1 | ||
|
|
f8fcc99929 | ||
|
|
c892f54a04 | ||
|
|
cdd74b5902 | ||
|
|
271a396ef5 | ||
|
|
30aaad6f77 | ||
|
|
7598e8b9f4 | ||
|
|
8d4a553a2a | ||
|
|
649ed2b350 | ||
|
|
e3f6b9a9db | ||
|
|
70959e945a | ||
|
|
4e0f7d33aa | ||
|
|
0b9fedcd6e | ||
|
|
699592071f | ||
|
|
7501fc54f1 | ||
|
|
560e69bc3b | ||
|
|
c14426ecaf | ||
|
|
07bd6a21ad | ||
|
|
4024f187df | ||
|
|
630ec22d8a | ||
|
|
4fd7f9824b | ||
|
|
045da0d7f4 | ||
|
|
5f024093d1 | ||
|
|
3c6cfa9bf4 | ||
|
|
f6d538f68a | ||
|
|
1f0d9acea8 | ||
|
|
7e8d71c877 | ||
|
|
037a8d5d32 | ||
|
|
035c02be5a | ||
|
|
a1795c6f29 | ||
|
|
5c6e027fc1 | ||
|
|
541e664da1 | ||
|
|
107ee5755e | ||
|
|
2b75f75733 | ||
|
|
0aea052311 | ||
|
|
f6cdc65464 | ||
|
|
1e2ce7a761 | ||
|
|
461438e81b | ||
|
|
710107ce5a | ||
|
|
1f824c097d | ||
|
|
4bd84ded04 | ||
|
|
9af77438b3 | ||
|
|
aebea6c231 | ||
|
|
685da6530f | ||
|
|
157a08fa78 | ||
|
|
16354952f9 | ||
|
|
3b5f0afece | ||
|
|
8ee9438b1c | ||
|
|
b71005d2b1 | ||
|
|
8765dc3c2d | ||
|
|
6084efeea4 | ||
|
|
8a61952a85 | ||
|
|
8b0b864c03 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -5,4 +5,5 @@ data/
|
||||
.env
|
||||
.env.*
|
||||
*.db
|
||||
.claude/settings.local.json
|
||||
EOF
|
||||
108
CLAUDE.md
Normal file
108
CLAUDE.md
Normal file
@@ -0,0 +1,108 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
## Development Commands
|
||||
|
||||
```bash
|
||||
# Start individual services
|
||||
npm run memory # Memory Service (port 3002)
|
||||
npm run embedding # Embedding Service (port 3003)
|
||||
npm run inference # Inference Service (port 3001)
|
||||
npm run orchestration # Orchestration Service (port 4000)
|
||||
npm run mini1 # Start memory + embedding concurrently
|
||||
|
||||
# Per-service dev mode (with --watch)
|
||||
npm -w packages/<service-name> run dev
|
||||
|
||||
# Chat client
|
||||
npm -w packages/chat-client run dev # Vite dev server (port 5173)
|
||||
npm -w packages/chat-client run build # Production build
|
||||
```
|
||||
|
||||
No test framework or linter is configured.
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
NexusAI is a **modular AI assistant** with persistent, project-scoped memory. It's a Node.js monorepo (`npm workspaces`) with 4 independent backend services, 1 React frontend, and 1 shared package.
|
||||
|
||||
### Services
|
||||
|
||||
| Package | Port | Role |
|
||||
|---|---|---|
|
||||
| `orchestration-service` | 4000 | Central gateway; coordinates all others |
|
||||
| `memory-service` | 3002 | SQLite + Qdrant hybrid memory |
|
||||
| `embedding-service` | 3003 | Text embeddings via Ollama (`nomic-embed-text`, 768-dim) |
|
||||
| `inference-service` | 3001 | LLM inference (Ollama or llama.cpp) |
|
||||
| `chat-client` | 5173 | React/Vite frontend |
|
||||
| `shared` | — | Constants, env helpers, logger, formatters |
|
||||
|
||||
All inter-service communication is **REST HTTP only** — no message queues or WebSockets.
|
||||
|
||||
### Chat Request Flow
|
||||
|
||||
1. Client POSTs to orchestration `/chat/stream`
|
||||
2. Orchestration resolves session, fetches **recent episodes** (SQLite) + **semantic episodes** (Qdrant vector search) + **entities** (Qdrant, scoped by project)
|
||||
3. Embedding computed for user message (embedding-service)
|
||||
4. Prompt assembled: system message → entities → semantic memories → recent episodes → user message
|
||||
5. Inference streams response (inference-service)
|
||||
6. Episode stored in SQLite + Qdrant (fire-and-forget embedding)
|
||||
7. Entity extraction triggered async (qwen2.5:3b via inference-service)
|
||||
8. Auto-summarization checked (threshold: 200+ tokens, re-triggers every 5 episodes)
|
||||
9. Auto-naming on first message (temp 0.3, 20 tokens max)
|
||||
|
||||
### Memory Model
|
||||
|
||||
**Dual store — neither works alone:**
|
||||
- **SQLite** (`better-sqlite3`, synchronous) — Full content: sessions, episodes, entities, relationships, projects, summaries, FTS5 index
|
||||
- **Qdrant** — Vector embeddings for semantic search; IDs used to fetch full content from SQLite afterward
|
||||
|
||||
Orchestration queries Qdrant directly (bypasses memory-service) for performance, then fetches full episode content from memory-service by ID.
|
||||
|
||||
**Project-scoped isolation:** Sessions grouped into projects; Qdrant queries use `should` filter on session IDs to enforce memory boundaries. Non-project sessions share a common pool.
|
||||
|
||||
### Key File Locations
|
||||
|
||||
**Orchestration** (`packages/orchestration-service/src/`):
|
||||
- `chat/index.js` — Core prompt building and memory assembly
|
||||
- `routes/` — HTTP endpoints: chat, sessions, projects, episodes, models, settings, summaries
|
||||
- `services/` — Thin HTTP clients for memory, embedding, inference, and direct Qdrant access
|
||||
- `config/settings.js` — Loads/saves `data/settings.json` (user-tunable: model params, thresholds, system prompt)
|
||||
|
||||
**Memory** (`packages/memory-service/src/`):
|
||||
- `db/schema.js` — SQLite table definitions (source of truth for data model)
|
||||
- `episodic/` — Episode CRUD
|
||||
- `semantic/` — Qdrant operations
|
||||
- `entities/` — Entity extraction + CRUD
|
||||
- `summarization/` — Project summary generation
|
||||
|
||||
**Shared** (`packages/shared/src/`):
|
||||
- `config/constants.js` — All tunables (ports, thresholds, model names, vector size)
|
||||
- `config/env.js` — `getEnv()` helper with fallback to constants
|
||||
- `utils.js` — `parseRow()`, `formatEpisodeText()`, `logger`
|
||||
|
||||
**Frontend** (`packages/chat-client/src/`):
|
||||
- `App.jsx` — View router and top-level state (views: home, chat, all-chats, all-projects, project, memory, summaries, settings)
|
||||
- `hooks/` — `useChat`, `useSession`, `useModels`, `useProjects`, `useSettings`, `useContextMenu`
|
||||
- `api/orchestration.js` — Fetch wrapper for all API calls
|
||||
- Vite proxy points to `192.168.0.205:4000` (Mini PC 2 / orchestration)
|
||||
|
||||
### Configuration
|
||||
|
||||
Each service uses `.env` via `dotenv`, falling back to `packages/shared/src/config/constants.js`. The orchestration service also serves `data/settings.json` to the frontend via `/settings` — this is the single source of truth for user-facing inference parameters and system prompt.
|
||||
|
||||
### Deployment
|
||||
|
||||
Home lab across 3 nodes, managed with Docker Compose:
|
||||
- **Main PC** — RTX A4000 (inference via llama.cpp)
|
||||
- **Mini PC 1** — memory + embedding services, Qdrant, Ollama
|
||||
- **Mini PC 2** — orchestration + chat client, Caddy reverse proxy + Authelia SSO
|
||||
|
||||
Docker Compose files: `docker-compose.mini1.yml`, `docker-compose.mini2.yml`. All services expose `/health`. Deployment docs: `docs/deployment/homelab.md`.
|
||||
|
||||
## Key Development Principles
|
||||
|
||||
- **Layer-by-layer validation** — always build and test backend → orchestration → frontend in sequence, curl-testing each layer before proceeding
|
||||
- **New orchestration routes require changes in four places**: route file, `orchestration-service/src/index.js`, Caddyfile on Mini PC 2 (`192.168.0.205`), and `vite.config.js` in the chat client
|
||||
- **All services read settings on every request** — no restart required for config changes
|
||||
- **Backend-first development** — data layer → service endpoints → orchestration proxy → frontend
|
||||
@@ -1,12 +1,23 @@
|
||||
# NexusAI Documentation
|
||||
|
||||
## Contents
|
||||
## Architecture
|
||||
|
||||
- [Architecture Overview](architecture/overview.md)
|
||||
- [Services](services/)
|
||||
- [Shared Package](services/shared.md)
|
||||
- [Memory Service](services/memory-service.md)
|
||||
- [Embedding Service](services/embedding-service.md)
|
||||
- [Inference Service](services/inference-service.md)
|
||||
- [Orchestration Service](services/orchestration-service.md)
|
||||
- [Deployment](deployment/homelab.md)
|
||||
|
||||
## Services
|
||||
|
||||
- [Shared Package](services/shared.md)
|
||||
- [Memory Service](services/memory-service.md)
|
||||
- [Embedding Service](services/embedding-service.md)
|
||||
- [Inference Service](services/inference-service.md)
|
||||
- [Orchestration Service](services/orchestration-service.md)
|
||||
- [Chat Client](services/chat-client.md)
|
||||
|
||||
## Reference
|
||||
|
||||
- [API Routes](reference/api-routes.md) — all HTTP endpoints across all services
|
||||
- [Memory Isolation](reference/memory-isolation.md) — project-scoped memory model
|
||||
|
||||
## Deployment
|
||||
|
||||
- [Homelab](deployment/homelab.md)
|
||||
@@ -1,55 +1,81 @@
|
||||
# Architecture Overview
|
||||
|
||||
NexusAI is a modular, memory-centric AI system designed for persistent, context-aware conversations. It separates concerns across different services that can be independently deployed and evolved.
|
||||
NexusAI is a modular, memory-centric AI assistant designed for persistent,
|
||||
context-aware conversations. It separates concerns across independent services
|
||||
that can be evolved and deployed separately.
|
||||
|
||||
## Core Design Principles
|
||||
|
||||
- **Decoupled layers:** memory, inference, and orchestration are independent of each other
|
||||
- **Hybrid retrieval:** semantic similarity (Qdrant) combined with structured storage (SQLite) for flexible, ranked context assembly
|
||||
- **Home lab:** services are distributed across nodes according to available hardware and resources
|
||||
- **Decoupled layers** — memory, inference, and orchestration are independent of each other
|
||||
- **Hybrid retrieval** — semantic similarity (Qdrant) combined with structured storage (SQLite) for flexible, ranked context assembly
|
||||
- **Project-scoped memory** — sessions can be grouped into projects with shared or isolated memory pools
|
||||
- **Home lab first** — services are distributed across nodes according to available hardware
|
||||
|
||||
## Memory Model
|
||||
|
||||
Memory is split between SQLite and Qdrant, which work together as a pair:
|
||||
Memory is split between SQLite and Qdrant, which always work as a pair:
|
||||
|
||||
- **SQLite:** episodic interactions, entities, relationships, summaries
|
||||
- **Qdrant:** vector embeddings for semantic similarity search
|
||||
- **SQLite** — episodic interactions, entities, relationships, summaries, sessions, projects
|
||||
- **Qdrant** — vector embeddings for semantic similarity search
|
||||
|
||||
When recalling memory, Qdrant returns IDs and similarity scores, which are used to fetch
|
||||
full content from SQLite. Neither SQLite nor Qdrant work in isolation.
|
||||
When recalling memory, Qdrant returns IDs and similarity scores, which are used
|
||||
to fetch full content from SQLite. Neither store works in isolation.
|
||||
|
||||
Episode embeddings carry a `{ sessionId, createdAt }` payload in Qdrant,
|
||||
enabling per-session and per-project filtering at search time. See
|
||||
`memory-isolation.md` for how project-scoped retrieval works.
|
||||
|
||||
## Hardware Layout
|
||||
|
||||
| Node | Address | Role |
|
||||
|---|---|---|
|
||||
| Main PC | local | Primary inference (RTX A4000 16GB) |
|
||||
| Mini PC 1 | 192.168.0.81 | Memory service, Embedding service, Qdrant |
|
||||
| Mini PC 2 | 192.168.0.205 | Orchestration service, Gitea |
|
||||
| Main PC | 192.168.0.79 | Primary inference — RTX A4000 16GB |
|
||||
| Mini PC 1 | 192.168.0.81 | Memory service, Embedding service, Qdrant, Ollama |
|
||||
| Mini PC 2 | 192.168.0.205 | Orchestration service, Chat Client, Caddy, Authelia, Gitea |
|
||||
|
||||
## Service Communication
|
||||
|
||||
All services expose a REST HTTP API. The orchestration service is the single entry point —
|
||||
clients do not talk directly to the memory or inference services.
|
||||
All services expose a REST HTTP API. The orchestration service is the single
|
||||
entry point — clients never talk directly to memory or inference services.
|
||||
|
||||
```
|
||||
Client
|
||||
└─► Orchestration (:4000)
|
||||
├─► Memory Service (:3002)
|
||||
│ ├─► Qdrant (:6333)
|
||||
│ └─► SQLite
|
||||
├─► Embedding Service (:3003)
|
||||
│ └─► Ollama
|
||||
└─► Inference Service (:3001)
|
||||
└─► Ollama
|
||||
Client (browser)
|
||||
└─► Caddy (HTTPS + Authelia SSO)
|
||||
└─► Orchestration (:4000) — Mini PC 2
|
||||
├─► Memory Service (:3002) — Mini PC 1
|
||||
│ ├─► SQLite (local file)
|
||||
│ └─► Qdrant (:6333) — Mini PC 1
|
||||
├─► Embedding Service (:3003) — Mini PC 1
|
||||
│ └─► Ollama (:11434) — Mini PC 1
|
||||
├─► Inference Service (:3001) — Main PC
|
||||
│ └─► llama-server (:8080) — Main PC
|
||||
└─► Qdrant (:6333) — Mini PC 1 (direct — semantic search)
|
||||
```
|
||||
|
||||
Note: Orchestration queries Qdrant directly for semantic search (bypassing
|
||||
the memory service) but always fetches full episode content from the memory
|
||||
service by ID after the vector search.
|
||||
|
||||
## Technology Choices
|
||||
|
||||
| Concern | Choice | Reason |
|
||||
|---|---|---|
|
||||
| Language | Node.js (JavaScript) | Familiar stack, async I/O suits service architecture |
|
||||
| Language | Node.js (CommonJS) | Familiar stack, async I/O suits service architecture |
|
||||
| Package management | npm workspaces | Monorepo with shared code, no publishing needed |
|
||||
| Vector store | Qdrant | Mature, Docker-native, excellent Node.js client |
|
||||
| Relational store | SQLite (better-sqlite3) | Zero-ops, fast, sufficient for single-user |
|
||||
| LLM runtime | Ollama | Easiest local LLM management, serves embeddings too |
|
||||
| Version control | Gitea (self-hosted) | Code stays on local network |
|
||||
| Relational store | SQLite (better-sqlite3) | Zero-ops, fast, sufficient for single-user scale |
|
||||
| LLM inference | llama.cpp (`llama-server`) | Maximum GPU utilisation on RTX A4000, OpenAI-compatible API |
|
||||
| Embeddings | Ollama (`nomic-embed-text`) | Co-located with memory service on Mini PC 1, 768-dim Cosine |
|
||||
| Reverse proxy | Caddy + Authelia | Automatic HTTPS, SSO/MFA for all exposed services |
|
||||
| Version control | Gitea (self-hosted) | Code stays on local network |
|
||||
|
||||
## Current State
|
||||
|
||||
The core four-service architecture is complete and operational. Key capabilities:
|
||||
|
||||
- **Retrieval fusion** — Reciprocal Rank Fusion (RRF) merges semantic (Qdrant vector search) and keyword (SQLite FTS5) episode retrieval into a single ranked result set. Weights are configurable per strategy via settings; keyword search is off by default (`keywordWeight: 0`) and can be enabled without a service restart
|
||||
- **Entity layer + Knowledge graph** — automatic extraction of named entities and relationships from conversations via qwen2.5:3b. Entities and relationships are stored in SQLite with `mention_count` tracking. A graph traversal layer expands Qdrant entity search hits into a 1-hop neighborhood subgraph, injecting structured connected knowledge into every prompt
|
||||
- **Projects** — sessions grouped with shared or isolated memory pools
|
||||
- **Auto-naming** — sessions named automatically from first exchange via inference
|
||||
- **Project-scoped semantic search** — Qdrant filtered by project session IDs
|
||||
- **Chat client** — view-based UI with sidebar navigation, project views, session management
|
||||
@@ -7,36 +7,140 @@ services appropriate for its hardware.
|
||||
|
||||
## Mini PC 1 — 192.168.0.81
|
||||
|
||||
Runs: Qdrant, Memory Service, Embedding Service
|
||||
Runs: Qdrant, Memory Service, Embedding Service, Ollama
|
||||
|
||||
```bash
|
||||
ssh username@192.168.0.81
|
||||
cd ~/nexusai
|
||||
ssh storme@192.168.0.81
|
||||
docker compose -f docker-compose.mini1.yml up -d # Qdrant
|
||||
npm run memory
|
||||
npm run embedding
|
||||
npm run memory # port 3002
|
||||
npm run embedding # port 3003
|
||||
ollama serve # port 11434 — must bind 0.0.0.0 (OLLAMA_HOST=0.0.0.0)
|
||||
```
|
||||
|
||||
> Ollama must be started with `OLLAMA_HOST=0.0.0.0` to accept connections
|
||||
> from other services on the LAN. Without this, embedding requests from the
|
||||
> memory service will be refused.
|
||||
|
||||
## Mini PC 2 — 192.168.0.205
|
||||
|
||||
Runs: Gitea, Orchestration Service
|
||||
Runs: Orchestration Service, Chat Client (via Caddy), Gitea, Caddy, Authelia
|
||||
|
||||
```bash
|
||||
ssh username@192.168.0.205
|
||||
cd ~/gitea
|
||||
docker compose up -d # Gitea
|
||||
cd ~/nexusai
|
||||
npm run orchestration
|
||||
ssh storme@192.168.0.205
|
||||
|
||||
cd /opt/stacks/network
|
||||
docker compose up -d # Caddy, Authelia, and other network services
|
||||
|
||||
cd ~/nexusAI
|
||||
npm run orchestration # port 4000
|
||||
```
|
||||
|
||||
## Main PC
|
||||
## Main PC — 192.168.0.79
|
||||
|
||||
Runs: Ollama, Inference Service
|
||||
```bash
|
||||
ollama serve
|
||||
npm run inference
|
||||
Runs: Inference Service, llama-server
|
||||
|
||||
```powershell
|
||||
# Start llama-server first — inference service depends on it
|
||||
.\llama-gpu\llama-server.exe `
|
||||
-m .\models\gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf `
|
||||
-ngl 99 --reasoning off --host 0.0.0.0 --port 8080 -c 64000
|
||||
|
||||
# Then start inference service
|
||||
npm run inference # port 3001
|
||||
```
|
||||
|
||||
## Chat Client Deployment
|
||||
|
||||
The chat client is a React + Vite app built to static files and served by
|
||||
Caddy on Mini PC 2. It does not run as a Node process.
|
||||
|
||||
```bash
|
||||
# On Mini PC 2 after git pull
|
||||
cd ~/nexusAI/packages/chat-client
|
||||
|
||||
# Set production URL before building
|
||||
VITE_ORCHESTRATION_URL=https://nexus.jellystorm.com npm run build
|
||||
|
||||
# Output lands in packages/chat-client/dist/
|
||||
# Caddy serves this directory directly via Docker volume mount
|
||||
```
|
||||
|
||||
> Do NOT set `VITE_ORCHESTRATION_URL` during local dev — Vite's proxy handles
|
||||
> routing and setting the HTTPS domain will cause Authelia to intercept API
|
||||
> requests, producing confusing JSON parse errors.
|
||||
|
||||
## Caddy Configuration
|
||||
|
||||
The Caddyfile on Mini PC 2 must include a handle block for each route prefix
|
||||
the client needs to reach. Current required blocks for NexusAI:
|
||||
|
||||
```caddy
|
||||
nexus.jellystorm.com {
|
||||
import authelia
|
||||
|
||||
handle /chat* {
|
||||
reverse_proxy 192.168.0.205:4000
|
||||
}
|
||||
|
||||
handle /sessions* {
|
||||
reverse_proxy 192.168.0.205:4000
|
||||
}
|
||||
|
||||
handle /models* {
|
||||
reverse_proxy 192.168.0.205:4000
|
||||
}
|
||||
|
||||
handle /projects* {
|
||||
reverse_proxy 192.168.0.205:4000
|
||||
}
|
||||
|
||||
handle {
|
||||
root * /srv/nexusai
|
||||
try_files {path} /index.html
|
||||
file_server
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
When adding new top-level routes to the orchestration service, add a matching
|
||||
handle block here and reload Caddy:
|
||||
|
||||
```bash
|
||||
caddy reload --config /path/to/Caddyfile
|
||||
```
|
||||
|
||||
The Caddy container mounts the `dist` directory via Docker volume:
|
||||
|
||||
```yaml
|
||||
- /home/storme/nexusAI/packages/chat-client/dist:/srv/nexusai
|
||||
```
|
||||
|
||||
> After adding or changing volume mounts, a full `docker compose down caddy && docker compose up -d caddy`
|
||||
> is required. Caddyfile-only changes only need `caddy reload`.
|
||||
|
||||
## Environment Files
|
||||
|
||||
Each node needs a `.env` file in the relevant service package directory.
|
||||
These are not committed to git. See each service's documentation for
|
||||
required variables.
|
||||
Each service needs a `.env` file in its package directory. These are not
|
||||
committed to git. See each service's documentation for required variables.
|
||||
|
||||
| Service | Location | Key Variables |
|
||||
|---|---|---|
|
||||
| Memory | `packages/memory-service/.env` | `SQLITE_PATH`, `QDRANT_URL`, `EMBEDDING_SERVICE_URL` |
|
||||
| Embedding | `packages/embedding-service/.env` | `OLLAMA_URL`, `EMBEDDING_MODEL` |
|
||||
| Inference | `packages/inference-service/.env` | `INFERENCE_PROVIDER`, `INFERENCE_URL`, `DEFAULT_MODEL` |
|
||||
| Orchestration | `packages/orchestration-service/src/.env` | `MEMORY_SERVICE_URL`, `EMBEDDING_SERVICE_URL`, `INFERENCE_SERVICE_URL`, `QDRANT_URL`, `MODELS_MANIFEST_PATH` |
|
||||
| Chat client | `packages/chat-client/.env` | `VITE_ORCHESTRATION_URL` (production builds only) |
|
||||
|
||||
## Models Manifest
|
||||
|
||||
The models manifest (`models.json`) lives on the Main PC alongside the model
|
||||
files, accessible to orchestration via an SMB mount at `/mnt/nexus-models`.
|
||||
|
||||
```json
|
||||
[
|
||||
{ "value": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf", "label": "Gemma 4 26B Claude Distill" }
|
||||
]
|
||||
```
|
||||
|
||||
`value` must exactly match the model name as reported by `llama-server`
|
||||
(including `.gguf` extension). No service restart needed to pick up changes.
|
||||
@@ -39,21 +39,21 @@ All external access is routed through **Caddy** (reverse proxy) with **Authelia*
|
||||
|------|--------|
|
||||
| GPU | NVIDIA RTX A4000 |
|
||||
| Role | Primary AI inference node |
|
||||
| Key Services | Ollama (inference) |
|
||||
| Key Services | llama-server (llama.cpp), Inference Service |
|
||||
|
||||
### Mini PC 1 — Media Node (`192.168.0.81`)
|
||||
| Spec | Detail |
|
||||
|------|--------|
|
||||
| GPU | NVIDIA RTX 5050 |
|
||||
| Role | Media services, embeddings, vector storage |
|
||||
| Key Services | Jellyfin, Nextcloud, Qdrant, arr stack, NexusAI memory/embedding |
|
||||
| Key Services | Jellyfin, Nextcloud, Qdrant, arr stack, NexusAI memory/embedding, Ollama |
|
||||
| Storage | NVMe (OS) + 3x external HDDs (see [Storage Layout](#storage-layout)) |
|
||||
|
||||
### Mini PC 2 — Infrastructure Node (`192.168.0.205`)
|
||||
| Spec | Detail |
|
||||
|------|--------|
|
||||
| Role | Network management, monitoring, auth, DNS, git |
|
||||
| Key Services | Caddy, Authelia, Tailscale, Pihole, Grafana, Gitea |
|
||||
| Role | Network management, monitoring, auth, DNS, git, NexusAI orchestration |
|
||||
| Key Services | Caddy, Authelia, Tailscale, Pihole, Grafana, Gitea, NexusAI orchestration |
|
||||
| Storage | NVMe (OS only) |
|
||||
|
||||
---
|
||||
@@ -155,7 +155,8 @@ All external access is routed through **Caddy** (reverse proxy) with **Authelia*
|
||||
|
||||
| Service | Notes |
|
||||
|---------|-------|
|
||||
| Ollama | Runs LLM inference using the RTX A4000. Also serves `nomic-embed-text` embeddings (768-dim vectors) consumed by NexusAI's embedding service on Mini PC 1. |
|
||||
| llama-server (llama.cpp) | Primary LLM inference using the RTX A4000. Started manually before the inference service. Serves the OpenAI-compatible API on port 8080. |
|
||||
| Ollama | Serves `nomic-embed-text` embeddings (768-dim vectors) consumed by NexusAI's embedding service on Mini PC 1. |
|
||||
|
||||
---
|
||||
|
||||
@@ -234,7 +235,7 @@ Phase 1 focused on establishing a stable, secure, and observable foundation:
|
||||
- ✅ Self-hosted git (Gitea)
|
||||
- ✅ Media stack fully operational (Jellyfin, arr stack, Nextcloud)
|
||||
- ✅ Download pipeline with VPN isolation (Gluetun + qBittorrent)
|
||||
- ✅ NexusAI foundation services running (Qdrant, Ollama)
|
||||
- ✅ NexusAI foundation services running (Qdrant, Ollama, llama.cpp)
|
||||
- ✅ Container management across nodes (Portainer + agent)
|
||||
|
||||
---
|
||||
@@ -249,6 +250,6 @@ Phase 2 shifts focus to resilience, security hardening, and smart home integrati
|
||||
- **Additional security hardening** — Audit exposed services, tighten firewall rules, review Authelia policies
|
||||
- **IP webcam integration** — Add camera feeds into the homelab ecosystem
|
||||
- **Home Assistant** — Integrate smart home automation and sensor data
|
||||
- **Continued NexusAI development** — Entities layer, embedding service, inference and orchestration buildout
|
||||
- **Continued NexusAI development** — Entity extraction pipeline, summaries layer, SettingsView implementation
|
||||
|
||||
> This section will be expanded as Phase 2 planning matures.
|
||||
447
docs/reference/API-routes.md
Normal file
447
docs/reference/API-routes.md
Normal file
@@ -0,0 +1,447 @@
|
||||
# API Routes
|
||||
|
||||
All HTTP endpoints across NexusAI services. Clients communicate only with
|
||||
the orchestration service (port 4000) — memory service routes are listed
|
||||
here for reference and direct debugging use.
|
||||
|
||||
---
|
||||
|
||||
## Orchestration Service — port 4000
|
||||
|
||||
### Health
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /health | Service health check |
|
||||
|
||||
### Chat
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /chat | Send a message, receive full response |
|
||||
| POST | /chat/stream | Send a message, receive SSE token stream |
|
||||
|
||||
**POST /chat and POST /chat/stream — request body:**
|
||||
```json
|
||||
{
|
||||
"sessionId": "your-session-uuid",
|
||||
"message": "Hello, my name is Tim.",
|
||||
"model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf",
|
||||
"temperature": 0.7
|
||||
}
|
||||
```
|
||||
`model` and `temperature` are optional. Inference parameters (temperature,
|
||||
topP, topK, repeatPenalty) are read from `settings.json` on every request —
|
||||
controlled via `PATCH /settings`.
|
||||
|
||||
**POST /chat — response:**
|
||||
```json
|
||||
{
|
||||
"sessionId": "your-session-uuid",
|
||||
"response": "Hello Tim! How can I help you today?",
|
||||
"model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf",
|
||||
"tokenCount": 87
|
||||
}
|
||||
```
|
||||
|
||||
**POST /chat/stream — response (SSE):**
|
||||
```
|
||||
data: {"text":"Hello"}
|
||||
data: {"text":" Tim"}
|
||||
data: {"done":true,"model":"gemma-4-26B...gguf","tokenCount":87}
|
||||
```
|
||||
|
||||
### Sessions
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /sessions | Paginated session list |
|
||||
| GET | /sessions/:sessionId/history | Paginated episode history for a session |
|
||||
| PATCH | /sessions/:sessionId | Update session name and/or project assignment |
|
||||
| DELETE | /sessions/:sessionId | Delete session and all its episodes |
|
||||
|
||||
**GET /sessions — query params:**
|
||||
|
||||
| Param | Default | Description |
|
||||
|---|---|---|
|
||||
| limit | 20 | Sessions per page |
|
||||
| offset | 0 | Pagination offset |
|
||||
| projectId | — | Filter by project (integer ID) |
|
||||
|
||||
**PATCH /sessions/:sessionId — body:**
|
||||
```json
|
||||
{ "name": "My Session", "projectId": 3 }
|
||||
```
|
||||
Either `name` or `projectId` is required. Both can be sent together.
|
||||
Returns the updated session object.
|
||||
|
||||
**GET /sessions/:sessionId/history — query params:**
|
||||
|
||||
| Param | Default | Description |
|
||||
|---|---|---|
|
||||
| limit | 20 | Episodes per page |
|
||||
| offset | 0 | Pagination offset |
|
||||
|
||||
Returns `{ sessionId, episodes: [...] }`. Episodes ordered newest first.
|
||||
|
||||
### Projects
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /projects | Get all projects |
|
||||
| POST | /projects | Create a new project |
|
||||
| PATCH | /projects/:id | Update a project (partial — any subset of fields) |
|
||||
| DELETE | /projects/:id | Delete a project (nulls session assignments) |
|
||||
|
||||
**POST /projects — body:**
|
||||
```json
|
||||
{
|
||||
"name": "My Project",
|
||||
"description": "Optional description",
|
||||
"colour": "#3d3a79",
|
||||
"icon": null,
|
||||
"isolated": 1
|
||||
}
|
||||
```
|
||||
`name` is required. All other fields optional. `isolated` is always `1` —
|
||||
all projects use isolated memory. Returns `201` with the created project object.
|
||||
|
||||
**PATCH /projects/:id — body:** any subset of fields, all optional.
|
||||
|
||||
| Field | Type | Description |
|
||||
|---|---|---|
|
||||
| `name` | string | Project name |
|
||||
| `description` | string | Project description |
|
||||
| `colour` | string | Hex colour for UI accent |
|
||||
| `icon` | string | Icon identifier |
|
||||
| `isolated` | integer | Memory isolation flag (always 1) |
|
||||
| `notes` | string | User-authored project notes |
|
||||
| `system_prompt` | string | Per-project system prompt override (null = use global) |
|
||||
|
||||
Only provided fields are updated — omitted fields are not touched.
|
||||
|
||||
### Summaries
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /summaries/session/:sessionId | Get all summaries for a session (by external UUID) |
|
||||
| GET | /summaries/project/:projectId | Get all summaries for a project |
|
||||
|
||||
**GET /summaries/session/:sessionId** — resolves the external UUID to an
|
||||
internal session ID, then fetches summaries from the memory service.
|
||||
Returns an array of summary objects ordered by `created_at` ascending.
|
||||
|
||||
**GET /summaries/project/:projectId** — proxies directly to the memory
|
||||
service project summaries endpoint.
|
||||
|
||||
**Summary object shape:**
|
||||
```json
|
||||
{
|
||||
"id": 8,
|
||||
"session_id": 72,
|
||||
"project_id": null,
|
||||
"content": "The user asked about...",
|
||||
"token_count": 579,
|
||||
"episode_range": "246-251",
|
||||
"created_at": 1776766518,
|
||||
"updated_at": 1776766518
|
||||
}
|
||||
```
|
||||
|
||||
> **Proxy requirement:** `/summaries` must be added to both the Caddyfile
|
||||
> reverse proxy and the Vite dev proxy config alongside the other route
|
||||
> prefixes. See `orchestration-service.md` for the Caddy block pattern.
|
||||
|
||||
### Models
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /models | Available models scanned live from models folder |
|
||||
| GET | /models/props | Live model props from llama-server (context window, loaded model) |
|
||||
|
||||
**GET /models** — returns array:
|
||||
```json
|
||||
[{ "value": "model-name.gguf", "label": "Display Name", "description": null, "size": "19.7 GB" }]
|
||||
```
|
||||
Scans `.gguf` files live from `modelsFolderPath` (set in settings). Merges
|
||||
with `models.json` in the same folder for label and description metadata.
|
||||
|
||||
**GET /models/props** — returns:
|
||||
```json
|
||||
{ "contextWindow": 64000, "modelAlias": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf" }
|
||||
```
|
||||
Fetches directly from llama-server `/props`. `n_ctx` is at
|
||||
`data.default_generation_settings.n_ctx` in the llama-server response.
|
||||
Returns `503` if llama-server is unreachable.
|
||||
|
||||
### Settings
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /settings | Get all current settings |
|
||||
| PATCH | /settings | Update one or more settings |
|
||||
|
||||
**GET /settings — response:**
|
||||
```json
|
||||
{
|
||||
"recentEpisodeLimit": 9,
|
||||
"semanticLimit": 5,
|
||||
"scoreThreshold": 0.6,
|
||||
"modelsFolderPath": "/mnt/nexus-models",
|
||||
"temperature": 0.65,
|
||||
"repeatPenalty": 1.3,
|
||||
"topP": 0.9,
|
||||
"topK": 41,
|
||||
"systemPrompt": "You are a helpful assistant..."
|
||||
}
|
||||
```
|
||||
|
||||
**PATCH /settings — body:** any subset of the above fields.
|
||||
|
||||
| Field | Type | Range | Description |
|
||||
|---|---|---|---|
|
||||
| `recentEpisodeLimit` | integer | 1–20 | Recent episodes injected into prompt |
|
||||
| `semanticLimit` | integer | 1–20 | Max semantic search results |
|
||||
| `scoreThreshold` | float | 0–1 | Minimum similarity score for Qdrant results |
|
||||
| `semanticWeight` | float | 0–5 | RRF weight for Qdrant semantic results |
|
||||
| `keywordWeight` | float | 0–5 | RRF weight for FTS5 keyword results (`0` = disabled) |
|
||||
| `modelsFolderPath` | string | — | Path to folder containing .gguf files |
|
||||
| `temperature` | float | 0–2 | Inference randomness |
|
||||
| `repeatPenalty` | float | 1–2 | Repeat token penalty |
|
||||
| `topP` | float | 0–1 | Nucleus sampling probability mass |
|
||||
| `topK` | integer | 1–100 | Top-K token candidates per step |
|
||||
| `systemPrompt` | string | — | Global system prompt (null reverts to hardcoded default) |
|
||||
|
||||
Settings are persisted to `data/settings.json` and read on every request —
|
||||
changes take effect immediately without a service restart.
|
||||
|
||||
### Episodes
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /episodes | Paginated episode list across all sessions |
|
||||
| DELETE | /episodes/:id | Delete an episode (SQLite + Qdrant) |
|
||||
|
||||
**GET /episodes — query params:**
|
||||
|
||||
| Param | Default | Description |
|
||||
|---|---|---|
|
||||
| limit | 20 | Episodes per page |
|
||||
| offset | 0 | Pagination offset |
|
||||
| q | — | Keyword search (FTS) |
|
||||
|
||||
---
|
||||
|
||||
## Memory Service — port 3002
|
||||
|
||||
Direct access is for debugging only. All client traffic goes through
|
||||
orchestration.
|
||||
|
||||
### Health
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /health | Service health check |
|
||||
|
||||
### Sessions
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /sessions | Create a new session |
|
||||
| GET | /sessions | Paginated session list with optional projectId filter |
|
||||
| GET | /sessions/:id | Get session by internal ID |
|
||||
| GET | /sessions/by-external/:externalId | Get session by external ID |
|
||||
| PATCH | /sessions/by-external/:externalId | Update session fields |
|
||||
| DELETE | /sessions/by-external/:externalId | Delete session (cascades to episodes) |
|
||||
|
||||
> Route ordering: `by-external/:externalId` must be defined before `/:id`
|
||||
> to prevent `by-external` being captured as an ID param.
|
||||
|
||||
**POST /sessions — body:**
|
||||
```json
|
||||
{ "externalId": "unique-uuid", "metadata": {} }
|
||||
```
|
||||
|
||||
**PATCH /sessions/by-external/:externalId — body:**
|
||||
```json
|
||||
{ "name": "Session Name", "projectId": 3 }
|
||||
```
|
||||
Both fields are optional. Only provided fields are updated.
|
||||
|
||||
### Episodes
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /episodes | Create episode + auto-embed into Qdrant |
|
||||
| GET | /episodes | Paginated episode list across all sessions |
|
||||
| GET | /episodes/search?q=&limit= | FTS keyword search across all episodes |
|
||||
| GET | /episodes/:id | Get episode by ID |
|
||||
| GET | /sessions/:id/episodes?limit=&offset= | Paginated episodes for a session |
|
||||
| DELETE | /episodes/:id | Delete episode (SQLite + Qdrant cleanup) |
|
||||
|
||||
> Route ordering: `/episodes/search` must be defined before `/episodes/:id`.
|
||||
|
||||
**POST /episodes — body:**
|
||||
```json
|
||||
{
|
||||
"sessionId": 1,
|
||||
"userMessage": "Hello",
|
||||
"aiResponse": "Hi there!",
|
||||
"tokenCount": 10
|
||||
}
|
||||
```
|
||||
|
||||
### Projects
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /projects | Create a new project |
|
||||
| GET | /projects | Get all projects |
|
||||
| GET | /projects/:id | Get project by ID |
|
||||
| PATCH | /projects/:id | Update a project (dynamic — any subset of fields) |
|
||||
| DELETE | /projects/:id | Delete project + null session assignments |
|
||||
|
||||
Same request/response shape as orchestration `/projects` above.
|
||||
|
||||
### Summaries
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /summaries | Create a new summary |
|
||||
| GET | /sessions/:id/summaries | Get all summaries for a session (internal ID) |
|
||||
| GET | /projects/:id/summaries | Get all summaries for a project |
|
||||
| PATCH | /summaries/:id | Update a summary (content, tokenCount, episodeRange) |
|
||||
| DELETE | /summaries/:id | Delete a summary |
|
||||
|
||||
**POST /summaries — body:**
|
||||
```json
|
||||
{
|
||||
"sessionId": 72,
|
||||
"content": "The user discussed...",
|
||||
"tokenCount": 579,
|
||||
"episodeRange": "246-251"
|
||||
}
|
||||
```
|
||||
`content` is required. Either `sessionId` or `projectId` is required.
|
||||
|
||||
**PATCH /summaries/:id — body:** any subset of `content`, `tokenCount`, `episodeRange`.
|
||||
|
||||
### Entities
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /entities | Upsert entity (creates or updates by name + type) |
|
||||
| GET | /entities/by-type/:type | All entities of a given type |
|
||||
| GET | /entities/:id | Get entity by ID |
|
||||
| DELETE | /entities/:id | Delete entity (cascades to relationships) |
|
||||
|
||||
> Route ordering: `/entities/by-type/:type` must be before `/entities/:id`.
|
||||
|
||||
**POST /entities — body:**
|
||||
```json
|
||||
{
|
||||
"name": "NexusAI",
|
||||
"type": "project",
|
||||
"notes": "My AI memory project",
|
||||
"metadata": {}
|
||||
}
|
||||
```
|
||||
|
||||
### Relationships
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /relationships | Upsert a relationship between two entities |
|
||||
| GET | /entities/:id/relationships | All relationships for an entity |
|
||||
| DELETE | /relationships | Delete a specific relationship |
|
||||
|
||||
**POST /relationships — body:**
|
||||
```json
|
||||
{ "fromId": 1, "toId": 2, "label": "uses", "metadata": {} }
|
||||
```
|
||||
|
||||
**DELETE /relationships — body:**
|
||||
```json
|
||||
{ "fromId": 1, "toId": 2, "label": "works_on", "notes": "Alice is the primary developer.", "metadata": {} }
|
||||
```
|
||||
notes is optional. label should be a snake_case verb. Relationship is identified by the composite key (fromId, toId, label) — re-submitting with the same key increments mention_count and preserves existing notes if the new value is null.
|
||||
|
||||
Relationships are identified by the composite key `(fromId, toId, label)`.
|
||||
Delete uses request body rather than URL params since this three-part key
|
||||
is awkward to encode in a path.
|
||||
|
||||
### Graph
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /graph/neighborhood/:entityId | Entity neighborhood — nodes + edges within N hops |
|
||||
| POST | /graph/neighbors | Bulk 1-hop neighborhood for a set of entity IDs |
|
||||
|
||||
**GET /graph/neighborhood/:entityId — query params:**
|
||||
|
||||
| Param | Default | Max | Description |
|
||||
|---|---|---|---|
|
||||
| depth | 1 | 3 | Traversal depth |
|
||||
|
||||
Returns `{ entity, neighborhood: { nodes, edges } }`. Returns `404` if entity not found.
|
||||
|
||||
**POST /graph/neighbors — body:**
|
||||
```json
|
||||
{ "entityIds": [5, 8, 12] }
|
||||
Returns { nodes: [...], edges: [...] }. Used internally by orchestration — not a client-facing endpoint.
|
||||
|
||||
---
|
||||
|
||||
## Embedding Service — port 3003
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /health | Service health check |
|
||||
| POST | /embed | Embed a single text string |
|
||||
| POST | /embed/batch | Embed an array of text strings |
|
||||
|
||||
**POST /embed — body:**
|
||||
```json
|
||||
{ "text": "Hello from NexusAI" }
|
||||
```
|
||||
|
||||
**POST /embed — response:**
|
||||
```json
|
||||
{ "embedding": [0.123, -0.456, ...], "model": "nomic-embed-text", "dimensions": 768 }
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Inference Service — port 3001
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /health | Health check — reports active provider and model |
|
||||
| POST | /complete | Full completion — awaits entire response |
|
||||
| POST | /complete/stream | Streaming completion via SSE |
|
||||
|
||||
**POST /complete — body:**
|
||||
```json
|
||||
{
|
||||
"prompt": "What is the capital of France?",
|
||||
"model": "gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf",
|
||||
"temperature": 0.7,
|
||||
"maxTokens": 1024,
|
||||
"topP": 0.9,
|
||||
"topK": 40,
|
||||
"repeatPenalty": 1.1
|
||||
}
|
||||
```
|
||||
All fields except `prompt` are optional. In normal usage these are forwarded
|
||||
from orchestration, which reads them from `settings.json`.
|
||||
|
||||
**POST /complete — response:**
|
||||
```json
|
||||
{
|
||||
"text": "The capital of France is Paris.",
|
||||
"model": "gemma-4-26B...gguf",
|
||||
"done": true,
|
||||
"evalCount": 8,
|
||||
"promptEvalCount": 41
|
||||
}
|
||||
```
|
||||
160
docs/reference/Memory-isolation.md
Normal file
160
docs/reference/Memory-isolation.md
Normal file
@@ -0,0 +1,160 @@
|
||||
# Memory Isolation
|
||||
|
||||
NexusAI implements project-scoped memory — sessions belonging to the same
|
||||
project share semantic context within that project's boundary. All projects
|
||||
are isolated by default.
|
||||
|
||||
## Concepts
|
||||
|
||||
**Session** — a single conversation thread. Identified by `external_id`.
|
||||
|
||||
**Project** — a named grouping of sessions. `isolated` is always `1` —
|
||||
the toggle has been removed from the UI and `isolated: 1` is hardcoded on
|
||||
project creation.
|
||||
|
||||
**Semantic search** — at inference time, the user's message is embedded and
|
||||
compared against past episodes and entities in Qdrant to surface relevant
|
||||
context. The scope of this search is controlled by the project context.
|
||||
|
||||
## Semantic Search Scope
|
||||
|
||||
| Session state | Episode search scope | Entity search scope |
|
||||
|---|---|---|
|
||||
| No project | All non-project episodes (shared pool) | No entity context |
|
||||
| Assigned to a project | All episodes across all sessions in that project | Entities tagged with that project |
|
||||
| Removed from a project | Back to shared non-project pool | Back to no entity context |
|
||||
|
||||
Non-project sessions share a common memory pool — they can draw on each
|
||||
other's episodes via semantic search, but cannot access episodes from any
|
||||
project session. Project sessions are fully isolated from all non-project
|
||||
sessions and from other projects.
|
||||
|
||||
## How It Works
|
||||
|
||||
### Step 1 — Project context resolution (orchestration)
|
||||
|
||||
In `chat/index.js`, immediately after session resolution:
|
||||
|
||||
```js
|
||||
let projectSessionIds = null;
|
||||
if (session.project_id) {
|
||||
const project = await memory.getProject(session.project_id);
|
||||
if (project) {
|
||||
const projectSessions = await memory.getProjectSessions(session.project_id);
|
||||
projectSessionIds = projectSessions.map(s => s.id);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If the session belongs to any project, `projectSessionIds` is populated with
|
||||
the internal integer IDs of all sessions in that project — creating a shared
|
||||
memory pool across all conversations in the project.
|
||||
|
||||
### Step 2 — Qdrant episode filter construction
|
||||
|
||||
In `services/qdrant.js`, `searchEpisodes` builds the filter:
|
||||
|
||||
```js
|
||||
if (projectSessionIds) {
|
||||
body.filter = {
|
||||
should: projectSessionIds.map(id => ({
|
||||
key: 'sessionId', match: { value: id }
|
||||
}))
|
||||
};
|
||||
} else if (sessionId) {
|
||||
body.filter = { must: [{ key: 'sessionId', match: { value: sessionId } }] };
|
||||
}
|
||||
```
|
||||
|
||||
`should` is Qdrant's "match any of" operator — equivalent to SQL
|
||||
`WHERE sessionId IN (...)`. When `projectSessionIds` is set, the single-session
|
||||
filter is not used.
|
||||
|
||||
### Step 3 — Entity search scoping
|
||||
|
||||
Entity search is also project-scoped. `searchEntities` in `services/qdrant.js`
|
||||
accepts a `projectId` parameter and filters accordingly:
|
||||
|
||||
```js
|
||||
if (projectId) {
|
||||
body.filter = {
|
||||
must: [{ key: 'projectId', match: { value: projectId } }]
|
||||
};
|
||||
}
|
||||
// No filter for non-project sessions — entity context not provided
|
||||
```
|
||||
|
||||
Non-project sessions receive no entity context. Project sessions only see
|
||||
entities extracted from conversations within that project.
|
||||
|
||||
### Step 4 — Episode payloads
|
||||
|
||||
Every episode upserted into Qdrant carries `{ sessionId, createdAt }` in its
|
||||
payload. `sessionId` here is the **internal integer ID** from SQLite.
|
||||
|
||||
### Step 5 — Entity payloads
|
||||
|
||||
Every entity upserted into Qdrant carries `{ name, type, notes, projectId }`
|
||||
in its payload. `projectId` is the integer project ID.
|
||||
|
||||
Entities are extracted and stored with `projectId` by `extraction.js`, which
|
||||
receives it from `createEpisode` in `episodic/index.js`, which receives it
|
||||
from the memory service episode route, which receives it from orchestration's
|
||||
`createEpisode` call in `chat/index.js`. The full chain:
|
||||
|
||||
```
|
||||
chat/index.js → memory.createEpisode(session.id, ..., session.project_id)
|
||||
→ POST /episodes { projectId }
|
||||
→ episodic.createEpisode(..., projectId)
|
||||
→ extractAndStoreEntities(userMessage, aiResponse, projectId)
|
||||
→ semantic.upsertEntity(id, vector, { name, type, notes, projectId })
|
||||
```
|
||||
|
||||
## Important Behaviours
|
||||
|
||||
**Pre-existing episodes are included immediately.** When a session is added
|
||||
to a project and a new message is sent, Qdrant can match all of that session's
|
||||
existing episodes since the filter only requires the `sessionId` to be in the
|
||||
project's session list.
|
||||
|
||||
**Removing a session from a project takes effect immediately.** On the next
|
||||
message, `getProjectSessions` will not include that session's ID, so its
|
||||
episodes disappear from the semantic search scope.
|
||||
|
||||
**Entity tags are immutable.** Entities extracted from a session's episodes
|
||||
are tagged with the `projectId` at extraction time. If a session is later
|
||||
moved to a different project, its previously extracted entities retain the
|
||||
original `projectId`. New entities extracted after the move will use the new
|
||||
`projectId`. Re-tagging existing entities requires a Qdrant payload update.
|
||||
|
||||
**New sessions created from ProjectView are assigned after the first message.**
|
||||
`handleNewProjectChat` in `App.jsx` calls `sendMessage` with the project ID,
|
||||
which is passed to `useChat`. After `onDone` fires, `useChat` calls
|
||||
`updateSession` to write the project assignment to the backend. There is a
|
||||
brief window during the first message where the session has no project assigned.
|
||||
The project is correctly applied from the second message onward.
|
||||
|
||||
## Verified Behaviours (tested April 2026)
|
||||
|
||||
- Project sessions cannot read episodes from non-project sessions ✓
|
||||
- Non-project sessions cannot read episodes from project sessions ✓
|
||||
- Non-project sessions can read each other's episodes ✓
|
||||
- Adding a session to a project — its history joins the project pool immediately ✓
|
||||
- Removing a session from a project — exits the project pool immediately ✓
|
||||
- Entity contamination across projects eliminated by `projectId` filter ✓
|
||||
|
||||
## Qdrant Payload Structures
|
||||
|
||||
**Episodes:**
|
||||
```json
|
||||
{ "sessionId": 42, "createdAt": 1776080188 }
|
||||
```
|
||||
|
||||
**Entities:**
|
||||
```json
|
||||
{ "name": "NexusAI", "type": "project", "notes": "...", "projectId": 3 }
|
||||
```
|
||||
|
||||
`sessionId` is the SQLite `sessions.id` integer, not the `external_id` UUID.
|
||||
`projectId` is the SQLite `projects.id` integer.
|
||||
Always use internal IDs when building Qdrant filters.
|
||||
228
docs/roadmap.md
Normal file
228
docs/roadmap.md
Normal file
@@ -0,0 +1,228 @@
|
||||
# NexusAI — Master Roadmap
|
||||
|
||||
> A modular, memory-centric AI assistant and personal second brain.
|
||||
> Built on Node.js, React/Vite, SQLite, Qdrant, and llama.cpp.
|
||||
> Repo: `https://gitea.jellystorm.com/storme/nexusAI`
|
||||
|
||||
---
|
||||
|
||||
## Current State (Completed)
|
||||
|
||||
### Backend — Core Four Services
|
||||
- ✅ **Shared package** — `getEnv`, constants (`QDRANT`, `COLLECTIONS`, `EPISODIC`, `SERVICES`)
|
||||
- ✅ **Memory service** (port 3002, Mini PC 1) — SQLite schema (sessions, episodes, entities, relationships, summaries), FTS5 search, full CRUD endpoints, Qdrant semantic layer (3 collections), embedding write path
|
||||
- ✅ **Embedding service** (port 3003, Mini PC 1) — `nomic-embed-text` via Ollama, 768-dim vectors, `/embed` and `/embed/batch`
|
||||
- ✅ **Inference service** (port 3001, Main PC) — provider pattern (`INFERENCE_PROVIDER`), llama.cpp provider, `/complete` and `/complete/stream` (SSE)
|
||||
- ✅ **Orchestration service** (port 4000, Mini PC 2) — `/chat` and `/chat/stream`, session auto-create, dual-layer context assembly (recency + semantic), episode write-back
|
||||
|
||||
### Memory System
|
||||
- ✅ Episodic memory — full conversation history in SQLite
|
||||
- ✅ Semantic memory — Qdrant vector search across episodes and entities
|
||||
- ✅ Entity extraction — background inference pass after each episode (qwen2.5:3b via Ollama)
|
||||
- ✅ Automatic summarization — triggered at context threshold, cumulative summary updates
|
||||
- ✅ Project memory isolation — project sessions fully isolated from each other and from non-project sessions
|
||||
|
||||
### Chat Client
|
||||
- ✅ React/Vite frontend served via Caddy
|
||||
- ✅ Sidebar navigation — recent chats, projects, settings
|
||||
- ✅ Project management — CRUD, colour coding, isolated flag, ProjectView
|
||||
- ✅ Session management — auto-naming, project assignment, SessionModal
|
||||
- ✅ Streaming chat interface — SSE token-by-token rendering
|
||||
- ✅ Memory viewer — episode browsing, deletion, health panel
|
||||
- ✅ Settings panel — models section, configuration
|
||||
|
||||
### Infrastructure
|
||||
- ✅ Caddy reverse proxy with Authelia SSO
|
||||
- ✅ Prometheus + Grafana monitoring (VRAM, CPU, RAM)
|
||||
- ✅ npm workspaces monorepo
|
||||
- ✅ Gitea self-hosted repo
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Loose Ends & Stability - COMPLETE ✅
|
||||
*Target: Next development session (Saturday)*
|
||||
|
||||
### Bug Fixes
|
||||
✅ **Entity extraction JSON parsing** — robustify response parser in `extraction.js` to handle model returning markdown fences or preamble around JSON
|
||||
✅ **Qdrant entity search empty results** — verify entities embedded post-isolation-fix are surfacing correctly in project session searches
|
||||
|
||||
### Tech Debt
|
||||
✅ **Logging** — introduce `LOG_LEVEL` env var across all services; reduce noise in production
|
||||
✅ **Error response consistency** — audit all endpoints for uniform `{ error, detail }` shape
|
||||
✅ **Constants audit** — move any remaining inline magic numbers (limits, thresholds, timeouts) to shared config
|
||||
✅ **Orchestration `chat/index.js` review** — extract any logic that has grown beyond its intended scope into dedicated modules
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — Memory System Upgrades
|
||||
*The core intelligence layer*
|
||||
|
||||
### 1. Knowledge Graph (SQLite) ✅
|
||||
The highest-leverage memory upgrade. Transforms NexusAI from "remembers conversations" to "understands relationships between things."
|
||||
- [x] Graph schema — `nodes` and `edges` tables with typed relationships
|
||||
- [x] Entity → node promotion pipeline (`mention_count` tracked; threshold gating deferred to Phase 2)
|
||||
- [x] Relationship traversal queries
|
||||
- [x] Graph-aware context assembly in orchestration
|
||||
|
||||
### 2. Retrieval Fusion + Full-Text Search ✅
|
||||
Multi-strategy retrieval merged into a single ranked result set.
|
||||
- [x] Reciprocal Rank Fusion (RRF) — merge semantic (Qdrant) + keyword (FTS5) results
|
||||
- [x] Configurable weights per retrieval strategy (`semanticWeight`, `keywordWeight` via `PATCH /settings`)
|
||||
- [x] Score threshold retained per-strategy; FTS scoped to session/project sessions; `keywordWeight: 0` default (disabled until tuned)
|
||||
|
||||
### 3. Memory Consolidation Lifecycle
|
||||
Prevents long-term memory degradation and enables compression.
|
||||
- [ ] Episode aging — score/weight episodes by recency and access frequency
|
||||
- [ ] Consolidation pass — merge related low-weight episodes into summary nodes
|
||||
- [ ] Orphan cleanup — remove entities no longer referenced by active episodes
|
||||
|
||||
### 4. User Preference Model
|
||||
Automatically maintained profile injected into every system prompt.
|
||||
- [ ] Preference schema — communication style, interests, known facts, tone preferences
|
||||
- [ ] Auto-update from conversation history
|
||||
- [ ] Manual override / review UI
|
||||
|
||||
### 5. Confidence-Based Routing *(inspired by acid2lake)*
|
||||
Short-circuit simple requests before they reach the LLM.
|
||||
- [ ] Intent classifier in orchestration — categorise incoming messages
|
||||
- [ ] Confidence bands — FAST PATH (memory lookup only) vs FULL (LLM + context)
|
||||
- [ ] Fast-path handlers — direct memory queries, session lookups, factual recalls
|
||||
|
||||
### 6. Smarter Context Assembly *(inspired by acid2lake)*
|
||||
Budget-aware context selection instead of dumping all relevant memory into the prompt.
|
||||
- [ ] Token budget manager in orchestration
|
||||
- [ ] Priority scoring — recency × relevance × entity weight
|
||||
- [ ] Configurable context budget via env var
|
||||
|
||||
### 7. Procedural Memory Store *(inspired by acid2lake)*
|
||||
Learns "how NexusAI has successfully handled this type of request before."
|
||||
- [ ] Procedural memory schema — trigger pattern, steps, success count, confidence
|
||||
- [ ] Auto-population from successful interaction traces
|
||||
- [ ] Procedural context injection for matched request types
|
||||
|
||||
### 8. Reflection / Self-Summarization
|
||||
NexusAI periodically reviews and synthesises its own memory.
|
||||
- [ ] Scheduled reflection pass — background job, configurable interval
|
||||
- [ ] Cross-session insight extraction
|
||||
- [ ] Summary nodes written back to knowledge graph
|
||||
- *Requires: Knowledge graph + consolidation lifecycle*
|
||||
|
||||
### 9. Proactive Agent Loop
|
||||
The JARVIS moment — NexusAI reasons, plans, and acts across multiple steps.
|
||||
- [ ] Tool calling framework in orchestration
|
||||
- [ ] Built-in tools — memory search, entity lookup, summarize, web fetch
|
||||
- [ ] Reasoning loop — think → act → observe → respond
|
||||
- [ ] Agent mode toggle per session
|
||||
- *Requires: All Phase 2 items above*
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — Client Features
|
||||
*Making the daily driver experience excellent*
|
||||
|
||||
### Core Chat Enhancements
|
||||
- [ ] Message regeneration — re-roll last AI response
|
||||
- [ ] Edit & resend — edit a previous message, clear subsequent history
|
||||
- [ ] Copy message button — hover icon per message
|
||||
- [ ] Message timestamps — subtle, toggleable
|
||||
- [ ] Token count display — per-response usage indicator
|
||||
|
||||
### Memory Visibility
|
||||
- [ ] **"What I remember" panel** — show which episodes/entities were injected into context
|
||||
- [ ] Memory pinning — mark episodes as always-include
|
||||
- [x] Session summary view — on-demand or auto-generated session summary
|
||||
- [ ] Memory attribution — subtle indicator on responses that were memory-informed
|
||||
|
||||
### Session & Project Management
|
||||
- [ ] Session search — full-text search across all sessions
|
||||
- [ ] Session tagging — freeform tags beyond project assignment
|
||||
- [ ] Session export — download as markdown or JSON
|
||||
- [ ] Pinned sessions — pin frequently used sessions to sidebar top
|
||||
- [ ] Bulk session actions — delete, move to project
|
||||
|
||||
### Model & Persona Controls *(high priority — circles back to companion origins)*
|
||||
- [ ] Per-session model switching — override default model per session
|
||||
- [x] System prompt editor — per-project custom prompts
|
||||
- [ ] System prompt editor — per-session custom prompts
|
||||
- [ ] Persona profiles — saved configurations (model + system prompt + temperature)
|
||||
- Examples: "Daily Driver", "Creative Mode", "Concise Mode", "Coding Mode"
|
||||
- [ ] Temperature / parameter sliders — collapsible panel for power users
|
||||
|
||||
### Second Brain Features
|
||||
- [ ] **Quick capture** — minimal input to save a thought directly to memory without starting a chat
|
||||
- [ ] **Knowledge graph visualiser** — interactive node/edge view of entities and relationships
|
||||
- [ ] Memory search page — dedicated search UI across all episodes and entities
|
||||
- [ ] Daily digest — generated summary of recent activity and learned facts
|
||||
|
||||
### Quality of Life
|
||||
- [ ] Keyboard shortcuts — `Ctrl+K` command palette, `Ctrl+Enter` to send
|
||||
- [ ] Dark/light theme toggle
|
||||
- [ ] Mobile layout polish — collapsible sidebar, touch-friendly inputs
|
||||
- [ ] Notification support — browser notifications for long completions
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — Coding Copilot
|
||||
*After core is feature-complete*
|
||||
|
||||
### Project Directory Awareness
|
||||
- [ ] Directory watcher service — monitors a VS Code workspace for changes
|
||||
- [ ] Symbol indexer — AST parsing via Tree-sitter, file → symbol map in SQLite
|
||||
- [ ] Diagnostic indexer — compiler errors/warnings per file, triggered on save
|
||||
- [ ] Maps to existing project isolation — coding project = NexusAI project with `indexedDirectory` flag
|
||||
|
||||
### Coding-Specific Memory
|
||||
- [ ] Procedural patterns per language/framework — stored in procedural memory layer
|
||||
- [ ] Skill compilation — successful coding solutions abstracted into reusable patterns
|
||||
- [ ] Codebase semantic search — embed code chunks into Qdrant, search by intent
|
||||
|
||||
---
|
||||
|
||||
## Phase 5 — Stretch Goals
|
||||
|
||||
### Voice Layer
|
||||
- [ ] TTS output — text-to-speech for AI responses
|
||||
- [ ] STT input — speech-to-text for voice messages
|
||||
- [ ] Hardware-dependent — deferred until appropriate hardware available
|
||||
- *Architecturally clean addition — new input/output modality only*
|
||||
|
||||
### Homelab Enhancements
|
||||
- [ ] Backup improvements — automated, verified backups of SQLite + Qdrant data
|
||||
- [ ] Security hardening — network segmentation, service-level auth
|
||||
- [ ] IP webcam integration
|
||||
- [ ] Home Assistant integration
|
||||
|
||||
---
|
||||
|
||||
## Architecture Reference
|
||||
|
||||
### Services & Nodes
|
||||
|
||||
| Service | Host | Port | Role |
|
||||
|---|---|---|---|
|
||||
| Inference | Main PC `192.168.0.79` | 3001 | llama.cpp provider, `/complete`, `/complete/stream` |
|
||||
| Memory | Mini PC 1 `192.168.0.81` | 3002 | SQLite, episode/entity/summary CRUD |
|
||||
| Embedding | Mini PC 1 `192.168.0.81` | 3003 | nomic-embed-text via Ollama, vector generation |
|
||||
| Qdrant | Mini PC 1 `192.168.0.81` | 6333 | Vector store — episodes, entities, summaries collections |
|
||||
| Orchestration | Hub `192.168.0.205` | 4000 | Chat pipeline, context assembly, session management |
|
||||
| Chat Client | Hub `192.168.0.205` | — | React/Vite, served via Caddy |
|
||||
| Caddy + Authelia | Hub `192.168.0.205` | 443 | Reverse proxy, SSO |
|
||||
|
||||
### Primary Models
|
||||
|
||||
| Role | Model | Notes |
|
||||
|---|---|---|
|
||||
| Daily driver | Gemma 4 26B Claude Distill APEX I-Mini | `--reasoning off` flag critical |
|
||||
| Creative/worldbuilding | Gemma 4 21B REAP Q5_K_M | |
|
||||
| Coding | DeepSeek Coder V2 Lite Instruct Q6_K | |
|
||||
| Background tasks | qwen2.5:3b via Ollama | Entity extraction, summarization |
|
||||
|
||||
### Key Design Principles
|
||||
- **Layer-by-layer validation** — backend → orchestration → frontend, curl-test each layer
|
||||
- **Fire-and-forget async** — embedding and entity extraction never block the chat response
|
||||
- **All services read settings on every request** — no restart required for config changes
|
||||
- **Backend-first development** — data layer → endpoints → orchestration proxy → frontend
|
||||
|
||||
---
|
||||
|
||||
*Last updated: April 2026*
|
||||
374
docs/services/chat-client.md
Normal file
374
docs/services/chat-client.md
Normal file
@@ -0,0 +1,374 @@
|
||||
# Chat Client
|
||||
|
||||
**Package:** `@nexusai/chat-client`
|
||||
**Location:** `packages/chat-client`
|
||||
**Deployed on:** Mini PC 2 (192.168.0.205)
|
||||
**URL:** `https://nexus.jellystorm.com` (behind Authelia SSO)
|
||||
|
||||
## Purpose
|
||||
|
||||
Browser-based chat interface for NexusAI. Communicates exclusively with
|
||||
the orchestration service — no direct access to memory, embedding, or
|
||||
inference services. Served as static files by Caddy on Mini PC 2.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `react` + `react-dom` — UI framework
|
||||
- `react-markdown` — Markdown rendering in message bubbles and memory viewer
|
||||
- `uuid` — session ID generation
|
||||
- `vite` + `@vitejs/plugin-react` — build tooling
|
||||
|
||||
## Build
|
||||
|
||||
```bash
|
||||
cd packages/chat-client
|
||||
npm run dev # local dev server on port 5173
|
||||
npm run build # outputs to dist/ for production
|
||||
```
|
||||
|
||||
After building, copy `dist/` contents to `/srv/nexusai` on Mini PC 2 for Caddy to serve.
|
||||
|
||||
Vite bakes environment variables into the bundle at build time. The `.env`
|
||||
file is only needed on the machine running the build, not where files are served.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Required | Default | Description |
|
||||
|---|---|---|---|
|
||||
| VITE_ORCHESTRATION_URL | No | `''` (empty) | Orchestration base URL. Leave empty in dev (Vite proxy handles routing). Set to HTTPS domain for production builds. |
|
||||
|
||||
**Development:** leave `VITE_ORCHESTRATION_URL` unset — the Vite proxy routes
|
||||
API requests directly to orchestration, bypassing Caddy and Authelia.
|
||||
|
||||
**Production build:** set before running `npm run build`:
|
||||
```
|
||||
VITE_ORCHESTRATION_URL=https://nexus.jellystorm.com
|
||||
```
|
||||
|
||||
> Do not set `VITE_ORCHESTRATION_URL` to the HTTPS domain during local dev.
|
||||
> Requests from `localhost:5173` to `nexus.jellystorm.com` will hit Authelia,
|
||||
> which returns an HTML login page instead of JSON — causing `Unexpected token '<'`
|
||||
> parse errors in `useModels` and `useSession`.
|
||||
|
||||
## Vite Dev Proxy
|
||||
|
||||
`vite.config.js` proxies API routes directly to the orchestration service
|
||||
during local development, bypassing Caddy and Authelia entirely:
|
||||
|
||||
```js
|
||||
export default defineConfig({
|
||||
plugins: [react()],
|
||||
server: {
|
||||
proxy: {
|
||||
'/models': 'http://192.168.0.205:4000',
|
||||
'/sessions': 'http://192.168.0.205:4000',
|
||||
'/chat': 'http://192.168.0.205:4000',
|
||||
'/projects': 'http://192.168.0.205:4000',
|
||||
'/episodes': 'http://192.168.0.205:4000',
|
||||
'/settings': 'http://192.168.0.205:4000',
|
||||
'/health': 'http://192.168.0.205:4000',
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
When adding new top-level routes to the orchestration service, add a matching
|
||||
entry here and in the Caddy config.
|
||||
|
||||
## Internal Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── api/
|
||||
│ └── orchestration.js # All fetch calls to the orchestration service
|
||||
├── config/
|
||||
│ └── constants.js # FALLBACK_MODELS, DEFAULT_MODEL, API_DEFAULTS, CLIENT_DEFAULTS
|
||||
├── hooks/
|
||||
│ ├── useSession.js # Session list, history loading, active session state
|
||||
│ ├── useChat.js # Message sending, SSE streaming, message state
|
||||
│ ├── useModels.js # Dynamic model list fetched from /models endpoint
|
||||
│ ├── useProjects.js # Project list fetched from /projects endpoint
|
||||
│ ├── useSettings.js # Settings fetch + saveSetting helper
|
||||
│ └── useContextMenu.js # Right-click context menu position and visibility
|
||||
├── components/
|
||||
│ ├── App.jsx # Root component — layout, shared state, view routing
|
||||
│ ├── Sidebar.jsx # Left sidebar — projects, grouped recent chats, navigation
|
||||
│ ├── HomeView.jsx # Landing screen — greeting, centred input, quick actions
|
||||
│ ├── ChatWindow.jsx # Centre panel — message thread, back button, model pill
|
||||
│ ├── MessageBubble.jsx # Individual message bubble — renders markdown via react-markdown
|
||||
│ ├── InfoPanel.jsx # Right panel — model selector and session metadata (slide-in)
|
||||
│ ├── SessionModal.jsx # Modal for session rename, project assignment, delete
|
||||
│ ├── ProjectModal.jsx # Modal for project create/edit — name, description, colour,
|
||||
│ │ # system prompt override; delete confirmation
|
||||
│ ├── AllChatsView.jsx # Paginated session list with project indicator column
|
||||
│ ├── AllProjectsView.jsx # Project tile grid with create/edit/delete; tile click navigates to ProjectView
|
||||
│ ├── ProjectView.jsx # Individual project — conversations, new chat input, memory
|
||||
│ │ # placeholder, user notes, ⋮ edit/delete menu
|
||||
│ ├── MemoryView.jsx # Paginated, searchable, expandable, deletable episode viewer
|
||||
│ └── SettingsView.jsx # Settings — Memory, Models, Behaviour (system prompt),
|
||||
│ # About, Appearance
|
||||
├── index.css # Global reset, CSS variables, utility classes
|
||||
└── main.jsx # React entry point
|
||||
```
|
||||
|
||||
## Layout
|
||||
|
||||
The app uses a view-based layout. `App.jsx` manages a `view` state string
|
||||
that controls which main panel is rendered. The left sidebar and right info
|
||||
panel are persistent across all views.
|
||||
|
||||
```
|
||||
┌──────────────────┬──────────────────────────────┐
|
||||
│ Sidebar │ Main Area (view-dependent) │
|
||||
│ (collapsible) │ │
|
||||
│ │ home → HomeView │
|
||||
│ + New Chat │ chat → ChatWindow │
|
||||
│ ⊞ View Projects │ all-chats → AllChatsView │
|
||||
│ │ all-projects → AllProjectsView│
|
||||
│ PROJECTS ▾ │ project → ProjectView │
|
||||
│ [tile] [tile] │ settings → SettingsView │
|
||||
│ All Projects → │ memory → MemoryView │
|
||||
│ │ │
|
||||
│ RECENT CHATS ▾ │ │
|
||||
│ ● Project A │ │
|
||||
│ Session 1 │ │
|
||||
│ Session 2 │ │
|
||||
│ ● Project B │ │
|
||||
│ Session 3 │ │
|
||||
│ Other │ │
|
||||
│ Session 4 │ │
|
||||
│ All Chats → │ │
|
||||
│ │ │
|
||||
│ ⚙ Settings │ │
|
||||
└──────────────────┴──────────────────────────────┘
|
||||
```
|
||||
|
||||
The sidebar collapses to a 48px icon rail and starts collapsed on the home
|
||||
view. The right `InfoPanel` slides in from the right using
|
||||
`transform: translateX()` — hidden by default, toggled via the `⊹` button
|
||||
in the `ChatWindow` header.
|
||||
|
||||
## View Routing
|
||||
|
||||
| View | Component | Trigger |
|
||||
|---|---|---|
|
||||
| `'home'` | `HomeView` | Initial load |
|
||||
| `'chat'` | `ChatWindow` | Selecting a session; new chat; sending from HomeView |
|
||||
| `'all-chats'` | `AllChatsView` | "All Chats →" or ☰ icon in collapsed rail |
|
||||
| `'all-projects'` | `AllProjectsView` | "View Projects" button or ⊞ icon |
|
||||
| `'project'` | `ProjectView` | Clicking a project tile in sidebar or AllProjectsView |
|
||||
| `'settings'` | `SettingsView` | Settings button or ⚙ icon |
|
||||
| `'memory'` | `MemoryView` | "Open →" button in Settings → Memory section |
|
||||
|
||||
`activeProject` state in `App.jsx` tracks which project `ProjectView` is
|
||||
displaying. Set via `onSelectProject` before navigating to `'project'`.
|
||||
|
||||
### View History Stack
|
||||
|
||||
`App.jsx` maintains a `viewHistory` array. Each `navigate(view)` call pushes
|
||||
the current view onto the stack. `goBack()` pops the last entry and restores
|
||||
it. All view components receive `onBack={goBack}` — no component hardcodes
|
||||
its own back destination. Navigating to `'home'` collapses the sidebar;
|
||||
leaving `'home'` expands it.
|
||||
|
||||
## Home View
|
||||
|
||||
`HomeView` is the landing screen shown on initial load. It displays:
|
||||
- Time-based greeting ("Morning / Afternoon / Evening, Tim")
|
||||
- Currently loaded model name (from `modelProps.modelAlias`, stripped of `.gguf`)
|
||||
- Centred textarea input — sending creates a new session and navigates to chat
|
||||
- Quick action pills that populate the input without auto-sending
|
||||
|
||||
`handleHomeSend` in `App.jsx` calls `createSession()` (which returns the new
|
||||
session object), then immediately calls `sendMessage` with the session passed
|
||||
directly — avoiding the React state settling race condition.
|
||||
|
||||
## CSS Architecture
|
||||
|
||||
Styles follow a hybrid approach — CSS utility classes for static reusable
|
||||
rules, inline styles for dynamic prop-driven values.
|
||||
|
||||
### CSS Variables (`:root`)
|
||||
|
||||
| Variable | Value | Description |
|
||||
|---|---|---|
|
||||
| `--bg-base` | `#0f1117` | Page background |
|
||||
| `--bg-surface` | `#0e0d0d` | Panel backgrounds |
|
||||
| `--bg-elevated` | `#222536` | Elevated elements (inputs, cards) |
|
||||
| `--border` | `#2e3150` | Border colour |
|
||||
| `--accent` | `#3d3a79` | Primary accent (buttons, highlights) |
|
||||
| `--accent-hover` | `#574fd6` | Accent hover state |
|
||||
| `--text-primary` | `#e8e8f0` | Primary text |
|
||||
| `--text-secondary` | `#8b8fa8` | Secondary text |
|
||||
| `--text-muted` | `#555870` | Muted / placeholder text |
|
||||
| `--bubble-user` | `#4742a8` | User message bubble background |
|
||||
| `--bubble-ai` | `#20264d` | AI message bubble background |
|
||||
| `--sidebar-width` | `180px` | Expanded sidebar width |
|
||||
| `--panel-width` | `200px` | Expanded info panel width |
|
||||
| `--header-height` | `40px` | Shared header height across all panels |
|
||||
| `--radius-sm` | `6px` | Small border radius |
|
||||
| `--radius-md` | `8px` | Medium border radius |
|
||||
| `--radius-lg` | `12px` | Large border radius |
|
||||
|
||||
### Utility Classes
|
||||
|
||||
| Class | Description |
|
||||
|---|---|
|
||||
| `.panel-header` | Shared header row — used across all panels |
|
||||
| `.btn-reset` | Resets button styles (no border, bg, cursor pointer) |
|
||||
| `.btn-icon` | Icon button with hover state |
|
||||
| `.btn-primary` | Accent-coloured action button with `:hover` and `:disabled` states |
|
||||
| `.flex` / `.flex-col` | Flex layout helpers |
|
||||
| `.flex-1` / `.flex-shrink` | Flex sizing helpers |
|
||||
| `.items-center` / `.justify-center` / `.justify-between` | Alignment helpers |
|
||||
| `.overflow-hidden` / `.scroll-y` | Overflow helpers |
|
||||
| `.text-xs` / `.text-sm` / `.text-base` | Font size helpers |
|
||||
| `.text-muted` / `.text-secondary` / `.text-accent` | Colour helpers |
|
||||
| `.label-upper` | Uppercase section label style |
|
||||
| `.truncate` | Text overflow ellipsis |
|
||||
|
||||
## Streaming
|
||||
|
||||
Messages are sent via `POST /chat/stream`. Tokens arrive as SSE events and
|
||||
are written into the active assistant bubble token by token via
|
||||
`updateLastMessage`. The blinking cursor in `MessageBubble` is shown while
|
||||
`message.streaming === true`.
|
||||
|
||||
`useChat.sendMessage` accepts an optional `session` parameter (4th arg) that
|
||||
overrides the closed-over `activeSession`. This is used by `handleHomeSend`
|
||||
and `handleNewProjectChat` in `App.jsx` to pass the newly created session
|
||||
object directly, avoiding React state settling races.
|
||||
|
||||
`useChat` accepts an optional `projectId` parameter in `sendMessage`. After
|
||||
the first message completes in a new session, if `projectId` is set,
|
||||
`updateSession` is called to write the project assignment to the backend.
|
||||
|
||||
## Session Management
|
||||
|
||||
Sessions are identified by `external_id` — a UUID generated client-side via
|
||||
the `uuid` package. New sessions are created locally and auto-registered in
|
||||
the memory service on the first message. The session list refreshes after
|
||||
each completed response to surface newly created sessions.
|
||||
|
||||
`useSession.createSession` returns the new session object — callers can pass
|
||||
it directly to `sendMessage` rather than waiting for React state to update.
|
||||
|
||||
`useSession.selectSession` skips the history fetch for new (`isNew: true`)
|
||||
sessions — fetching history for an unsaved session would 404 since it doesn't
|
||||
exist in the backend yet.
|
||||
|
||||
### Auto-naming
|
||||
|
||||
After the first exchange completes, orchestration fires a secondary inference
|
||||
call with a short naming prompt (max 20 tokens, temperature 0.3). The result
|
||||
is written back as `session.name`. The client fires a second `refreshSessions`
|
||||
after a 3-second delay to pick up the name once written.
|
||||
|
||||
Manually renamed sessions are never overwritten — the `!session.name` guard
|
||||
in `chat/index.js` prevents this.
|
||||
|
||||
### Session Actions
|
||||
|
||||
Session rows support rename, project assignment, and delete via:
|
||||
- **Hover** — reveals ✎ and ✕ icon buttons alongside the row
|
||||
- **Right-click** — context menu with the same actions
|
||||
|
||||
`SessionModal` handles rename and project assignment together in `settings`
|
||||
mode, and delete confirmation in `confirm-delete` mode.
|
||||
|
||||
### Key Patterns
|
||||
|
||||
- Button nesting: action icons are siblings of row buttons, not children — HTML forbids `<button>` inside `<button>`
|
||||
- Context menu rendered outside sidebar via React fragment to avoid `overflow: hidden` clipping
|
||||
- `useContextMenu` dismisses on a `window` click listener
|
||||
- Dynamic `updateSession` SQL builds `SET` clause from only the fields passed — prevents accidental overwrites
|
||||
- `AllChatsView` pagination uses `CLIENT_DEFAULTS.PAGE_SIZE` (not `API_DEFAULTS.PAGE_SIZE` which doesn't exist)
|
||||
- `Sidebar` groups sessions by project — `key` must be passed directly to `<SessionRow key={...}>`, not included in the props spread object
|
||||
|
||||
## Sidebar — Session Grouping
|
||||
|
||||
Recent sessions in the sidebar are grouped by project under a colour dot +
|
||||
project name label. Unassigned sessions appear under "Other" if any project
|
||||
groups are present. The grouping is computed client-side from the `sessions`
|
||||
array and `projects` list already available in `App.jsx` — no extra API call.
|
||||
|
||||
`AllChatsView` receives `projects` as a prop from `App.jsx` and displays a
|
||||
project indicator column (colour dot + truncated name) in each session row.
|
||||
|
||||
## Project Management
|
||||
|
||||
All projects are isolated by default (`isolated: 1` hardcoded on create).
|
||||
The isolated toggle has been removed from `ProjectModal`.
|
||||
|
||||
`useProjects` fetches the project list from `GET /projects` on mount and
|
||||
exposes `refreshProjects` for keeping the sidebar in sync after mutations.
|
||||
|
||||
### ProjectModal Fields
|
||||
|
||||
- **Name** (required)
|
||||
- **Description** (optional)
|
||||
- **Colour** — picker from six preset hex values
|
||||
- **System Prompt** (optional) — overrides the global system prompt for all
|
||||
conversations in this project. Leave blank to use the global default.
|
||||
Stored as `system_prompt` (snake_case) matching the SQLite column.
|
||||
`Enter` key does not submit — textarea fields make it ambiguous. Save button only.
|
||||
|
||||
`handleSave` in `ProjectView` destructures `system_prompt` (snake_case) to
|
||||
match what `ProjectModal` sends. `updateProject` in `orchestration.js` uses
|
||||
a passthrough pattern — spreads all fields into the request body.
|
||||
|
||||
### System Prompt Hierarchy
|
||||
|
||||
System prompt resolution in `chat/index.js` (orchestration):
|
||||
|
||||
1. `project.system_prompt` — if set on the project (highest priority)
|
||||
2. `settings.systemPrompt` — global setting from `settings.json`
|
||||
3. `ORCHESTRATION.SYSTEM_PROMPT` — hardcoded constant in `@nexusai/shared` (last resort)
|
||||
|
||||
### ProjectView
|
||||
|
||||
`ProjectView` is a full project workspace with:
|
||||
- Colour accent bar + project title + description
|
||||
- ⋮ dropdown menu for edit (opens `ProjectModal` pre-filled) and delete
|
||||
- Conversations list — each session is a clickable row navigating to `'chat'`
|
||||
- `ChatInput` component below the list (or centred when no sessions exist) for
|
||||
starting new project-tied conversations without a separate button
|
||||
- **Project Memory** — placeholder section explaining upcoming auto-summary feature
|
||||
- **Project Notes** — textarea with Save button; notes saved to `projects.notes`
|
||||
column in SQLite; save button only appears when content has changed from last
|
||||
saved value (`savedNotes` state tracks the baseline, not `initialNotes`)
|
||||
|
||||
`updateProject` in `orchestration.js` uses a passthrough pattern — spreads
|
||||
all fields directly into the request body. This allows partial updates like
|
||||
`{ notes }` or `{ system_prompt }` without clobbering other fields.
|
||||
|
||||
For memory isolation behaviour, see `memory-isolation.md`.
|
||||
|
||||
## Settings
|
||||
|
||||
`useSettings` fetches from `GET /settings` on mount and exposes a
|
||||
`saveSetting(key, value)` helper that issues a `PATCH /settings` with a
|
||||
single key-value pair. The `saving` boolean is exposed for disabling save
|
||||
buttons during in-flight requests.
|
||||
|
||||
`SettingsView` receives `settings`/`saveSetting`/`saving` from a single
|
||||
`useSettings()` call at the top level and passes them as props to
|
||||
`ModelsSection`, `ModelsFolderSetting`, and `SystemPromptSetting` — avoiding
|
||||
triple fetch on mount. `modelProps` (context window, loaded model) is fetched
|
||||
once in `App.jsx` and passed down as a prop.
|
||||
|
||||
`SettingsView` is organised into sections:
|
||||
|
||||
- **Memory** — recent episode limit, semantic limit, score threshold, link to MemoryView
|
||||
- **Models** — models folder path, temperature, repeat penalty, Top-P, Top-K,
|
||||
active model dropdown, read-only model info panel (file, size, context window,
|
||||
loaded model from llama-server)
|
||||
- **Behaviour** — global system prompt textarea (`SystemPromptSetting`). Save
|
||||
button appears only when content differs from `savedPrompt` state. Saving an
|
||||
empty string sends `null` which reverts to the hardcoded default.
|
||||
- **About** — service health check panel, version
|
||||
- **Appearance** — theme (coming soon)
|
||||
|
||||
An error boundary (`SettingsSectionErrorBoundary`) wraps the Models section —
|
||||
if the models fetch fails, only that section shows an error with a Retry
|
||||
button rather than blanking the entire settings view.
|
||||
@@ -27,80 +27,43 @@ minimizing network hops on the memory write path.
|
||||
| OLLAMA_URL | No | http://localhost:11434 | Ollama instance URL |
|
||||
| EMBEDDING_MODEL | No | nomic-embed-text | Ollama embedding model to use |
|
||||
|
||||
> Ollama must be running with `OLLAMA_HOST=0.0.0.0` to accept LAN connections
|
||||
> from other services.
|
||||
|
||||
## Model
|
||||
|
||||
**nomic-embed-text** via Ollama produces **768-dimension** vectors using **Cosine similarity**.
|
||||
This must match the `QDRANT.VECTOR_SIZE` constant in `@nexusai/shared`.
|
||||
**nomic-embed-text** via Ollama produces **768-dimension** vectors with
|
||||
**Cosine similarity**. This must match `QDRANT.VECTOR_SIZE` in `@nexusai/shared`.
|
||||
|
||||
If the embedding model is changed, the Qdrant collections must be reinitialized
|
||||
with the new vector dimension — updating `QDRANT.VECTOR_SIZE` in `constants.js` is
|
||||
the single change required to keep everything consistent.
|
||||
with the new vector dimension. Updating `QDRANT.VECTOR_SIZE` in `constants.js`
|
||||
is the single change required to keep everything consistent.
|
||||
|
||||
## Ollama API
|
||||
|
||||
Uses the `/api/embed` endpoint (Ollama v0.4+). Request shape:
|
||||
Uses the `/api/embed` endpoint (Ollama v0.4+):
|
||||
|
||||
```json
|
||||
// Request
|
||||
{ "model": "nomic-embed-text", "input": "text to embed" }
|
||||
```
|
||||
Response key is `embeddings[0]` — an array of 768 floats.
|
||||
|
||||
## Endpoints
|
||||
|
||||
### Health
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /health | Service health check |
|
||||
|
||||
### Embed
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /embed | Embed a single text string |
|
||||
| POST | /embed/batch | Embed an array of text strings |
|
||||
|
||||
---
|
||||
|
||||
**POST /embed**
|
||||
|
||||
Embeds a single text string and returns the vector.
|
||||
|
||||
Request body:
|
||||
```json
|
||||
{
|
||||
"text": "Hello from NexusAI"
|
||||
}
|
||||
// Response key
|
||||
embeddings[0] // array of 768 floats
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"embedding": [0.123, -0.456, ...],
|
||||
"model": "nomic-embed-text",
|
||||
"dimensions": 768
|
||||
}
|
||||
```
|
||||
> Earlier Ollama versions used `/api/embeddings` with a `prompt` key and
|
||||
> returned `embedding` (singular). Use `/api/embed`, `input`, and
|
||||
> `embeddings[0]` for Ollama v0.4+.
|
||||
|
||||
---
|
||||
## Usage in NexusAI
|
||||
|
||||
**POST /embed/batch**
|
||||
The embedding service is called in two places:
|
||||
|
||||
Embeds an array of strings sequentially and returns all vectors in the same order.
|
||||
Ollama does not natively parallelize embeddings, so requests are processed one at a time.
|
||||
1. **Memory service** — after each episode is saved to SQLite, the combined
|
||||
`User: ..\nAssistant: ..` text is embedded and upserted into Qdrant.
|
||||
This is fire-and-forget — failures are logged but don't affect the response.
|
||||
|
||||
Request body:
|
||||
```json
|
||||
{
|
||||
"texts": ["first sentence", "second sentence"]
|
||||
}
|
||||
```
|
||||
2. **Orchestration service** — the user's message is embedded at the start of
|
||||
the chat pipeline to perform semantic search against past episodes.
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"embeddings": [[0.123, ...], [0.456, ...]],
|
||||
"model": "nomic-embed-text",
|
||||
"dimensions": 768,
|
||||
"count": 2
|
||||
}
|
||||
```
|
||||
For all HTTP endpoints, see `api-routes.md`.
|
||||
140
docs/services/entity-extraction.md
Normal file
140
docs/services/entity-extraction.md
Normal file
@@ -0,0 +1,140 @@
|
||||
# Entity Extraction
|
||||
|
||||
**Location:** `packages/memory-service/src/entities/extraction.js`
|
||||
**Triggered by:** Episode creation (`POST /episodes`)
|
||||
**Model:** `qwen2.5:3b` via Ollama (configurable via `EXTRACTION_MODEL` env var)
|
||||
|
||||
## Purpose
|
||||
|
||||
After each episode is saved to SQLite, the extraction pipeline runs
|
||||
asynchronously in the background to identify named entities and the
|
||||
relationships between them. Results are written back to SQLite and
|
||||
embedded into Qdrant — the episode response is never delayed.
|
||||
|
||||
## Trigger
|
||||
|
||||
`createEpisode()` in `episodic/index.js` calls `extractAndStoreEntities()`
|
||||
immediately after the SQLite insert, without awaiting it:
|
||||
|
||||
```js
|
||||
extractAndStoreEntities(userMessage, aiResponse, episode.id, projectId)
|
||||
.catch(err => logger.error(`Failed to extract entities for episode ${episode.id}:`, err.message));
|
||||
```
|
||||
|
||||
If extraction throws, the episode is unaffected — the error is logged and
|
||||
swallowed.
|
||||
|
||||
## Model Settings
|
||||
|
||||
| Setting | Value | Notes |
|
||||
|---|---|---|
|
||||
| Model | `qwen2.5:3b` | Ollama, configurable via `EXTRACTION_MODEL` |
|
||||
| Temperature | 0.1 | Low for consistent, deterministic output |
|
||||
| `num_predict` | 1500 | Higher ceiling to accommodate entity + relationship JSON |
|
||||
| `format` | `'json'` | Ollama constrained decoding — enforces valid JSON output |
|
||||
| Prompt format | ChatML | `<\|im_start\|>` / `<\|im_end\|>` tokens |
|
||||
|
||||
## Prompt Structure
|
||||
|
||||
The prompt is built by `buildExtractionPrompt()`. It includes:
|
||||
|
||||
1. **System message** — declares the model's role as an entity and relationship extractor
|
||||
2. **Instructions** — entity types, field rules, relationship label format, required JSON schema
|
||||
3. **Known entities block** — last 20 entities from SQLite, by `rowid DESC`, used to encourage consistent name/type pairs across conversations
|
||||
4. **Conversation** — the raw user message and AI response, delimited clearly
|
||||
|
||||
```
|
||||
<|im_start|>system
|
||||
You are a named entity and relationship extractor. You output only valid JSON.
|
||||
<|im_end|>
|
||||
<|im_start|>user
|
||||
Read the conversation below and extract all named entities and the relationships between them.
|
||||
Entity types: person, place, project, technology, concept, organization
|
||||
...
|
||||
Return this exact JSON structure:
|
||||
{ "entities": [...], "relationships": [...] }
|
||||
|
||||
Already known entities (use these exact name and type values if the same entity appears):
|
||||
- "NexusAI" (project)
|
||||
- "Alice" (person)
|
||||
|
||||
--- CONVERSATION ---
|
||||
User: ...
|
||||
Assistant: ...
|
||||
--- END CONVERSATION ---
|
||||
<|im_end|>
|
||||
<|im_start|>assistant
|
||||
```
|
||||
|
||||
## Expected JSON Output
|
||||
|
||||
```json
|
||||
{
|
||||
"entities": [
|
||||
{ "name": "Alice", "type": "person", "notes": "Software engineer working on NexusAI." },
|
||||
{ "name": "NexusAI", "type": "project", "notes": "A modular AI assistant with persistent memory." }
|
||||
],
|
||||
"relationships": [
|
||||
{
|
||||
"from": "Alice", "fromType": "person",
|
||||
"to": "NexusAI", "toType": "project",
|
||||
"label": "works_on",
|
||||
"notes": "Alice is the primary developer."
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Relationship labels use **snake_case verbs** (e.g. `works_on`, `manages`, `uses`,
|
||||
`knows`, `located_in`, `part_of`, `created_by`).
|
||||
|
||||
## JSON Parsing
|
||||
|
||||
The raw model response is matched with `/\{[\s\S]*\}/` before parsing — this
|
||||
tolerates any preamble or trailing prose the model emits alongside the JSON.
|
||||
If the match fails or `JSON.parse` throws, the function logs a warning and
|
||||
returns without writing anything.
|
||||
|
||||
## Entity Processing
|
||||
|
||||
For each entity in `parsed.entities`:
|
||||
|
||||
1. Validate `name`, `type` (must be in `ENTITY_TYPES`), and not in `IGNORED_NAMES`
|
||||
2. Call `upsertEntity(name, type, notes)`:
|
||||
- **Insert**: creates new row with `mention_count = 1`, `source = 'extraction'`
|
||||
- **Conflict** on `(name, type)`: increments `mention_count`, updates `last_seen_at`, preserves existing `notes` if new extraction returns null
|
||||
3. Add to `entityMap` keyed by `"${name}::${type}"` — used for relationship resolution below
|
||||
4. Call `linkEntityToEpisode(entity.id, episodeId)` — writes to `entity_episodes` join table
|
||||
5. Fire-and-forget: embed as `"${name} (${type}): ${notes}"` → store to Qdrant `entities` collection with `{ name, type, notes, projectId }` in payload
|
||||
|
||||
**Valid entity types:** `person`, `place`, `project`, `technology`, `concept`, `organization`
|
||||
|
||||
**Stoplist (ignored names):** `good morning`, `good night`, `hello`, `goodbye`, `thanks`, `thank you`
|
||||
|
||||
## Relationship Processing
|
||||
|
||||
After all entities are saved, relationships are processed:
|
||||
|
||||
1. For each entry in `parsed.relationships`, look up both endpoints in `entityMap` using `"${from}::${fromType}"` and `"${to}::${toType}"` as keys
|
||||
2. If either endpoint is missing (filtered out, invalid type, or not in this extraction), the relationship is silently skipped
|
||||
3. Call `upsertRelationship(fromId, toId, label, notes)`:
|
||||
- **Insert**: creates new row with `mention_count = 1`
|
||||
- **Conflict** on `(from_id, to_id, label)`: increments `mention_count`, preserves existing `notes` if new is null
|
||||
|
||||
Relationships are unidirectional in storage. Bidirectionality is handled at
|
||||
query time by the graph traversal layer.
|
||||
|
||||
## Project Scoping
|
||||
|
||||
`projectId` is threaded through from the episode creation call. It is stored
|
||||
in the Qdrant entity payload, which enables project-scoped entity search in
|
||||
orchestration. SQLite entities and relationships are global — scoping only
|
||||
applies at the Qdrant retrieval layer.
|
||||
|
||||
## Error Behaviour
|
||||
|
||||
All steps after the initial model call are wrapped in a single outer try/catch.
|
||||
If Ollama is unreachable, returns a non-200 status, or the JSON cannot be
|
||||
parsed, the function logs at `warn` level and returns. There is no retry logic.
|
||||
Individual entity embedding failures are caught per-entity and logged at `warn`
|
||||
level without affecting other entities in the same batch.
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
**Package:** `@nexusai/inference-service`
|
||||
**Location:** `packages/inference-service`
|
||||
**Deployed on:** Main PC
|
||||
**Deployed on:** Main PC (192.168.0.79)
|
||||
**Port:** 3001
|
||||
|
||||
## Purpose
|
||||
@@ -15,7 +15,7 @@ to switch inference backends without changes to the rest of the system.
|
||||
## Dependencies
|
||||
|
||||
- `express` — HTTP API
|
||||
- `ollama` — Ollama client (used by the Ollama provider)
|
||||
- `ollama` — Ollama client (used by the Ollama provider, kept as fallback)
|
||||
- `dotenv` — environment variable loading
|
||||
- `@nexusai/shared` — shared utilities
|
||||
|
||||
@@ -24,102 +24,127 @@ to switch inference backends without changes to the rest of the system.
|
||||
| Variable | Required | Default | Description |
|
||||
|---|---|---|---|
|
||||
| PORT | No | 3001 | Port to listen on |
|
||||
| INFERENCE_PROVIDER | No | ollama | Active inference provider (ollama, llamacpp) |
|
||||
| INFERENCE_URL | No | http://localhost:11434 | URL of the inference runtime |
|
||||
| DEFAULT_MODEL | No | llama3.2 | Default model name passed to the provider |
|
||||
| INFERENCE_PROVIDER | No | llamacpp | Active provider (`ollama` or `llamacpp`) |
|
||||
| INFERENCE_URL | No | http://localhost:8080 | URL of the inference runtime |
|
||||
| DEFAULT_MODEL | No | local-model | Default model name passed to the provider |
|
||||
|
||||
> `INFERENCE_URL` points to `llama-server` directly (port 8080), not to this
|
||||
> service. The orchestration service uses `INFERENCE_SERVICE_URL` to reach
|
||||
> this service on port 3001.
|
||||
|
||||
## Provider Architecture
|
||||
|
||||
The inference service uses a provider pattern to abstract the underlying
|
||||
LLM runtime. The active provider is selected at startup via `INFERENCE_PROVIDER`
|
||||
and loaded from `src/providers/`. Both providers expose identical function
|
||||
signatures, so the rest of the service is unaware of which backend is active.
|
||||
The active provider is selected at startup via `INFERENCE_PROVIDER` and
|
||||
loaded from `src/providers/`. Both providers expose identical function
|
||||
signatures.
|
||||
|
||||
### Supported Providers
|
||||
|
||||
| Provider | Value | Runtime |
|
||||
|---|---|---|
|
||||
| Ollama | `ollama` | Ollama via the `ollama` npm package |
|
||||
| llama.cpp | `llamacpp` | llama.cpp server (OpenAI-compatible API) |
|
||||
| llama.cpp | `llamacpp` | llama.cpp server (OpenAI-compatible API) — **current default** |
|
||||
| Ollama | `ollama` | Ollama via the `ollama` npm package — available as fallback |
|
||||
|
||||
Switching providers requires only a `.env` change — no code modifications needed.
|
||||
Switching providers requires only a `.env` change — no code modifications:
|
||||
```
|
||||
INFERENCE_PROVIDER=llamacpp
|
||||
INFERENCE_URL=http://localhost:8080
|
||||
```
|
||||
|
||||
The provider loader throws immediately on an unknown value, preventing silent
|
||||
misconfiguration.
|
||||
|
||||
> **LM Studio compatibility note:** LM Studio exposes an OpenAI-compatible
|
||||
> `/v1/chat/completions` endpoint with the same request shape as llama.cpp.
|
||||
> A future `lmstudio.js` provider would be nearly identical to `llamacpp.js` —
|
||||
> only the `BASE_URL` would differ. No architectural changes required.
|
||||
|
||||
## Internal Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── providers/
|
||||
│ ├── ollama.js # Ollama provider — uses ollama npm package
|
||||
│ └── llamacpp.js # llama.cpp provider — uses OpenAI-compatible REST API
|
||||
│ ├── ollama.js # Ollama provider
|
||||
│ └── llamacpp.js # llama.cpp provider (OpenAI-compatible REST)
|
||||
├── routes/
|
||||
│ └── inference.js # /complete and /complete/stream route handlers
|
||||
├── infer.js # Provider loader — selects and re-exports active provider
|
||||
└── index.js # Express app + route definitions
|
||||
|
||||
## Endpoints
|
||||
|
||||
### Health
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /health | Service health check — reports active provider and model |
|
||||
|
||||
### Inference
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /complete | Standard completion — returns full response when done |
|
||||
| POST | /complete/stream | Streaming completion via Server-Sent Events |
|
||||
|
||||
---
|
||||
|
||||
**POST /complete**
|
||||
|
||||
Request body:
|
||||
```json
|
||||
{
|
||||
"prompt": "What is the capital of France?",
|
||||
"model": "companion:latest",
|
||||
"temperature": 0.7,
|
||||
"maxTokens": 1024
|
||||
}
|
||||
```
|
||||
|
||||
`model` is optional — falls back to `DEFAULT_MODEL` if omitted.
|
||||
`maxTokens` is optional — defaults to 1024.
|
||||
`temperature` is optional — defaults to 0.7.
|
||||
## llama.cpp Provider
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"text": "The capital of France is Paris.",
|
||||
"model": "companion:latest",
|
||||
"done": true,
|
||||
"evalCount": 8,
|
||||
"promptEvalCount": 41
|
||||
}
|
||||
Uses the OpenAI-compatible REST API exposed by `llama-server`.
|
||||
|
||||
### Starting llama-server
|
||||
|
||||
Must be started manually on the main PC before the inference service can
|
||||
handle requests:
|
||||
|
||||
```powershell
|
||||
.\llama-gpu\llama-server.exe `
|
||||
-m .\models\gemma-4-26B-A4B-Claude-Distill-APEX-I-Mini.gguf `
|
||||
-ngl 99 `
|
||||
--reasoning off `
|
||||
--host 0.0.0.0 `
|
||||
--port 8080 `
|
||||
-c 64000
|
||||
```
|
||||
|
||||
| Field | Description |
|
||||
| Flag | Description |
|
||||
|---|---|
|
||||
| `text` | The model's response |
|
||||
| `model` | Model name as reported by the provider |
|
||||
| `done` | Whether generation completed normally |
|
||||
| `evalCount` | Number of tokens generated |
|
||||
| `promptEvalCount` | Number of tokens in the prompt |
|
||||
| `-ngl 99` | Offload as many layers as possible to GPU |
|
||||
| `--reasoning off` | Disables thinking delay on Gemma 4 models |
|
||||
| `--host 0.0.0.0` | Allows LAN connections |
|
||||
| `-c 64000` | Context window size in tokens |
|
||||
|
||||
---
|
||||
> `-c 64000` is intentionally large. NexusAI's memory architecture handles
|
||||
> context injection so 6–8K is often sufficient if VRAM pressure builds.
|
||||
|
||||
**POST /complete/stream**
|
||||
### Model Naming
|
||||
|
||||
Same request body as `/complete` (`maxTokens` not applicable for streaming).
|
||||
The model name in requests must match the name reported by `llama-server`
|
||||
including the `.gguf` extension:
|
||||
|
||||
Response is a stream of Server-Sent Events. Each event contains a partial
|
||||
response chunk as JSON. The stream closes with a final `data: [DONE]` event.
|
||||
data: {"model":"companion:latest","response":"The","done":false}
|
||||
data: {"model":"companion:latest","response":" capital","done":false}
|
||||
data: {"model":"companion:latest","response":" of France is Paris.","done":false}
|
||||
```powershell
|
||||
Invoke-RestMethod -Uri "http://192.168.0.79:8080/v1/models"
|
||||
```
|
||||
|
||||
Set `DEFAULT_MODEL` in `.env` to the exact reported name.
|
||||
|
||||
### Inference Parameters
|
||||
|
||||
All parameters are resolved in `resolveOptions()` — falling back to
|
||||
`INFERENCE_DEFAULTS` from `@nexusai/shared` if not provided in the request.
|
||||
In normal usage, orchestration reads these from `settings.json` and forwards
|
||||
them on every request.
|
||||
|
||||
| NexusAI option | API field | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `temperature` | `temperature` | 0.7 | Response randomness (0 = deterministic) |
|
||||
| `maxTokens` | `max_tokens` | 1024 | Max tokens to generate |
|
||||
| `topP` | `top_p` | 0.9 | Nucleus sampling probability mass |
|
||||
| `topK` | `top_k` | 40 | Top-K token candidates per step |
|
||||
| `repeatPenalty` | `repeat_penalty` | 1.1 | Penalty for recently used tokens |
|
||||
| `seed` | `seed` | null | null = random; integer for reproducible output |
|
||||
|
||||
## Streaming Response Format
|
||||
|
||||
The llama.cpp provider yields chunks in this shape:
|
||||
```js
|
||||
{ response: "token text", done: false }
|
||||
// final chunk:
|
||||
{ response: '', done: true, model: "model-name.gguf", tokenCount: 42 }
|
||||
```
|
||||
|
||||
The inference route re-emits as SSE:
|
||||
```
|
||||
data: {"response":"token text"}
|
||||
data: {"done":true,"model":"model-name.gguf","tokenCount":42}
|
||||
data: [DONE]
|
||||
```
|
||||
|
||||
Clients should read the `response` field from each chunk and accumulate
|
||||
them to build the full response string.
|
||||
`model` and `tokenCount` are captured from the llama.cpp `finish_reason: stop`
|
||||
chunk and emitted on the done event.
|
||||
|
||||
For all HTTP endpoints, see `api-routes.md`.
|
||||
213
docs/services/knowledge-graph.md
Normal file
213
docs/services/knowledge-graph.md
Normal file
@@ -0,0 +1,213 @@
|
||||
# Knowledge Graph
|
||||
|
||||
**Location:** `packages/memory-service/src/graph/index.js`
|
||||
**Schema additions:** `entity_episodes` table; new columns on `entities` and `relationships`
|
||||
**Exposed via:** `GET /graph/neighborhood/:entityId`, `POST /graph/neighbors`
|
||||
**Consumed by:** Orchestration service context assembly
|
||||
|
||||
## Purpose
|
||||
|
||||
The knowledge graph transforms NexusAI from "remembers conversations" to
|
||||
"understands relationships between things." Rather than injecting a flat
|
||||
list of entity facts into every prompt, orchestration now retrieves a
|
||||
1-hop subgraph of connected entities and their relationships, giving the
|
||||
model structured, linked knowledge about people, projects, technologies,
|
||||
and concepts that have appeared across conversations.
|
||||
|
||||
## Schema
|
||||
|
||||
### `entity_episodes` (join table)
|
||||
|
||||
Tracks which episodes contributed to each entity's knowledge. Defined in
|
||||
`schema.js` — exists on all installs.
|
||||
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS entity_episodes (
|
||||
entity_id INTEGER NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
|
||||
episode_id INTEGER NOT NULL REFERENCES episodes(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (entity_id, episode_id)
|
||||
);
|
||||
```
|
||||
|
||||
Both FKs cascade on delete — removing an entity or episode automatically
|
||||
cleans up its join rows.
|
||||
|
||||
### New columns on `entities`
|
||||
|
||||
Added via migration in `db/index.js`:
|
||||
|
||||
| Column | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `mention_count` | INTEGER | 1 | How many times this entity has been extracted across conversations |
|
||||
| `confidence` | REAL | 1.0 | Reserved for future confidence scoring |
|
||||
| `source` | TEXT | `'extraction'` | `'extraction'` (auto) or `'manual'` |
|
||||
| `last_seen_at` | INTEGER | NULL | Unix timestamp of most recent extraction hit |
|
||||
|
||||
### New columns on `relationships`
|
||||
|
||||
| Column | Type | Default | Description |
|
||||
|---|---|---|---|
|
||||
| `mention_count` | INTEGER | 1 | How many times this edge has been extracted |
|
||||
| `notes` | TEXT | NULL | Relationship context sentence from extraction |
|
||||
|
||||
## Entity Promotion Model
|
||||
|
||||
Entities are not created equal — some are mentioned once in passing, others
|
||||
recur across many conversations. `mention_count` is the signal:
|
||||
|
||||
- Every time `upsertEntity` is called for an existing `(name, type)` pair, `mention_count` is incremented and `last_seen_at` is updated.
|
||||
- `ENTITIES.PROMOTION_THRESHOLD` (default: **3**) is the `mention_count` at which an entity is considered "well-established" — referenced in the codebase for future filtering and scoring logic.
|
||||
- Currently `mention_count` is stored and incremented but not yet used to gate retrieval. It provides the foundation for future features such as orphan cleanup (entities never re-extracted) and confidence-weighted graph traversal.
|
||||
|
||||
The same pattern applies to relationships — `mention_count` rises each time
|
||||
the same `(from_id, to_id, label)` triple is extracted.
|
||||
|
||||
## Graph Traversal
|
||||
|
||||
`src/graph/index.js` exports two functions built on SQLite's `WITH RECURSIVE`
|
||||
CTE support. No external graph database is needed.
|
||||
|
||||
### `getNeighborhood(entityId, depth)`
|
||||
|
||||
Traverses the graph from a single entity, following edges in **both directions**,
|
||||
up to `depth` hops. Returns `{ nodes: [...entities], edges: [...relationships] }`.
|
||||
|
||||
Default depth: `ENTITIES.GRAPH_HOP_DEPTH` (1). Maximum enforced at HTTP layer: 3.
|
||||
|
||||
**SQLite query:**
|
||||
|
||||
```sql
|
||||
WITH RECURSIVE traverse(entity_id, depth) AS (
|
||||
SELECT ?, 0
|
||||
UNION
|
||||
SELECT
|
||||
CASE WHEN r.from_id = t.entity_id THEN r.to_id ELSE r.from_id END,
|
||||
t.depth + 1
|
||||
FROM relationships r
|
||||
JOIN traverse t ON (r.from_id = t.entity_id OR r.to_id = t.entity_id)
|
||||
WHERE t.depth < ?
|
||||
)
|
||||
SELECT DISTINCT entity_id FROM traverse
|
||||
```
|
||||
|
||||
`UNION` (not `UNION ALL`) eliminates duplicate visits and naturally handles
|
||||
cycles — a node already in the traversal set is not re-visited.
|
||||
|
||||
After collecting node IDs, two follow-up queries fetch:
|
||||
- All entity rows for those IDs
|
||||
- All relationship rows where both `from_id` and `to_id` are in the node set
|
||||
|
||||
This ensures edges between neighbors are included even if they aren't on the
|
||||
traversal path from the seed.
|
||||
|
||||
### `getEntityNeighbors(entityIds[])`
|
||||
|
||||
Bulk 1-hop version designed for orchestration. Given multiple seed entity IDs
|
||||
(the results of Qdrant semantic search), returns the combined 1-hop subgraph.
|
||||
|
||||
1. Finds all neighbor IDs via one query using `IN (...)` on both `from_id` and `to_id`
|
||||
2. Deduplicates seeds + neighbors using a JavaScript `Set`
|
||||
3. Fetches all entity rows and all relationship rows within the combined node set
|
||||
|
||||
This is intentionally simpler than the recursive version — orchestration always
|
||||
uses depth=1, and the bulk query avoids N separate CTE calls.
|
||||
|
||||
## Graph-Aware Context Assembly
|
||||
|
||||
Orchestration's `assembleContext` (in `src/chat/index.js`) integrates the
|
||||
graph at step 7 of the chat pipeline:
|
||||
|
||||
1. Qdrant entity search returns up to `ORCHESTRATION.ENTITIES_LIMIT` results, each including `r.id` (the SQLite entity ID) alongside the Qdrant payload
|
||||
2. `graph.getNeighbors(entityIds)` is called with those IDs → `POST /graph/neighbors` on memory-service
|
||||
3. The returned `{ nodes, edges }` is passed to `formatGraphContext()`
|
||||
4. On failure, falls back to using the Qdrant payload data directly as flat nodes with no edges
|
||||
|
||||
### Prompt Format
|
||||
|
||||
`formatGraphContext(nodes, edges)` in `chat/index.js` formats the subgraph as:
|
||||
|
||||
```
|
||||
Here is what you know about entities relevant to this conversation and their connections:
|
||||
- Alice (person): software engineer working on NexusAI
|
||||
→ works_on NexusAI (project)
|
||||
→ knows Bob (person)
|
||||
- NexusAI (project): AI assistant framework
|
||||
- Bob (person): Alice's colleague
|
||||
```
|
||||
|
||||
- One line per node: `- {name} ({type}): {notes}`
|
||||
- Outbound edges indented below: ` → {label} {target_name} ({target_type})`
|
||||
- Nodes with only inbound edges (pulled in as neighbors) appear without connection lines
|
||||
- Only outbound edges are shown — each relationship appears once, from the `from_id` side
|
||||
|
||||
## Project Scoping
|
||||
|
||||
The knowledge graph respects project boundaries at the **entry point**, not
|
||||
during traversal:
|
||||
|
||||
- Qdrant entity search is filtered by `projectId` — only entities tagged with this project are returned as seeds
|
||||
- Graph traversal in SQLite is unfiltered — neighbors can be from any project or no project
|
||||
- This is intentional: the graph entry is project-scoped, but traversal follows the global relationship graph to discover connected knowledge
|
||||
|
||||
Entities are tagged with `projectId` in the Qdrant payload at extraction time.
|
||||
Entities extracted from non-project sessions have `projectId: null` and only
|
||||
appear in unfiltered global searches.
|
||||
|
||||
## API Reference
|
||||
|
||||
### `GET /graph/neighborhood/:entityId`
|
||||
|
||||
Returns the neighborhood of a single entity.
|
||||
|
||||
**Query params:**
|
||||
|
||||
| Param | Default | Max | Description |
|
||||
|---|---|---|---|
|
||||
| `depth` | `ENTITIES.GRAPH_HOP_DEPTH` (1) | 3 | Traversal depth |
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"entity": { "id": 5, "name": "Alice", "type": "person", "notes": "...", "mention_count": 4 },
|
||||
"neighborhood": {
|
||||
"nodes": [
|
||||
{ "id": 5, "name": "Alice", "type": "person", "notes": "..." },
|
||||
{ "id": 8, "name": "NexusAI", "type": "project", "notes": "..." }
|
||||
],
|
||||
"edges": [
|
||||
{ "id": 2, "from_id": 5, "to_id": 8, "label": "works_on", "notes": "...", "mention_count": 3 }
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Returns 404 if the entity does not exist.
|
||||
|
||||
### `POST /graph/neighbors`
|
||||
|
||||
Bulk 1-hop neighborhood for a set of entity IDs. Used internally by
|
||||
orchestration — not intended for direct client use.
|
||||
|
||||
**Request body:**
|
||||
```json
|
||||
{ "entityIds": [5, 8, 12] }
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"nodes": [ ...entity objects... ],
|
||||
"edges": [ ...relationship objects... ]
|
||||
}
|
||||
```
|
||||
|
||||
Returns 400 if `entityIds` is missing or empty.
|
||||
|
||||
## Constants (`packages/shared/src/config/constants.js`)
|
||||
|
||||
| Constant | Value | Description |
|
||||
|---|---|---|
|
||||
| `ENTITIES.PROMOTION_THRESHOLD` | 3 | `mention_count` at which an entity is considered well-established |
|
||||
| `ENTITIES.GRAPH_HOP_DEPTH` | 1 | Default traversal depth for neighborhood queries |
|
||||
| `ORCHESTRATION.ENTITIES_LIMIT` | 5 | Max entity seeds returned from Qdrant search |
|
||||
| `ORCHESTRATION.ENTITIES_THRESHOLD` | 0.55 | Minimum similarity score for entity Qdrant search |
|
||||
@@ -9,8 +9,8 @@
|
||||
|
||||
Responsible for all reading and writing of long-term memory. Acts as the
|
||||
sole interface to both SQLite and Qdrant — no other service accesses these
|
||||
stores directly. On episode creation, automatically calls the embedding
|
||||
service to generate and store a vector in Qdrant.
|
||||
stores directly. On episode creation, automatically triggers entity and
|
||||
relationship extraction and embeds results into Qdrant.
|
||||
|
||||
## Dependencies
|
||||
|
||||
@@ -28,32 +28,66 @@ service to generate and store a vector in Qdrant.
|
||||
| SQLITE_PATH | Yes | — | Path to SQLite database file |
|
||||
| QDRANT_URL | No | http://localhost:6333 | Qdrant instance URL |
|
||||
| EMBEDDING_SERVICE_URL | No | http://localhost:3003 | Embedding service URL |
|
||||
| EXTRACTION_URL | No | http://localhost:11434 | Ollama URL for entity extraction |
|
||||
| EXTRACTION_MODEL | No | qwen2.5:3b | Ollama model used for entity extraction |
|
||||
|
||||
## Internal Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── db/
|
||||
│ ├── index.js # SQLite connection + initialization
|
||||
│ └── schema.js # Table definitions, indexes, FTS5, triggers
|
||||
│ ├── index.js # SQLite connection + initialization + migrations
|
||||
│ ├── schema.js # Table definitions, indexes, FTS5, triggers
|
||||
│ ├── projects.js # Project CRUD functions
|
||||
│ └── summaries.js # Summary CRUD functions
|
||||
├── episodic/
|
||||
│ └── index.js # Session + episode CRUD, FTS search, embedding write path
|
||||
├── semantic/
|
||||
│ └── index.js # Qdrant collection management, upsert, search, delete
|
||||
├── entities/
|
||||
│ └── index.js # Entity + relationship CRUD
|
||||
└── index.js # Express app + route definitions
|
||||
│ ├── index.js # Entity + relationship CRUD (upsert, mention tracking)
|
||||
│ └── extraction.js # Automatic entity + relationship extraction via qwen2.5:3b
|
||||
├── graph/
|
||||
│ └── index.js # Knowledge graph traversal (neighborhood queries, recursive CTE)
|
||||
└── index.js # Express app + all route definitions
|
||||
```
|
||||
|
||||
## SQLite Schema
|
||||
|
||||
Five core tables:
|
||||
Eight core tables:
|
||||
|
||||
- **sessions** — top-level conversation containers, identified by an `external_id`
|
||||
- **sessions** — top-level conversation containers. Fields: `external_id`, `name`, `project_id`, `metadata`
|
||||
- **episodes** — individual exchanges (user message + AI response) tied to a session
|
||||
- **entities** — named things the system learns about (people, places, concepts)
|
||||
- **relationships** — directional labeled links between entities
|
||||
- **entities** — named things the system learns about (people, places, concepts, etc.). Fields include `mention_count`, `confidence`, `source`, `last_seen_at`
|
||||
- **relationships** — directional labeled links between entities (`from_id`, `to_id`, `label`). Fields include `mention_count`, `notes`
|
||||
- **entity_episodes** — join table linking entities to the episodes where they were extracted. Used for provenance and orphan cleanup
|
||||
- **summaries** — condensed episode groups for efficient context retrieval
|
||||
- **projects** — named groupings of sessions with `name`, `description`, `colour`, `icon`, `isolated`, `notes`, `system_prompt`
|
||||
|
||||
### Migrations
|
||||
|
||||
Schema changes that cannot use `CREATE TABLE IF NOT EXISTS` are applied as
|
||||
idempotent migrations in `db/index.js` at startup:
|
||||
|
||||
```js
|
||||
try { db.exec(`ALTER TABLE sessions ADD COLUMN name TEXT`); } catch {}
|
||||
try { db.exec(`ALTER TABLE sessions ADD COLUMN project_id INTEGER REFERENCES projects(id)`); } catch {}
|
||||
try { db.exec(`CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project_id)`); } catch {}
|
||||
try { db.exec(`ALTER TABLE projects ADD COLUMN isolated INTEGER NOT NULL DEFAULT 0`); } catch {}
|
||||
try { db.exec(`ALTER TABLE projects ADD COLUMN notes TEXT`); } catch {}
|
||||
try { db.exec(`ALTER TABLE projects ADD COLUMN system_prompt TEXT`); } catch {}
|
||||
// Knowledge graph columns:
|
||||
try { db.exec(`ALTER TABLE entities ADD COLUMN mention_count INTEGER NOT NULL DEFAULT 1`) } catch {}
|
||||
try { db.exec(`ALTER TABLE entities ADD COLUMN confidence REAL NOT NULL DEFAULT 1.0`) } catch {}
|
||||
try { db.exec(`ALTER TABLE entities ADD COLUMN source TEXT NOT NULL DEFAULT 'extraction'`) } catch {}
|
||||
try { db.exec(`ALTER TABLE entities ADD COLUMN last_seen_at INTEGER`) } catch {}
|
||||
try { db.exec(`ALTER TABLE relationships ADD COLUMN mention_count INTEGER NOT NULL DEFAULT 1`) } catch {}
|
||||
try { db.exec(`ALTER TABLE relationships ADD COLUMN notes TEXT`) } catch {}
|
||||
```
|
||||
|
||||
`entity_episodes` is defined in `schema.js` itself (not a migration) since it is a new table.
|
||||
|
||||
New migrations are always appended — never modify the schema file for existing tables since `ALTER TABLE` cannot use `IF NOT EXISTS`.
|
||||
|
||||
### FTS5 Full-Text Search
|
||||
|
||||
@@ -65,11 +99,22 @@ keep the FTS index automatically in sync with the episodes table.
|
||||
|
||||
- `journal_mode = WAL` — non-blocking reads during writes
|
||||
- `foreign_keys = ON` — enforces referential integrity and cascade deletes
|
||||
- PRAGMAs are set via `db.pragma()` separately from `db.exec()`
|
||||
- PRAGMAs set via `db.pragma()`, not `db.exec()`
|
||||
|
||||
### Dynamic Updates
|
||||
|
||||
Both `updateSession` and `updateProject` build their `SET` clause dynamically
|
||||
from only the fields passed — prevents partial updates from overwriting fields
|
||||
that weren't touched.
|
||||
|
||||
`updateProject` allowlist:
|
||||
```js
|
||||
const allowed = ['name', 'description', 'colour', 'icon', 'isolated', 'notes', 'system_prompt'];
|
||||
```
|
||||
|
||||
## Qdrant / Semantic Layer
|
||||
|
||||
Three collections are initialized on service startup (created if they don't already exist):
|
||||
Three Qdrant collections are initialized on service startup via `semantic.initCollections()`:
|
||||
|
||||
| Collection | Purpose |
|
||||
|---|---|
|
||||
@@ -77,158 +122,79 @@ Three collections are initialized on service startup (created if they don't alre
|
||||
| `entities` | Embeddings for named entities |
|
||||
| `summaries` | Embeddings for condensed episode summaries |
|
||||
|
||||
All collections use **768-dimension vectors** with **Cosine similarity**, matching the
|
||||
output of the `nomic-embed-text` embedding model via Ollama.
|
||||
All collections use **768-dimension vectors** with **Cosine similarity**,
|
||||
matching `nomic-embed-text` via Ollama. Vector size and distance metric are
|
||||
defined in `@nexusai/shared` — not hardcoded here.
|
||||
|
||||
Vector dimension and distance metric are defined in `@nexusai/shared` constants
|
||||
(`QDRANT.VECTOR_SIZE`, `QDRANT.DISTANCE_METRIC`) — not hardcoded in this service.
|
||||
`initCollections()` iterates `Object.values(COLLECTIONS)` and creates any
|
||||
collection that doesn't already exist at startup — all three collections are
|
||||
guaranteed to exist before any requests are handled.
|
||||
|
||||
### Semantic Layer Operations
|
||||
|
||||
Each collection exposes three operations via helper functions in `src/semantic/index.js`:
|
||||
|
||||
- **Upsert** — stores a vector with a payload containing the SQLite row ID, enabling
|
||||
lookups back to the full content after a vector search
|
||||
- **Search** — returns the top-k most similar vectors, with optional Qdrant filter
|
||||
- **Delete** — removes a vector point by ID
|
||||
|
||||
The `wait: true` flag is used on all write operations so the caller receives confirmation
|
||||
only after Qdrant has committed the change.
|
||||
Each collection exposes upsert, search (with optional Qdrant filter), and
|
||||
delete operations. The `wait: true` flag is used on all writes.
|
||||
|
||||
## Embedding Write Path
|
||||
|
||||
When a new episode is created, the memory service automatically generates and stores
|
||||
a vector embedding in Qdrant via the embedding service:
|
||||
When a new episode is created:
|
||||
|
||||
1. Episode is saved to SQLite synchronously — the response is returned immediately
|
||||
2. Both sides of the exchange are combined into a single text:
|
||||
```
|
||||
User: {userMessage}
|
||||
Assistant: {aiResponse}
|
||||
```
|
||||
3. This text is sent to the embedding service (`POST /embed`)
|
||||
4. The returned vector is upserted into the `episodes` Qdrant collection with a
|
||||
payload of `{ sessionId, createdAt }` for filtering and lookups
|
||||
1. Episode saved to SQLite synchronously — response returned immediately
|
||||
2. User message + AI response combined: `User: ...\nAssistant: ...`
|
||||
3. Text sent to embedding service (`POST /embed`)
|
||||
4. Vector upserted into `episodes` Qdrant collection with payload `{ sessionId, createdAt }`
|
||||
|
||||
The embedding step is **fire-and-forget** — it runs asynchronously after the SQLite
|
||||
insert succeeds. If embedding fails, the episode is still saved and searchable via
|
||||
FTS. The error is logged but does not affect the API response.
|
||||
This step is **fire-and-forget** — if embedding fails, the episode is still
|
||||
saved and searchable via FTS. The error is logged but not surfaced.
|
||||
|
||||
### Hybrid Retrieval Pattern
|
||||
|
||||
Qdrant and SQLite work as a pair — neither operates in isolation:
|
||||
|
||||
1. Query is embedded and searched in Qdrant → returns IDs + similarity scores
|
||||
2. IDs are used to fetch full content from SQLite
|
||||
3. Results are ranked and assembled into a context package
|
||||
> The Qdrant payload stores `sessionId` (the internal integer ID). See
|
||||
> `memory-isolation.md` for how project-level filtering works.
|
||||
|
||||
## Entity Layer
|
||||
|
||||
Entities and relationships are stored in SQLite with two key constraints:
|
||||
Entities and relationships use upsert semantics with composite unique
|
||||
constraints to prevent duplicates:
|
||||
|
||||
- `UNIQUE(name, type)` on entities — ensures no duplicates; upsert updates existing records
|
||||
- `UNIQUE(from_id, to_id, label)` on relationships — prevents duplicate edges
|
||||
- `ON DELETE CASCADE` on both `from_id` and `to_id` — deleting an entity automatically
|
||||
removes all relationships where it appears on either end
|
||||
- `UNIQUE(name, type)` on entities — conflict increments `mention_count` and updates `last_seen_at`
|
||||
- `UNIQUE(from_id, to_id, label)` on relationships — conflict increments `mention_count` and preserves existing `notes`
|
||||
- `ON DELETE CASCADE` on relationship foreign keys
|
||||
|
||||
## Endpoints
|
||||
After each episode is saved, `extraction.js` automatically extracts named
|
||||
entities **and relationships** from the conversation using `qwen2.5:3b` on
|
||||
Ollama — fire-and-forget. Each saved entity is also linked to the episode
|
||||
via the `entity_episodes` join table.
|
||||
|
||||
### Health
|
||||
> For full details on the extraction pipeline and JSON format, see `entity-extraction.md`.
|
||||
> For the knowledge graph traversal layer, see `knowledge-graph.md`.
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /health | Service health check |
|
||||
## Knowledge Graph Layer
|
||||
|
||||
### Sessions
|
||||
`src/graph/index.js` provides SQLite-based graph traversal over the entities
|
||||
and relationships tables. Two functions are exposed via HTTP:
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /sessions | Create a new session |
|
||||
| GET | /sessions/:id | Get session by internal ID |
|
||||
| GET | /sessions/by-external/:externalId | Get session by external ID |
|
||||
| DELETE | /sessions/:id | Delete session (cascades to episodes + summaries) |
|
||||
- **`getNeighborhood(entityId, depth)`** — recursive CTE traversal, bidirectional, returns `{ nodes, edges }`
|
||||
- **`getEntityNeighbors(entityIds[])`** — bulk 1-hop traversal for orchestration context assembly
|
||||
|
||||
**POST /sessions body:**
|
||||
```json
|
||||
{
|
||||
"externalId": "unique-session-id",
|
||||
"metadata": {}
|
||||
}
|
||||
> For design rationale, traversal queries, and integration with orchestration, see `knowledge-graph.md`.
|
||||
|
||||
## Summaries Layer
|
||||
|
||||
Session summaries are generated by `orchestration-service/src/services/summarization.js`
|
||||
after each episode write and stored here via `POST /summaries`. The memory
|
||||
service is responsible only for CRUD — generation logic lives in orchestration.
|
||||
|
||||
> For full details on trigger conditions, prompt format, cumulative updates,
|
||||
> and ChatML token stripping, see `summarization.md`.
|
||||
|
||||
## Project Delete Behaviour
|
||||
|
||||
Deleting a project runs as a transaction — it first nulls out `project_id`
|
||||
on all assigned sessions, then deletes the project. This avoids a foreign
|
||||
key constraint failure since `sessions.project_id` has no `ON DELETE` rule:
|
||||
|
||||
```js
|
||||
const doDelete = db.transaction(() => {
|
||||
db.prepare(`UPDATE sessions SET project_id = NULL WHERE project_id = ?`).run(id);
|
||||
db.prepare(`DELETE FROM projects WHERE id = ?`).run(id);
|
||||
});
|
||||
```
|
||||
|
||||
### Episodes
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /episodes | Create episode + auto-embed into Qdrant |
|
||||
| GET | /episodes/search?q=&limit= | Full-text search across episodes |
|
||||
| GET | /episodes/:id | Get episode by ID |
|
||||
| GET | /sessions/:id/episodes?limit=&offset= | Get paginated episodes for a session |
|
||||
| DELETE | /episodes/:id | Delete an episode |
|
||||
|
||||
**POST /episodes body:**
|
||||
```json
|
||||
{
|
||||
"sessionId": 1,
|
||||
"userMessage": "Hello",
|
||||
"aiResponse": "Hi there!",
|
||||
"tokenCount": 10,
|
||||
"metadata": {}
|
||||
}
|
||||
```
|
||||
|
||||
> Note: `/episodes/search` must be defined before `/episodes/:id` in Express to prevent
|
||||
> the word `search` being captured as an ID parameter.
|
||||
|
||||
### Entities
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /entities | Upsert an entity (creates or updates by name + type) |
|
||||
| GET | /entities/by-type/:type | Get all entities of a given type |
|
||||
| GET | /entities/:id | Get entity by internal ID |
|
||||
| DELETE | /entities/:id | Delete entity (cascades to relationships) |
|
||||
|
||||
**POST /entities body:**
|
||||
```json
|
||||
{
|
||||
"name": "NexusAI",
|
||||
"type": "project",
|
||||
"notes": "My AI memory project",
|
||||
"metadata": {}
|
||||
}
|
||||
```
|
||||
|
||||
> Note: `/entities/by-type/:type` must be defined before `/entities/:id` in Express to
|
||||
> prevent `by-type` being captured as an ID parameter.
|
||||
|
||||
### Relationships
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| POST | /relationships | Upsert a relationship between two entities |
|
||||
| GET | /entities/:id/relationships | Get all relationships originating from an entity |
|
||||
| DELETE | /relationships | Delete a specific relationship |
|
||||
|
||||
**POST /relationships body:**
|
||||
```json
|
||||
{
|
||||
"fromId": 1,
|
||||
"toId": 2,
|
||||
"label": "uses",
|
||||
"metadata": {}
|
||||
}
|
||||
```
|
||||
|
||||
**DELETE /relationships body:**
|
||||
```json
|
||||
{
|
||||
"fromId": 1,
|
||||
"toId": 2,
|
||||
"label": "uses"
|
||||
}
|
||||
```
|
||||
|
||||
> Relationships are identified by the composite key `(fromId, toId, label)`. Delete uses
|
||||
> the request body rather than URL params as this three-part key is awkward to express
|
||||
> cleanly in a path.
|
||||
For all HTTP endpoints, see `api-routes.md`.
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
# Orchestration Service
|
||||
|
||||
**Package:** `@nexusai/orchestration-service`
|
||||
**Location:** `packages/orchestration-service`
|
||||
**Deployed on:** Mini PC 2 (192.168.0.205)
|
||||
**Port:** 4000
|
||||
|
||||
## Purpose
|
||||
|
||||
The main entry point for all clients. Assembles context packages from
|
||||
memory, routes prompts to inference, and writes new episodes back to
|
||||
memory after each interaction.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `express` — HTTP API
|
||||
- `node-fetch` — inter-service HTTP communication
|
||||
- `dotenv` — environment variable loading
|
||||
- `@nexusai/shared` — shared utilities
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Required | Default | Description |
|
||||
|---|---|---|---|
|
||||
| PORT | No | 4000 | Port to listen on |
|
||||
| MEMORY_SERVICE_URL | No | http://localhost:3002 | Memory service URL |
|
||||
| EMBEDDING_SERVICE_URL | No | http://localhost:3003 | Embedding service URL |
|
||||
| INFERENCE_SERVICE_URL | No | http://localhost:3001 | Inference service URL |
|
||||
|
||||
## Endpoints
|
||||
|
||||
| Method | Path | Description |
|
||||
|---|---|---|
|
||||
| GET | /health | Service health check |
|
||||
|
||||
> Further endpoints will be documented as the service is built out.
|
||||
226
docs/services/orchestration-service.md
Normal file
226
docs/services/orchestration-service.md
Normal file
@@ -0,0 +1,226 @@
|
||||
# Orchestration Service
|
||||
|
||||
**Package:** `@nexusai/orchestration-service`
|
||||
**Location:** `packages/orchestration-service`
|
||||
**Deployed on:** Mini PC 2 (192.168.0.205)
|
||||
**Port:** 4000
|
||||
|
||||
## Purpose
|
||||
|
||||
The main entry point for all clients. Assembles context packages from
|
||||
memory, routes prompts to inference, and writes new episodes back to
|
||||
memory after each interaction. Clients never talk directly to the memory
|
||||
or inference services — all traffic flows through orchestration.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `express` — HTTP API
|
||||
- `cors` — cross-origin resource sharing middleware
|
||||
- `dotenv` — environment variable loading
|
||||
- `@nexusai/shared` — shared utilities
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Required | Default | Description |
|
||||
|---|---|---|---|
|
||||
| PORT | No | 4000 | Port to listen on |
|
||||
| MEMORY_SERVICE_URL | No | http://localhost:3002 | Memory service URL |
|
||||
| EMBEDDING_SERVICE_URL | No | http://localhost:3003 | Embedding service URL |
|
||||
| INFERENCE_SERVICE_URL | No | http://localhost:3001 | Inference service URL |
|
||||
| LLAMA_SERVER_URL | No | http://localhost:8080 | Direct llama-server URL for /models/props |
|
||||
| QDRANT_URL | No | http://localhost:6333 | Qdrant URL for semantic search |
|
||||
| CORS_ORIGIN | No | http://localhost:5173 | Allowed origin for CORS requests |
|
||||
| EXTRACTION_URL | No | http://localhost:11434 | Ollama URL for summarisation |
|
||||
| EXTRACTION_MODEL | No | qwen2.5:3b | Ollama model used for summarisation |
|
||||
|
||||
## Internal Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── services/
|
||||
│ ├── memory.js # HTTP client for memory service
|
||||
│ ├── inference.js # HTTP client for inference service
|
||||
│ ├── embedding.js # HTTP client for embedding service
|
||||
│ ├── qdrant.js # HTTP client for Qdrant (direct vector search)
|
||||
│ ├── graph.js # HTTP client for memory-service graph endpoints
|
||||
│ └── summarization.js # Session summarisation — triggers after each episode
|
||||
├── chat/
|
||||
│ └── index.js # Core pipeline — context assembly, graph expansion, auto-naming
|
||||
├── config/
|
||||
│ └── settings.js # Settings load/save — reads/writes data/settings.json
|
||||
├── routes/
|
||||
│ ├── chat.js # POST /chat and POST /chat/stream
|
||||
│ ├── sessions.js # Session CRUD proxy
|
||||
│ ├── projects.js # Project CRUD proxy
|
||||
│ ├── episodes.js # Episode list and delete proxy
|
||||
│ ├── summaries.js # GET /summaries/session/:id and /summaries/project/:id
|
||||
│ ├── settings.js # GET /settings and PATCH /settings
|
||||
│ ├── health.js # GET /health/services — pings all four services
|
||||
│ └── models.js # GET /models and GET /models/props
|
||||
└── index.js # Express app entry point
|
||||
```
|
||||
|
||||
The `services/` layer wraps all downstream HTTP calls in named functions.
|
||||
URL or endpoint changes have a single place to be updated.
|
||||
|
||||
## Settings
|
||||
|
||||
Settings are persisted to `data/settings.json` and loaded on every request
|
||||
via `appSettings.load()` — changes apply immediately without a service restart.
|
||||
|
||||
| Setting | Default | Description |
|
||||
|---|---|---|
|
||||
| `recentEpisodeLimit` | 5 | Recent episodes injected into prompt |
|
||||
| `semanticLimit` | 5 | Semantic search results injected into prompt |
|
||||
| `scoreThreshold` | 0.5 | Minimum similarity score for Qdrant semantic results |
|
||||
| `semanticWeight` | 1.0 | RRF weight for Qdrant semantic results |
|
||||
| `keywordWeight` | 0 | RRF weight for FTS5 keyword results (`0` = disabled) |
|
||||
| `modelsFolderPath` | `/mnt/nexus-models` | Path to folder containing .gguf files |
|
||||
| `temperature` | 0.7 | Inference temperature |
|
||||
| `repeatPenalty` | 1.1 | Repeat token penalty |
|
||||
| `topP` | 0.9 | Nucleus sampling probability mass |
|
||||
| `topK` | 40 | Top-K token candidates per step |
|
||||
| `systemPrompt` | *(ORCHESTRATION.SYSTEM_PROMPT)* | Global system prompt. `null` reverts to hardcoded constant. |
|
||||
|
||||
## Chat Pipeline
|
||||
|
||||
Both `POST /chat` and `POST /chat/stream` share the same steps. The only
|
||||
difference is how the inference response is delivered to the client.
|
||||
|
||||
### Steps
|
||||
|
||||
1. **Session resolution** — look up session by `externalId`. Auto-create if
|
||||
not found.
|
||||
|
||||
2. **Project context resolution** — if the session has a `project_id`, fetch
|
||||
the project and all its session IDs. Used to scope semantic search. The
|
||||
project's `system_prompt` is also read at this step if set.
|
||||
|
||||
3. **System prompt resolution** — three-tier hierarchy:
|
||||
- `project.system_prompt` — highest priority
|
||||
- `settings.systemPrompt` — global setting from `settings.json`
|
||||
- `ORCHESTRATION.SYSTEM_PROMPT` — hardcoded constant (last resort)
|
||||
|
||||
4. **Recent episode retrieval** — fetch most recent episodes (`recentEpisodeLimit`).
|
||||
|
||||
5. **Fused episode retrieval** — runs semantic (Qdrant) and keyword (FTS5)
|
||||
search in parallel, then merges results via Reciprocal Rank Fusion (RRF).
|
||||
Both paths are filtered against `recentIds` before fusion. FTS is scoped
|
||||
to the current session or all project sessions. If `keywordWeight` is `0`,
|
||||
the FTS call is skipped entirely. Non-critical — failures fall back to
|
||||
whichever strategy succeeded.
|
||||
|
||||
6. **Entity search** — query `entities` Qdrant collection filtered by
|
||||
`projectId`. Returns entity IDs alongside Qdrant payload data (the Qdrant
|
||||
point ID equals the SQLite entity ID). Non-critical.
|
||||
|
||||
7. **Graph neighborhood expansion** — call `POST /graph/neighbors` on
|
||||
memory-service with the entity IDs from step 6. Returns a 1-hop subgraph
|
||||
`{ nodes, edges }` — entity objects plus the relationships connecting them.
|
||||
If no entities were found or the graph call fails, falls back to flat entity
|
||||
list (no edges). Non-critical.
|
||||
|
||||
8. **Prompt assembly** — combine system prompt, graph context, fused episodes,
|
||||
recent episodes, and user message.
|
||||
|
||||
9. **Inference** — send to inference service. `/chat` awaits full response;
|
||||
`/chat/stream` pipes SSE chunks to the client.
|
||||
|
||||
10. **Episode write** — write exchange back to memory with `projectId`.
|
||||
|
||||
11. **Summarisation trigger** — `triggerSummary(session, allEpisodes)` called
|
||||
fire-and-forget. See `summarization.md` for full details.
|
||||
|
||||
12. **Auto-naming** — on first message with no session name, fires a secondary
|
||||
inference call (max 20 tokens, temperature 0.3) to generate a session name.
|
||||
|
||||
### Prompt Structure
|
||||
|
||||
```
|
||||
[Resolved system prompt]
|
||||
|
||||
Here is what you know about entities relevant to this conversation and their connections:
|
||||
- {name} ({type}): {notes}
|
||||
→ {label} {neighbor_name} ({neighbor_type})
|
||||
---
|
||||
Here are some relevant memories from earlier conversations:
|
||||
User: {past user message}
|
||||
Assistant: {past ai response}
|
||||
---
|
||||
Here are some relevant memories from your past conversations:
|
||||
User: {past user message}
|
||||
Assistant: {past ai response}
|
||||
--- End of recent memories ---
|
||||
|
||||
User: {current message}
|
||||
Assistant:
|
||||
```
|
||||
|
||||
The entity block renders the full graph neighborhood — seed entities matched
|
||||
by Qdrant search plus any neighbors pulled in by 1-hop traversal. Each entity
|
||||
shows its `notes` and any outbound relationships with their targets. Neighbor
|
||||
nodes that have no outbound edges within the subgraph appear without connection
|
||||
lines.
|
||||
|
||||
## Summarisation
|
||||
|
||||
After each episode write, `triggerSummary` is called fire-and-forget. It
|
||||
checks token thresholds and episode counts before generating, then stores
|
||||
the result in the memory service.
|
||||
|
||||
> For full details on trigger conditions, prompt format, cumulative updates,
|
||||
> ChatML token stripping, and episode range tracking, see `summarization.md`.
|
||||
|
||||
## SSE Stream Format
|
||||
|
||||
Inference service → orchestration:
|
||||
```
|
||||
data: {"response":"Hello","done":false}
|
||||
data: {"done":true,"model":"gemma-4-26B...gguf","tokenCount":42}
|
||||
data: [DONE]
|
||||
```
|
||||
|
||||
Orchestration → client:
|
||||
```
|
||||
data: {"text":"Hello"}
|
||||
data: {"done":true,"model":"gemma-4-26B...gguf","tokenCount":42}
|
||||
```
|
||||
|
||||
The `[DONE]` sentinel is consumed internally and not forwarded.
|
||||
|
||||
## Models Route
|
||||
|
||||
`GET /models` scans `.gguf` files live from `modelsFolderPath` and merges
|
||||
with `models.json` for metadata. Returns file size in GB.
|
||||
|
||||
`GET /models/props` fetches directly from llama-server. Returns
|
||||
`{ contextWindow, modelAlias }`. Returns `503` if unreachable.
|
||||
|
||||
## Sessions Route Behaviour
|
||||
|
||||
`PATCH /sessions/:sessionId` accepts `name`, `projectId`, or both.
|
||||
Rejects only when neither is provided — allows `useChat` to write project
|
||||
assignment separately from rename operations.
|
||||
|
||||
## Caddy Configuration
|
||||
|
||||
Each route prefix needs a handle block in the Caddyfile on Mini PC 2.
|
||||
**Any new top-level route must be added here AND in `vite.config.js`.**
|
||||
|
||||
```
|
||||
handle /chat* { reverse_proxy localhost:4000 }
|
||||
handle /sessions* { reverse_proxy localhost:4000 }
|
||||
handle /models* { reverse_proxy localhost:4000 }
|
||||
handle /projects* { reverse_proxy localhost:4000 }
|
||||
handle /episodes* { reverse_proxy localhost:4000 }
|
||||
handle /settings* { reverse_proxy localhost:4000 }
|
||||
handle /summaries* { reverse_proxy localhost:4000 }
|
||||
handle /health* { reverse_proxy localhost:4000 }
|
||||
```
|
||||
|
||||
After updating: `caddy reload --config /path/to/Caddyfile`
|
||||
|
||||
> Note: `/graph` routes are on the memory-service (port 3002) and are called
|
||||
> internally by orchestration — they do not need a Caddy entry.
|
||||
|
||||
For all HTTP endpoints, see `api-routes.md`.
|
||||
153
docs/services/retrieval-fusion.md
Normal file
153
docs/services/retrieval-fusion.md
Normal file
@@ -0,0 +1,153 @@
|
||||
# Retrieval Fusion
|
||||
|
||||
**Implementation:** `packages/orchestration-service/src/chat/index.js`
|
||||
**FTS scoping:** `packages/memory-service/src/episodic/index.js`, `src/index.js`
|
||||
**Settings:** `semanticWeight`, `keywordWeight` via `PATCH /settings`
|
||||
|
||||
## Purpose
|
||||
|
||||
Rather than relying solely on Qdrant vector similarity (which finds semantically
|
||||
related content but misses exact keyword matches) or FTS5 keyword search alone
|
||||
(which finds exact matches but not paraphrases), Reciprocal Rank Fusion (RRF)
|
||||
merges the ranked results from both strategies into a single better-ranked list.
|
||||
|
||||
Episodes that rank highly in **both** lists score highest. An episode that is
|
||||
the top semantic match but irrelevant by keyword, or vice versa, scores lower
|
||||
than one that satisfies both.
|
||||
|
||||
## How RRF Works
|
||||
|
||||
For each episode `d`, its fused score is:
|
||||
|
||||
```
|
||||
RRF(d) = w_semantic / (k + rank_semantic(d))
|
||||
+ w_keyword / (k + rank_keyword(d))
|
||||
```
|
||||
|
||||
- `rank_i(d)` — 1-based position in that strategy's result list (episode absent from a list contributes 0 for that term)
|
||||
- `k = 60` — smoothing constant (standard; not exposed in settings)
|
||||
- `w_semantic`, `w_keyword` — user-tunable weights (both default-sourced from `RETRIEVAL` constants)
|
||||
|
||||
Setting a weight to `0` removes that strategy's contribution entirely. Setting
|
||||
`keywordWeight` to `0` also short-circuits the FTS network call.
|
||||
|
||||
## Architecture
|
||||
|
||||
Fusion lives in orchestration — the service already coordinates multiple data
|
||||
sources, and fusion is a retrieval strategy, not a storage concern.
|
||||
|
||||
```
|
||||
getFusedEpisodes()
|
||||
├── getSemanticEpisodes() — Qdrant embed+search → fetch full rows by ID
|
||||
│ (existing path, unchanged)
|
||||
└── getFTSResults() — memory-service /episodes/search → full rows directly
|
||||
(skipped entirely if keywordWeight == 0)
|
||||
↓
|
||||
fuseEpisodeResults() — pure RRF, no I/O
|
||||
↓
|
||||
fusedEpisodes[] — top semanticLimit episodes by RRF score
|
||||
```
|
||||
|
||||
### Data Shape Consistency
|
||||
|
||||
Both sides must enter fusion as `Episode[]` — full SQLite row objects with
|
||||
the same shape — and both must be filtered against `recentIds` first:
|
||||
|
||||
- **Semantic path**: `recentIds` filter applied before `getEpisodeById` fetch (existing behaviour)
|
||||
- **FTS path**: full rows returned directly; `recentIds` filter applied in `getFusedEpisodes` after receiving them
|
||||
|
||||
FTS requests `semanticLimit * 2` results to provide headroom for the
|
||||
`recentIds` filter without under-serving the fusion.
|
||||
|
||||
## FTS Session Scoping
|
||||
|
||||
Without scoping, FTS5 searches across all episodes in the database. For
|
||||
context assembly, results must be constrained to the current session or
|
||||
project session pool — the same scope used for Qdrant semantic search.
|
||||
|
||||
`searchEpisodes(query, limit, sessionIds)` in memory-service accepts an
|
||||
optional `sessionIds` array. When provided, the SQL becomes:
|
||||
|
||||
```sql
|
||||
SELECT e.* FROM episodes e
|
||||
JOIN episodes_fts fts ON e.id = fts.rowid
|
||||
WHERE episodes_fts MATCH ?
|
||||
AND e.session_id IN (?, ?, ...)
|
||||
ORDER BY rank
|
||||
LIMIT ?
|
||||
```
|
||||
|
||||
The HTTP endpoint `GET /episodes/search` accepts `sessionIds` as a
|
||||
comma-separated query param: `?q=hello&sessionIds=1,2,3`.
|
||||
|
||||
In orchestration, `ftsSessionIds` is set to:
|
||||
- `projectSessionIds` (all sessions in the project) — if the session belongs to a project
|
||||
- `[session.id]` — otherwise (single session only)
|
||||
|
||||
This mirrors the Qdrant scoping logic exactly.
|
||||
|
||||
## `fuseEpisodeResults` — Implementation Detail
|
||||
|
||||
```js
|
||||
function fuseEpisodeResults(semanticEps, keywordEps, { semanticWeight, keywordWeight, limit }) {
|
||||
const k = RETRIEVAL.RRF_K; // 60
|
||||
const scores = new Map(); // episode.id → { episode, score }
|
||||
|
||||
// Score semantic results (already filtered against recentIds)
|
||||
semanticEps.forEach((ep, i) => {
|
||||
scores.set(ep.id, { episode: ep, score: semanticWeight / (k + i + 1) });
|
||||
});
|
||||
|
||||
// Score + merge keyword results (already filtered against recentIds)
|
||||
keywordEps.forEach((ep, i) => {
|
||||
const contrib = keywordWeight / (k + i + 1);
|
||||
if (scores.has(ep.id)) {
|
||||
scores.get(ep.id).score += contrib; // appears in both — sum scores
|
||||
} else if (contrib > 0) {
|
||||
scores.set(ep.id, { episode: ep, score: contrib }); // FTS-only episode
|
||||
}
|
||||
// contrib == 0 (keywordWeight: 0) → episode not added (guard prevents score-0 bleed-through)
|
||||
});
|
||||
|
||||
return [...scores.values()]
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, limit)
|
||||
.map(({ episode }) => episode);
|
||||
}
|
||||
```
|
||||
|
||||
The `else if (contrib > 0)` guard prevents FTS-only episodes from entering
|
||||
the result set with a score of 0 when `keywordWeight` is 0 — verified by
|
||||
the test suite.
|
||||
|
||||
## Settings
|
||||
|
||||
| Setting | Default | Range | Description |
|
||||
|---|---|---|---|
|
||||
| `semanticWeight` | 1.0 | 0–5 | Weight applied to Qdrant semantic results |
|
||||
| `keywordWeight` | 0 | 0–5 | Weight applied to FTS5 keyword results. `0` = disabled |
|
||||
|
||||
Both are readable via `GET /settings` and writable via `PATCH /settings`
|
||||
without a service restart. Changes take effect on the next chat request.
|
||||
|
||||
**To enable keyword search:**
|
||||
```bash
|
||||
curl -X PATCH http://localhost:4000/settings \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"keywordWeight": 1.0}'
|
||||
```
|
||||
|
||||
**To favour keyword matches over semantic:**
|
||||
```bash
|
||||
curl -X PATCH http://localhost:4000/settings \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"semanticWeight": 0.5, "keywordWeight": 2.0}'
|
||||
```
|
||||
|
||||
## Constants (`packages/shared/src/config/constants.js`)
|
||||
|
||||
| Constant | Value | Description |
|
||||
|---|---|---|
|
||||
| `RETRIEVAL.RRF_K` | 60 | RRF smoothing constant — not exposed in settings |
|
||||
| `RETRIEVAL.SEMANTIC_WEIGHT` | 1.0 | Default semantic weight |
|
||||
| `RETRIEVAL.KEYWORD_WEIGHT` | 0 | Default keyword weight (off) |
|
||||
@@ -24,13 +24,40 @@ const DB = getEnv('SQLITE_PATH'); // required — throws if missing
|
||||
|
||||
---
|
||||
|
||||
### `parseRow(row)`
|
||||
|
||||
Parses a SQLite row object, deserialising any JSON-encoded `metadata` fields
|
||||
into plain objects. Returns `null` if the row is `null` or `undefined`.
|
||||
|
||||
```js
|
||||
const { parseRow } = require('@nexusai/shared');
|
||||
const session = parseRow(db.prepare('SELECT * FROM sessions WHERE id = ?').get(id));
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### `formatEpisodeText(userMessage, aiResponse)`
|
||||
|
||||
Combines a user message and AI response into the canonical text format used
|
||||
for embedding:
|
||||
|
||||
```
|
||||
User: {userMessage}
|
||||
Assistant: {aiResponse}
|
||||
```
|
||||
|
||||
Used by the memory service's embedding write path to ensure consistent
|
||||
vector representations across all episodes.
|
||||
|
||||
---
|
||||
|
||||
### Constants
|
||||
|
||||
Tuneable values and shared identifiers are centralised in `constants.js`
|
||||
rather than hardcoded across services. Import the relevant group by name.
|
||||
|
||||
```js
|
||||
const { QDRANT, COLLECTIONS, EPISODIC } = require('@nexusai/shared');
|
||||
const { QDRANT, COLLECTIONS, EPISODIC, LLAMACPP } = require('@nexusai/shared');
|
||||
```
|
||||
|
||||
#### `QDRANT`
|
||||
@@ -40,15 +67,14 @@ embedding model and Qdrant collection setup.
|
||||
|
||||
| Key | Value | Description |
|
||||
|---|---|---|
|
||||
| `DEFAULT_URL` | `http://localhost:6333` | Fallback Qdrant URL if `QDRANT_URL` env var is not set |
|
||||
| `DEFAULT_URL` | `http://localhost:6333` | Fallback Qdrant URL |
|
||||
| `VECTOR_SIZE` | `768` | Output dimensions of `nomic-embed-text` |
|
||||
| `DISTANCE_METRIC` | `'Cosine'` | Similarity metric used for all collections |
|
||||
| `DEFAULT_LIMIT` | `10` | Default top-k for vector searches |
|
||||
|
||||
#### `COLLECTIONS`
|
||||
|
||||
Canonical Qdrant collection names. Used by both the semantic layer and
|
||||
any service that constructs Qdrant queries directly.
|
||||
Canonical Qdrant collection names.
|
||||
|
||||
| Key | Value |
|
||||
|---|---|
|
||||
@@ -64,4 +90,121 @@ Default pagination and result limits for SQLite episode queries.
|
||||
|---|---|---|
|
||||
| `DEFAULT_RECENT_LIMIT` | `10` | Default number of recent episodes to retrieve |
|
||||
| `DEFAULT_PAGE_SIZE` | `20` | Default episodes per page for paginated queries |
|
||||
| `DEFAULT_SEARCH_LIMIT` | `10` | Default number of FTS search results to return |
|
||||
| `DEFAULT_SEARCH_LIMIT` | `10` | Default number of FTS search results to return |
|
||||
| `DEFAULT_OFFSET` | `0` | Default pagination offset |
|
||||
| `DEFAULT_SESSIONS_LIMIT` | `20` | Default number of sessions to return |
|
||||
|
||||
#### `SERVICES`
|
||||
|
||||
Default URLs for inter-service communication. Used as fallback values
|
||||
when the corresponding environment variable is not set.
|
||||
|
||||
| Key | Value | Description |
|
||||
|---|---|---|
|
||||
| `EMBEDDING_URL` | `http://localhost:3003` | Fallback embedding service URL |
|
||||
| `MEMORY_URL` | `http://localhost:3002` | Fallback memory service URL |
|
||||
| `INFERENCE_URL` | `http://localhost:3001` | Fallback inference service URL |
|
||||
|
||||
#### `PORTS`
|
||||
|
||||
Default port numbers for each service.
|
||||
|
||||
| Key | Value |
|
||||
|---|---|
|
||||
| `INFERENCE` | `'3001'` |
|
||||
| `MEMORY` | `'3002'` |
|
||||
| `EMBEDDING` | `'3003'` |
|
||||
| `ORCHESTRATION` | `'4000'` |
|
||||
|
||||
#### `OLLAMA`
|
||||
|
||||
Ollama runtime defaults — used by the Ollama inference provider.
|
||||
|
||||
| Key | Value | Description |
|
||||
|---|---|---|
|
||||
| `DEFAULT_URL` | `http://localhost:11434` | Fallback Ollama URL |
|
||||
| `EMBED_MODEL` | `'nomic-embed-text'` | Default embedding model |
|
||||
| `OLLAMA_MODEL` | `'companion:latest'` | Default chat model |
|
||||
|
||||
#### `LLAMACPP`
|
||||
|
||||
llama.cpp runtime defaults — used by the llama.cpp inference provider.
|
||||
|
||||
| Key | Value | Description |
|
||||
|---|---|---|
|
||||
| `DEFAULT_URL` | `http://localhost:8080` | Fallback llama-server URL |
|
||||
| `DEFAULT_MODEL` | `'local-model'` | Fallback model name (override via `DEFAULT_MODEL` env var) |
|
||||
|
||||
> Always set `DEFAULT_MODEL` in the inference service `.env` to the exact model
|
||||
> name reported by `llama-server` (including `.gguf` extension). The shared
|
||||
> constant is a last-resort fallback only.
|
||||
|
||||
#### `INFERENCE_DEFAULTS`
|
||||
|
||||
Default inference parameters applied when not specified in a request.
|
||||
These are used as fallbacks in `resolveOptions()` in both providers.
|
||||
Orchestration reads live values from `settings.json` and forwards them
|
||||
on every request — these constants are the fallback layer only.
|
||||
|
||||
| Key | Value | Description |
|
||||
|---|---|---|
|
||||
| `TEMPERATURE` | `0.7` | Controls randomness (0 = deterministic, 1 = creative) |
|
||||
| `MAX_TOKENS` | `1024` | Maximum tokens to generate |
|
||||
| `TOP_P` | `0.9` | Nucleus sampling probability mass |
|
||||
| `TOP_K` | `40` | Top-K candidates at each step |
|
||||
| `REPEAT_PENALTY` | `1.1` | Penalty for recently used tokens |
|
||||
| `SEED` | `null` | null = random; set integer for reproducible outputs |
|
||||
|
||||
#### `ORCHESTRATION`
|
||||
|
||||
Orchestration pipeline defaults. Used as fallback values in
|
||||
`config/settings.js` when `settings.json` doesn't contain a key.
|
||||
|
||||
| Key | Value | Description |
|
||||
|---|---|---|
|
||||
| `RECENT_EPISODE_LIMIT` | `5` | Recent episodes to inject into prompt |
|
||||
| `SEMANTIC_LIMIT` | `5` | Semantic search results to inject into prompt |
|
||||
| `SCORE_THRESHOLD` | `0.75` | Minimum similarity score for semantic results |
|
||||
| `ENTITIES_LIMIT` | `5` | Max entity search results to inject into prompt |
|
||||
| `ENTITIES_THRESHOLD` | `0.55` | Minimum similarity score for entity results |
|
||||
| `TEMPERATURE` | `0.7` | Default inference temperature |
|
||||
| `CORS_ORIGIN` | `'http://localhost:5173'` | Fallback allowed CORS origin |
|
||||
| `SYSTEM_PROMPT` | *(see below)* | Default system prompt |
|
||||
|
||||
> `ENTITIES_THRESHOLD` is set to `0.55` — lower than `SCORE_THRESHOLD` because
|
||||
> entity notes generated by a 3B model tend to embed with lower cosine similarity
|
||||
> than full episode text. Tune upward if irrelevant entities appear in context.
|
||||
|
||||
> `repeatPenalty`, `topP`, and `topK` defaults are sourced from
|
||||
> `INFERENCE_DEFAULTS` in `config/settings.js` rather than `ORCHESTRATION`,
|
||||
> since those constants already define the canonical values.
|
||||
|
||||
Default system prompt:
|
||||
> "You are a helpful, context-aware AI assistant. You have access to memories
|
||||
> of past conversations with the user. Use them to provide consistent,
|
||||
> personalised responses."
|
||||
|
||||
#### `SUMMARIES`
|
||||
|
||||
Controls the automatic session summarisation system in `orchestration-service/src/services/summarization.js`.
|
||||
|
||||
| Key | Value | Description |
|
||||
|---|---|---|
|
||||
| `THRESHOLD_TOKENS` | `200` | Minimum total session tokens before summarisation is considered |
|
||||
| `MAX_SUMMARY_TOKENS` | `800` | If existing summary exceeds this length (chars), create a new row instead of updating |
|
||||
| `MIN_EPISODES_SINCE` | `5` | Minimum new episodes since last summary before re-summarising |
|
||||
|
||||
These can be overridden per-deployment via environment variables in the
|
||||
orchestration service `.env`:
|
||||
|
||||
```
|
||||
SUMMARY_THRESHOLD_TOKENS=200
|
||||
SUMMARY_MAX_TOKENS=800
|
||||
SUMMARY_MIN_EPISODES=5
|
||||
```
|
||||
|
||||
#### `SQLITE`
|
||||
|
||||
| Key | Value | Description |
|
||||
|---|---|---|
|
||||
| `DEFAULT_PATH` | `'./data/nexusai.db'` | Fallback SQLite database path |
|
||||
201
docs/services/summarization.md
Normal file
201
docs/services/summarization.md
Normal file
@@ -0,0 +1,201 @@
|
||||
# Summarization
|
||||
|
||||
Session summarization generates rolling plain-text summaries of conversation
|
||||
history, giving the model a condensed view of past context without consuming
|
||||
the full context window with raw episodes.
|
||||
|
||||
**Location:** `packages/orchestration-service/src/services/summarization.js`
|
||||
**Triggered by:** `chat/index.js` after every episode write (fire-and-forget)
|
||||
**Model:** `qwen2.5:3b` via Ollama on Mini PC 1 (192.168.0.81)
|
||||
|
||||
---
|
||||
|
||||
## Trigger Conditions
|
||||
|
||||
`triggerSummary(session, allEpisodes)` calls `maybeSummarize` fire-and-forget.
|
||||
`maybeSummarize` proceeds only when both conditions are met:
|
||||
|
||||
1. Total session token count exceeds `SUMMARIES.THRESHOLD_TOKENS` (default 200)
|
||||
2. At least `SUMMARIES.MIN_EPISODES_SINCE` (default 5) new episodes have
|
||||
accumulated since the last summary
|
||||
|
||||
The token threshold is intentionally low — it ensures summaries start
|
||||
generating early in a session's life rather than only after very long
|
||||
conversations.
|
||||
|
||||
---
|
||||
|
||||
## Summary Rows and Cumulative Updates
|
||||
|
||||
Each session can have multiple summary rows in the `summaries` table.
|
||||
The update strategy depends on the size of the most recent summary:
|
||||
|
||||
| Condition | Action |
|
||||
|---|---|
|
||||
| No existing summary | Generate fresh summary from all episodes |
|
||||
| Latest summary under `MAX_SUMMARY_TOKENS` | Update: summarise new episodes with existing summary as context |
|
||||
| Latest summary over `MAX_SUMMARY_TOKENS` | Create new row: treat as fresh summarisation |
|
||||
|
||||
This produces a chain of summary rows over time. Each row's `episode_range`
|
||||
covers only the episodes summarised in that specific pass (e.g. `259-263`),
|
||||
not all episodes in the session.
|
||||
|
||||
---
|
||||
|
||||
## Ollama Request
|
||||
|
||||
```js
|
||||
{
|
||||
model: EXTRACTION_MODEL, // qwen2.5:3b (set via EXTRACTION_MODEL env var)
|
||||
prompt: buildSummaryPrompt(episodesToSummarize, existingSummary),
|
||||
stream: false,
|
||||
// No format: 'json' — free-text output required for summaries
|
||||
options: {
|
||||
temperature: 0.2,
|
||||
num_predict: 500,
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
`temperature: 0.2` is slightly higher than extraction (0.1) — summaries
|
||||
benefit from some fluency. `num_predict: 500` gives room for 5 thorough
|
||||
sentences without risk of runoff.
|
||||
|
||||
---
|
||||
|
||||
## Prompt Format
|
||||
|
||||
ChatML format — native to qwen2.5:
|
||||
|
||||
```
|
||||
<|im_start|>user
|
||||
Summarize the conversation below in 3-5 sentences.
|
||||
Write in third person. Do not quote directly — paraphrase only.
|
||||
Do not include greetings, sign-offs, or filler. Output only the summary text.
|
||||
|
||||
Conversation:
|
||||
{context}
|
||||
<|im_end|>
|
||||
<|im_start|>assistant
|
||||
```
|
||||
|
||||
For cumulative updates, the instruction and context change:
|
||||
|
||||
```
|
||||
<|im_start|>user
|
||||
Update the summary below to incorporate the new exchanges.
|
||||
Write 3-5 sentences in third person. Do not quote directly — paraphrase only.
|
||||
Do not include greetings, sign-offs, or filler. Output only the updated summary text.
|
||||
|
||||
Previous summary:
|
||||
{existingSummary}
|
||||
|
||||
New exchanges:
|
||||
{context}
|
||||
<|im_end|>
|
||||
<|im_start|>assistant
|
||||
```
|
||||
|
||||
### Input truncation
|
||||
|
||||
Episode context is truncated to `MAX_CHARS = 3000` characters, keeping the
|
||||
most recent exchanges (sliced from the end). This keeps Qwen focused and
|
||||
prevents the prompt from exceeding its effective context window.
|
||||
|
||||
---
|
||||
|
||||
## ChatML Token Stripping
|
||||
|
||||
Qwen occasionally echoes ChatML tokens back into its response. The raw output
|
||||
is cleaned before saving:
|
||||
|
||||
```js
|
||||
const raw = data.response?.trim() ?? '';
|
||||
const content = raw
|
||||
.replace(/<\|im_start\|>.*?<\|im_end\|>/gs, '')
|
||||
.replace(/<\|im_start\|>|<\|im_end\|>|<\|im_sep\|>/g, '')
|
||||
.trim();
|
||||
return content;
|
||||
```
|
||||
|
||||
Without this, leaked tokens get stored in the summary and then injected
|
||||
back into the next summarisation prompt — causing the model to append a new
|
||||
summary after the old one rather than replacing it.
|
||||
|
||||
---
|
||||
|
||||
## Episode Range Tracking
|
||||
|
||||
Each summary row stores `episode_range` as `"firstId-lastId"` covering only
|
||||
the episodes summarised in that pass:
|
||||
|
||||
```js
|
||||
const summarizedIds = episodesToSummarize.map(ep => ep.id).sort((a,b) => a - b);
|
||||
const episodeRange = `${summarizedIds.at(0)}-${summarizedIds.at(-1)}`;
|
||||
```
|
||||
|
||||
This makes SummaryView cards meaningful — "Episodes 259-263" tells you
|
||||
exactly which exchanges that summary covers, rather than always showing
|
||||
the full session range.
|
||||
|
||||
---
|
||||
|
||||
## Summary Storage
|
||||
|
||||
Summaries are written directly to the memory service from orchestration:
|
||||
|
||||
```js
|
||||
// Create new row
|
||||
await fetch(`${MEMORY_URL}/summaries`, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ sessionId: session.id, content, tokenCount, episodeRange }),
|
||||
});
|
||||
|
||||
// Update existing row
|
||||
await fetch(`${MEMORY_URL}/summaries/${latest.id}`, {
|
||||
method: 'PATCH',
|
||||
body: JSON.stringify({ content, tokenCount, episodeRange }),
|
||||
});
|
||||
```
|
||||
|
||||
`session.id` here is the internal SQLite integer ID — not the external UUID.
|
||||
It is available directly on the `session` object passed from `chat/index.js`.
|
||||
|
||||
---
|
||||
|
||||
## Client-Side Indicator
|
||||
|
||||
The chat client shows a "Summarising…" spinner in the `ChatWindow` header
|
||||
and on the InfoPanel's Session Memory button while summarisation may be
|
||||
in progress.
|
||||
|
||||
Since summarisation is fire-and-forget with no completion signal back to
|
||||
the client, the indicator is timer-based: it activates when the stream
|
||||
finishes and clears after 8 seconds.
|
||||
|
||||
```js
|
||||
// In App.jsx, watching the streaming state from useChat:
|
||||
useEffect(() => {
|
||||
if (prevStreaming.current && !streaming) {
|
||||
setSummarising(true);
|
||||
const t = setTimeout(() => setSummarising(false), 8000);
|
||||
return () => clearTimeout(t);
|
||||
}
|
||||
prevStreaming.current = streaming;
|
||||
}, [streaming]);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Set in `packages/orchestration-service/src/.env`:
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `EXTRACTION_URL` | `http://localhost:11434` | Ollama instance URL |
|
||||
| `EXTRACTION_MODEL` | `qwen2.5:3b` | Model for summarisation |
|
||||
| `MEMORY_SERVICE_URL` | `http://localhost:3002` | Memory service URL |
|
||||
| `SUMMARY_THRESHOLD_TOKENS` | `200` | Token threshold before summarisation triggers |
|
||||
| `SUMMARY_MAX_TOKENS` | `800` | Max summary length before a new row is created |
|
||||
| `SUMMARY_MIN_EPISODES` | `5` | Min new episodes since last summary before re-summarising |s
|
||||
2692
package-lock.json
generated
2692
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
12
packages/chat-client/index.html
Normal file
12
packages/chat-client/index.html
Normal file
@@ -0,0 +1,12 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>NexusAI</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="/src/main.jsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
20
packages/chat-client/package.json
Normal file
20
packages/chat-client/package.json
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"name": "@nexusai/chat-client",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "vite build",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-markdown": "^10.1.0",
|
||||
"uuid": "^13.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@vitejs/plugin-react": "^4.2.0",
|
||||
"vite": "^5.0.0"
|
||||
}
|
||||
}
|
||||
233
packages/chat-client/src/App.jsx
Normal file
233
packages/chat-client/src/App.jsx
Normal file
@@ -0,0 +1,233 @@
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import ChatWindow from './components/ChatWindow';
|
||||
import InfoPanel from './components/InfoPanel';
|
||||
import Sidebar from './components/Sidebar';
|
||||
import HomeView from './components/HomeView';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
import { getModelProps } from './api/orchestration';
|
||||
|
||||
/*** View Panels*** */
|
||||
import AllChatsView from './components/AllChatsView';
|
||||
import AllProjectsView from './components/AllProjectsView';
|
||||
import SettingsView from './components/SettingsView';
|
||||
import ProjectView from './components/ProjectView';
|
||||
import MemoryView from './components/MemoryView';
|
||||
import SummaryView from './components/SummaryView';
|
||||
|
||||
/**** useHooks **** */
|
||||
import { useSession } from './hooks/useSession';
|
||||
import { useChat } from './hooks/useChat';
|
||||
import { useModels } from './hooks/useModels';
|
||||
import { useProjects } from './hooks/useProjects';
|
||||
|
||||
// Views where back nav makes sense, and where they go back to
|
||||
const BACK_MAP = {
|
||||
'chat': 'home',
|
||||
'all-chats': 'home',
|
||||
'all-projects': 'home',
|
||||
'settings': 'home',
|
||||
'project': 'all-projects',
|
||||
'memory': 'settings',
|
||||
'summaries': 'chat',
|
||||
};
|
||||
|
||||
export default function App() {
|
||||
const [leftOpen, setLeftOpen] = useState(false); // collapsed on home
|
||||
const [rightOpen, setRightOpen] = useState(false);
|
||||
const { models, selectedModel, setSelectedModel } = useModels();
|
||||
const [view, setView] = useState('home');
|
||||
const [viewHistory, setViewHistory] = useState([]);
|
||||
const [activeProject, setActiveProject] = useState(null);
|
||||
const { projects, refreshProjects } = useProjects();
|
||||
|
||||
// Lifted model props — available to header + SettingsView
|
||||
const [modelProps, setModelProps] = useState(null);
|
||||
useEffect(() => {
|
||||
getModelProps().then(setModelProps).catch(() => {});
|
||||
}, []);
|
||||
|
||||
const {
|
||||
sessions,
|
||||
setSessions,
|
||||
activeSession,
|
||||
messages,
|
||||
loadingHistory,
|
||||
selectSession,
|
||||
createSession,
|
||||
refreshSessions,
|
||||
appendMessage,
|
||||
updateLastMessage,
|
||||
} = useSession();
|
||||
|
||||
const {
|
||||
sendMessage,
|
||||
cancelStream,
|
||||
streaming,
|
||||
lastTokenCount,
|
||||
lastModel,
|
||||
summarising,
|
||||
} = useChat({ activeSession, appendMessage, updateLastMessage, refreshSessions });
|
||||
|
||||
function navigate(nextView) {
|
||||
setViewHistory(prev => [...prev, view]);
|
||||
setView(nextView);
|
||||
// Expand sidebar when leaving home
|
||||
if (view === 'home') setLeftOpen(true);
|
||||
}
|
||||
|
||||
function goBack() {
|
||||
if (viewHistory.length > 0) {
|
||||
const prev = viewHistory[viewHistory.length - 1];
|
||||
setViewHistory(h => h.slice(0, -1));
|
||||
setView(prev);
|
||||
if (prev === 'home') setLeftOpen(false);
|
||||
} else {
|
||||
// Fallback to BACK_MAP
|
||||
const dest = BACK_MAP[view] ?? 'home';
|
||||
setView(dest);
|
||||
if (dest === 'home') setLeftOpen(false);
|
||||
}
|
||||
}
|
||||
|
||||
function handleSendMessage(text) {
|
||||
sendMessage(text, selectedModel, activeSession?.project_id ?? null);
|
||||
}
|
||||
|
||||
function handleSessionsChange(deletedSession) {
|
||||
if (deletedSession?.external_id === activeSession?.external_id) {
|
||||
selectSession(null);
|
||||
}
|
||||
refreshSessions();
|
||||
}
|
||||
|
||||
// Home: create session, navigate to chat, then send after a tick
|
||||
function handleHomeSend(text) {
|
||||
const newSession = createSession(); // ← capture the returned session
|
||||
setViewHistory(prev => [...prev, 'home']);
|
||||
setView('chat');
|
||||
setLeftOpen(true);
|
||||
sendMessage(text, selectedModel, null, newSession); // ← pass directly, no setTimeout needed
|
||||
}
|
||||
|
||||
function handleNewProjectChat(text) {
|
||||
const newSession = {
|
||||
external_id: uuidv4(),
|
||||
metadata: null,
|
||||
isNew: true,
|
||||
project_id: activeProject?.id ?? null,
|
||||
};
|
||||
setSessions(prev => [newSession, ...prev]);
|
||||
selectSession(newSession);
|
||||
setViewHistory(prev => [...prev, view]);
|
||||
setView('chat');
|
||||
setLeftOpen(true);
|
||||
sendMessage(text, selectedModel, activeProject?.id ?? null, newSession); // ← direct, no timeout
|
||||
}
|
||||
|
||||
const canGoBack = view !== 'home';
|
||||
|
||||
return (
|
||||
<div style={{ display: 'flex', height: '100vh', overflow: 'hidden' }}>
|
||||
<Sidebar
|
||||
sessions={sessions}
|
||||
activeSession={activeSession}
|
||||
onSelectSession={session => { selectSession(session); navigate('chat'); }}
|
||||
onNewChat={() => { createSession(); navigate('chat'); }}
|
||||
onNewProject={() => navigate('all-projects')}
|
||||
isOpen={leftOpen}
|
||||
onToggle={() => setLeftOpen(o => !o)}
|
||||
onSessionsChange={handleSessionsChange}
|
||||
onNavigate={navigate}
|
||||
projects={projects}
|
||||
onProjectsChange={refreshProjects}
|
||||
onSelectProject={setActiveProject}
|
||||
/>
|
||||
|
||||
{view === 'home' && (
|
||||
<HomeView
|
||||
onSendMessage={handleHomeSend}
|
||||
loadedModel={modelProps?.modelAlias ?? null}
|
||||
/>
|
||||
)}
|
||||
|
||||
{view === 'chat' && (
|
||||
<ChatWindow
|
||||
messages={messages}
|
||||
loadingHistory={loadingHistory}
|
||||
streaming={streaming}
|
||||
activeSession={activeSession}
|
||||
onSendMessage={handleSendMessage}
|
||||
onCancel={cancelStream}
|
||||
onTogglePanel={() => setRightOpen(o => !o)}
|
||||
onBack={goBack}
|
||||
canGoBack={canGoBack}
|
||||
loadedModel={modelProps?.modelAlias ?? null}
|
||||
summarising={summarising}
|
||||
/>
|
||||
)}
|
||||
|
||||
{view === 'all-chats' && (
|
||||
<AllChatsView
|
||||
onBack={goBack}
|
||||
onSelectSession={session => { selectSession(session); navigate('chat'); }}
|
||||
projects={projects}
|
||||
/>
|
||||
)}
|
||||
|
||||
{view === 'all-projects' && (
|
||||
<AllProjectsView
|
||||
onBack={goBack}
|
||||
onProjectsChange={refreshProjects}
|
||||
onSelectProject={setActiveProject}
|
||||
onNavigate={navigate}
|
||||
/>
|
||||
)}
|
||||
|
||||
{view === 'settings' && (
|
||||
<SettingsView
|
||||
onNavigate={navigate}
|
||||
onBack={goBack}
|
||||
modelProps={modelProps}
|
||||
/>
|
||||
)}
|
||||
|
||||
{view === 'project' && activeProject && (
|
||||
<ProjectView
|
||||
project={activeProject}
|
||||
onNavigate={navigate}
|
||||
onBack={goBack}
|
||||
onSelectSession={selectSession}
|
||||
onNewProjectChat={handleNewProjectChat}
|
||||
onProjectsChange={refreshProjects} // ← add
|
||||
/>
|
||||
)}
|
||||
|
||||
{view === 'memory' && (
|
||||
<MemoryView
|
||||
onNavigate={navigate}
|
||||
onBack={goBack}
|
||||
/>
|
||||
)}
|
||||
|
||||
{view === 'summaries' && (
|
||||
<SummaryView
|
||||
activeSession={activeSession}
|
||||
onBack={goBack}
|
||||
/>
|
||||
)}
|
||||
|
||||
<InfoPanel
|
||||
isOpen={rightOpen}
|
||||
onToggle={() => setRightOpen(o => !o)}
|
||||
activeSession={activeSession}
|
||||
models={models}
|
||||
selectedModel={selectedModel}
|
||||
onModelChange={setSelectedModel}
|
||||
lastModel={lastModel}
|
||||
lastTokenCount={lastTokenCount}
|
||||
summarising={summarising}
|
||||
onViewSummary={() => navigate('summaries')}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
226
packages/chat-client/src/api/orchestration.js
Normal file
226
packages/chat-client/src/api/orchestration.js
Normal file
@@ -0,0 +1,226 @@
|
||||
import { API_DEFAULTS } from "../config/constants";
|
||||
|
||||
|
||||
const BASE_URL = import.meta.env.VITE_ORCHESTRATION_URL ?? '';
|
||||
|
||||
// ── Sessions ────────────────────────────────────────────────
|
||||
|
||||
export async function fetchSessions(limit = API_DEFAULTS.SESSIONS_LIMIT, offset = API_DEFAULTS.OFFSET, projectId = null) {
|
||||
const url = new URL(`${BASE_URL}/sessions`, window.location.origin);
|
||||
url.searchParams.set('limit', limit);
|
||||
url.searchParams.set('offset', offset);
|
||||
if (projectId) url.searchParams.set('projectId', projectId);
|
||||
|
||||
const res = await fetch(url.toString());
|
||||
if (!res.ok) throw new Error(`Failed to fetch sessions: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function fetchSessionHistory(sessionId, limit = API_DEFAULTS.HISTORY_LIMIT, offset = API_DEFAULTS.OFFSET) {
|
||||
const res = await fetch(`${BASE_URL}/sessions/${sessionId}/history?limit=${limit}&offset=${offset}`);
|
||||
if (!res.ok) throw new Error(`Failed to fetch history: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
// ── Chat ────────────────────────────────────────────────────
|
||||
|
||||
export async function sendMessage(sessionId, message, model) {
|
||||
const res = await fetch(`${BASE_URL}/chat`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ sessionId, message, model }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Chat request failed: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export function streamMessage(sessionId, message, model, { onChunk, onDone, onError }) {
|
||||
const controller = new AbortController();
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
const res = await fetch(`${BASE_URL}/chat/stream`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ sessionId, message, model }),
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
if (!res.ok) throw new Error(`Stream request failed: ${res.status}`);
|
||||
|
||||
const reader = res.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
const events = buffer.split('\n\n');
|
||||
buffer = events.pop() || '';
|
||||
|
||||
for (const event of events) {
|
||||
const lines = event.split('\n');
|
||||
const dataLines = lines
|
||||
.filter(line => line.startsWith('data: '))
|
||||
.map(line => line.slice(6));
|
||||
|
||||
if (dataLines.length === 0) continue;
|
||||
|
||||
const raw = dataLines.join('\n').trim();
|
||||
if (raw === '[DONE]') continue;
|
||||
|
||||
try {
|
||||
const data = JSON.parse(raw);
|
||||
|
||||
if (data.text) onChunk(data.text);
|
||||
if (data.done) onDone({ model: data.model ?? model, tokenCount: data.tokenCount ?? 0 });
|
||||
if (data.error) onError(new Error(data.error));
|
||||
} catch (err) {
|
||||
console.error('[chat-client] Failed to parse SSE event:', raw, err);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
if (err.name !== 'AbortError') onError(err);
|
||||
}
|
||||
})();
|
||||
|
||||
return () => controller.abort();
|
||||
}
|
||||
export async function fetchModels() {
|
||||
const res = await fetch(`${BASE_URL}/models`);
|
||||
if(!res.ok) throw new Error(`Failted to fetch models: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function updateSession(sessionId, { name, projectId } = {}) {
|
||||
const res = await fetch(`${BASE_URL}/sessions/${sessionId}`, {
|
||||
method: 'PATCH',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ name, projectId }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to update session: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function renameSession(sessionId, name) {
|
||||
return updateSession(sessionId, {name})
|
||||
}
|
||||
|
||||
export async function deleteSession(sessionId) {
|
||||
const res = await fetch(`${BASE_URL}/sessions/${sessionId}`, {
|
||||
method: 'DELETE',
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to delete session: ${res.status}`);
|
||||
}
|
||||
|
||||
export async function fetchProjects() {
|
||||
const res = await fetch(`${BASE_URL}/projects`);
|
||||
if (!res.ok) throw new Error(`Failed to fetch projects: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function createProject({ name, description, colour, icon, isolated }) {
|
||||
const res = await fetch(`${BASE_URL}/projects`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ name, description, colour, icon, isolated: isolated ? 1 : 0 }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to create project: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function updateProject(id, fields = {}) {
|
||||
// Convert isolated boolean to integer if present
|
||||
const body = { ...fields };
|
||||
if (body.isolated !== undefined) body.isolated = body.isolated ? 1 : 0;
|
||||
|
||||
const res = await fetch(`${BASE_URL}/projects/${id}`, {
|
||||
method: 'PATCH',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to update project: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function deleteProject(id) {
|
||||
const res = await fetch(`${BASE_URL}/projects/${id}`, { method: 'DELETE' });
|
||||
if (!res.ok) throw new Error(`Failed to delete project: ${res.status}`);
|
||||
}
|
||||
|
||||
export async function updateSessionProject(sessionId, projectId) {
|
||||
const res = await fetch(`${BASE_URL}/sessions/${sessionId}`, {
|
||||
method: 'PATCH',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ projectId }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to update session project: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function getEpisodes({ limit = API_DEFAULTS.EPISODE_LIMIT, offset = API_DEFAULTS.OFFSET, sessionId, q } = {}) {
|
||||
const url = new URL(`${BASE_URL}/episodes`, window.location.origin);
|
||||
url.searchParams.set('limit', limit);
|
||||
url.searchParams.set('offset', offset);
|
||||
if (sessionId) url.searchParams.set('sessionId', sessionId);
|
||||
if (q) url.searchParams.set('q', q);
|
||||
|
||||
const res = await fetch(url.toString());
|
||||
if (!res.ok) throw new Error(`Failed to fetch episodes: ${res.status}`);
|
||||
return res.json(); // { episodes, total }
|
||||
}
|
||||
|
||||
export async function deleteEpisode(id) {
|
||||
const res = await fetch(`${BASE_URL}/episodes/${id}`, { method: 'DELETE' });
|
||||
if (!res.ok) throw new Error(`Failed to delete episode: ${res.status}`);
|
||||
}
|
||||
|
||||
export async function getSettings() {
|
||||
const res = await fetch(`${BASE_URL}/settings`);
|
||||
if (!res.ok) throw new Error(`Failed to fetch settings: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function updateSettings(updates) {
|
||||
const res = await fetch(`${BASE_URL}/settings`, {
|
||||
method: 'PATCH',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(updates),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to update settings: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function getServiceHealth() {
|
||||
const res = await fetch(`${BASE_URL}/health/services`);
|
||||
if (!res.ok) throw new Error(`Failed to fetch health: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function getModelProps() {
|
||||
const res = await fetch(`${BASE_URL}/models/props`);
|
||||
if (!res.ok) throw new Error('Failed to fetch model props');
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function fetchSessionSummaries(sessionId) {
|
||||
const res = await fetch(`${BASE_URL}/summaries/session/${sessionId}`);
|
||||
if (!res.ok) throw new Error(`Failed to fetch summaries: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function generateProjectSummary(projectId) {
|
||||
const res = await fetch(`${BASE_URL}/summaries/project/${projectId}/generate`, { method: 'POST' });
|
||||
if (!res.ok) throw new Error(`Failed to generate project summary: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
export async function fetchProjectOverviewSummary(projectId) {
|
||||
const res = await fetch(`${BASE_URL}/summaries/project/${projectId}/overview`);
|
||||
if (!res.ok) throw new Error(`Failed to fetch project overview: ${res.status}`);
|
||||
return res.json(); // null if none exists yet
|
||||
}
|
||||
274
packages/chat-client/src/components/AllChatsView.jsx
Normal file
274
packages/chat-client/src/components/AllChatsView.jsx
Normal file
@@ -0,0 +1,274 @@
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import { fetchSessions, deleteSession } from '../api/orchestration';
|
||||
import { CLIENT_DEFAULTS } from '../config/constants';
|
||||
|
||||
|
||||
const PAGE_SIZE = CLIENT_DEFAULTS.PAGE_SIZE;
|
||||
|
||||
export default function AllChatsView({ onSelectSession, onBack, projects }) {
|
||||
const [sessions, setSessions] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [page, setPage] = useState(0);
|
||||
const [total, setTotal] = useState(0);
|
||||
const [selected, setSelected] = useState(new Set());
|
||||
const [confirmOpen, setConfirmOpen] = useState(false);
|
||||
const [deleting, setDeleting] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
loadPage(page);
|
||||
}, [page]);
|
||||
|
||||
async function loadPage(p) {
|
||||
setLoading(true);
|
||||
setSelected(new Set());
|
||||
try {
|
||||
const data = await fetchSessions(PAGE_SIZE, p * PAGE_SIZE);
|
||||
setSessions(data);
|
||||
setTotal(data.length === PAGE_SIZE ? (p + 2) * PAGE_SIZE : p * PAGE_SIZE + data.length);
|
||||
} catch (err) {
|
||||
console.error('[AllChatsView] Failed to load sessions:', err.message);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}
|
||||
|
||||
function toggleSelect(id) {
|
||||
setSelected(prev => {
|
||||
const next = new Set(prev);
|
||||
next.has(id) ? next.delete(id) : next.add(id);
|
||||
return next;
|
||||
});
|
||||
}
|
||||
|
||||
function toggleSelectAll() {
|
||||
if (selected.size === sessions.length) {
|
||||
setSelected(new Set());
|
||||
} else {
|
||||
setSelected(new Set(sessions.map(s => s.external_id)));
|
||||
}
|
||||
}
|
||||
|
||||
async function handleBulkDelete() {
|
||||
setDeleting(true);
|
||||
try {
|
||||
await Promise.all([...selected].map(id => deleteSession(id)));
|
||||
setConfirmOpen(false);
|
||||
await loadPage(page);
|
||||
} catch (err) {
|
||||
console.error('[AllChatsView] Bulk delete failed:', err.message);
|
||||
} finally {
|
||||
setDeleting(false);
|
||||
}
|
||||
}
|
||||
|
||||
function formatTimestamp(ts) {
|
||||
if (!ts) return '—';
|
||||
const date = new Date(ts * 1000);
|
||||
const now = new Date();
|
||||
const diffMs = now - date;
|
||||
const diffMins = Math.floor(diffMs / 60000);
|
||||
const diffHours = Math.floor(diffMs / 3600000);
|
||||
const diffDays = Math.floor(diffMs / 86400000);
|
||||
|
||||
if (diffMins < 1) return 'Just now';
|
||||
if (diffMins < 60) return `${diffMins}m ago`;
|
||||
if (diffHours < 24) return `${diffHours}h ago`;
|
||||
if (diffDays === 1) return 'Yesterday';
|
||||
return date.toLocaleDateString([], { month: 'short', day: 'numeric', year: 'numeric' });
|
||||
}
|
||||
|
||||
function getProject(projectId) {
|
||||
if (!projectId || !projects) return null;
|
||||
return projects.find(p => p.id === projectId) ?? null;
|
||||
}
|
||||
|
||||
const totalPages = Math.ceil(total / PAGE_SIZE);
|
||||
const allSelected = sessions.length > 0 && selected.size === sessions.length;
|
||||
|
||||
return (
|
||||
<div className="flex-col flex-1 overflow-hidden" style={{ background: 'var(--bg-base)' }}>
|
||||
|
||||
{/* Header */}
|
||||
<div className="panel-header" style={{ padding: '0 8px 0 8px', justifyContent: 'space-between' }}>
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: '4px' }}>
|
||||
<button className="btn-icon" onClick={onBack} title="Back" style={{ fontSize: '16px', padding: '4px 8px' }}>←</button>
|
||||
<span className="text-base" style={{ fontWeight: 500, color: 'var(--text-secondary)' }}>All Chats</span>
|
||||
</div>
|
||||
{selected.size > 0 && (
|
||||
<button
|
||||
onClick={() => setConfirmOpen(true)}
|
||||
className="btn-reset text-xs"
|
||||
style={{
|
||||
padding: '4px 10px',
|
||||
borderRadius: 'var(--radius-md)',
|
||||
background: '#c0392b22',
|
||||
color: '#ff6b6b',
|
||||
border: '1px solid #c0392b55',
|
||||
}}
|
||||
>
|
||||
Delete {selected.size} selected
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Table */}
|
||||
<div className="flex-1 scroll-y" style={{ padding: '16px 24px' }}>
|
||||
{loading ? (
|
||||
<div className="text-base text-muted" style={{ padding: '40px', textAlign: 'center' }}>
|
||||
Loading...
|
||||
</div>
|
||||
) : (
|
||||
<table style={{ width: '100%', borderCollapse: 'collapse' }}>
|
||||
<thead>
|
||||
<tr style={{ borderBottom: '1px solid var(--border)' }}>
|
||||
<th style={{ width: '36px', padding: '8px 0' }}>
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={allSelected}
|
||||
onChange={toggleSelectAll}
|
||||
style={{ cursor: 'pointer', accentColor: 'var(--accent-hover)' }}
|
||||
/>
|
||||
</th>
|
||||
<th className="label-upper" style={{ textAlign: 'left', padding: '8px 12px' }}>Name</th>
|
||||
<th className="label-upper" style={{ textAlign: 'left', padding: '8px 12px', width: '130px' }}>Project</th>
|
||||
<th className="label-upper" style={{ textAlign: 'right', padding: '8px 0', width: '110px' }}>Last Active</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{sessions.map(session => {
|
||||
const isSelected = selected.has(session.external_id);
|
||||
const project = getProject(session.project_id);
|
||||
return (
|
||||
<tr
|
||||
key={session.external_id}
|
||||
style={{
|
||||
borderBottom: '1px solid var(--border)',
|
||||
background: isSelected ? 'var(--bg-elevated)' : 'transparent',
|
||||
transition: 'background 0.1s',
|
||||
}}
|
||||
onMouseEnter={e => { if (!isSelected) e.currentTarget.style.background = 'var(--bg-surface)'; }}
|
||||
onMouseLeave={e => { if (!isSelected) e.currentTarget.style.background = 'transparent'; }}
|
||||
>
|
||||
<td style={{ padding: '10px 0', width: '36px' }}>
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={isSelected}
|
||||
onChange={() => toggleSelect(session.external_id)}
|
||||
style={{ cursor: 'pointer', accentColor: 'var(--accent-hover)' }}
|
||||
/>
|
||||
</td>
|
||||
<td style={{ padding: '10px 12px' }}>
|
||||
<button
|
||||
className="btn-reset text-base"
|
||||
onClick={() => onSelectSession(session)}
|
||||
style={{ color: 'var(--text-primary)', textAlign: 'left' }}
|
||||
>
|
||||
{session.name || session.external_id}
|
||||
</button>
|
||||
</td>
|
||||
<td style={{ padding: '10px 12px' }}>
|
||||
{project ? (
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: '6px' }}>
|
||||
<div style={{
|
||||
width: '6px', height: '6px', borderRadius: '50%', flexShrink: 0,
|
||||
background: project.colour ?? 'var(--accent)',
|
||||
}} />
|
||||
<span className="text-xs text-muted truncate" style={{ maxWidth: '90px' }}>
|
||||
{project.name}
|
||||
</span>
|
||||
</div>
|
||||
) : (
|
||||
<span className="text-xs text-muted">—</span>
|
||||
)}
|
||||
</td>
|
||||
<td className="text-xs text-muted" style={{ textAlign: 'right', padding: '10px 0' }}>
|
||||
{formatTimestamp(session.updated_at)}
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
})}
|
||||
|
||||
{sessions.length === 0 && (
|
||||
<tr>
|
||||
<td colSpan={4} className="text-base text-muted"
|
||||
style={{ textAlign: 'center', padding: '40px' }}>
|
||||
No conversations yet
|
||||
</td>
|
||||
</tr>
|
||||
)}
|
||||
</tbody>
|
||||
</table>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Pagination */}
|
||||
{totalPages > 1 && (
|
||||
<div className="flex items-center" style={{
|
||||
borderTop: '1px solid var(--border)',
|
||||
padding: '10px 24px',
|
||||
gap: '12px',
|
||||
flexShrink: 0,
|
||||
justifyContent: 'flex-end',
|
||||
}}>
|
||||
<span className="text-xs text-muted">
|
||||
Page {page + 1} of {totalPages}
|
||||
</span>
|
||||
<button
|
||||
className="btn-icon"
|
||||
onClick={() => setPage(p => p - 1)}
|
||||
disabled={page === 0}
|
||||
style={{ fontSize: '14px' }}
|
||||
>‹</button>
|
||||
<button
|
||||
className="btn-icon"
|
||||
onClick={() => setPage(p => p + 1)}
|
||||
disabled={(page + 1) * PAGE_SIZE >= total}
|
||||
style={{ fontSize: '14px' }}
|
||||
>›</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Bulk delete confirmation dialog */}
|
||||
{confirmOpen && (
|
||||
<div onClick={() => setConfirmOpen(false)} style={{
|
||||
position: 'fixed', inset: 0,
|
||||
background: 'rgba(0,0,0,0.5)',
|
||||
display: 'flex', alignItems: 'center', justifyContent: 'center',
|
||||
zIndex: 100,
|
||||
}}>
|
||||
<div onClick={e => e.stopPropagation()} style={{
|
||||
background: 'var(--bg-surface)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-lg)',
|
||||
padding: '24px', width: '360px',
|
||||
display: 'flex', flexDirection: 'column', gap: '16px',
|
||||
}}>
|
||||
<h2 style={{ fontSize: '15px', fontWeight: 600, color: 'var(--text-primary)' }}>
|
||||
Delete {selected.size} conversation{selected.size !== 1 ? 's' : ''}?
|
||||
</h2>
|
||||
<p className="text-sm text-secondary">
|
||||
This will permanently remove all selected conversations and their messages. This cannot be undone.
|
||||
</p>
|
||||
<div className="flex" style={{ gap: '8px', justifyContent: 'flex-end' }}>
|
||||
<button
|
||||
className="btn-reset text-base text-muted"
|
||||
onClick={() => setConfirmOpen(false)}
|
||||
style={{ padding: '8px 14px', borderRadius: 'var(--radius-md)' }}
|
||||
>Cancel</button>
|
||||
<button
|
||||
className="btn-reset text-base"
|
||||
onClick={handleBulkDelete}
|
||||
disabled={deleting}
|
||||
style={{
|
||||
padding: '8px 16px', borderRadius: 'var(--radius-md)',
|
||||
background: deleting ? 'var(--bg-elevated)' : '#c0392b',
|
||||
color: deleting ? 'var(--text-muted)' : 'white',
|
||||
}}
|
||||
>{deleting ? 'Deleting...' : 'Delete'}</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
166
packages/chat-client/src/components/AllProjectsView.jsx
Normal file
166
packages/chat-client/src/components/AllProjectsView.jsx
Normal file
@@ -0,0 +1,166 @@
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import ProjectModal from './ProjectModal';
|
||||
import { fetchProjects, createProject, updateProject, deleteProject } from '../api/orchestration';
|
||||
|
||||
|
||||
export default function AllProjectsView({ onProjectsChange, onBack, onSelectProject, onNavigate }) {
|
||||
const [projects, setProjects] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [modal, setModal] = useState(null); // { mode, project? }
|
||||
|
||||
useEffect(() => { load(); }, []);
|
||||
|
||||
async function load() {
|
||||
setLoading(true);
|
||||
try {
|
||||
setProjects(await fetchProjects());
|
||||
} catch (err) {
|
||||
console.error('[AllProjectsView] Failed to load:', err.message);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}
|
||||
|
||||
async function handleSave({ name, description, colour, icon }) {
|
||||
try {
|
||||
if (modal.mode === 'create') {
|
||||
await createProject({ name, description, colour, icon });
|
||||
} else {
|
||||
await updateProject(modal.project.id, { name, description, colour, icon });
|
||||
}
|
||||
await load();
|
||||
onProjectsChange?.(); // add this
|
||||
} catch (err) {
|
||||
console.error('[AllProjectsView] Save failed:', err.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function handleDelete(id) {
|
||||
try {
|
||||
await deleteProject(id);
|
||||
await load();
|
||||
onProjectsChange?.(); // add this
|
||||
} catch (err) {
|
||||
console.error('[AllProjectsView] Delete failed:', err.message);
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex-col flex-1 overflow-hidden" style={{ background: 'var(--bg-base)' }}>
|
||||
|
||||
{/* Header */}
|
||||
<div className="panel-header" style={{ padding: '0 8px 0 8px', justifyContent: 'space-between' }}>
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: '4px' }}>
|
||||
<button className="btn-icon" onClick={onBack} title="Back" style={{ fontSize: '16px', padding: '4px 8px' }}>←</button>
|
||||
<span className="text-base" style={{ fontWeight: 500, color: 'var(--text-secondary)' }}>All Projects</span>
|
||||
</div>
|
||||
<button
|
||||
className="btn-primary"
|
||||
onClick={() => setModal({ mode: 'create' })}
|
||||
style={{ padding: '5px 12px', fontSize: '12px' }}
|
||||
>
|
||||
+ New Project
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Tile grid */}
|
||||
<div className="flex-1 scroll-y" style={{ padding: '24px' }}>
|
||||
{loading ? (
|
||||
<div className="text-base text-muted" style={{ textAlign: 'center', padding: '40px' }}>
|
||||
Loading...
|
||||
</div>
|
||||
) : (
|
||||
<div style={{
|
||||
display: 'grid',
|
||||
gridTemplateColumns: 'repeat(auto-fill, minmax(180px, 1fr))',
|
||||
gap: '16px',
|
||||
}}>
|
||||
{projects.map(project => (
|
||||
<ProjectTile
|
||||
key={project.id}
|
||||
project={project}
|
||||
onSelect={() => { onSelectProject(project); onNavigate('project'); }}
|
||||
onEdit={() => setModal({ mode: 'edit', project })}
|
||||
onDelete={() => setModal({ mode: 'confirm-delete', project })}
|
||||
/>
|
||||
))}
|
||||
|
||||
{projects.length === 0 && (
|
||||
<div className="text-base text-muted" style={{
|
||||
gridColumn: '1 / -1', textAlign: 'center', padding: '60px 0',
|
||||
}}>
|
||||
No projects yet — create one to get started
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{modal && (
|
||||
<ProjectModal
|
||||
project={modal.project}
|
||||
mode={modal.mode}
|
||||
onSave={handleSave}
|
||||
onDelete={handleDelete}
|
||||
onClose={() => setModal(null)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function ProjectTile({ project, onSelect, onEdit, onDelete }) {
|
||||
const [hovered, setHovered] = useState(false);
|
||||
|
||||
return (
|
||||
<div
|
||||
onClick={onSelect}
|
||||
onMouseEnter={() => setHovered(true)}
|
||||
onMouseLeave={() => setHovered(false)}
|
||||
style={{
|
||||
background: 'var(--bg-surface)',
|
||||
border: `1px solid ${hovered ? 'var(--accent)' : 'var(--border)'}`,
|
||||
borderRadius: 'var(--radius-lg)',
|
||||
padding: '16px',
|
||||
display: 'flex', flexDirection: 'column', gap: '8px',
|
||||
transition: 'border-color 0.15s',
|
||||
position: 'relative',
|
||||
minHeight: '100px',
|
||||
cursor: 'pointer',
|
||||
}}
|
||||
>
|
||||
{/* Colour accent bar */}
|
||||
<div style={{
|
||||
position: 'absolute', top: 0, left: 0, right: 0,
|
||||
height: '3px',
|
||||
background: project.colour ?? 'var(--accent)',
|
||||
borderRadius: 'var(--radius-lg) var(--radius-lg) 0 0',
|
||||
}} />
|
||||
|
||||
<span className="text-base truncate" style={{
|
||||
fontWeight: 500, color: 'var(--text-primary)', marginTop: '4px',
|
||||
}}>
|
||||
{project.name}
|
||||
</span>
|
||||
|
||||
{project.description && (
|
||||
<span className="text-xs text-muted" style={{
|
||||
display: '-webkit-box', WebkitLineClamp: 2,
|
||||
WebkitBoxOrient: 'vertical', overflow: 'hidden',
|
||||
}}>
|
||||
{project.description}
|
||||
</span>
|
||||
)}
|
||||
|
||||
{/* Action buttons — appear on hover */}
|
||||
{hovered && (
|
||||
<div className="flex" style={{ gap: '4px', marginTop: 'auto', justifyContent: 'flex-end' }}>
|
||||
<button className="btn-icon" onClick={e => { e.stopPropagation(); onEdit(); }}
|
||||
title="Edit" style={{ fontSize: '12px' }}>✎</button>
|
||||
<button className="btn-icon" onClick={e => { e.stopPropagation(); onDelete(); }}
|
||||
title="Delete" style={{ fontSize: '12px', color: '#ff6b6b' }}>✕</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
206
packages/chat-client/src/components/ChatWindow.jsx
Normal file
206
packages/chat-client/src/components/ChatWindow.jsx
Normal file
@@ -0,0 +1,206 @@
|
||||
import React, { useEffect, useRef } from 'react';
|
||||
import MessageBubble from './MessageBubble';
|
||||
|
||||
export default function ChatWindow({
|
||||
messages,
|
||||
loadingHistory,
|
||||
streaming,
|
||||
onSendMessage,
|
||||
onCancel,
|
||||
activeSession,
|
||||
onTogglePanel,
|
||||
onBack,
|
||||
canGoBack,
|
||||
loadedModel,
|
||||
summarising,
|
||||
}) {
|
||||
const bottomRef = useRef(null);
|
||||
const inputRef = useRef(null);
|
||||
const [input, setInput] = React.useState('');
|
||||
|
||||
useEffect(() => {
|
||||
bottomRef.current?.scrollIntoView({ behavior: 'smooth' });
|
||||
}, [messages]);
|
||||
|
||||
function handleSend() {
|
||||
const text = input.trim();
|
||||
if (!text || streaming) return;
|
||||
setInput('');
|
||||
onSendMessage(text);
|
||||
}
|
||||
|
||||
function handleKeyDown(e) {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault();
|
||||
handleSend();
|
||||
}
|
||||
}
|
||||
|
||||
// Trim .gguf for display
|
||||
const modelLabel = loadedModel ? loadedModel.replace('.gguf', '') : null;
|
||||
|
||||
return (
|
||||
<div className="flex-col flex-1 overflow-hidden" style={{ background: 'var(--bg-base)' }}>
|
||||
|
||||
{/* Header */}
|
||||
<div className="panel-header" style={{ padding: '0 12px 0 8px', justifyContent: 'space-between' }}>
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: '4px', minWidth: 0 }}>
|
||||
{/* Back button */}
|
||||
{canGoBack && (
|
||||
<button
|
||||
className="btn-icon"
|
||||
onClick={onBack}
|
||||
title="Go back"
|
||||
style={{ flexShrink: 0, fontSize: '16px', padding: '4px 8px' }}
|
||||
>←</button>
|
||||
)}
|
||||
{/* Session name */}
|
||||
<span className="text-base text-secondary truncate">
|
||||
{activeSession ? (activeSession.name || activeSession.external_id) : 'New chat'}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: '8px', flexShrink: 0 }}>
|
||||
{/* Loaded model pill */}
|
||||
{modelLabel && (
|
||||
<span style={{
|
||||
fontSize: '11px',
|
||||
color: 'var(--text-muted)',
|
||||
background: 'var(--bg-elevated)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: '999px',
|
||||
padding: '2px 10px',
|
||||
maxWidth: '200px',
|
||||
overflow: 'hidden',
|
||||
textOverflow: 'ellipsis',
|
||||
whiteSpace: 'nowrap',
|
||||
}}>
|
||||
{modelLabel}
|
||||
</span>
|
||||
)}
|
||||
{!modelLabel && (
|
||||
<span style={{
|
||||
fontSize: '11px',
|
||||
color: 'var(--text-muted)',
|
||||
fontStyle: 'italic',
|
||||
}}>
|
||||
No model loaded
|
||||
</span>
|
||||
)}
|
||||
{summarising && (
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: '6px' }}>
|
||||
<div style={{
|
||||
width: '10px', height: '10px', borderRadius: '50%',
|
||||
border: '2px solid var(--accent)',
|
||||
borderTopColor: 'transparent',
|
||||
animation: 'spin 0.7s linear infinite',
|
||||
flexShrink: 0,
|
||||
}} />
|
||||
<span style={{ fontSize: '11px', color: 'var(--text-muted)', whiteSpace: 'nowrap' }}>
|
||||
Summarising…
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
<button className="btn-icon" onClick={onTogglePanel} title="Session info">⊹</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Message thread */}
|
||||
<div className="flex-1 scroll-y" style={{ padding: '20px 0' }}>
|
||||
{!activeSession && (
|
||||
<div className="flex-col items-center justify-center" style={{
|
||||
height: '100%',
|
||||
color: 'var(--text-muted)',
|
||||
gap: '12px',
|
||||
}}>
|
||||
<div style={{ fontSize: '32px', opacity: 0.4 }}>✦</div>
|
||||
<p className="text-base">Start typing to begin</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{loadingHistory && (
|
||||
<div className="flex justify-center text-muted" style={{ padding: '40px', fontSize: '13px' }}>
|
||||
Loading history...
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!loadingHistory && messages.map(msg => (
|
||||
<MessageBubble key={msg.id} message={msg} />
|
||||
))}
|
||||
|
||||
<div ref={bottomRef} />
|
||||
</div>
|
||||
|
||||
{/* Input bar */}
|
||||
<div style={{
|
||||
borderTop: '1px solid var(--border)',
|
||||
padding: '12px 16px',
|
||||
background: 'var(--bg-surface)',
|
||||
flexShrink: 0,
|
||||
}}>
|
||||
<div className="flex items-end" style={{
|
||||
gap: '10px',
|
||||
background: 'var(--bg-elevated)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-lg)',
|
||||
padding: '8px 12px',
|
||||
}}>
|
||||
<textarea
|
||||
ref={inputRef}
|
||||
value={input}
|
||||
onChange={e => setInput(e.target.value)}
|
||||
onKeyDown={handleKeyDown}
|
||||
placeholder="Message NexusAI..."
|
||||
rows={1}
|
||||
style={{
|
||||
flex: 1,
|
||||
background: 'transparent',
|
||||
border: 'none',
|
||||
outline: 'none',
|
||||
color: 'var(--text-primary)',
|
||||
fontSize: '14px',
|
||||
lineHeight: '1.6',
|
||||
resize: 'none',
|
||||
fontFamily: 'inherit',
|
||||
maxHeight: '120px',
|
||||
overflowY: 'auto',
|
||||
}}
|
||||
onInput={e => {
|
||||
e.target.style.height = 'auto';
|
||||
e.target.style.height = `${e.target.scrollHeight}px`;
|
||||
}}
|
||||
/>
|
||||
|
||||
{streaming ? (
|
||||
<button onClick={onCancel} className="btn-reset" style={{
|
||||
background: 'var(--text-muted)',
|
||||
borderRadius: 'var(--radius-md)',
|
||||
width: '32px',
|
||||
height: '32px',
|
||||
flexShrink: 0,
|
||||
color: 'white',
|
||||
fontSize: '12px',
|
||||
}}>■</button>
|
||||
) : (
|
||||
<button
|
||||
onClick={handleSend}
|
||||
disabled={!input.trim()}
|
||||
className="btn-primary"
|
||||
style={{
|
||||
width: '32px',
|
||||
height: '32px',
|
||||
flexShrink: 0,
|
||||
fontSize: '16px',
|
||||
border: '1px solid var(--border)',
|
||||
}}
|
||||
>↑</button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<p className="text-xs text-muted" style={{ textAlign: 'center', marginTop: '8px' }}>
|
||||
Enter to send · Shift+Enter for new line
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
149
packages/chat-client/src/components/HomeView.jsx
Normal file
149
packages/chat-client/src/components/HomeView.jsx
Normal file
@@ -0,0 +1,149 @@
|
||||
import React, { useState } from 'react';
|
||||
|
||||
function getGreeting() {
|
||||
const h = new Date().getHours();
|
||||
if (h < 12) return 'Morning';
|
||||
if (h < 18) return 'Afternoon';
|
||||
return 'Evening';
|
||||
}
|
||||
|
||||
const QUICK_ACTIONS = [
|
||||
{ label: 'Summarise something', icon: '◈' },
|
||||
{ label: 'Help me write', icon: '✦' },
|
||||
{ label: 'Explain a concept', icon: '◎' },
|
||||
{ label: 'Debug my code', icon: '</>' },
|
||||
];
|
||||
|
||||
export default function HomeView({ onSendMessage, loadedModel }) {
|
||||
const [input, setInput] = useState('');
|
||||
|
||||
function handleSend() {
|
||||
const text = input.trim();
|
||||
if (!text) return;
|
||||
setInput('');
|
||||
onSendMessage(text);
|
||||
}
|
||||
|
||||
function handleKeyDown(e) {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault();
|
||||
handleSend();
|
||||
}
|
||||
}
|
||||
|
||||
const modelLabel = loadedModel ? loadedModel.replace('.gguf', '') : null;
|
||||
|
||||
return (
|
||||
<div className="flex-col flex-1 overflow-hidden" style={{
|
||||
background: 'var(--bg-base)',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
gap: '32px',
|
||||
}}>
|
||||
|
||||
{/* Greeting */}
|
||||
<div style={{ textAlign: 'center' }}>
|
||||
<h1 style={{
|
||||
fontSize: '32px',
|
||||
fontWeight: 600,
|
||||
color: 'var(--text-primary)',
|
||||
letterSpacing: '-0.5px',
|
||||
marginBottom: '8px',
|
||||
}}>
|
||||
{getGreeting()}, Tim
|
||||
</h1>
|
||||
<p className="text-sm text-muted">
|
||||
{modelLabel ? `Running ${modelLabel}` : 'No model loaded'}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Input */}
|
||||
<div style={{ width: '100%', maxWidth: '580px', padding: '0 24px' }}>
|
||||
<div style={{
|
||||
background: 'var(--bg-elevated)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-lg)',
|
||||
padding: '12px 14px',
|
||||
}}>
|
||||
<textarea
|
||||
value={input}
|
||||
onChange={e => setInput(e.target.value)}
|
||||
onKeyDown={handleKeyDown}
|
||||
placeholder="How can I help you today?"
|
||||
rows={1}
|
||||
autoFocus
|
||||
style={{
|
||||
width: '100%',
|
||||
background: 'transparent',
|
||||
border: 'none',
|
||||
outline: 'none',
|
||||
color: 'var(--text-primary)',
|
||||
fontSize: '14px',
|
||||
lineHeight: '1.6',
|
||||
resize: 'none',
|
||||
fontFamily: 'inherit',
|
||||
maxHeight: '120px',
|
||||
overflowY: 'auto',
|
||||
}}
|
||||
onInput={e => {
|
||||
e.target.style.height = 'auto';
|
||||
e.target.style.height = `${e.target.scrollHeight}px`;
|
||||
}}
|
||||
/>
|
||||
<div style={{ display: 'flex', justifyContent: 'flex-end', marginTop: '8px' }}>
|
||||
<button
|
||||
onClick={handleSend}
|
||||
disabled={!input.trim()}
|
||||
className="btn-primary"
|
||||
style={{
|
||||
width: '32px', height: '32px',
|
||||
fontSize: '16px',
|
||||
border: '1px solid var(--border)',
|
||||
}}
|
||||
>↑</button>
|
||||
</div>
|
||||
</div>
|
||||
<p className="text-xs text-muted" style={{ textAlign: 'center', marginTop: '8px' }}>
|
||||
Enter to send · Shift+Enter for new line
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{/* Quick action pills — populate input, don't auto-send */}
|
||||
<div style={{
|
||||
display: 'flex', gap: '8px',
|
||||
flexWrap: 'wrap', justifyContent: 'center',
|
||||
padding: '0 24px',
|
||||
}}>
|
||||
{QUICK_ACTIONS.map(({ label, icon }) => (
|
||||
<button
|
||||
key={label}
|
||||
onClick={() => setInput(label)}
|
||||
style={{
|
||||
display: 'flex', alignItems: 'center', gap: '6px',
|
||||
padding: '7px 14px',
|
||||
background: 'var(--bg-surface)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: '999px',
|
||||
color: 'var(--text-secondary)',
|
||||
fontSize: '13px',
|
||||
cursor: 'pointer',
|
||||
transition: 'border-color 0.15s, color 0.15s',
|
||||
}}
|
||||
onMouseEnter={e => {
|
||||
e.currentTarget.style.borderColor = 'var(--accent)';
|
||||
e.currentTarget.style.color = 'var(--text-primary)';
|
||||
}}
|
||||
onMouseLeave={e => {
|
||||
e.currentTarget.style.borderColor = 'var(--border)';
|
||||
e.currentTarget.style.color = 'var(--text-secondary)';
|
||||
}}
|
||||
>
|
||||
<span style={{ fontSize: '11px', opacity: 0.7 }}>{icon}</span>
|
||||
{label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
|
||||
</div>
|
||||
);
|
||||
}
|
||||
172
packages/chat-client/src/components/InfoPanel.jsx
Normal file
172
packages/chat-client/src/components/InfoPanel.jsx
Normal file
@@ -0,0 +1,172 @@
|
||||
import React from 'react';
|
||||
|
||||
export default function InfoPanel({
|
||||
isOpen,
|
||||
onToggle,
|
||||
activeSession,
|
||||
lastModel,
|
||||
lastTokenCount,
|
||||
selectedModel,
|
||||
onModelChange,
|
||||
models,
|
||||
summarising,
|
||||
onViewSummary,
|
||||
}) {
|
||||
return (
|
||||
<div className="flex-col" style={{
|
||||
position: 'fixed',
|
||||
top: 0,
|
||||
right: 0,
|
||||
height: '100vh',
|
||||
width: 'var(--panel-width)',
|
||||
background: 'var(--bg-surface)',
|
||||
borderLeft: '1px solid var(--border)',
|
||||
transform: isOpen ? 'translateX(0)' : 'translateX(100%)',
|
||||
transition: 'transform 0.2s ease',
|
||||
zIndex: 20,
|
||||
}}>
|
||||
|
||||
{/* Header */}
|
||||
<div className="panel-header" style={{
|
||||
justifyContent: isOpen ? 'space-between' : 'center',
|
||||
padding: isOpen ? '0 16px 0 12px' : '0',
|
||||
}}>
|
||||
<button className="btn-icon" onClick={onToggle}>{isOpen ? '▶' : '◀'}</button>
|
||||
{isOpen && <span className="text-base" style={{ fontWeight: 500, color: 'var(--text-secondary)' }}>Session Info</span>}
|
||||
</div>
|
||||
|
||||
{isOpen && (
|
||||
<div className="flex-1 scroll-y" style={{ padding: '16px' }}>
|
||||
|
||||
{/* Model selector */}
|
||||
<Section title="Model">
|
||||
<select
|
||||
value={selectedModel}
|
||||
onChange={e => onModelChange(e.target.value)}
|
||||
style={{
|
||||
width: '100%',
|
||||
padding: '8px 10px',
|
||||
background: 'var(--bg-elevated)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-md)',
|
||||
color: 'var(--text-primary)',
|
||||
fontSize: '13px',
|
||||
cursor: 'pointer',
|
||||
outline: 'none',
|
||||
}}
|
||||
>
|
||||
{models.map(m => (
|
||||
<option key={m.value} value={m.value}>{m.label}</option>
|
||||
))}
|
||||
</select>
|
||||
</Section>
|
||||
|
||||
{/* Session details */}
|
||||
<Section title="Session">
|
||||
{activeSession ? (
|
||||
<div className="flex-col" style={{ gap: '8px' }}>
|
||||
<InfoRow label="ID" value={activeSession.external_id} mono truncate />
|
||||
<InfoRow label="Status" value={activeSession.isNew ? 'Unsaved' : 'Active'} accent={activeSession.isNew} />
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm text-muted">No session selected</p>
|
||||
)}
|
||||
</Section>
|
||||
|
||||
{/* Last response stats */}
|
||||
<Section title="Last Response">
|
||||
{lastModel ? (
|
||||
<div className="flex-col" style={{ gap: '8px' }}>
|
||||
<InfoRow label="Model" value={lastModel} />
|
||||
<InfoRow label="Tokens" value={lastTokenCount > 0 ? lastTokenCount.toLocaleString() : '—'} />
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm text-muted">No response yet</p>
|
||||
)}
|
||||
</Section>
|
||||
|
||||
{/* Session Memory button */}
|
||||
{activeSession && !activeSession.isNew && (
|
||||
<button
|
||||
onClick={onViewSummary}
|
||||
className="btn-reset text-sm"
|
||||
style={{
|
||||
marginTop: '8px', width: '100%', padding: '7px 10px',
|
||||
borderRadius: 'var(--radius-md)',
|
||||
background: 'var(--bg-elevated)',
|
||||
border: '1px solid var(--border)',
|
||||
color: 'var(--text-secondary)',
|
||||
display: 'flex', alignItems: 'center', gap: '8px',
|
||||
}}
|
||||
onMouseEnter={e => e.currentTarget.style.borderColor = 'var(--accent-hover)'}
|
||||
onMouseLeave={e => e.currentTarget.style.borderColor = 'var(--border)'}
|
||||
>
|
||||
<span>◈</span>
|
||||
<span>Session Memory</span>
|
||||
{summarising && (
|
||||
<div style={{
|
||||
marginLeft: 'auto',
|
||||
width: '8px', height: '8px', borderRadius: '50%',
|
||||
border: '2px solid var(--accent-hover)',
|
||||
borderTopColor: 'transparent',
|
||||
animation: 'spin 0.7s linear infinite',
|
||||
flexShrink: 0,
|
||||
}} />
|
||||
)}
|
||||
</button>
|
||||
)}
|
||||
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function Section({ title, children }) {
|
||||
return (
|
||||
<div style={{ marginBottom: '24px' }}>
|
||||
<p className="label-upper" style={{ marginBottom: '10px' }}>{title}</p>
|
||||
{children}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function InfoRow({ label, value, mono, truncate, accent }) {
|
||||
return (
|
||||
<div className="flex items-center" style={{ justifyContent: 'space-between', gap: '8px' }}>
|
||||
<span className="text-sm text-muted flex-shrink">{label}</span>
|
||||
<span style={{
|
||||
fontSize: '12px',
|
||||
color: accent ? 'var(--accent)' : 'var(--text-secondary)',
|
||||
fontFamily: mono ? 'monospace' : 'inherit',
|
||||
overflow: truncate ? 'hidden' : 'visible',
|
||||
textOverflow: truncate ? 'ellipsis' : 'clip',
|
||||
whiteSpace: truncate ? 'nowrap' : 'normal',
|
||||
maxWidth: truncate ? '130px' : 'auto',
|
||||
textAlign: 'right',
|
||||
}}>
|
||||
{value}
|
||||
</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function IconHint({ title, children }) {
|
||||
return (
|
||||
<div title={title} style={{
|
||||
width: '32px',
|
||||
height: '32px',
|
||||
borderRadius: 'var(--radius-md)',
|
||||
background: 'var(--bg-elevated)',
|
||||
border: '1px solid var(--border)',
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
fontSize: '12px',
|
||||
color: 'var(--text-muted)',
|
||||
cursor: 'default',
|
||||
}}>
|
||||
{children}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
194
packages/chat-client/src/components/MemoryView.jsx
Normal file
194
packages/chat-client/src/components/MemoryView.jsx
Normal file
@@ -0,0 +1,194 @@
|
||||
import React, { useState, useEffect, useCallback } from 'react';
|
||||
import { getEpisodes, deleteEpisode } from '../api/orchestration';
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
|
||||
const PAGE_SIZE = 20;
|
||||
|
||||
export default function MemoryView({ onNavigate, onBack }) {
|
||||
const [episodes, setEpisodes] = useState([]);
|
||||
const [total, setTotal] = useState(0);
|
||||
const [offset, setOffset] = useState(0);
|
||||
const [search, setSearch] = useState('');
|
||||
const [query, setQuery] = useState(''); // committed search term
|
||||
const [expanded, setExpanded] = useState(null); // episode id
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState(null);
|
||||
|
||||
const load = useCallback(async () => {
|
||||
setLoading(true);
|
||||
setError(null);
|
||||
try {
|
||||
const data = await getEpisodes({ limit: PAGE_SIZE, offset, q: query || undefined });
|
||||
setEpisodes(data.episodes);
|
||||
setTotal(data.total);
|
||||
} catch (err) {
|
||||
setError(err.message);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [offset, query]);
|
||||
|
||||
useEffect(() => { load(); }, [load]);
|
||||
|
||||
function handleSearch(e) {
|
||||
e.preventDefault();
|
||||
setOffset(0); // reset to page 1 on new search
|
||||
setQuery(search);
|
||||
}
|
||||
|
||||
async function handleDelete(id) {
|
||||
if (!confirm('Delete this memory? This cannot be undone.')) return;
|
||||
await deleteEpisode(id);
|
||||
load();
|
||||
}
|
||||
|
||||
|
||||
|
||||
const totalPages = Math.ceil(total / PAGE_SIZE);
|
||||
const currentPage = Math.floor(offset / PAGE_SIZE) + 1;
|
||||
|
||||
return (
|
||||
<div style={{ display: 'flex', flexDirection: 'column', flex: 1, overflow: 'hidden', background: 'var(--bg-base)' }}>
|
||||
|
||||
{/* Header */}
|
||||
<div className="panel-header" style={{ padding: '0 24px', gap: 12 }}>
|
||||
<button className="btn-icon" onClick={onBack} title="Back">
|
||||
←
|
||||
</button>
|
||||
<span className="text-base" style={{ fontWeight: 500 }}>Memory Viewer</span>
|
||||
<span className="text-sm text-muted" style={{ marginLeft: 'auto' }}>
|
||||
{total} episode{total !== 1 ? 's' : ''}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Search bar */}
|
||||
<form onSubmit={handleSearch} style={{ padding: '12px 24px', borderBottom: '1px solid var(--border)' }}>
|
||||
<div style={{ display: 'flex', gap: 8 }}>
|
||||
<input
|
||||
className="text-sm"
|
||||
value={search}
|
||||
onChange={e => setSearch(e.target.value)}
|
||||
placeholder="Search memories…"
|
||||
style={{
|
||||
flex: 1, padding: '8px 12px',
|
||||
background: 'var(--bg-surface)', border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius)', color: 'var(--text-primary)',
|
||||
}}
|
||||
/>
|
||||
<button type="submit" className="btn-primary" style={{ padding: '8px 16px' }}>
|
||||
Search
|
||||
</button>
|
||||
{query && (
|
||||
<button type="button" className="btn-icon" onClick={() => { setSearch(''); setQuery(''); setOffset(0); }}>
|
||||
✕
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</form>
|
||||
|
||||
{/* Episode list */}
|
||||
<div className="scroll-y flex-1" style={{ padding: '16px 24px' }}>
|
||||
{loading && <p className="text-sm text-muted">Loading…</p>}
|
||||
{error && <p className="text-sm" style={{ color: 'var(--error, #e05)' }}>{error}</p>}
|
||||
{!loading && episodes.length === 0 && (
|
||||
<p className="text-sm text-muted">No memories found.</p>
|
||||
)}
|
||||
|
||||
{episodes.map(ep => (
|
||||
<EpisodeCard
|
||||
key={ep.id}
|
||||
episode={ep}
|
||||
expanded={expanded === ep.id}
|
||||
onToggle={() => setExpanded(expanded === ep.id ? null : ep.id)}
|
||||
onDelete={() => handleDelete(ep.id)}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{/* Pagination */}
|
||||
{totalPages > 1 && (
|
||||
<div style={{
|
||||
display: 'flex', alignItems: 'center', justifyContent: 'center',
|
||||
gap: 12, padding: '12px', borderTop: '1px solid var(--border)',
|
||||
}}>
|
||||
<button className="btn-icon" disabled={offset === 0}
|
||||
onClick={() => setOffset(o => Math.max(0, o - PAGE_SIZE))}>←</button>
|
||||
<span className="text-sm text-muted">{currentPage} / {totalPages}</span>
|
||||
<button className="btn-icon" disabled={currentPage >= totalPages}
|
||||
onClick={() => setOffset(o => o + PAGE_SIZE)}>→</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function stripMarkdown(text) {
|
||||
return text
|
||||
.replace(/\*\*(.*?)\*\*/g, '$1') // bold
|
||||
.replace(/\*(.*?)\*/g, '$1') // italic
|
||||
.replace(/`([^`]+)`/g, '$1') // inline code
|
||||
.replace(/^#{1,6}\s+/gm, '') // headings
|
||||
.replace(/^\s*[-*+]\s+/gm, '') // list markers
|
||||
.trim();
|
||||
}
|
||||
|
||||
function EpisodeCard({ episode, expanded, onToggle, onDelete }) {
|
||||
const date = new Date(episode.created_at * 1000).toLocaleString();
|
||||
const preview = stripMarkdown(episode.user_message).slice(0, 80) +
|
||||
(episode.user_message.length > 80 ? '…' : '');
|
||||
|
||||
return (
|
||||
<div style={{
|
||||
background: 'var(--bg-surface)', border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-lg)', marginBottom: 8, overflow: 'hidden',
|
||||
}}>
|
||||
{/* Card header — always visible */}
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: 8, padding: '10px 14px', cursor: 'pointer' }}
|
||||
onClick={onToggle}>
|
||||
<span style={{ flex: 1, fontSize: 13, color: 'var(--text-primary)' }}>{preview}</span>
|
||||
<span className="text-sm text-muted">{date}</span>
|
||||
<span className="text-muted" style={{ fontSize: 11 }}>#{episode.id}</span>
|
||||
<button className="btn-icon" style={{ color: 'var(--error, #e05)', fontSize: 14 }}
|
||||
onClick={e => { e.stopPropagation(); onDelete(); }} title="Delete">🗑</button>
|
||||
<span className="text-muted" style={{ fontSize: 11 }}>{expanded ? '▲' : '▼'}</span>
|
||||
</div>
|
||||
|
||||
{/* Expanded content */}
|
||||
{expanded && (
|
||||
<div style={{ padding: '0 14px 14px', borderTop: '1px solid var(--border)' }}>
|
||||
<MessageBlock label="You" content={episode.user_message} color="var(--accent)" />
|
||||
<MessageBlock label="NexusAI" content={episode.ai_response} color="var(--text-secondary)" />
|
||||
{episode.token_count > 0 && (
|
||||
<p className="text-sm text-muted" style={{ marginTop: 8 }}>
|
||||
Tokens: {episode.token_count}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function MessageBlock({ label, content, color }) {
|
||||
const isAI = label === 'NexusAI';
|
||||
return (
|
||||
<div style={{ marginTop: 12 }}>
|
||||
<p style={{ fontSize: 11, fontWeight: 600, color, marginBottom: 4, textTransform: 'uppercase', letterSpacing: '0.05em' }}>
|
||||
{label}
|
||||
</p>
|
||||
<ReactMarkdown
|
||||
components={{
|
||||
p: ({children}) => <p style={{ margin: '0 0 8px', lineHeight: 1.6, fontSize: 13 }}>{children}</p>,
|
||||
ul: ({children}) => <ul style={{ margin: '0 0 8px', paddingLeft: '20px' }}>{children}</ul>,
|
||||
ol: ({children}) => <ol style={{ margin: '0 0 8px', paddingLeft: '20px' }}>{children}</ol>,
|
||||
li: ({children}) => <li style={{ marginBottom: '2px', fontSize: 13 }}>{children}</li>,
|
||||
code: ({inline, children}) => inline
|
||||
? <code style={{ background: 'var(--bg-elevated)', padding: '1px 5px', borderRadius: 'var(--radius-sm)', fontSize: 12, fontFamily: 'monospace' }}>{children}</code>
|
||||
: <pre style={{ background: 'var(--bg-elevated)', padding: '10px 12px', borderRadius: 'var(--radius-md)', overflowX: 'auto', fontSize: 12, fontFamily: 'monospace' }}><code>{children}</code></pre>,
|
||||
strong: ({children}) => <strong style={{ fontWeight: 600, color: 'var(--text-primary)' }}>{children}</strong>,
|
||||
}}
|
||||
>{content}</ReactMarkdown>
|
||||
|
||||
</div>
|
||||
);
|
||||
}
|
||||
70
packages/chat-client/src/components/MessageBubble.jsx
Normal file
70
packages/chat-client/src/components/MessageBubble.jsx
Normal file
@@ -0,0 +1,70 @@
|
||||
import React from 'react';
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
|
||||
export default function MessageBubble({ message }) {
|
||||
const isUser = message.role === 'user';
|
||||
|
||||
return (
|
||||
<div className="flex" style={{
|
||||
justifyContent: isUser ? 'flex-end' : 'flex-start',
|
||||
marginBottom: '12px',
|
||||
padding: '0 16px',
|
||||
}}>
|
||||
{!isUser && (
|
||||
<div className="flex items-center justify-center flex-shrink" style={{
|
||||
width: '28px',
|
||||
height: '28px',
|
||||
borderRadius: '50%',
|
||||
background: 'var(--accent)',
|
||||
fontSize: '12px',
|
||||
fontWeight: 600,
|
||||
marginRight: '8px',
|
||||
alignSelf: 'flex-end',
|
||||
}}>N</div>
|
||||
)}
|
||||
|
||||
<div style={{
|
||||
maxWidth: '70%',
|
||||
padding: '14px 14px',
|
||||
borderRadius: isUser ? '18px 4px 4px 18px' : '4px 18px 18px 4px',
|
||||
background: isUser ? 'var(--bubble-user)' : 'var(--bubble-ai)',
|
||||
color: 'var(--text-primary)',
|
||||
fontSize: '18px',
|
||||
lineHeight: '1.8',
|
||||
border: isUser ? 'none' : '2px solid var(--border)',
|
||||
wordBreak: 'break-word',
|
||||
}}>
|
||||
<ReactMarkdown
|
||||
components={{
|
||||
// Tighten up default spacing so it fits the bubble style
|
||||
p: ({ children }) => <p style={{ margin: '0 0 8px', lineHeight: 1.6 }}>{children}</p>,
|
||||
ul: ({ children }) => <ul style={{ margin: '0 0 8px', paddingLeft: '20px' }}>{children}</ul>,
|
||||
ol: ({ children }) => <ol style={{ margin: '0 0 8px', paddingLeft: '20px' }}>{children}</ol>,
|
||||
li: ({ children }) => <li style={{ marginBottom: '2px' }}>{children}</li>,
|
||||
code: ({ inline, children }) => inline
|
||||
? <code style={{ background: 'var(--bg-elevated)', padding: '1px 5px', borderRadius: 'var(--radius-sm)', fontSize: '12px', fontFamily: 'monospace' }}>{children}</code>
|
||||
: <pre style={{ background: 'var(--bg-elevated)', padding: '10px 12px', borderRadius: 'var(--radius-md)', overflowX: 'auto', fontSize: '12px', fontFamily: 'monospace' }}><code>{children}</code></pre>,
|
||||
strong: ({ children }) => <strong style={{ fontWeight: 600, color: 'var(--text-primary)' }}>{children}</strong>,
|
||||
}}
|
||||
>{message.text}</ReactMarkdown>
|
||||
|
||||
{message.streaming && (
|
||||
<span style={{
|
||||
display: 'inline-block',
|
||||
width: '8px',
|
||||
height: '14px',
|
||||
background: 'var(--text-secondary)',
|
||||
marginLeft: '2px',
|
||||
borderRadius: 'var(--radius-sm)',
|
||||
animation: 'blink 1s step-end infinite',
|
||||
}} />
|
||||
)}
|
||||
{message.error && (
|
||||
<div className="text-xs" style={{ marginTop: '6px', color: 'var(--warning)' }}>
|
||||
⚠ Failed to complete response
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
178
packages/chat-client/src/components/ProjectModal.jsx
Normal file
178
packages/chat-client/src/components/ProjectModal.jsx
Normal file
@@ -0,0 +1,178 @@
|
||||
import React, { useState, useEffect, useRef } from 'react';
|
||||
|
||||
const COLOURS = ['#3d3a79', '#2d6a4f', '#7b2d8b', '#c0392b', '#d4800a', '#1a6b8a'];
|
||||
|
||||
export default function ProjectModal({ project, mode, onSave, onDelete, onClose }) {
|
||||
const [name, setName] = useState(project?.name ?? '');
|
||||
const [description, setDescription] = useState(project?.description ?? '');
|
||||
const [colour, setColour] = useState(project?.colour ?? COLOURS[0]);
|
||||
const [systemPrompt, setSystemPrompt] = useState(project?.system_prompt ?? '');
|
||||
const inputRef = useRef(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (mode !== 'confirm-delete') inputRef.current?.focus();
|
||||
}, [mode]);
|
||||
|
||||
function handleSubmit() {
|
||||
const trimmed = name.trim();
|
||||
if (!trimmed) return;
|
||||
onSave({
|
||||
name: trimmed,
|
||||
description: description.trim() || null,
|
||||
colour,
|
||||
icon: null,
|
||||
isolated: 1,
|
||||
system_prompt: systemPrompt.trim() || null,
|
||||
});
|
||||
onClose();
|
||||
}
|
||||
|
||||
function handleKeyDown(e) {
|
||||
if (e.key === 'Escape') onClose();
|
||||
// Don't submit on Enter — textarea fields make Enter ambiguous
|
||||
}
|
||||
|
||||
return (
|
||||
<div onClick={onClose} style={{
|
||||
position: 'fixed', inset: 0,
|
||||
background: 'rgba(0,0,0,0.5)',
|
||||
display: 'flex', alignItems: 'center', justifyContent: 'center',
|
||||
zIndex: 100,
|
||||
}}>
|
||||
<div onClick={e => e.stopPropagation()} onKeyDown={handleKeyDown} style={{
|
||||
background: 'var(--bg-surface)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-lg)',
|
||||
padding: '24px', width: '420px',
|
||||
maxHeight: '90vh', overflowY: 'auto',
|
||||
display: 'flex', flexDirection: 'column', gap: '16px',
|
||||
}}>
|
||||
{mode === 'confirm-delete' ? (
|
||||
<>
|
||||
<h2 style={{ fontSize: '15px', fontWeight: 600, color: 'var(--text-primary)' }}>
|
||||
Delete project?
|
||||
</h2>
|
||||
<p className="text-sm text-secondary">
|
||||
Are you sure you want to delete{' '}
|
||||
<span style={{ color: 'var(--text-primary)', fontWeight: 500 }}>{project.name}</span>?
|
||||
Sessions in this project will not be deleted.
|
||||
</p>
|
||||
<div className="flex" style={{ gap: '8px', justifyContent: 'flex-end' }}>
|
||||
<button className="btn-reset text-base text-muted"
|
||||
onClick={onClose}
|
||||
style={{ padding: '8px 14px', borderRadius: 'var(--radius-md)' }}>
|
||||
Cancel
|
||||
</button>
|
||||
<button className="btn-reset text-base"
|
||||
onClick={() => { onDelete(project.id); onClose(); }}
|
||||
style={{ padding: '8px 16px', borderRadius: 'var(--radius-md)', background: '#c0392b', color: 'white' }}>
|
||||
Delete
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<h2 style={{ fontSize: '15px', fontWeight: 600, color: 'var(--text-primary)' }}>
|
||||
{mode === 'create' ? 'New Project' : 'Edit Project'}
|
||||
</h2>
|
||||
|
||||
{/* Name */}
|
||||
<div className="flex-col" style={{ gap: '6px' }}>
|
||||
<label className="label-upper">Name</label>
|
||||
<input
|
||||
ref={inputRef}
|
||||
value={name}
|
||||
onChange={e => setName(e.target.value)}
|
||||
placeholder="Project name..."
|
||||
style={{
|
||||
background: 'var(--bg-elevated)', border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-md)', padding: '8px 12px',
|
||||
color: 'var(--text-primary)', fontSize: '14px', outline: 'none', width: '100%',
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Description */}
|
||||
<div className="flex-col" style={{ gap: '6px' }}>
|
||||
<label className="label-upper">Description <span style={{ opacity: 0.5 }}>(optional)</span></label>
|
||||
<textarea
|
||||
value={description}
|
||||
onChange={e => setDescription(e.target.value)}
|
||||
placeholder="What's this project about..."
|
||||
rows={2}
|
||||
style={{
|
||||
background: 'var(--bg-elevated)', border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-md)', padding: '8px 12px',
|
||||
color: 'var(--text-primary)', fontSize: '14px', outline: 'none',
|
||||
width: '100%', resize: 'none', fontFamily: 'inherit',
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Colour picker */}
|
||||
<div className="flex-col" style={{ gap: '6px' }}>
|
||||
<label className="label-upper">Colour</label>
|
||||
<div className="flex" style={{ gap: '8px' }}>
|
||||
{COLOURS.map(c => (
|
||||
<button
|
||||
key={c}
|
||||
onClick={() => setColour(c)}
|
||||
className="btn-reset"
|
||||
style={{
|
||||
width: '24px', height: '24px',
|
||||
borderRadius: '50%',
|
||||
background: c,
|
||||
border: colour === c ? '2px solid var(--text-primary)' : '2px solid transparent',
|
||||
outline: colour === c ? '2px solid var(--accent-hover)' : 'none',
|
||||
outlineOffset: '2px',
|
||||
}}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* System Prompt */}
|
||||
<div className="flex-col" style={{ gap: '6px' }}>
|
||||
<label className="label-upper">
|
||||
System Prompt <span style={{ opacity: 0.5 }}>(optional)</span>
|
||||
</label>
|
||||
<p className="text-xs text-muted" style={{ marginTop: '-2px' }}>
|
||||
Overrides the global system prompt for conversations in this project.
|
||||
Leave blank to use the global default.
|
||||
</p>
|
||||
<textarea
|
||||
value={systemPrompt}
|
||||
onChange={e => setSystemPrompt(e.target.value)}
|
||||
placeholder="You are a helpful assistant specialised in..."
|
||||
rows={4}
|
||||
style={{
|
||||
background: 'var(--bg-elevated)', border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-md)', padding: '8px 12px',
|
||||
color: 'var(--text-primary)', fontSize: '13px', outline: 'none',
|
||||
width: '100%', resize: 'vertical', fontFamily: 'inherit',
|
||||
lineHeight: '1.6',
|
||||
}}
|
||||
onFocus={e => e.target.style.borderColor = 'var(--accent)'}
|
||||
onBlur={e => e.target.style.borderColor = 'var(--border)'}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="flex" style={{ gap: '8px', justifyContent: 'flex-end' }}>
|
||||
<button className="btn-reset text-base text-muted"
|
||||
onClick={onClose}
|
||||
style={{ padding: '8px 14px', borderRadius: 'var(--radius-md)' }}>
|
||||
Cancel
|
||||
</button>
|
||||
<button className="btn-primary"
|
||||
onClick={handleSubmit}
|
||||
disabled={!name.trim()}
|
||||
style={{ padding: '8px 16px' }}>
|
||||
{mode === 'create' ? 'Create' : 'Save'}
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
440
packages/chat-client/src/components/ProjectView.jsx
Normal file
440
packages/chat-client/src/components/ProjectView.jsx
Normal file
@@ -0,0 +1,440 @@
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import { fetchSessions, updateProject, deleteProject, generateProjectSummary, fetchProjectOverviewSummary } from '../api/orchestration';
|
||||
import ProjectModal from './ProjectModal';
|
||||
|
||||
export default function ProjectView({ project, onNavigate, onBack, onSelectSession, onNewProjectChat, onProjectsChange }) {
|
||||
const [sessions, setSessions] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [input, setInput] = useState('');
|
||||
const [menuOpen, setMenuOpen] = useState(false);
|
||||
const [modal, setModal] = useState(null);
|
||||
const [overview, setOverview] = useState(null);
|
||||
const [overviewLoading, setOverviewLoading] = useState(true);
|
||||
const [generating, setGenerating] = useState(false);
|
||||
const [generateError, setGenerateError] = useState(null);
|
||||
|
||||
useEffect(() => { load(); }, [project.id]);
|
||||
|
||||
useEffect(() => {
|
||||
async function loadOverview() {
|
||||
setOverviewLoading(true);
|
||||
try {
|
||||
setOverview(await fetchProjectOverviewSummary(project.id));
|
||||
} catch (err) {
|
||||
console.error('[ProjectView] Failed to load overview:', err.message);
|
||||
} finally {
|
||||
setOverviewLoading(false);
|
||||
}
|
||||
}
|
||||
loadOverview();
|
||||
}, [project.id]);
|
||||
|
||||
async function load() {
|
||||
setLoading(true);
|
||||
try {
|
||||
setSessions(await fetchSessions(50, 0, project.id));
|
||||
} catch (err) {
|
||||
console.error('[ProjectView] Failed to load sessions:', err.message);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}
|
||||
|
||||
function handleSend() {
|
||||
const text = input.trim();
|
||||
if (!text) return;
|
||||
setInput('');
|
||||
onNewProjectChat(text);
|
||||
}
|
||||
|
||||
function handleKeyDown(e) {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault();
|
||||
handleSend();
|
||||
}
|
||||
}
|
||||
|
||||
async function handleSave({ name, description, colour, icon, isolated, system_prompt }) {
|
||||
try {
|
||||
await updateProject(project.id, { name, description, colour, icon, isolated, system_prompt });
|
||||
onProjectsChange?.();
|
||||
setModal(null);
|
||||
} catch (err) {
|
||||
console.error('[ProjectView] Update failed:', err.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function handleDelete() {
|
||||
try {
|
||||
await deleteProject(project.id);
|
||||
onProjectsChange?.();
|
||||
onBack();
|
||||
} catch (err) {
|
||||
console.error('[ProjectView] Delete failed:', err.message);
|
||||
}
|
||||
}
|
||||
|
||||
function formatTimestamp(ts) {
|
||||
if (!ts) return '—';
|
||||
const date = new Date(ts * 1000);
|
||||
const now = new Date();
|
||||
const diffMs = now - date;
|
||||
const diffMins = Math.floor(diffMs / 60000);
|
||||
const diffHours = Math.floor(diffMs / 3600000);
|
||||
const diffDays = Math.floor(diffMs / 86400000);
|
||||
if (diffMins < 1) return 'Just now';
|
||||
if (diffMins < 60) return `${diffMins}m ago`;
|
||||
if (diffHours < 24) return `${diffHours}h ago`;
|
||||
if (diffDays === 1) return 'Yesterday';
|
||||
return date.toLocaleDateString([], { month: 'short', day: 'numeric', year: 'numeric' });
|
||||
}
|
||||
|
||||
async function handleGenerateSummary() {
|
||||
setGenerating(true);
|
||||
setGenerateError(null);
|
||||
try {
|
||||
setOverview(await generateProjectSummary(project.id));
|
||||
} catch (err) {
|
||||
// 422 means no session summaries exist yet — surface a friendly message
|
||||
setGenerateError(
|
||||
err.message.includes('422')
|
||||
? 'No conversations found in this project yet.'
|
||||
: 'Failed to generate summary. Please try again.'
|
||||
);
|
||||
} finally {
|
||||
setGenerating(false);
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex-col flex-1 overflow-hidden" style={{ background: 'var(--bg-base)' }}>
|
||||
|
||||
{/* Colour accent bar */}
|
||||
<div style={{ height: '3px', flexShrink: 0, background: project.colour ?? 'var(--accent)' }} />
|
||||
|
||||
{/* Header */}
|
||||
<div className="panel-header" style={{ padding: '0 24px', justifyContent: 'space-between' }}>
|
||||
<button
|
||||
className="btn-reset text-xs text-muted"
|
||||
onClick={onBack}
|
||||
style={{ display: 'flex', alignItems: 'center', gap: '4px' }}
|
||||
onMouseEnter={e => e.currentTarget.style.color = 'var(--text-secondary)'}
|
||||
onMouseLeave={e => e.currentTarget.style.color = 'var(--text-muted)'}
|
||||
>
|
||||
← All Projects
|
||||
</button>
|
||||
|
||||
<div style={{ position: 'relative' }}>
|
||||
<button
|
||||
className="btn-icon"
|
||||
onClick={() => setMenuOpen(o => !o)}
|
||||
title="Project options"
|
||||
style={{ fontSize: '18px', letterSpacing: '1px' }}
|
||||
>⋮</button>
|
||||
|
||||
{menuOpen && (
|
||||
<>
|
||||
<div style={{ position: 'fixed', inset: 0, zIndex: 40 }} onClick={() => setMenuOpen(false)} />
|
||||
<div style={{
|
||||
position: 'absolute', top: '100%', right: 0,
|
||||
background: 'var(--bg-elevated)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-md)',
|
||||
padding: '4px', zIndex: 50, minWidth: '150px',
|
||||
}}>
|
||||
<MenuButton onClick={() => { setMenuOpen(false); setModal({ mode: 'edit' }); }}>
|
||||
✎ Edit details
|
||||
</MenuButton>
|
||||
<MenuButton danger onClick={() => { setMenuOpen(false); setModal({ mode: 'confirm-delete' }); }}>
|
||||
✕ Delete project
|
||||
</MenuButton>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Scrollable content */}
|
||||
<div className="flex-1 scroll-y" style={{ padding: '32px 24px' }}>
|
||||
|
||||
{/* Project title + description */}
|
||||
<div style={{ marginBottom: '32px' }}>
|
||||
<h1 style={{ fontSize: '22px', fontWeight: 600, color: 'var(--text-primary)', marginBottom: '8px' }}>
|
||||
{project.name}
|
||||
</h1>
|
||||
{project.description && (
|
||||
<p className="text-sm" style={{ color: 'var(--text-secondary)', maxWidth: '560px', lineHeight: 1.6 }}>
|
||||
{project.description}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* ── Conversations ── */}
|
||||
<div style={{ marginBottom: '40px' }}>
|
||||
<p className="label-upper" style={{ marginBottom: '12px' }}>Conversations</p>
|
||||
|
||||
{loading ? (
|
||||
<div className="text-sm text-muted">Loading...</div>
|
||||
|
||||
) : sessions.length === 0 ? (
|
||||
<div style={{ display: 'flex', flexDirection: 'column', alignItems: 'center', gap: '16px', padding: '32px 0' }}>
|
||||
<p className="text-sm text-muted">No conversations yet — start one below</p>
|
||||
<ChatInput
|
||||
value={input}
|
||||
onChange={setInput}
|
||||
onSend={handleSend}
|
||||
placeholder={`Start a conversation in ${project.name}…`}
|
||||
autoFocus
|
||||
/>
|
||||
</div>
|
||||
|
||||
) : (
|
||||
<>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', marginBottom: '16px' }}>
|
||||
{sessions.map((session, i) => (
|
||||
<button
|
||||
key={session.external_id}
|
||||
className="btn-reset"
|
||||
onClick={() => { onSelectSession(session); onNavigate('chat'); }}
|
||||
style={{
|
||||
padding: '12px 16px',
|
||||
display: 'flex', alignItems: 'center', justifyContent: 'space-between',
|
||||
borderBottom: i < sessions.length - 1 ? '1px solid var(--border)' : 'none',
|
||||
borderRadius: i === 0
|
||||
? 'var(--radius-md) var(--radius-md) 0 0'
|
||||
: i === sessions.length - 1
|
||||
? '0 0 var(--radius-md) var(--radius-md)'
|
||||
: '0',
|
||||
background: 'var(--bg-surface)',
|
||||
textAlign: 'left',
|
||||
}}
|
||||
onMouseEnter={e => e.currentTarget.style.background = 'var(--bg-elevated)'}
|
||||
onMouseLeave={e => e.currentTarget.style.background = 'var(--bg-surface)'}
|
||||
>
|
||||
<span className="text-base" style={{ color: 'var(--text-primary)' }}>
|
||||
{session.name || session.external_id}
|
||||
</span>
|
||||
<span className="text-xs text-muted" style={{ flexShrink: 0, marginLeft: '16px' }}>
|
||||
{formatTimestamp(session.updated_at)}
|
||||
</span>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<ChatInput
|
||||
value={input}
|
||||
onChange={setInput}
|
||||
onSend={handleSend}
|
||||
placeholder={`New conversation in ${project.name}…`}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* ── Project Memory ── */}
|
||||
<div style={{ marginBottom: '40px' }}>
|
||||
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', marginBottom: '12px' }}>
|
||||
<p className="label-upper">Project Memory</p>
|
||||
<button
|
||||
className="btn-primary"
|
||||
style={{ padding: '5px 12px', fontSize: '12px', display: 'flex', alignItems: 'center', gap: '6px' }}
|
||||
onClick={handleGenerateSummary}
|
||||
disabled={generating}
|
||||
>
|
||||
{generating
|
||||
? <><span className="spinner" />Generating…</>
|
||||
: overview ? 'Regenerate' : 'Generate Summary'
|
||||
}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div style={{
|
||||
background: 'var(--bg-surface)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-lg)',
|
||||
padding: '20px',
|
||||
}}>
|
||||
{overviewLoading ? (
|
||||
<p className="text-sm text-muted">Loading…</p>
|
||||
|
||||
) : generateError ? (
|
||||
<p className="text-sm" style={{ color: 'var(--text-muted)', fontStyle: 'italic' }}>
|
||||
{generateError}
|
||||
</p>
|
||||
|
||||
) : overview ? (
|
||||
<>
|
||||
<p className="text-sm" style={{ color: 'var(--text-secondary)', lineHeight: 1.7, whiteSpace: 'pre-wrap' }}>
|
||||
{overview.content}
|
||||
</p>
|
||||
<p className="text-xs text-muted" style={{ marginTop: '12px' }}>
|
||||
Last generated {formatTimestamp(overview.created_at)}
|
||||
</p>
|
||||
</>
|
||||
|
||||
) : (
|
||||
// No overview exists yet — explain what this section is for
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: '10px' }}>
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: '10px' }}>
|
||||
<span style={{ fontSize: '20px', opacity: 0.4 }}>◈</span>
|
||||
<span className="text-sm" style={{ fontWeight: 500, color: 'var(--text-primary)' }}>
|
||||
No project summary yet
|
||||
</span>
|
||||
</div>
|
||||
<p className="text-sm text-muted" style={{ lineHeight: 1.6, maxWidth: '520px' }}>
|
||||
Generate a summary to create a concise overview of this project's goals,
|
||||
progress, and key decisions — built from your session summaries.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* ── Notes ── */}
|
||||
<NotesSection projectId={project.id} initialNotes={project.notes ?? ''} />
|
||||
|
||||
</div>
|
||||
|
||||
{/* Modal */}
|
||||
{modal && (
|
||||
<ProjectModal
|
||||
project={project}
|
||||
mode={modal.mode}
|
||||
onSave={handleSave}
|
||||
onDelete={handleDelete}
|
||||
onClose={() => setModal(null)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// ── Sub-components ─────────────────────────────────────────
|
||||
|
||||
function ChatInput({ value, onChange, onSend, placeholder, autoFocus }) {
|
||||
function handleKeyDown(e) {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault();
|
||||
onSend();
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div style={{ width: '100%', maxWidth: '520px' }}>
|
||||
<div style={{
|
||||
background: 'var(--bg-elevated)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-lg)',
|
||||
padding: '12px 14px',
|
||||
}}>
|
||||
<textarea
|
||||
value={value}
|
||||
onChange={e => onChange(e.target.value)}
|
||||
onKeyDown={handleKeyDown}
|
||||
placeholder={placeholder}
|
||||
rows={1}
|
||||
autoFocus={autoFocus}
|
||||
style={{
|
||||
width: '100%', background: 'transparent',
|
||||
border: 'none', outline: 'none',
|
||||
color: 'var(--text-primary)', fontSize: '14px',
|
||||
lineHeight: '1.6', resize: 'none', fontFamily: 'inherit',
|
||||
maxHeight: '120px', overflowY: 'auto',
|
||||
}}
|
||||
onInput={e => {
|
||||
e.target.style.height = 'auto';
|
||||
e.target.style.height = `${e.target.scrollHeight}px`;
|
||||
}}
|
||||
/>
|
||||
<div style={{ display: 'flex', justifyContent: 'flex-end', marginTop: '8px' }}>
|
||||
<button
|
||||
onClick={onSend}
|
||||
disabled={!value.trim()}
|
||||
className="btn-primary"
|
||||
style={{ width: '32px', height: '32px', fontSize: '16px', border: '1px solid var(--border)' }}
|
||||
>↑</button>
|
||||
</div>
|
||||
</div>
|
||||
<p className="text-xs text-muted" style={{ textAlign: 'center', marginTop: '8px' }}>
|
||||
Enter to send · Shift+Enter for new line
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function NotesSection({ projectId, initialNotes }) {
|
||||
const [notes, setNotes] = useState(initialNotes);
|
||||
const [savedNotes, setSavedNotes] = useState(initialNotes);
|
||||
const [saving, setSaving] = useState(false);
|
||||
|
||||
const isDirty = notes !== savedNotes;
|
||||
|
||||
async function handleSave() {
|
||||
setSaving(true);
|
||||
try {
|
||||
await updateProject(projectId, { notes });
|
||||
setSavedNotes(notes);
|
||||
} catch (err) {
|
||||
console.error('[NotesSection] Save failed:', err.message);
|
||||
} finally {
|
||||
setSaving(false);
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div style={{ marginBottom: '40px' }}>
|
||||
<div style={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', marginBottom: '12px' }}>
|
||||
<p className="label-upper">Project Notes</p>
|
||||
{isDirty && (
|
||||
<button
|
||||
className="btn-primary"
|
||||
style={{ padding: '5px 12px', fontSize: '12px' }}
|
||||
disabled={saving}
|
||||
onClick={handleSave}
|
||||
>
|
||||
{saving ? 'Saving…' : 'Save'}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
<textarea
|
||||
value={notes}
|
||||
onChange={e => setNotes(e.target.value)}
|
||||
placeholder="Add notes about this project — references, goals, context, anything useful…"
|
||||
rows={6}
|
||||
style={{
|
||||
width: '100%',
|
||||
background: 'var(--bg-surface)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-lg)',
|
||||
padding: '14px 16px',
|
||||
color: 'var(--text-primary)',
|
||||
fontSize: '13px', lineHeight: '1.6',
|
||||
resize: 'vertical', fontFamily: 'inherit',
|
||||
outline: 'none', boxSizing: 'border-box',
|
||||
}}
|
||||
onFocus={e => e.target.style.borderColor = 'var(--accent)'}
|
||||
onBlur={e => e.target.style.borderColor = 'var(--border)'}
|
||||
/>
|
||||
{!isDirty && notes && (
|
||||
<p className="text-xs text-muted" style={{ marginTop: '6px' }}>Saved</p>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function MenuButton({ children, onClick, danger }) {
|
||||
return (
|
||||
<button
|
||||
className="btn-reset text-sm"
|
||||
onClick={onClick}
|
||||
style={{
|
||||
width: '100%', padding: '8px 12px',
|
||||
borderRadius: 'var(--radius-sm)',
|
||||
justifyContent: 'flex-start',
|
||||
color: danger ? '#ff6b6b' : 'var(--text-primary)',
|
||||
}}
|
||||
onMouseEnter={e => e.currentTarget.style.background = 'var(--bg-surface)'}
|
||||
onMouseLeave={e => e.currentTarget.style.background = 'transparent'}
|
||||
>{children}</button>
|
||||
);
|
||||
}
|
||||
128
packages/chat-client/src/components/SessionModal.jsx
Normal file
128
packages/chat-client/src/components/SessionModal.jsx
Normal file
@@ -0,0 +1,128 @@
|
||||
import React, { useState, useEffect, useRef } from 'react';
|
||||
import { updateSession } from '../api/orchestration';
|
||||
|
||||
export default function SessionModal({ session, mode = 'settings', onRename, onDelete, onClose, projects = [] }) {
|
||||
const [name, setName] = useState(session?.name || '');
|
||||
const [projectId, setProjectId] = useState(session?.project_id ?? '');
|
||||
const inputRef = useRef(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (mode === 'settings') {
|
||||
inputRef.current?.focus();
|
||||
inputRef.current?.select();
|
||||
}
|
||||
}, [mode]);
|
||||
|
||||
function handleSubmit() {
|
||||
const trimmed = name.trim();
|
||||
if (!trimmed) return;
|
||||
onRename(session, trimmed, projectId || null);
|
||||
onClose();
|
||||
}
|
||||
|
||||
function handleKeyDown(e) {
|
||||
if (e.key === 'Enter' && mode === 'settings') handleSubmit();
|
||||
if (e.key === 'Escape') onClose();
|
||||
}
|
||||
|
||||
if (!session) return null;
|
||||
|
||||
return (
|
||||
<div onClick={onClose} style={{
|
||||
position: 'fixed', inset: 0,
|
||||
background: 'rgba(0,0,0,0.5)',
|
||||
display: 'flex', alignItems: 'center', justifyContent: 'center',
|
||||
zIndex: 100,
|
||||
}}>
|
||||
<div onClick={e => e.stopPropagation()} onKeyDown={handleKeyDown} style={{
|
||||
background: 'var(--bg-surface)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-lg)',
|
||||
padding: '24px', width: '360px',
|
||||
display: 'flex', flexDirection: 'column', gap: '16px',
|
||||
}}>
|
||||
{mode === 'settings' ? (
|
||||
<>
|
||||
<h2 style={{ fontSize: '15px', fontWeight: 600, color: 'var(--text-primary)' }}>
|
||||
Session Settings
|
||||
</h2>
|
||||
|
||||
{/* Name */}
|
||||
<div className="flex-col" style={{ gap: '6px' }}>
|
||||
<label className="label-upper">Name</label>
|
||||
<input
|
||||
ref={inputRef}
|
||||
value={name}
|
||||
onChange={e => setName(e.target.value)}
|
||||
placeholder="Enter session name..."
|
||||
style={{
|
||||
background: 'var(--bg-elevated)', border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-md)', padding: '8px 12px',
|
||||
color: 'var(--text-primary)', fontSize: '14px', outline: 'none', width: '100%',
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Project assignment */}
|
||||
<div className="flex-col" style={{ gap: '6px' }}>
|
||||
<label className="label-upper">Project <span style={{ opacity: 0.5 }}>(optional)</span></label>
|
||||
<select
|
||||
value={projectId}
|
||||
onChange={e => setProjectId(e.target.value)}
|
||||
style={{
|
||||
width: '100%', padding: '8px 10px',
|
||||
background: 'var(--bg-elevated)', border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-md)', color: 'var(--text-primary)',
|
||||
fontSize: '13px', cursor: 'pointer', outline: 'none',
|
||||
}}
|
||||
>
|
||||
<option value=''>No project</option>
|
||||
{projects.map(p => (
|
||||
<option key={p.id} value={p.id}>{p.name}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div className="flex" style={{ gap: '8px', justifyContent: 'flex-end' }}>
|
||||
<button className="btn-reset text-base text-muted"
|
||||
onClick={onClose}
|
||||
style={{ padding: '8px 14px', borderRadius: 'var(--radius-md)' }}>
|
||||
Cancel
|
||||
</button>
|
||||
<button className="btn-primary" onClick={handleSubmit}
|
||||
disabled={!name.trim()}
|
||||
style={{ padding: '8px 16px' }}>
|
||||
Save
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<h2 style={{ fontSize: '15px', fontWeight: 600, color: 'var(--text-primary)' }}>
|
||||
Delete Session
|
||||
</h2>
|
||||
<p className="text-sm text-secondary">
|
||||
Are you sure you want to delete{' '}
|
||||
<span style={{ color: 'var(--text-primary)', fontWeight: 500 }}>
|
||||
{session.name || session.external_id}
|
||||
</span>
|
||||
? This will permanently remove all messages in this conversation.
|
||||
</p>
|
||||
<div className="flex" style={{ gap: '8px', justifyContent: 'flex-end' }}>
|
||||
<button className="btn-reset text-base text-muted"
|
||||
onClick={onClose}
|
||||
style={{ padding: '8px 14px', borderRadius: 'var(--radius-md)' }}>
|
||||
Cancel
|
||||
</button>
|
||||
<button className="btn-reset text-base"
|
||||
onClick={() => { onDelete(session); onClose(); }}
|
||||
style={{ padding: '8px 16px', borderRadius: 'var(--radius-md)', background: '#c0392b', color: 'white' }}>
|
||||
Delete
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
502
packages/chat-client/src/components/SettingsView.jsx
Normal file
502
packages/chat-client/src/components/SettingsView.jsx
Normal file
@@ -0,0 +1,502 @@
|
||||
import React, { useState, useEffect, useCallback } from 'react';
|
||||
import { useSettings } from '../hooks/useSettings';
|
||||
import { useModels } from '../hooks/useModels';
|
||||
import { getServiceHealth } from '../api/orchestration';
|
||||
|
||||
|
||||
export default function SettingsView({ onNavigate, onBack, modelProps }) {
|
||||
const { settings, saveSetting, saving } = useSettings();
|
||||
|
||||
return (
|
||||
<div style={{ display: 'flex', flexDirection: 'column', flex: 1, overflow: 'hidden', background: 'var(--bg-base)' }}>
|
||||
<div className="panel-header" style={{ padding: '0 8px 0 8px' }}>
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: '4px' }}>
|
||||
<button className="btn-icon" onClick={onBack} title="Back" style={{ fontSize: '16px', padding: '4px 8px' }}>←</button>
|
||||
<span className="text-base" style={{ fontWeight: 500, color: 'var(--text-secondary)' }}>Settings</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex-1 scroll-y" style={{ padding: '24px' }}>
|
||||
|
||||
<SettingsSection title="Memory">
|
||||
<SettingsRow
|
||||
label="Memory Viewer"
|
||||
description="Browse, search, and delete stored episodes"
|
||||
action={<button className="btn-primary" style={{ padding: '6px 14px', fontSize: '13px' }}
|
||||
onClick={() => onNavigate('memory')}>Open →</button>}
|
||||
/>
|
||||
<NumberSetting
|
||||
label="Recent Episode Limit"
|
||||
description="Recent episodes injected into each prompt"
|
||||
value={settings?.recentEpisodeLimit}
|
||||
min={1} max={20}
|
||||
onSave={val => saveSetting('recentEpisodeLimit', val)}
|
||||
saving={saving}
|
||||
/>
|
||||
<NumberSetting
|
||||
label="Semantic Search Limit"
|
||||
description="Max episodes retrieved via vector search per query"
|
||||
value={settings?.semanticLimit}
|
||||
min={1} max={20}
|
||||
onSave={val => saveSetting('semanticLimit', val)}
|
||||
saving={saving}
|
||||
/>
|
||||
<NumberSetting
|
||||
label="Score Threshold"
|
||||
description="Minimum similarity score for semantic results (0–1)"
|
||||
value={settings?.scoreThreshold}
|
||||
min={0} max={1} step={0.05}
|
||||
onSave={val => saveSetting('scoreThreshold', val)}
|
||||
saving={saving}
|
||||
/>
|
||||
</SettingsSection>
|
||||
|
||||
<SettingsSection title="Models">
|
||||
<SettingsSectionErrorBoundary>
|
||||
<ModelsSection settings={settings} saveSetting={saveSetting} saving={saving} modelProps={modelProps} />
|
||||
</SettingsSectionErrorBoundary>
|
||||
</SettingsSection>
|
||||
|
||||
{/* Global system prompt */}
|
||||
<SettingsSection title="Behaviour">
|
||||
<SystemPromptSetting settings={settings} saveSetting={saveSetting} saving={saving} />
|
||||
</SettingsSection>
|
||||
|
||||
<SettingsSection title="About">
|
||||
<SettingsRow
|
||||
label="Service Health"
|
||||
description="Ping all four services"
|
||||
action={<ServiceHealth />}
|
||||
/>
|
||||
<SettingsRow
|
||||
label="Version"
|
||||
description="NexusAI"
|
||||
action={<span className="text-sm text-muted">v0.1.0</span>}
|
||||
/>
|
||||
</SettingsSection>
|
||||
|
||||
<SettingsSection title="Appearance">
|
||||
<SettingsRow label="Theme" description="UI colour scheme" action={<ComingSoon />} />
|
||||
</SettingsSection>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// ── Error boundary ───────────────────────────────────────────
|
||||
|
||||
class SettingsSectionErrorBoundary extends React.Component {
|
||||
constructor(props) {
|
||||
super(props);
|
||||
this.state = { error: null };
|
||||
}
|
||||
static getDerivedStateFromError(error) {
|
||||
return { error };
|
||||
}
|
||||
render() {
|
||||
if (this.state.error) {
|
||||
return (
|
||||
<SettingsRow
|
||||
label="Models unavailable"
|
||||
description={this.state.error.message ?? 'Failed to load model settings'}
|
||||
action={
|
||||
<button className="btn-primary" style={{ padding: '5px 10px', fontSize: '12px' }}
|
||||
onClick={() => this.setState({ error: null })}>
|
||||
Retry
|
||||
</button>
|
||||
}
|
||||
/>
|
||||
);
|
||||
}
|
||||
return this.props.children;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Layout components ────────────────────────────────────────
|
||||
|
||||
function SettingsSection({ title, children }) {
|
||||
return (
|
||||
<div style={{ marginBottom: '32px' }}>
|
||||
<p className="label-upper" style={{ marginBottom: '12px', color: 'var(--text-secondary)' }}>
|
||||
{title}
|
||||
</p>
|
||||
<div style={{
|
||||
background: 'var(--bg-surface)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-lg)',
|
||||
overflow: 'hidden',
|
||||
}}>
|
||||
{children}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function SettingsRow({ label, description, action }) {
|
||||
return (
|
||||
<div style={{
|
||||
display: 'flex', alignItems: 'flex-start', justifyContent: 'space-between',
|
||||
padding: '14px 16px',
|
||||
borderBottom: '1px solid var(--border)',
|
||||
}}>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
|
||||
<span className="text-sm" style={{ color: 'var(--text-primary)', fontWeight: 500 }}>{label}</span>
|
||||
{description && <span className="text-xs text-muted">{description}</span>}
|
||||
</div>
|
||||
<div style={{ flexShrink: 0, marginLeft: 16 }}>
|
||||
{action}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function NumberSetting({ label, description, value, min, max, step = 1, onSave, saving }) {
|
||||
const [local, setLocal] = useState(value ?? '');
|
||||
const isDirty = local !== '' && Number(local) !== value;
|
||||
|
||||
useEffect(() => {
|
||||
if (value !== undefined) setLocal(value);
|
||||
}, [value]);
|
||||
|
||||
return (
|
||||
<SettingsRow
|
||||
label={label}
|
||||
description={description}
|
||||
action={
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: 6 }}>
|
||||
<input
|
||||
type="number"
|
||||
value={local}
|
||||
min={min} max={max} step={step}
|
||||
onChange={e => setLocal(e.target.value)}
|
||||
style={{
|
||||
width: '64px', padding: '5px 8px', textAlign: 'center',
|
||||
background: 'var(--bg-elevated)', border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-md)', color: 'var(--text-primary)',
|
||||
fontSize: '13px', outline: 'none',
|
||||
}}
|
||||
/>
|
||||
{isDirty && (
|
||||
<button
|
||||
className="btn-primary"
|
||||
style={{ padding: '5px 10px', fontSize: '12px' }}
|
||||
disabled={saving}
|
||||
onClick={() => onSave(Number(local))}
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
function ComingSoon() {
|
||||
return <span className="text-xs text-muted" style={{ fontStyle: 'italic' }}>Coming soon</span>;
|
||||
}
|
||||
|
||||
// ── System prompt setting ────────────────────────────────────
|
||||
|
||||
function SystemPromptSetting({ settings, saveSetting, saving }) {
|
||||
const [local, setLocal] = useState(settings?.systemPrompt ?? '');
|
||||
const [savedPrompt, setSavedPrompt] = useState(settings?.systemPrompt ?? '');
|
||||
|
||||
useEffect(() => {
|
||||
if (settings?.systemPrompt !== undefined) {
|
||||
setLocal(settings.systemPrompt ?? '');
|
||||
setSavedPrompt(settings.systemPrompt ?? '');
|
||||
}
|
||||
}, [settings?.systemPrompt]);
|
||||
|
||||
const isDirty = local !== savedPrompt;
|
||||
|
||||
async function handleSave() {
|
||||
await saveSetting('systemPrompt', local.trim() || null);
|
||||
setSavedPrompt(local);
|
||||
}
|
||||
|
||||
return (
|
||||
<div style={{ padding: '14px 16px', borderBottom: '1px solid var(--border)' }}>
|
||||
<div style={{ display: 'flex', alignItems: 'flex-start', justifyContent: 'space-between', marginBottom: '8px' }}>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 2 }}>
|
||||
<span className="text-sm" style={{ color: 'var(--text-primary)', fontWeight: 500 }}>
|
||||
System Prompt
|
||||
</span>
|
||||
<span className="text-xs text-muted">
|
||||
Default instruction given to the model on every request. Projects can override this.
|
||||
</span>
|
||||
</div>
|
||||
{isDirty && (
|
||||
<button
|
||||
className="btn-primary"
|
||||
style={{ padding: '5px 12px', fontSize: '12px', flexShrink: 0, marginLeft: '16px' }}
|
||||
disabled={saving}
|
||||
onClick={handleSave}
|
||||
>
|
||||
{saving ? 'Saving…' : 'Save'}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
<textarea
|
||||
value={local}
|
||||
onChange={e => setLocal(e.target.value)}
|
||||
rows={5}
|
||||
style={{
|
||||
width: '100%',
|
||||
background: 'var(--bg-elevated)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-md)',
|
||||
padding: '10px 12px',
|
||||
color: 'var(--text-primary)',
|
||||
fontSize: '13px', lineHeight: '1.6',
|
||||
resize: 'vertical', fontFamily: 'inherit',
|
||||
outline: 'none', boxSizing: 'border-box',
|
||||
}}
|
||||
onFocus={e => e.target.style.borderColor = 'var(--accent)'}
|
||||
onBlur={e => e.target.style.borderColor = 'var(--border)'}
|
||||
/>
|
||||
{!isDirty && local && (
|
||||
<p className="text-xs text-muted" style={{ marginTop: '6px' }}>Saved</p>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// ── Service health ───────────────────────────────────────────
|
||||
|
||||
function ServiceHealth() {
|
||||
const [services, setServices] = useState(null);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [lastChecked, setLastChecked] = useState(null);
|
||||
|
||||
const check = useCallback(async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
setServices(await getServiceHealth());
|
||||
setLastChecked(new Date());
|
||||
} catch (err) {
|
||||
console.error('[ServiceHealth]', err.message);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 8 }}>
|
||||
<div style={{ display: 'flex', alignItems: 'center', gap: 8 }}>
|
||||
<button
|
||||
className="btn-primary"
|
||||
style={{ padding: '5px 12px', fontSize: '12px' }}
|
||||
disabled={loading}
|
||||
onClick={check}
|
||||
>
|
||||
{loading ? 'Checking…' : 'Check Now'}
|
||||
</button>
|
||||
{lastChecked && (
|
||||
<span className="text-xs text-muted">
|
||||
{lastChecked.toLocaleTimeString()}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{services && (
|
||||
<div style={{
|
||||
display: 'flex', flexDirection: 'column',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-md)',
|
||||
overflow: 'hidden', marginTop: 4,
|
||||
}}>
|
||||
{services.map((svc, i) => (
|
||||
<div key={svc.key} style={{
|
||||
display: 'flex', alignItems: 'center', gap: 10,
|
||||
padding: '8px 12px',
|
||||
borderBottom: i < services.length - 1 ? '1px solid var(--border)' : 'none',
|
||||
background: 'var(--bg-elevated)',
|
||||
}}>
|
||||
<div style={{
|
||||
width: 8, height: 8, borderRadius: '50%', flexShrink: 0,
|
||||
background: svc.status === 'healthy' ? '#2ecc71' : '#e74c3c',
|
||||
}} />
|
||||
<span className="text-sm" style={{ minWidth: 90, color: 'var(--text-primary)' }}>
|
||||
{svc.label}
|
||||
</span>
|
||||
<span className="text-xs text-muted" style={{ flex: 1 }}>
|
||||
{svc.key === 'inference' && svc.detail?.model
|
||||
? svc.detail.model
|
||||
: svc.status === 'unreachable' ? 'Unreachable' : ''}
|
||||
</span>
|
||||
<span className="text-xs text-muted" style={{ flexShrink: 0 }}>
|
||||
{svc.latency}ms
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// ── Models section ───────────────────────────────────────────
|
||||
|
||||
function ModelsSection({ settings, saveSetting, saving, modelProps }) {
|
||||
const { models, selectedModel, setSelectedModel } = useModels();
|
||||
const [selectedInfo, setSelectedInfo] = useState(null);
|
||||
|
||||
useEffect(() => {
|
||||
const m = models.find(m => m.value === selectedModel);
|
||||
setSelectedInfo(m ?? null);
|
||||
}, [selectedModel, models]);
|
||||
|
||||
return (
|
||||
<>
|
||||
<SettingsRow
|
||||
label="Models Folder"
|
||||
description="Path to folder containing .gguf files"
|
||||
action={<ModelsFolderSetting settings={settings} saveSetting={saveSetting} saving={saving} />}
|
||||
/>
|
||||
<NumberSetting
|
||||
label="Temperature"
|
||||
description="Response randomness — lower is more focused, higher is more creative (0–2)"
|
||||
value={settings?.temperature}
|
||||
min={0} max={2} step={0.05}
|
||||
onSave={val => saveSetting('temperature', val)}
|
||||
saving={saving}
|
||||
/>
|
||||
<NumberSetting
|
||||
label="Repeat Penalty"
|
||||
description="Penalises repeated tokens — higher reduces repetition (1–2)"
|
||||
value={settings?.repeatPenalty}
|
||||
min={1} max={2} step={0.05}
|
||||
onSave={val => saveSetting('repeatPenalty', val)}
|
||||
saving={saving}
|
||||
/>
|
||||
<NumberSetting
|
||||
label="Top-P"
|
||||
description="Nucleus sampling — limits token pool by cumulative probability (0–1)"
|
||||
value={settings?.topP}
|
||||
min={0} max={1} step={0.05}
|
||||
onSave={val => saveSetting('topP', val)}
|
||||
saving={saving}
|
||||
/>
|
||||
<NumberSetting
|
||||
label="Top-K"
|
||||
description="Limits token pool to K most likely tokens per step (1–100)"
|
||||
value={settings?.topK}
|
||||
min={1} max={100} step={1}
|
||||
onSave={val => saveSetting('topK', val)}
|
||||
saving={saving}
|
||||
/>
|
||||
<SettingsRow
|
||||
label="Active Model"
|
||||
description="Model used for inference"
|
||||
action={
|
||||
<select
|
||||
value={selectedModel}
|
||||
onChange={e => setSelectedModel(e.target.value)}
|
||||
style={{
|
||||
padding: '6px 10px', fontSize: '13px',
|
||||
background: 'var(--bg-elevated)', border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-md)', color: 'var(--text-primary)',
|
||||
cursor: 'pointer', outline: 'none', maxWidth: '220px',
|
||||
}}
|
||||
>
|
||||
{models.map(m => (
|
||||
<option key={m.value} value={m.value}>{m.label}</option>
|
||||
))}
|
||||
</select>
|
||||
}
|
||||
/>
|
||||
|
||||
{selectedInfo && (
|
||||
<div style={{
|
||||
margin: '0', padding: '14px 16px',
|
||||
borderTop: '1px solid var(--border)',
|
||||
background: 'var(--bg-elevated)',
|
||||
display: 'flex', flexDirection: 'column', gap: 8,
|
||||
}}>
|
||||
<p className="label-upper" style={{ color: 'var(--text-muted)' }}>Model Info</p>
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 6 }}>
|
||||
<InfoLine label="File" value={selectedInfo.value} mono />
|
||||
<InfoLine label="Size" value={selectedInfo.size ?? '—'} />
|
||||
{selectedInfo.description && (
|
||||
<InfoLine label="Description" value={selectedInfo.description} />
|
||||
)}
|
||||
<InfoLine
|
||||
label="Context"
|
||||
value={modelProps?.contextWindow
|
||||
? `${modelProps.contextWindow.toLocaleString()} tokens`
|
||||
: '—'}
|
||||
/>
|
||||
<InfoLine
|
||||
label="Loaded"
|
||||
value={modelProps?.modelAlias ?? '—'}
|
||||
mono
|
||||
/>
|
||||
</div>
|
||||
<p className="text-xs text-muted" style={{ marginTop: 4, fontStyle: 'italic' }}>
|
||||
Model loading and parameter configuration coming soon
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
function InfoLine({ label, value, mono }) {
|
||||
return (
|
||||
<div style={{ display: 'flex', gap: 8, alignItems: 'baseline' }}>
|
||||
<span className="text-xs text-muted" style={{ minWidth: 72, flexShrink: 0 }}>{label}</span>
|
||||
<span style={{
|
||||
fontSize: 12, color: 'var(--text-secondary)',
|
||||
fontFamily: mono ? 'monospace' : 'inherit',
|
||||
wordBreak: 'break-all',
|
||||
}}>{value}</span>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function ModelsFolderSetting({ settings, saveSetting, saving }) {
|
||||
const [local, setLocal] = useState('');
|
||||
const [error, setError] = useState(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (settings?.modelsFolderPath) setLocal(settings.modelsFolderPath);
|
||||
}, [settings?.modelsFolderPath]);
|
||||
|
||||
const isDirty = local !== '' && local !== settings?.modelsFolderPath;
|
||||
|
||||
async function handleSave() {
|
||||
setError(null);
|
||||
try {
|
||||
await saveSetting('modelsFolderPath', local);
|
||||
} catch (err) {
|
||||
setError('Path not accessible');
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div style={{ display: 'flex', flexDirection: 'column', gap: 4, alignItems: 'flex-end' }}>
|
||||
<div style={{ display: 'flex', gap: 6, alignItems: 'center' }}>
|
||||
<input
|
||||
value={local}
|
||||
onChange={e => { setLocal(e.target.value); setError(null); }}
|
||||
style={{
|
||||
width: '220px', padding: '5px 8px', fontSize: '12px',
|
||||
fontFamily: 'monospace',
|
||||
background: 'var(--bg-elevated)', border: `1px solid ${error ? '#e74c3c' : 'var(--border)'}`,
|
||||
borderRadius: 'var(--radius-md)', color: 'var(--text-primary)', outline: 'none',
|
||||
}}
|
||||
/>
|
||||
{isDirty && (
|
||||
<button className="btn-primary" style={{ padding: '5px 10px', fontSize: '12px' }}
|
||||
disabled={saving} onClick={handleSave}>
|
||||
Save
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{error && <span className="text-xs" style={{ color: '#e74c3c' }}>{error}</span>}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
424
packages/chat-client/src/components/Sidebar.jsx
Normal file
424
packages/chat-client/src/components/Sidebar.jsx
Normal file
@@ -0,0 +1,424 @@
|
||||
import React, { useState } from 'react';
|
||||
import SessionModal from './SessionModal';
|
||||
import { useContextMenu } from '../hooks/useContextMenu';
|
||||
import { renameSession, deleteSession, updateSession } from '../api/orchestration';
|
||||
|
||||
|
||||
export default function Sidebar({
|
||||
sessions,
|
||||
activeSession,
|
||||
onSelectSession,
|
||||
onNewChat,
|
||||
onNewProject,
|
||||
isOpen,
|
||||
onToggle,
|
||||
onSessionsChange,
|
||||
onNavigate,
|
||||
projects,
|
||||
onProjectsChange,
|
||||
onSelectProject
|
||||
}) {
|
||||
const [chatsOpen, setChatsOpen] = useState(true);
|
||||
const [projectsOpen, setProjectsOpen] = useState(true);
|
||||
const [modalSession, setModalSession] = useState(null);
|
||||
const [modalMode, setModalMode] = useState('settings');
|
||||
const [hoveredId, setHoveredId] = useState(null);
|
||||
const { menu, open: openMenu, close: closeMenu } = useContextMenu();
|
||||
|
||||
// ── Handlers ────────────────────────────────────────────
|
||||
|
||||
async function handleRename(session, name, projectId) {
|
||||
try {
|
||||
await updateSession(session.external_id, { name, projectId });
|
||||
onSessionsChange();
|
||||
} catch (err) {
|
||||
console.error('[Sidebar] Rename failed:', err.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function handleDelete(session) {
|
||||
try {
|
||||
await deleteSession(session.external_id);
|
||||
onSessionsChange(session);
|
||||
} catch (err) {
|
||||
console.error('[Sidebar] Delete failed:', err.message);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Collapsed rail ───────────────────────────────────────
|
||||
|
||||
if (!isOpen) {
|
||||
return (
|
||||
<div className="flex-col" style={{
|
||||
width: '48px',
|
||||
flexShrink: 0,
|
||||
background: 'var(--bg-surface)',
|
||||
borderRight: '1px solid var(--border)',
|
||||
alignItems: 'center',
|
||||
paddingTop: '8px',
|
||||
paddingBottom: '8px',
|
||||
gap: '4px',
|
||||
}}>
|
||||
{/* Expand toggle */}
|
||||
<button className="btn-icon" onClick={onToggle} title="Expand sidebar"
|
||||
style={{ marginBottom: '4px' }}>▶</button>
|
||||
|
||||
<div style={{ width: '32px', height: '1px', background: 'var(--border)', margin: '4px 0' }} />
|
||||
|
||||
{/* New Chat */}
|
||||
<button className="btn-icon" onClick={onNewChat} title="New Chat"
|
||||
style={{ fontSize: '18px', color: 'var(--text-secondary)' }}>+</button>
|
||||
|
||||
{/* New Project */}
|
||||
<button className="btn-icon" onClick={onNewProject} title="View Projects"
|
||||
style={{ fontSize: '14px', color: 'var(--text-secondary)' }}>⊞</button>
|
||||
|
||||
{/* All Chats */}
|
||||
<button className="btn-icon" onClick={() => onNavigate('all-chats')} title="All Chats"
|
||||
style={{ fontSize: '14px', color: 'var(--text-secondary)' }}>☰</button>
|
||||
|
||||
{/* Spacer */}
|
||||
<div style={{ flex: 1 }} />
|
||||
|
||||
{/* Settings */}
|
||||
<button className="btn-icon" onClick={() => onNavigate('settings')} title="Settings"
|
||||
style={{ fontSize: '14px', color: 'var(--text-secondary)' }}>⚙</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// ── Expanded sidebar ─────────────────────────────────────
|
||||
|
||||
const recentSessions = sessions.slice(0, 10);
|
||||
|
||||
// Group recent sessions by project
|
||||
const grouped = {};
|
||||
const unassigned = [];
|
||||
for (const session of recentSessions) {
|
||||
if (session.project_id) {
|
||||
if (!grouped[session.project_id]) grouped[session.project_id] = [];
|
||||
grouped[session.project_id].push(session);
|
||||
} else {
|
||||
unassigned.push(session);
|
||||
}
|
||||
}
|
||||
|
||||
const sessionRowProps = (session) => ({
|
||||
session,
|
||||
isActive: activeSession?.external_id === session.external_id,
|
||||
isHovered: hoveredId === session.external_id,
|
||||
onHover: setHoveredId,
|
||||
onSelect: () => { onSelectSession(session); onNavigate('chat'); },
|
||||
onRename: () => { setModalMode('settings'); setModalSession(session); },
|
||||
onDelete: () => { setModalMode('confirm-delete'); setModalSession(session); },
|
||||
onContextMenu: e => !session.isNew && openMenu(e, session),
|
||||
});
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="flex-col" style={{
|
||||
width: 'var(--sidebar-width)',
|
||||
flexShrink: 0,
|
||||
background: 'var(--bg-surface)',
|
||||
borderRight: '1px solid var(--border)',
|
||||
overflow: 'hidden',
|
||||
}}>
|
||||
|
||||
{/* Header */}
|
||||
<div className="panel-header" style={{ justifyContent: 'space-between', padding: '0 12px 0 16px' }}>
|
||||
<span className="text-base" style={{ fontWeight: 1000, color: 'var(--text-secondary)' }}>NexusAI</span>
|
||||
<button className="btn-icon" onClick={onToggle}>◀</button>
|
||||
</div>
|
||||
|
||||
{/* Action buttons */}
|
||||
<div style={{ padding: '10px 10px 6px', display: 'flex', flexDirection: 'column', gap: '6px', flexShrink: 0 }}>
|
||||
<button className="btn-primary" onClick={onNewChat} style={{
|
||||
width: '100%', padding: '7px 12px',
|
||||
display: 'flex', alignItems: 'center', gap: '8px',
|
||||
}}>
|
||||
<span style={{ fontSize: '16px', lineHeight: 1 }}>+</span>
|
||||
<span>New Chat</span>
|
||||
</button>
|
||||
<button className="btn-primary" onClick={onNewProject} style={{
|
||||
width: '100%', padding: '7px 12px',
|
||||
display: 'flex', alignItems: 'center', gap: '8px',
|
||||
}}>
|
||||
<span style={{ fontSize: '14px', lineHeight: 1 }}>⊞</span>
|
||||
<span>View Projects</span>
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div style={{ height: '1px', background: 'var(--border)', flexShrink: 0, margin: '2px 0' }} />
|
||||
|
||||
{/* Scrollable content */}
|
||||
<div className="flex-1 scroll-y">
|
||||
|
||||
{/* ── Projects section ── */}
|
||||
<SectionHeader
|
||||
label="Projects"
|
||||
isOpen={projectsOpen}
|
||||
onToggle={() => setProjectsOpen(o => !o)}
|
||||
/>
|
||||
{projectsOpen && (
|
||||
<div style={{ padding: '4px 10px 8px' }}>
|
||||
{!projects?.length ? (
|
||||
<div style={{
|
||||
padding: '10px',
|
||||
borderRadius: 'var(--radius-md)',
|
||||
border: '1px dashed var(--border)',
|
||||
color: 'var(--text-sb-hdr)',
|
||||
fontSize: '13px',
|
||||
textAlign: 'center',
|
||||
}}>
|
||||
No projects yet
|
||||
</div>
|
||||
) : (
|
||||
<div style={{ display: 'flex', flexWrap: 'wrap', gap: '6px' }}>
|
||||
{projects.slice(0, 6).map(project => (
|
||||
<button
|
||||
key={project.id}
|
||||
onClick={() => { onSelectProject(project); onNavigate('project'); }}
|
||||
className="btn-reset text-xs"
|
||||
style={{
|
||||
padding: '4px 8px',
|
||||
borderRadius: 'var(--radius-sm)',
|
||||
background: 'var(--bg-elevated)',
|
||||
border: `1px solid ${project.colour ?? 'var(--border)'}`,
|
||||
color: 'var(--text-secondary)',
|
||||
maxWidth: '100%',
|
||||
}}
|
||||
title={project.description ?? project.name}
|
||||
>
|
||||
<span className="truncate" style={{ display: 'block', maxWidth: '140px' }}>
|
||||
{project.name}
|
||||
</span>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div style={{ height: '1px', background: 'var(--border)', margin: '2px 0' }} />
|
||||
|
||||
{/* ── Recent Chats section ── */}
|
||||
<SectionHeader
|
||||
label="Recent Chats"
|
||||
isOpen={chatsOpen}
|
||||
onToggle={() => setChatsOpen(o => !o)}
|
||||
/>
|
||||
|
||||
{chatsOpen && (
|
||||
<>
|
||||
{recentSessions.length === 0 && (
|
||||
<div className="text-xs text-muted" style={{ padding: '12px 16px', textAlign: 'center' }}>
|
||||
No conversations yet
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Project groups */}
|
||||
{Object.entries(grouped).map(([projectId, projectSessions]) => {
|
||||
const project = projects?.find(p => p.id === Number(projectId));
|
||||
return (
|
||||
<div key={projectId}>
|
||||
{/* Project group label */}
|
||||
<div style={{
|
||||
display: 'flex', alignItems: 'center', gap: '6px',
|
||||
padding: '6px 16px 2px',
|
||||
}}>
|
||||
<span className=" text-muted truncate"
|
||||
style={{
|
||||
fontSize: '12px',
|
||||
textTransform: 'uppercase',
|
||||
fontWeight: '500',
|
||||
textAlign: 'center',
|
||||
borderRadius: 'var(--radius-md)',
|
||||
border: `1px solid ${project.colour ?? 'var(--border)'}`,
|
||||
padding: '2px 2px',
|
||||
width: '100%'
|
||||
}}>
|
||||
{project?.name ?? 'Project'}
|
||||
</span>
|
||||
</div>
|
||||
{projectSessions.map(session => (
|
||||
<SessionRow key={session.external_id} {...sessionRowProps(session)} />
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
|
||||
{/* Unassigned sessions */}
|
||||
{unassigned.length > 0 && (
|
||||
<>
|
||||
{Object.keys(grouped).length > 0 && (
|
||||
<div style={{ padding: '6px 16px 2px' }}>
|
||||
<span className=" text-muted " style={{fontSize: '12px', textTransform: 'uppercase', fontWeight: '500', textAlign: 'center',}}>Other</span>
|
||||
</div>
|
||||
)}
|
||||
{unassigned.map(session => (
|
||||
<SessionRow key={session.external_id} {...sessionRowProps(session)} />
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
|
||||
{sessions.length > 0 && (
|
||||
<button
|
||||
onClick={() => onNavigate('all-chats')}
|
||||
className="btn-reset text-xs text-muted"
|
||||
style={{ width: '100%', padding: '6px', borderRadius: 'var(--radius-sm)' }}
|
||||
onMouseEnter={e => e.currentTarget.style.color = 'var(--text-secondary)'}
|
||||
onMouseLeave={e => e.currentTarget.style.color = 'var(--text-muted)'}
|
||||
>
|
||||
All Chats →
|
||||
</button>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Settings — pinned to bottom */}
|
||||
<div style={{ borderTop: '1px solid var(--border)', padding: '8px 10px', flexShrink: 0 }}>
|
||||
<button
|
||||
onClick={() => onNavigate('settings')}
|
||||
className="btn-reset text-base"
|
||||
style={{
|
||||
width: '100%', padding: '8px 12px',
|
||||
borderRadius: 'var(--radius-md)',
|
||||
display: 'flex', alignItems: 'center', gap: '8px',
|
||||
color: 'var(--text-secondary)',
|
||||
}}
|
||||
onMouseEnter={e => e.currentTarget.style.background = 'var(--bg-elevated)'}
|
||||
onMouseLeave={e => e.currentTarget.style.background = 'transparent'}
|
||||
>
|
||||
<span style={{ fontSize: '14px' }}>⚙</span>
|
||||
<span>Settings</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Context menu */}
|
||||
{menu && (
|
||||
<div
|
||||
onClick={e => e.stopPropagation()}
|
||||
style={{
|
||||
position: 'fixed', top: menu.y, left: menu.x,
|
||||
background: 'var(--bg-elevated)', border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-md)', padding: '4px', zIndex: 50, minWidth: '140px',
|
||||
}}
|
||||
>
|
||||
<ContextMenuItem
|
||||
onClick={() => { setModalMode('settings'); setModalSession(menu.session); closeMenu(); }}
|
||||
>✎ Rename</ContextMenuItem>
|
||||
<ContextMenuItem
|
||||
onClick={() => { setModalMode('confirm-delete'); setModalSession(menu.session); closeMenu(); }}
|
||||
danger
|
||||
>✕ Delete</ContextMenuItem>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Session modal */}
|
||||
{modalSession && (
|
||||
<SessionModal
|
||||
session={modalSession}
|
||||
mode={modalMode}
|
||||
onRename={handleRename}
|
||||
onDelete={handleDelete}
|
||||
onClose={() => setModalSession(null)}
|
||||
projects={projects}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
// ── Sub-components ───────────────────────────────────────────
|
||||
|
||||
function SectionHeader({ label, isOpen, onToggle }) {
|
||||
return (
|
||||
<button
|
||||
onClick={onToggle}
|
||||
className="btn-reset label-upper"
|
||||
style={{
|
||||
width: '100%', padding: '8px 16px',
|
||||
display: 'flex', alignItems: 'center', justifyContent: 'center',
|
||||
color: 'var(--text-sb-hdr)',
|
||||
|
||||
}}
|
||||
|
||||
>
|
||||
<span>{label}</span>
|
||||
<span style={{ fontSize: '13px' }}>{isOpen ? '▾' : '▸'}</span>
|
||||
</button>
|
||||
);
|
||||
}
|
||||
|
||||
function SessionRow({ session, isActive, isHovered, onHover, onSelect, onRename, onDelete, onContextMenu }) {
|
||||
return (
|
||||
<div
|
||||
onMouseEnter={() => onHover(session.external_id)}
|
||||
onMouseLeave={() => onHover(null)}
|
||||
onContextMenu={onContextMenu}
|
||||
style={{
|
||||
position: 'relative', display: 'flex', alignItems: 'stretch',
|
||||
background: isActive || isHovered ? 'var(--bg-elevated)' : 'transparent',
|
||||
borderLeft: isActive ? '2px solid var(--accent)' : '2px solid transparent',
|
||||
transition: 'background 0.1s',
|
||||
overflow: 'hidden',
|
||||
width: '100%',
|
||||
boxSizing: 'border-box',
|
||||
}}
|
||||
>
|
||||
<button
|
||||
onClick={onSelect}
|
||||
className="btn-reset"
|
||||
style={{
|
||||
flex: 1, padding: '8px 16px',
|
||||
paddingRight: isHovered && !session.isNew ? '4px' : '16px',
|
||||
textAlign: 'left',
|
||||
minWidth: 0,
|
||||
overflow: 'hidden',
|
||||
}}
|
||||
>
|
||||
<span className="text-base truncate" style={{
|
||||
display: 'block',
|
||||
color: isActive ? 'var(--text-primary)' : 'var(--text-secondary)',
|
||||
fontWeight: isActive ? 500 : 400,
|
||||
}}>
|
||||
{session.isNew ? 'New conversation' : (session.name || session.external_id)}
|
||||
</span>
|
||||
{session.isNew && (
|
||||
<span className="text-xs text-accent" style={{ fontStyle: 'italic' }}>Unsaved</span>
|
||||
)}
|
||||
</button>
|
||||
|
||||
<div
|
||||
style={{
|
||||
display: 'flex', alignItems: 'center',
|
||||
gap: '2px',
|
||||
paddingRight: isHovered && !session.isNew ? '8px' : '0px',
|
||||
flexShrink: 0,
|
||||
width: isHovered && !session.isNew ? '44px' : '0px',
|
||||
overflow: 'hidden',
|
||||
transition: 'width 0.1s ease',
|
||||
}}
|
||||
>
|
||||
<button className="btn-icon" title="Rename" onClick={onRename}
|
||||
style={{ padding: '2px 4px', fontSize: '12px' }}>✎</button>
|
||||
<button className="btn-icon" title="Delete" onClick={onDelete}
|
||||
style={{ padding: '2px 4px', fontSize: '12px', color: '#ff6b6b' }}>✕</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function ContextMenuItem({ children, onClick, danger }) {
|
||||
return (
|
||||
<button
|
||||
className="btn-reset text-base"
|
||||
onClick={onClick}
|
||||
style={{ width: '100%', padding: '8px 12px', borderRadius: 'var(--radius-sm)', justifyContent: 'flex-start', color: danger ? '#ff6b6b' : 'var(--text-primary)' }}
|
||||
onMouseEnter={e => e.currentTarget.style.background = 'var(--bg-surface)'}
|
||||
onMouseLeave={e => e.currentTarget.style.background = 'transparent'}
|
||||
>{children}</button>
|
||||
);
|
||||
}
|
||||
124
packages/chat-client/src/components/SummaryView.jsx
Normal file
124
packages/chat-client/src/components/SummaryView.jsx
Normal file
@@ -0,0 +1,124 @@
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import { fetchSessionSummaries } from '../api/orchestration';
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
|
||||
export default function SummaryView({ activeSession, onBack }) {
|
||||
const [summaries, setSummaries] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [error, setError] = useState(null);
|
||||
const [expanded, setExpanded] = useState(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (!activeSession || activeSession.isNew) {
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
fetchSessionSummaries(activeSession.external_id)
|
||||
.then(data => setSummaries(Array.isArray(data) ? data : []))
|
||||
.catch(err => setError(err.message))
|
||||
.finally(() => setLoading(false));
|
||||
}, [activeSession]);
|
||||
|
||||
function formatTimestamp(ts) {
|
||||
if (!ts) return '—';
|
||||
return new Date(ts * 1000).toLocaleString([], {
|
||||
month: 'short', day: 'numeric',
|
||||
hour: '2-digit', minute: '2-digit',
|
||||
});
|
||||
}
|
||||
|
||||
return (
|
||||
<div style={{ display: 'flex', flexDirection: 'column', flex: 1, overflow: 'hidden', background: 'var(--bg-base)' }}>
|
||||
|
||||
{/* Header */}
|
||||
<div className="panel-header" style={{ padding: '0 24px', gap: 12 }}>
|
||||
<button className="btn-icon" onClick={onBack}>←</button>
|
||||
<span className="text-base" style={{ fontWeight: 500 }}>Session Memory</span>
|
||||
<span className="text-sm text-muted" style={{ marginLeft: 'auto' }}>
|
||||
{summaries.length} summar{summaries.length !== 1 ? 'ies' : 'y'}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Session name pill */}
|
||||
{activeSession && (
|
||||
<div style={{ padding: '8px 24px 0' }}>
|
||||
<span className="text-xs text-muted" style={{
|
||||
background: 'var(--bg-elevated)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: '999px',
|
||||
padding: '3px 10px',
|
||||
}}>
|
||||
{activeSession.name || activeSession.external_id}
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Content */}
|
||||
<div className="scroll-y flex-1" style={{ padding: '16px 24px' }}>
|
||||
{loading && <p className="text-sm text-muted">Loading…</p>}
|
||||
{error && <p className="text-sm" style={{ color: 'var(--error, #e05)' }}>{error}</p>}
|
||||
|
||||
{!loading && !activeSession && (
|
||||
<p className="text-sm text-muted">No active session.</p>
|
||||
)}
|
||||
|
||||
{!loading && activeSession && summaries.length === 0 && (
|
||||
<div style={{
|
||||
display: 'flex', flexDirection: 'column', alignItems: 'center',
|
||||
gap: '12px', padding: '48px 0', color: 'var(--text-muted)',
|
||||
}}>
|
||||
<span style={{ fontSize: '28px', opacity: 0.3 }}>◈</span>
|
||||
<p className="text-sm">No summaries yet for this session.</p>
|
||||
<p className="text-xs text-muted" style={{ maxWidth: '280px', textAlign: 'center', lineHeight: 1.6 }}>
|
||||
Summaries generate automatically once a session accumulates enough conversation.
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{summaries.map(summary => (
|
||||
<div key={summary.id} style={{
|
||||
background: 'var(--bg-surface)',
|
||||
border: '1px solid var(--border)',
|
||||
borderRadius: 'var(--radius-lg)',
|
||||
marginBottom: '10px', overflow: 'hidden',
|
||||
}}>
|
||||
{/* Card header */}
|
||||
<div
|
||||
onClick={() => setExpanded(expanded === summary.id ? null : summary.id)}
|
||||
style={{ display: 'flex', alignItems: 'center', gap: '10px', padding: '10px 14px', cursor: 'pointer' }}
|
||||
>
|
||||
<span style={{ flex: 1, fontSize: 13, color: 'var(--text-primary)' }}>
|
||||
Episodes {summary.episode_range}
|
||||
</span>
|
||||
<span className="text-xs text-muted">{formatTimestamp(summary.created_at)}</span>
|
||||
<span className="text-muted" style={{ fontSize: 11 }}>
|
||||
{expanded === summary.id ? '▲' : '▼'}
|
||||
</span>
|
||||
</div>
|
||||
|
||||
{/* Expanded content */}
|
||||
{expanded === summary.id && (
|
||||
<div style={{ padding: '0 14px 14px', borderTop: '1px solid var(--border)' }}>
|
||||
<ReactMarkdown components={{
|
||||
p: ({ children }) => (
|
||||
<p style={{ margin: '8px 0', lineHeight: 1.7, fontSize: 13, color: 'var(--text-secondary)' }}>
|
||||
{children}
|
||||
</p>
|
||||
),
|
||||
}}>
|
||||
{summary.content}
|
||||
</ReactMarkdown>
|
||||
{summary.token_count > 0 && (
|
||||
<p className="text-xs text-muted" style={{ marginTop: 8 }}>
|
||||
{summary.token_count.toLocaleString()} tokens covered
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
19
packages/chat-client/src/config/constants.js
Normal file
19
packages/chat-client/src/config/constants.js
Normal file
@@ -0,0 +1,19 @@
|
||||
export const FALLBACK_MODELS = [
|
||||
{ value: 'companion:latest', label: 'Companion' },
|
||||
{ value: 'mistral-nemo:latest', label: 'Mistral Nemo' },
|
||||
{ value: 'coder:latest', label: 'Coder' },
|
||||
{ value: 'qwen2.5-coder:14b', label: 'Qwen 2.5 Coder 14B' },
|
||||
];
|
||||
|
||||
export const DEFAULT_MODEL = FALLBACK_MODELS[0].value;
|
||||
|
||||
export const API_DEFAULTS = {
|
||||
SESSIONS_LIMIT: 20,
|
||||
HISTORY_LIMIT: 50,
|
||||
OFFSET: 0,
|
||||
EPISODE_LIMIT: 50,
|
||||
}
|
||||
|
||||
export const CLIENT_DEFAULTS = {
|
||||
PAGE_SIZE: 20,
|
||||
}
|
||||
113
packages/chat-client/src/hooks/useChat.js
Normal file
113
packages/chat-client/src/hooks/useChat.js
Normal file
@@ -0,0 +1,113 @@
|
||||
import React, { useEffect, useState, useCallback, useRef } from 'react';
|
||||
import { streamMessage, updateSession } from '../api/orchestration';
|
||||
|
||||
export function useChat({ activeSession, appendMessage, updateLastMessage, refreshSessions }) {
|
||||
const [streaming, setStreaming] = useState(false);
|
||||
const [error, setError] = useState(null);
|
||||
const [lastTokenCount, setLastTokenCount] = useState(0);
|
||||
const [lastModel, setLastModel] = useState(null);
|
||||
const cancelRef = useRef(null);
|
||||
const prevStreaming = React.useRef(false);
|
||||
const [summarising, setSummarising] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
if (prevStreaming.current && !streaming) {
|
||||
// Stream just finished — trigger the summarising indicator
|
||||
setSummarising(true);
|
||||
const t = setTimeout(() => setSummarising(false), 8000);
|
||||
return () => clearTimeout(t);
|
||||
}
|
||||
prevStreaming.current = streaming;
|
||||
}, [streaming]);
|
||||
|
||||
const sendMessage = useCallback(async (text, model, projectId = null, session=null) => {
|
||||
const targetSession = session ?? activeSession;
|
||||
if (!targetSession || !text.trim() || streaming) return;
|
||||
|
||||
setError(null);
|
||||
|
||||
// 1. Append user bubble immediately
|
||||
appendMessage({
|
||||
id: `user-${Date.now()}`,
|
||||
role: 'user',
|
||||
text,
|
||||
});
|
||||
|
||||
// 2. Append empty assistant bubble — will be filled by stream
|
||||
appendMessage({
|
||||
id: `assistant-${Date.now()}`,
|
||||
role: 'assistant',
|
||||
text: '',
|
||||
streaming: true,
|
||||
});
|
||||
|
||||
setStreaming(true);
|
||||
|
||||
// 3. Open stream
|
||||
cancelRef.current = streamMessage(
|
||||
targetSession.external_id,
|
||||
text,
|
||||
model,
|
||||
{
|
||||
onChunk: (token) => {
|
||||
updateLastMessage(msg => ({
|
||||
...msg,
|
||||
text: msg.text + token,
|
||||
}));
|
||||
},
|
||||
|
||||
onDone: ({ model: resolvedModel, tokenCount }) => {
|
||||
// Mark bubble as complete
|
||||
updateLastMessage(msg => ({ ...msg, streaming: false }));
|
||||
setLastTokenCount(tokenCount);
|
||||
setLastModel(resolvedModel);
|
||||
setStreaming(false);
|
||||
cancelRef.current = null;
|
||||
|
||||
// Refresh session list so new sessions appear in sidebar
|
||||
refreshSessions();
|
||||
|
||||
// Delayed refresh
|
||||
setTimeout( () => refreshSessions(), 3000);
|
||||
|
||||
// Assign project after first message if one was set
|
||||
if (projectId) {
|
||||
updateSession(targetSession.external_id, { projectId })
|
||||
.catch(err => console.warn('[useChat] Failed to assign project:', err.message));
|
||||
}
|
||||
},
|
||||
|
||||
onError: (err) => {
|
||||
updateLastMessage(msg => ({
|
||||
...msg,
|
||||
text: msg.text || 'Something went wrong.',
|
||||
streaming: false,
|
||||
error: true,
|
||||
}));
|
||||
setError(err.message);
|
||||
setStreaming(false);
|
||||
cancelRef.current = null;
|
||||
},
|
||||
}
|
||||
);
|
||||
}, [activeSession, streaming, appendMessage, updateLastMessage, refreshSessions]);
|
||||
|
||||
const cancelStream = useCallback(() => {
|
||||
if (cancelRef.current) {
|
||||
cancelRef.current();
|
||||
cancelRef.current = null;
|
||||
updateLastMessage(msg => ({ ...msg, streaming: false }));
|
||||
setStreaming(false);
|
||||
}
|
||||
}, [updateLastMessage]);
|
||||
|
||||
return {
|
||||
sendMessage,
|
||||
cancelStream,
|
||||
streaming,
|
||||
error,
|
||||
lastTokenCount,
|
||||
lastModel,
|
||||
summarising,
|
||||
};
|
||||
}
|
||||
22
packages/chat-client/src/hooks/useContextMenu.js
Normal file
22
packages/chat-client/src/hooks/useContextMenu.js
Normal file
@@ -0,0 +1,22 @@
|
||||
import { useState, useEffect, useCallback } from 'react';
|
||||
|
||||
export function useContextMenu() {
|
||||
const [menu, setMenu] = useState(null); // { x, y, session }
|
||||
|
||||
const open = useCallback((e, session) => {
|
||||
e.preventDefault();
|
||||
setMenu({ x: e.clientX, y: e.clientY, session });
|
||||
}, []);
|
||||
|
||||
const close = useCallback(() => setMenu(null), []);
|
||||
|
||||
// Close on any click outside
|
||||
useEffect(() => {
|
||||
if (!menu) return;
|
||||
const handler = () => close();
|
||||
window.addEventListener('click', handler);
|
||||
return () => window.removeEventListener('click', handler);
|
||||
}, [menu, close]);
|
||||
|
||||
return { menu, open, close };
|
||||
}
|
||||
24
packages/chat-client/src/hooks/useModels.js
Normal file
24
packages/chat-client/src/hooks/useModels.js
Normal file
@@ -0,0 +1,24 @@
|
||||
// hooks/useModels.js
|
||||
import { useState, useEffect } from 'react';
|
||||
import { fetchModels } from '../api/orchestration';
|
||||
import { FALLBACK_MODELS, DEFAULT_MODEL } from '../config/constants';
|
||||
|
||||
export function useModels() {
|
||||
const [models, setModels] = useState(FALLBACK_MODELS);
|
||||
const [selectedModel, setSelectedModel] = useState(DEFAULT_MODEL);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
fetchModels()
|
||||
.then(data => {
|
||||
setModels(data);
|
||||
setSelectedModel(data[0]?.value ?? DEFAULT_MODEL);
|
||||
})
|
||||
.catch(err => {
|
||||
console.warn('[useModels] Falling back to static list:', err.message);
|
||||
})
|
||||
.finally(() => setLoading(false));
|
||||
}, []);
|
||||
|
||||
return { models, selectedModel, setSelectedModel, loading };
|
||||
}
|
||||
19
packages/chat-client/src/hooks/useProjects.js
Normal file
19
packages/chat-client/src/hooks/useProjects.js
Normal file
@@ -0,0 +1,19 @@
|
||||
import { useState, useEffect, useCallback } from 'react';
|
||||
import { fetchProjects } from '../api/orchestration';
|
||||
|
||||
|
||||
export function useProjects() {
|
||||
const [projects, setProjects] = useState([]);
|
||||
|
||||
const refreshProjects = useCallback(async () => {
|
||||
try {
|
||||
setProjects(await fetchProjects());
|
||||
} catch (err) {
|
||||
console.warn('[useProjects] Failed to load projects:', err.message);
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => { refreshProjects(); }, [refreshProjects]);
|
||||
|
||||
return { projects, refreshProjects };
|
||||
}
|
||||
98
packages/chat-client/src/hooks/useSession.js
Normal file
98
packages/chat-client/src/hooks/useSession.js
Normal file
@@ -0,0 +1,98 @@
|
||||
import { useState, useEffect, useCallback } from 'react';
|
||||
import { fetchSessions, fetchSessionHistory } from '../api/orchestration';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
|
||||
export function useSession() {
|
||||
const [sessions, setSessions] = useState([]);
|
||||
const [activeSession, setActiveSession] = useState(null);
|
||||
const [messages, setMessages] = useState([]);
|
||||
const [loadingHistory, setLoadingHistory] = useState(false);
|
||||
const [error, setError] = useState(null);
|
||||
|
||||
|
||||
// Called by useChat after a message completes — keeps session list fresh
|
||||
const refreshSessions = useCallback(async () => {
|
||||
try {
|
||||
const data = await fetchSessions();
|
||||
setSessions(data);
|
||||
} catch {
|
||||
// non-critical — sidebar just won't update
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Load session list on mount
|
||||
useEffect(() => {
|
||||
refreshSessions();
|
||||
}, [refreshSessions]);
|
||||
|
||||
function episodesToMessages(episodes) {
|
||||
return [...episodes].reverse().flatMap(ep => [
|
||||
{ id: `${ep.id}-user`, role: 'user', text: ep.user_message },
|
||||
{ id: `${ep.id}-ai`, role: 'assistant', text: ep.ai_response },
|
||||
]);
|
||||
}
|
||||
|
||||
// Switch to an existing session and load its history
|
||||
const selectSession = useCallback(async (session) => {
|
||||
setActiveSession(session);
|
||||
setMessages([]);
|
||||
if (!session || session.isNew) return;
|
||||
setLoadingHistory(true);
|
||||
|
||||
try {
|
||||
const data = await fetchSessionHistory(session.external_id);
|
||||
// History comes back newest-first — reverse for display
|
||||
const history = episodesToMessages(data.episodes);
|
||||
|
||||
setMessages(history);
|
||||
} catch (err) {
|
||||
setError(err.message);
|
||||
} finally {
|
||||
setLoadingHistory(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Create a new session with a generated UUID — no backend call needed yet,
|
||||
// orchestration auto-creates the session on the first message
|
||||
const createSession = useCallback(() => {
|
||||
const newSession = {
|
||||
external_id: uuidv4(),
|
||||
metadata: null,
|
||||
isNew: true,
|
||||
};
|
||||
setSessions(prev => [newSession, ...prev]);
|
||||
setActiveSession(newSession);
|
||||
setMessages([]);
|
||||
return newSession
|
||||
}, []);
|
||||
|
||||
|
||||
|
||||
// Append a message to the current thread (used by useChat)
|
||||
const appendMessage = useCallback((message) => {
|
||||
setMessages(prev => [...prev, message]);
|
||||
}, []);
|
||||
|
||||
// Update the last message in the thread (used by useChat during streaming)
|
||||
const updateLastMessage = useCallback((updater) => {
|
||||
setMessages(prev => {
|
||||
const updated = [...prev];
|
||||
updated[updated.length - 1] = updater(updated[updated.length - 1]);
|
||||
return updated;
|
||||
});
|
||||
}, []);
|
||||
|
||||
return {
|
||||
sessions,
|
||||
setSessions,
|
||||
activeSession,
|
||||
messages,
|
||||
loadingHistory,
|
||||
error,
|
||||
selectSession,
|
||||
createSession,
|
||||
refreshSessions,
|
||||
appendMessage,
|
||||
updateLastMessage,
|
||||
};
|
||||
}
|
||||
25
packages/chat-client/src/hooks/useSettings.js
Normal file
25
packages/chat-client/src/hooks/useSettings.js
Normal file
@@ -0,0 +1,25 @@
|
||||
import { useState, useEffect } from 'react';
|
||||
import { getSettings, updateSettings } from '../api/orchestration';
|
||||
|
||||
export function useSettings() {
|
||||
const [settings, setSettings] = useState(null);
|
||||
const [saving, setSaving] = useState(false);
|
||||
|
||||
useEffect(() => {
|
||||
getSettings().then(setSettings).catch(console.error);
|
||||
}, []);
|
||||
|
||||
async function saveSetting(key, value) {
|
||||
setSaving(true);
|
||||
try {
|
||||
const updated = await updateSettings({ [key]: value });
|
||||
setSettings(updated);
|
||||
} catch (err) {
|
||||
console.error('[useSettings] Save failed:', err.message);
|
||||
} finally {
|
||||
setSaving(false);
|
||||
}
|
||||
}
|
||||
|
||||
return { settings, saveSetting, saving };
|
||||
}
|
||||
127
packages/chat-client/src/index.css
Normal file
127
packages/chat-client/src/index.css
Normal file
@@ -0,0 +1,127 @@
|
||||
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
|
||||
:root {
|
||||
--bg-base: #9c9a9a;
|
||||
--bg-surface: #000000;
|
||||
--bg-elevated: #111111;
|
||||
--border: #989899;
|
||||
--accent: #333335;
|
||||
--accent-hover: #574fd6;
|
||||
--text-primary: #e8e8f0;
|
||||
--text-secondary: #8b8fa8;
|
||||
--text-muted: #ababaf;
|
||||
--text-sb-hdr: #ffffff;
|
||||
--bubble-user: #020202;
|
||||
--bubble-ai: #303033;
|
||||
--warning: #ec5353;
|
||||
--sidebar-width: 180px;
|
||||
--panel-width: 200px;
|
||||
--header-height: 40px;
|
||||
--radius-sm: 6px;
|
||||
--radius-md: 8px;
|
||||
--radius-lg: 12px;
|
||||
}
|
||||
|
||||
html, body, #root {
|
||||
height: 100%;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
||||
background: var(--bg-base);
|
||||
color: var(--text-primary);
|
||||
font-size: 15px;
|
||||
}
|
||||
|
||||
@keyframes blink {
|
||||
0%, 100% { opacity: 1; }
|
||||
50% { opacity: 0; }
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
/* ── Layout ─────────────────────────────────────────── */
|
||||
|
||||
.flex { display: flex; }
|
||||
.flex-col { display: flex; flex-direction: column; }
|
||||
.flex-1 { flex: 1; }
|
||||
.flex-shrink { flex-shrink: 0; }
|
||||
.items-center { align-items: center; }
|
||||
.justify-center { justify-content: center; }
|
||||
.justify-between { justify-content: space-between; }
|
||||
.overflow-hidden { overflow: hidden; }
|
||||
.scroll-y { overflow-y: auto; overflow-x: hidden; }
|
||||
|
||||
/* ── Panel header — shared by all three sidebars ────── */
|
||||
|
||||
.panel-header {
|
||||
height: var(--header-height);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
border-bottom: 1px solid var(--border);
|
||||
flex-shrink: 0;
|
||||
background: var(--bg-surface);
|
||||
}
|
||||
|
||||
/* ── Button resets ──────────────────────────────────── */
|
||||
|
||||
.btn-reset {
|
||||
background: none;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: flex-start;
|
||||
min-width: 0;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.btn-icon {
|
||||
background: none;
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
padding: 6px;
|
||||
border-radius: var(--radius-sm);
|
||||
color: var(--text-muted);
|
||||
font-size: 16px;
|
||||
line-height: 1;
|
||||
}
|
||||
|
||||
.btn-icon:hover { background: var(--bg-elevated); }
|
||||
|
||||
.btn-primary {
|
||||
background: var(--accent);
|
||||
border: none;
|
||||
border-radius: var(--radius-md);
|
||||
color: white;
|
||||
cursor: pointer;
|
||||
font-size: 13px;
|
||||
font-weight: 500;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
|
||||
.btn-primary:hover { background: var(--accent-hover); }
|
||||
.btn-primary:disabled { background: var(--bg-elevated); color: var(--text-muted); cursor: default; }
|
||||
|
||||
/* ── Typography helpers ─────────────────────────────── */
|
||||
|
||||
.text-xs { font-size: 11px; }
|
||||
.text-sm { font-size: 12px; }
|
||||
.text-base { font-size: 13px; }
|
||||
.text-muted { color: var(--text-muted); }
|
||||
.text-secondary { color: var(--text-secondary); }
|
||||
.text-accent { color: var(--accent); }
|
||||
.label-upper { font-size: 13px; font-weight: 750; color: var(--text-muted); text-transform: uppercase; letter-spacing: 0.08em; }
|
||||
.truncate { overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
||||
|
||||
.spinner {
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border: 2px solid var(--border);
|
||||
border-top-color: var(--text-muted);
|
||||
border-radius: 50%;
|
||||
animation: spin 0.7s linear infinite;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
10
packages/chat-client/src/main.jsx
Normal file
10
packages/chat-client/src/main.jsx
Normal file
@@ -0,0 +1,10 @@
|
||||
import React from 'react';
|
||||
import ReactDOM from 'react-dom/client';
|
||||
import App from './App';
|
||||
import './index.css';
|
||||
|
||||
ReactDOM.createRoot(document.getElementById('root')).render(
|
||||
<React.StrictMode>
|
||||
<App />
|
||||
</React.StrictMode>
|
||||
);
|
||||
22
packages/chat-client/vite.config.js
Normal file
22
packages/chat-client/vite.config.js
Normal file
@@ -0,0 +1,22 @@
|
||||
import { defineConfig } from 'vite';
|
||||
import react from '@vitejs/plugin-react';
|
||||
|
||||
export default defineConfig({
|
||||
plugins: [react()],
|
||||
build: {
|
||||
outDir: 'dist',
|
||||
},
|
||||
server: {
|
||||
port: 5173,
|
||||
proxy: {
|
||||
'/chat': 'http://192.168.0.205:4000',
|
||||
'/sessions': 'http://192.168.0.205:4000',
|
||||
'/models': 'http://192.168.0.205:4000',
|
||||
'/projects': 'http://192.168.0.205:4000',
|
||||
'/episodes': 'http://192.168.0.205:4000',
|
||||
'/settings': 'http://192.168.0.205:4000',
|
||||
'/health': 'http://192.168.0.205:4000',
|
||||
'/summaries': 'http://192.168.0.205:4000',
|
||||
},
|
||||
},
|
||||
});
|
||||
64
packages/embedding-service/CLAUDE.md
Normal file
64
packages/embedding-service/CLAUDE.md
Normal file
@@ -0,0 +1,64 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
See the root [CLAUDE.md](../../CLAUDE.md) for overall architecture, service roles, and deployment layout.
|
||||
|
||||
## Running This Service
|
||||
|
||||
```bash
|
||||
npm run embedding # From repo root
|
||||
npm -w packages/embedding-service run dev # With --watch
|
||||
```
|
||||
|
||||
Default port: **3003**. Requires Ollama to be reachable at `OLLAMA_URL`.
|
||||
|
||||
## Single-File Service
|
||||
|
||||
The entire service is `src/index.js` — no subdirectory structure. All routes, the Ollama helper, and startup are in one file.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `PORT` | `3003` | Port to listen on |
|
||||
| `OLLAMA_URL` | `http://localhost:11434` | Ollama instance URL |
|
||||
| `EMBEDDING_MODEL` | `nomic-embed-text` | Model passed to Ollama `/api/embed` |
|
||||
|
||||
Note: the env var name is `EMBEDDING_MODEL`, not `EMBED_MODEL` — the internal constant is `EMBED_MODEL` but the lookup key is different.
|
||||
|
||||
## Ollama API Details
|
||||
|
||||
Uses Ollama's `/api/embed` endpoint (not `/api/embeddings`). Request shape:
|
||||
|
||||
```json
|
||||
{ "model": "nomic-embed-text", "input": "text to embed" }
|
||||
```
|
||||
|
||||
Ollama returns `{ "embeddings": [[...]] }` — an array of arrays even for a single input. The helper takes `data.embeddings[0]` to return the single vector.
|
||||
|
||||
The `ollama` npm package is listed as a dependency but is **not used** — all calls are raw `fetch`. Do not refactor to use the package without checking the API shape matches.
|
||||
|
||||
## Batch Endpoint
|
||||
|
||||
`POST /embed/batch` embeds items **sequentially** in a for-loop, not in parallel. The comment explains this: Ollama doesn't parallelise embedding calls, so parallel requests would queue internally anyway. Do not change to `Promise.all` without verifying Ollama behaviour.
|
||||
|
||||
## Error Responses
|
||||
|
||||
| Condition | Status | Notes |
|
||||
|---|---|---|
|
||||
| Missing/empty `text` | 400 | |
|
||||
| Ollama call fails | 502 | Upstream failure — correct status |
|
||||
| Empty `texts` array | 400 | |
|
||||
|
||||
## Known Issue
|
||||
|
||||
The 400 error message for `/embed` reads `"text is required and must be empty"` — the word "not" is missing. Should read `"must not be empty"`.
|
||||
|
||||
## API Endpoints
|
||||
|
||||
| Method | Path | Notes |
|
||||
|---|---|---|
|
||||
| GET | `/health` | Static response — does not verify Ollama is reachable |
|
||||
| POST | `/embed` | Body: `{ text: string }`. Returns `{ embedding, model, dimensions }` |
|
||||
| POST | `/embed/batch` | Body: `{ texts: string[] }`. Returns `{ embeddings, model, dimensions, count }` |
|
||||
@@ -9,7 +9,6 @@
|
||||
"dependencies": {
|
||||
"@nexusai/shared": "^1.0.0",
|
||||
"dotenv": "^17.4.0",
|
||||
"express": "^5.2.1",
|
||||
"ollama": "^0.6.3"
|
||||
"express": "^5.2.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,23 +1,21 @@
|
||||
require ('dotenv').config();
|
||||
const express = require('express');
|
||||
const {getEnv} = require('@nexusai/shared');
|
||||
const {getEnv, OLLAMA, PORTS, logger} = require('@nexusai/shared');
|
||||
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
app.use(express.json({ limit: '1mb' })); // limit request body to 1mb to prevent abuse - embedding requests should be small
|
||||
|
||||
const PORT = getEnv('PORT', '3003'); // Default to 3003 if PORT is not set
|
||||
const OLLAMA_URL = getEnv('OLLAMA_URL', 'http://localhost:11434'); // URL for Ollama API
|
||||
const EMBED_MODEL = getEnv('EMBEDDING_MODEL', 'nomic-embed-text'); // Ollama model for embeddings
|
||||
|
||||
console.log('OLLAMA_URL:', OLLAMA_URL);
|
||||
console.log('EMBED_MODEL:', EMBED_MODEL);
|
||||
const PORT = getEnv('PORT', PORTS.EMBEDDING);
|
||||
const OLLAMA_URL = getEnv('OLLAMA_URL', OLLAMA.DEFAULT_URL);
|
||||
const EMBED_MODEL = getEnv('EMBEDDING_MODEL', OLLAMA.EMBED_MODEL);
|
||||
|
||||
//OLLAMA embedding helper function
|
||||
async function embedText(text) {
|
||||
const res = await fetch(`${OLLAMA_URL}/api/embed`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ model: EMBED_MODEL, input: text })
|
||||
body: JSON.stringify({ model: EMBED_MODEL, input: text }),
|
||||
signal: AbortSignal.timeout(30_000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
@@ -40,7 +38,7 @@ app.get('/health', (req,res) => {
|
||||
app.post('/embed', async (req, res) => {
|
||||
const { text } = req.body;
|
||||
if (!text || typeof text !== 'string' || text.trim() === '') {
|
||||
return res.status(400).json({ error: 'text is required and must be empty' });
|
||||
return res.status(400).json({ error: 'text is required and must not be empty' });
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -63,7 +61,10 @@ app.post('/embed/batch', async (req, res) => {
|
||||
}
|
||||
|
||||
try {
|
||||
//sequential embedding for now, Ollama doesn't natively parallize embeddings
|
||||
const invalid = texts.findIndex(t => !t || typeof t !== 'string' || t.trim() === '');
|
||||
if (invalid !== -1)
|
||||
return res.status(400).json({ error: `texts[${invalid}] is empty or not a string` });
|
||||
|
||||
const embeddings = [];
|
||||
for (const text of texts) {
|
||||
embeddings.push(await embedText(text.trim()));
|
||||
@@ -81,5 +82,5 @@ app.post('/embed/batch', async (req, res) => {
|
||||
|
||||
/******* Start Server ********/
|
||||
app.listen(PORT, () => {
|
||||
console.log(`Embedding Service listening on port ${PORT}`);
|
||||
logger.info(`Embedding Service listening on port ${PORT}`);
|
||||
});
|
||||
75
packages/inference-service/CLAUDE.md
Normal file
75
packages/inference-service/CLAUDE.md
Normal file
@@ -0,0 +1,75 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
See the root [CLAUDE.md](../../CLAUDE.md) for overall architecture, service roles, and deployment layout.
|
||||
|
||||
## Running This Service
|
||||
|
||||
```bash
|
||||
npm run inference # From repo root
|
||||
npm -w packages/inference-service run dev # With --watch
|
||||
```
|
||||
|
||||
Default port: **3001**. Set `INFERENCE_PROVIDER` to select the backend.
|
||||
|
||||
## Provider Pattern
|
||||
|
||||
`src/infer.js` reads `INFERENCE_PROVIDER` at startup and loads one of two providers:
|
||||
|
||||
| `INFERENCE_PROVIDER` | Module | Backend |
|
||||
|---|---|---|
|
||||
| `ollama` (default) | `src/providers/ollama.js` | Ollama npm client → `/api/generate` |
|
||||
| `llamacpp` | `src/providers/llamacpp.js` | Raw fetch → `/v1/chat/completions` (OpenAI-compatible) |
|
||||
|
||||
An unknown provider throws immediately at startup — fail-fast, not at request time.
|
||||
|
||||
Both providers export the same interface: `complete(prompt, options)` and `completeStream(prompt, options)`.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `PORT` | `3001` | Port to listen on |
|
||||
| `INFERENCE_PROVIDER` | `ollama` | `ollama` or `llamacpp` |
|
||||
| `INFERENCE_URL` | `http://localhost:11434` (Ollama) / `http://localhost:8080` (llama.cpp) | Backend URL |
|
||||
| `DEFAULT_MODEL` | Provider-specific | Model name passed to backend |
|
||||
|
||||
`INFERENCE_URL` defaults differ per provider — Ollama uses the Ollama default URL, llama.cpp uses the llama-server default.
|
||||
|
||||
## Options Resolution
|
||||
|
||||
Both providers use `resolveOptions(options)` to merge caller-supplied options with `INFERENCE_DEFAULTS` from shared constants. Any option not supplied by the caller falls back to the constant.
|
||||
|
||||
## Streaming Chunk Format
|
||||
|
||||
The two providers yield differently shaped chunks — the route in `src/routes/inference.js` normalises them:
|
||||
|
||||
**Ollama** yields raw Ollama generate chunks: `{ response, done, model, eval_count, prompt_eval_count, ... }`
|
||||
|
||||
**llama.cpp** yields:
|
||||
- Per-token: `{ response: delta, done: false }`
|
||||
- Final: `{ response: '', done: true, model, tokenCount }` — token count is the sum of `completion_tokens + prompt_tokens` from the usage chunk
|
||||
|
||||
The route checks `chunk.response` to stream text and `chunk.done` to capture metadata. For Ollama streaming, **token count is not captured** — the done chunk from Ollama contains `eval_count`/`prompt_eval_count` but the route only reads `chunk.tokenCount` (a llama.cpp field). Ollama streaming calls always report `tokenCount: 0` to the client.
|
||||
|
||||
## Known Issue: `maxTokens` Missing from Streaming Route
|
||||
|
||||
`POST /complete` correctly destructures `maxTokens` from the request body and passes it through. `POST /complete/stream` does **not** — it omits `maxTokens` from its destructuring, so streaming completions always use `INFERENCE_DEFAULTS.MAX_TOKENS` regardless of what the caller sends. This means `/chat/stream` has a different effective token ceiling than `/chat`.
|
||||
|
||||
## SSE Format (route → caller)
|
||||
|
||||
```
|
||||
data: {"response":"Hello"} ← per token
|
||||
data: {"response":" world"}
|
||||
data: {"done":true,"model":"...","tokenCount":42} ← final metadata
|
||||
data: [DONE] ← sentinel
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
| Method | Path | Notes |
|
||||
|---|---|---|
|
||||
| GET | `/health` | Returns `{ service, status, provider, model }` |
|
||||
| POST | `/complete` | Body: `{ prompt, model?, temperature?, maxTokens?, topP?, topK?, repeatPenalty? }` |
|
||||
| POST | `/complete/stream` | Same body as `/complete` except `maxTokens` is silently ignored |
|
||||
@@ -1,20 +1,22 @@
|
||||
require ('dotenv').config();
|
||||
const express = require('express');
|
||||
const {getEnv} = require('@nexusai/shared');
|
||||
const {getEnv, PORTS, OLLAMA, logger} = require('@nexusai/shared');
|
||||
const inferenceRouter = require('./routes/inference');
|
||||
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
app.use(express.json({ limit: '8mb' })); // prompts include full context window
|
||||
|
||||
const PORT = getEnv('PORT', '3001'); // Default to 3001 if PORT is not set
|
||||
const PORT = getEnv('PORT', PORTS.INFERENCE);
|
||||
const PROVIDER = getEnv('INFERENCE_PROVIDER', 'ollama');
|
||||
const MODEL = getEnv('DEFAULT_MODEL', OLLAMA.OLLAMA_MODEL)
|
||||
|
||||
// Health check endpoint
|
||||
app.get('/health', (req, res) => {
|
||||
res.json({
|
||||
service: 'Inference Service',
|
||||
status: 'healthy',
|
||||
provider: getEnv('INFERENCE_PROVIDER', 'ollama'),
|
||||
model: getEnv('DEFAULT_MODEL', 'llama3.2')
|
||||
provider: PROVIDER,
|
||||
model: MODEL
|
||||
});
|
||||
});
|
||||
|
||||
@@ -22,5 +24,5 @@ app.use('/', inferenceRouter);
|
||||
|
||||
// Start the server
|
||||
app.listen(PORT, () => {
|
||||
console.log(`Inference Service is running on port ${PORT}`);
|
||||
logger.info(`Inference Service is running on port ${PORT}`);
|
||||
});
|
||||
@@ -1,63 +1,97 @@
|
||||
const { getEnv } = require('@nexusai/shared');
|
||||
const { getEnv, LLAMACPP, INFERENCE_DEFAULTS, logger } = require("@nexusai/shared");
|
||||
|
||||
const BASE_URL = getEnv('INFERENCE_URL', 'http://localhost:8080');
|
||||
const DEFAULT_MODEL = getEnv('DEFAULT_MODEL', 'local-model');
|
||||
const BASE_URL = getEnv("INFERENCE_URL", LLAMACPP.DEFAULT_URL);
|
||||
const DEFAULT_MODEL = getEnv("DEFAULT_MODEL", LLAMACPP.DEFAULT_MODEL);
|
||||
|
||||
function buildPayload(prompt, options, stream = false){
|
||||
return {
|
||||
model: options.model || DEFAULT_MODEL,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
temperature: options.temperature ?? 0.7,
|
||||
max_tokens: options.num_predict ?? 1024,
|
||||
stream,
|
||||
};
|
||||
function resolveOptions(options) {
|
||||
return {
|
||||
temperature: options.temperature ?? INFERENCE_DEFAULTS.TEMPERATURE,
|
||||
maxTokens: options.maxTokens ?? INFERENCE_DEFAULTS.MAX_TOKENS,
|
||||
topP: options.topP ?? INFERENCE_DEFAULTS.TOP_P,
|
||||
topK: options.topK ?? INFERENCE_DEFAULTS.TOP_K,
|
||||
repeatPenalty: options.repeatPenalty ?? INFERENCE_DEFAULTS.REPEAT_PENALTY,
|
||||
seed: options.seed ?? INFERENCE_DEFAULTS.SEED,
|
||||
};
|
||||
}
|
||||
|
||||
async function complete(prompt, options = {} ) {
|
||||
const res = await fetch(`${BASE_URL}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(buildPayload(prompt, options, false))
|
||||
})
|
||||
function buildPayload(prompt, options, stream = false) {
|
||||
const opts = resolveOptions(options);
|
||||
|
||||
if (!res.ok) throw new Error(`llama.cpp error: ${res.status} ${res.statusText}`);
|
||||
|
||||
const data = await res.json();
|
||||
const choice = data.choices[0];
|
||||
|
||||
return {
|
||||
text: choice.message.content,
|
||||
model: data.model,
|
||||
done: choice.finish_reason === 'stop',
|
||||
evalCount: data.usage?.completion_tokens,
|
||||
promptEvalCount: data.usage?.prompt_tokens,
|
||||
}
|
||||
return {
|
||||
model: options.model || DEFAULT_MODEL,
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
temperature: opts.temperature,
|
||||
max_tokens: opts.maxTokens,
|
||||
top_p: opts.topP,
|
||||
top_k: opts.topK,
|
||||
repeat_penalty: opts.repeatPenalty,
|
||||
stream,
|
||||
stream_options: stream ? { include_usage: true } : undefined,
|
||||
...(opts.seed !== null && { seed: opts.seed }),
|
||||
};
|
||||
}
|
||||
|
||||
async function complete(prompt, options = {}) {
|
||||
const res = await fetch(`${BASE_URL}/v1/chat/completions`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(buildPayload(prompt, options, false)),
|
||||
});
|
||||
|
||||
if (!res.ok)
|
||||
throw new Error(`llama.cpp error: ${res.status} ${res.statusText}`);
|
||||
|
||||
const data = await res.json();
|
||||
const choice = data.choices[0];
|
||||
|
||||
return {
|
||||
text: choice.message.content,
|
||||
model: data.model,
|
||||
done: choice.finish_reason === "stop",
|
||||
evalCount: data.usage?.completion_tokens,
|
||||
promptEvalCount: data.usage?.prompt_tokens,
|
||||
};
|
||||
}
|
||||
|
||||
async function* completeStream(prompt, options = {}) {
|
||||
const res = await fetch(`${BASE_URL}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(buildPayload(prompt, options, true))
|
||||
});
|
||||
let finalModel = DEFAULT_MODEL;
|
||||
let finalTokenCount = 0;
|
||||
|
||||
if (!res.ok) throw new Error(`llama.cpp error: ${res.status} ${res.statusText}`);
|
||||
const res = await fetch(`${BASE_URL}/v1/chat/completions`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(buildPayload(prompt, options, true)),
|
||||
});
|
||||
|
||||
//OpenAI streaming sends newline-delimited JSON (NDJSON) with "data: " prefix for each chunk
|
||||
//Example chunk: data: {"choices":[{"delta":{"content":"Hello"},"finish_reason":null,"index":0}]}
|
||||
//we parse each chunk as it arrives
|
||||
for await (const chunk of res.body){
|
||||
const lines = Buffer.from(chunk).toString('utf8')
|
||||
.split('\n')
|
||||
.filter(l => l.startsWith('data: ') && l !== 'data: [DONE]');
|
||||
|
||||
for (const line of lines) {
|
||||
const json = JSON.parse(line.slice(6)); //remove 'data: ' prefix
|
||||
const delta = json.choices?.[0]?.delta?.content;
|
||||
if (delta) yield {response: delta, done: false};
|
||||
}
|
||||
if (!res.ok)
|
||||
throw new Error(`llama.cpp error: ${res.status} ${res.statusText}`);
|
||||
|
||||
for await (const chunk of res.body) {
|
||||
const lines = Buffer.from(chunk)
|
||||
.toString("utf8")
|
||||
.split("\n")
|
||||
.filter((l) => l.startsWith("data: ") && l !== "data: [DONE]");
|
||||
|
||||
for (const line of lines) {
|
||||
const json = JSON.parse(line.slice(6));
|
||||
const delta = json.choices?.[0]?.delta?.content;
|
||||
|
||||
if (json.choices?.[0]?.finish_reason === 'stop') {
|
||||
finalModel = json.model ?? finalModel;
|
||||
}
|
||||
|
||||
// usage arrives in a separate final chunk with empty choices array
|
||||
if (json.usage) {
|
||||
finalTokenCount = (json.usage.completion_tokens ?? 0) + (json.usage.prompt_tokens ?? 0);
|
||||
}
|
||||
|
||||
if (delta) yield { response: delta, done: false };
|
||||
}
|
||||
yield { response: '', done: true}; //signal completion at the end of the stream
|
||||
}
|
||||
|
||||
logger.info('[llamacpp] finalTokenCount:', finalTokenCount);
|
||||
|
||||
yield { response: '', done: true, model: finalModel, tokenCount: finalTokenCount };
|
||||
}
|
||||
|
||||
module.exports = { complete, completeStream };
|
||||
module.exports = { complete, completeStream };
|
||||
|
||||
@@ -1,17 +1,33 @@
|
||||
const { Ollama } = require('ollama');
|
||||
const { getEnv } = require('@nexusai/shared');
|
||||
const { getEnv, OLLAMA, INFERENCE_DEFAULTS } = require('@nexusai/shared');
|
||||
|
||||
const client = new Ollama({ host: getEnv('INFERENCE_URL', 'http://localhost:11434') });
|
||||
const DEFAULT_MODEL = getEnv('DEFAULT_MODEL', 'companion:latest');
|
||||
const client = new Ollama({ host: getEnv('INFERENCE_URL', OLLAMA.DEFAULT_URL) });
|
||||
const DEFAULT_MODEL = getEnv('DEFAULT_MODEL', OLLAMA.OLLAMA_MODEL);
|
||||
|
||||
function resolveOptions(options){
|
||||
return {
|
||||
temperature: options.temperature ?? INFERENCE_DEFAULTS.TEMPERATURE,
|
||||
maxTokens: options.maxTokens ?? INFERENCE_DEFAULTS.MAX_TOKENS,
|
||||
topP: options.topP ?? INFERENCE_DEFAULTS.TOP_P,
|
||||
topK: options.topK ?? INFERENCE_DEFAULTS.TOP_K,
|
||||
repeatPenalty: options.repeatPenalty ?? INFERENCE_DEFAULTS.REPEAT_PENALTY,
|
||||
seed: options.seed ?? INFERENCE_DEFAULTS.SEED,
|
||||
}
|
||||
}
|
||||
|
||||
async function complete(prompt, options = {} ) {
|
||||
const opts = resolveOptions(options);
|
||||
const response = await client.generate({
|
||||
model: options.model || DEFAULT_MODEL,
|
||||
prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: options.temperature ?? 0.7,
|
||||
num_predict: options.maxTokens ?? 1024,
|
||||
temperature: opts.temperature,
|
||||
num_predict: opts.maxTokens,
|
||||
top_p: opts.topP,
|
||||
top_k: opts.topK,
|
||||
repeat_penalty: opts.repeatPenalty,
|
||||
...(opts.seed !== null && { seed: opts.seed }),
|
||||
}
|
||||
});
|
||||
|
||||
@@ -25,17 +41,32 @@ async function complete(prompt, options = {} ) {
|
||||
}
|
||||
|
||||
async function* completeStream(prompt, options = {} ) {
|
||||
const opts = resolveOptions(options);
|
||||
const stream = await client.generate({
|
||||
model: options.model || DEFAULT_MODEL,
|
||||
prompt,
|
||||
stream: true,
|
||||
options:{
|
||||
temperature: options.temperature ?? 0.7,
|
||||
temperature: opts.temperature,
|
||||
num_predict: opts.maxTokens,
|
||||
top_p: opts.topP,
|
||||
top_k: opts.topK,
|
||||
repeat_penalty: opts.repeatPenalty,
|
||||
...(opts.seed !== null && { seed: opts.seed }),
|
||||
},
|
||||
});
|
||||
|
||||
for await (const chunk of stream) {
|
||||
yield chunk;
|
||||
if (chunk.done) {
|
||||
yield {
|
||||
response: '',
|
||||
done: true,
|
||||
model: chunk.model,
|
||||
tokenCount: (chunk.eval_count ?? 0) + (chunk.prompt_eval_count ?? 0),
|
||||
};
|
||||
} else {
|
||||
yield chunk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,45 +1,59 @@
|
||||
const { Router } = require('express');
|
||||
const { complete, completeStream } = require('../infer');
|
||||
const { logger } = require('@nexusai/shared');
|
||||
|
||||
const router = Router();
|
||||
|
||||
// Standard completion endpoint - returns full response when done
|
||||
router.post('/complete', async (req, res) => {
|
||||
const { prompt, model, temperature, maxTokens } = req.body;
|
||||
const { prompt, model, temperature, maxTokens, topP, topK, repeatPenalty } = req.body;
|
||||
|
||||
if (!prompt) {
|
||||
return res.status(400).json({ error: 'prompt is required'});
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await complete (prompt, {model, temperature, maxTokens});
|
||||
const result = await complete (prompt, {model, temperature, maxTokens, topP, topK, repeatPenalty});
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[Inference] Completion error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
logger.error('[Inference] Completion error:', error.message);
|
||||
res.status(500).json({ error: 'Inference failed', detail: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Streaming completion endpoint - sends partial responses as they arrive
|
||||
router.post('/complete/stream', async (req, res) => {
|
||||
const { prompt, model, temperature } = req.body;
|
||||
const { prompt, model, temperature, maxTokens, topP, topK, repeatPenalty } = req.body;
|
||||
|
||||
if (!prompt) {
|
||||
return res.status(400).json({error: 'prompt is required'});
|
||||
}
|
||||
if (!prompt) return res.status(400).json({ error: 'prompt is required' });
|
||||
|
||||
res.setHeader('Content-Type', 'text/event-stream');
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
|
||||
try {
|
||||
for await (const chunk of completeStream(prompt, {model, temperature})) {
|
||||
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
||||
let lastModel = model;
|
||||
let tokenCount = 0;
|
||||
|
||||
for await (const chunk of completeStream(prompt, { model, temperature, maxTokens,topP, topK, repeatPenalty })) {
|
||||
if (chunk.response) {
|
||||
res.write(`data: ${JSON.stringify({ response: chunk.response })}\n\n`);
|
||||
}
|
||||
if (chunk.done) {
|
||||
// capture final metadata from the done signal
|
||||
lastModel = chunk.model ?? lastModel;
|
||||
tokenCount = chunk.tokenCount ?? tokenCount;
|
||||
logger.info('[inference router] tokenCount from chunk:', chunk.tokenCount, '→', tokenCount);
|
||||
}
|
||||
}
|
||||
|
||||
// Send a single done event with metadata after stream closes
|
||||
res.write(`data: ${JSON.stringify({ done: true, model: lastModel, tokenCount })}\n\n`);
|
||||
res.write('data: [DONE]\n\n');
|
||||
} catch (error) {
|
||||
console.error('[Inference] Streaming error:', error.message);
|
||||
res.write(`data: ${JSON.stringify({ error: error.message })}\n\n`);
|
||||
|
||||
} catch (err) {
|
||||
logger.error('[Inference] Streaming error:', err.message);
|
||||
res.write(`data: ${JSON.stringify({ error: err.message })}\n\n`);
|
||||
} finally {
|
||||
res.end();
|
||||
}
|
||||
|
||||
114
packages/memory-service/CLAUDE.md
Normal file
114
packages/memory-service/CLAUDE.md
Normal file
@@ -0,0 +1,114 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
See the root [CLAUDE.md](../../CLAUDE.md) for overall architecture, service roles, and the dual-store memory model.
|
||||
|
||||
## Running This Service
|
||||
|
||||
```bash
|
||||
npm run memory # From repo root (node src/index.js)
|
||||
npm -w packages/memory-service run dev # With --watch
|
||||
```
|
||||
|
||||
Default port: **3002**. Requires Qdrant and the embedding-service to be reachable on startup.
|
||||
|
||||
## SQLite Schema
|
||||
|
||||
`src/db/schema.js` is the source of truth for the data model. Key schema facts:
|
||||
|
||||
- `sessions` and `episodes` are linked by FK with cascade delete — deleting a session removes all its episodes automatically.
|
||||
- `episodes_fts` is an FTS5 virtual table that mirrors `user_message` and `ai_response`. It is kept in sync via SQL triggers on INSERT/UPDATE/DELETE. On service startup, the FTS index is fully rebuilt from live episode data.
|
||||
- Several columns (`sessions.name`, `sessions.project_id`, `entities.mention_count`, etc.) were added as migrations using `ALTER TABLE` wrapped in individual try-catch blocks. Failures are silently swallowed — if a column already exists, the alter fails and the service continues. The `idx_summaries_project` index is defined twice (benign duplicate).
|
||||
- `summaries` rows with `session_id IS NULL` and a `project_id` represent project-level overviews, not session summaries. This distinction is how `GET /projects/:id/overview` works.
|
||||
- `entity_episodes` is a join table linking entities to the episodes where they were first extracted. Used for provenance tracking and future orphan cleanup. Defined in `schema.js` (not a migration), so it exists on all installs.
|
||||
|
||||
**New columns on `entities` (added via migration):**
|
||||
- `mention_count INTEGER DEFAULT 1` — incremented every time this entity is re-extracted
|
||||
- `confidence REAL DEFAULT 1.0` — reserved for future confidence scoring
|
||||
- `source TEXT DEFAULT 'extraction'` — `'extraction'` or `'manual'`
|
||||
- `last_seen_at INTEGER` — Unix timestamp of most recent extraction hit
|
||||
|
||||
**New columns on `relationships` (added via migration):**
|
||||
- `mention_count INTEGER DEFAULT 1` — incremented every time this edge is re-extracted
|
||||
- `notes TEXT` — relationship context sentence from extraction
|
||||
|
||||
## Async Pipeline: Episode Creation
|
||||
|
||||
`POST /episodes` returns a 201 as soon as the SQLite insert succeeds. Two background tasks run after without blocking the response:
|
||||
|
||||
1. **Embedding** — Fetches a vector from embedding-service, stores to Qdrant with `{sessionId, createdAt}` as payload metadata.
|
||||
2. **Entity + relationship extraction** — Sends the episode text to Ollama (`qwen2.5:3b`, temp 0.1, 1500 tokens) and upserts any recognized entities and relationships to both SQLite and Qdrant. Also links each entity to the episode via `entity_episodes`.
|
||||
|
||||
Both tasks catch and log errors silently. An episode can exist in SQLite with no corresponding Qdrant point if either step fails.
|
||||
|
||||
## Entity Extraction Details
|
||||
|
||||
`src/entities/extraction.js`:
|
||||
|
||||
- Fetches the last 20 known entities from SQLite before prompting the model, so the prompt can ask for name/type consistency with existing entries.
|
||||
- Recognized entity types: `person`, `place`, `project`, `technology`, `concept`, `organization` — anything else is discarded.
|
||||
- Ignores a hardcoded list of low-value names (`hello`, `thanks`, `good morning`, etc.).
|
||||
- Extracts JSON using a regex (`{...}`) applied to raw model output, so surrounding prose doesn't break parsing.
|
||||
- The model is asked to return both entities and relationships in a single JSON response: `{ "entities": [...], "relationships": [...] }`.
|
||||
- Entity upsert uses `ON CONFLICT(name, type) DO UPDATE` — preserves existing `notes` if the new extraction returns null, increments `mention_count`, updates `last_seen_at`.
|
||||
- Relationship upsert uses `ON CONFLICT(from_id, to_id, label) DO UPDATE` — increments `mention_count`, preserves existing `notes` if new is null.
|
||||
- Relationships are resolved by looking up both endpoints in the `entityMap` built during entity processing — if either entity wasn't saved (filtered out or invalid type), the relationship is silently dropped.
|
||||
- After upsert, embeds each entity as `"${name} (${type}): ${notes}"` and stores to Qdrant with `projectId` in the payload for project-scoped filtering.
|
||||
|
||||
> For full details see `docs/services/entity-extraction.md` and `docs/services/knowledge-graph.md`.
|
||||
|
||||
## Knowledge Graph
|
||||
|
||||
`src/graph/index.js` provides two SQLite traversal functions:
|
||||
|
||||
- **`getNeighborhood(entityId, depth)`** — Single-entity recursive CTE traversal. Bidirectional (follows edges in both directions). Returns `{ nodes: [...entities], edges: [...relationships] }`. Depth defaults to `ENTITIES.GRAPH_HOP_DEPTH` (1), max enforced to 3 at the HTTP layer.
|
||||
|
||||
- **`getEntityNeighbors(entityIds[])`** — Bulk 1-hop version for orchestration. Given a set of seed entity IDs, returns their immediate neighbors plus all edges within the combined node set.
|
||||
|
||||
The recursive CTE uses `UNION` (not `UNION ALL`) to eliminate cycles and duplicate visits automatically.
|
||||
|
||||
> For full design rationale and usage see `docs/services/knowledge-graph.md`.
|
||||
|
||||
## Summarization Strategy
|
||||
|
||||
`src/summarization/project.js`:
|
||||
|
||||
- Preferred path: generate a project overview from existing **session-level summaries** (higher-level abstraction, shorter input).
|
||||
- Fallback path: if no session summaries exist, summarize raw episodes directly (up to `SUMMARIES.MAX_PROJECT_EPISODE_LIMIT`).
|
||||
- Both paths truncate input at `SUMMARIES.MAX_SUMMARY_CHARS` (8,000 chars) by slicing from the end (most recent content wins).
|
||||
- Strips ChatML tokens from the Ollama response (`<|im_start|>`, `<|im_end|>`).
|
||||
- Uses temp 0.2 and `num_predict 1200`.
|
||||
|
||||
## Qdrant Client
|
||||
|
||||
`src/semantic/index.js` creates the Qdrant client lazily on first use and reuses it. All three collections (`episodes`, `entities`, `summaries`) are created at startup if missing. There is no connection health check — if Qdrant is unreachable, semantic operations throw at call time.
|
||||
|
||||
## API Endpoints Quick Reference
|
||||
|
||||
| Method | Path | Notes |
|
||||
|---|---|---|
|
||||
| GET | `/health` | Static response, no dependency checks |
|
||||
| GET/POST | `/sessions` | POST requires `externalId`; duplicate → 409 |
|
||||
| GET/PATCH | `/sessions/by-external/:externalId` | PATCH accepts `name`, `projectId` |
|
||||
| DELETE | `/sessions/by-external/:externalId` | Cascades to episodes, summaries, relationships |
|
||||
| GET/POST | `/episodes` | POST triggers async embedding + entity/relationship extraction |
|
||||
| GET | `/episodes/search` | FTS5 search; route must precede `/:id` |
|
||||
| GET | `/sessions/:id/episodes` | Paginated, ordered `created_at DESC` |
|
||||
| DELETE | `/episodes/:id` | Removes from SQLite + async Qdrant delete |
|
||||
| POST | `/entities` | Upsert by `(name, type)`; increments `mention_count` on conflict |
|
||||
| GET | `/entities/by-type/:type` | All entities of given type |
|
||||
| GET/DELETE | `/entities/:id` | |
|
||||
| POST | `/relationships` | Upsert by `(fromId, toId, label)`; increments `mention_count` on conflict. Body: `fromId`, `toId`, `label`, `notes` (optional) |
|
||||
| GET | `/entities/:id/relationships` | Outbound only |
|
||||
| DELETE | `/relationships` | Body: `fromId`, `toId`, `label` |
|
||||
| GET | `/graph/neighborhood/:entityId` | Single-entity neighborhood; `?depth=` (default 1, max 3) |
|
||||
| POST | `/graph/neighbors` | Bulk 1-hop neighborhood; body: `{ entityIds: [...] }` |
|
||||
| GET/POST | `/projects` | POST requires non-empty `name` |
|
||||
| GET/PATCH/DELETE | `/projects/:id` | |
|
||||
| POST | `/projects/:id/summarize` | On-demand overview generation; 422 if no data |
|
||||
| GET | `/projects/:id/overview` | Returns null (not 404) if no overview exists |
|
||||
| GET | `/projects/:id/summaries` | All summaries for project |
|
||||
| POST | `/summaries` | Requires `content` + at least one of `sessionId`/`projectId` |
|
||||
| GET | `/sessions/:id/summaries` | |
|
||||
| PATCH/DELETE | `/summaries/:id` | |
|
||||
@@ -1,12 +1,12 @@
|
||||
const Database = require('better-sqlite3');
|
||||
const schema = require('./schema');
|
||||
const {getEnv } = require('@nexusai/shared');
|
||||
const {getEnv, SQLITE, logger } = require('@nexusai/shared');
|
||||
|
||||
let db; // Declare db variable in a scope accessible to all functions
|
||||
|
||||
function getDB() {
|
||||
if (!db) {
|
||||
const path = getEnv('SQLITE_PATH', './data/nexusai.db');
|
||||
const path = getEnv('SQLITE_PATH', SQLITE.DEFAULT_PATH);
|
||||
db = new Database(path);
|
||||
|
||||
db.pragma('journal_mode = WAL');
|
||||
@@ -14,11 +14,60 @@ function getDB() {
|
||||
|
||||
db.exec(schema);
|
||||
|
||||
try{
|
||||
db.exec(`ALTER TABLE sessions ADD COLUMN name TEXT`)
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
db.exec(`ALTER TABLE sessions ADD COLUMN project_id INTEGER REFERENCES projects(id)`);
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
db.exec(`CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project_id)`);
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
db.exec(`ALTER TABLE projects ADD COLUMN isolated INTEGER NOT NULL DEFAULT 0`);
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
db.exec(`ALTER TABLE projects ADD COLUMN notes TEXT`); // ← add this
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
db.exec(`ALTER TABLE projects ADD COLUMN system_prompt TEXT`);
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
db.exec(`ALTER TABLE summaries ADD COLUMN project_id INTEGER REFERENCES projects(id) ON DELETE CASCADE`);
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
db.exec(`ALTER TABLE summaries ADD COLUMN token_count INTEGER`);
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
db.exec(`CREATE INDEX IF NOT EXISTS idx_summaries_project ON summaries(project_id)`);
|
||||
} catch {}
|
||||
|
||||
try {
|
||||
db.exec(`CREATE INDEX IF NOT EXISTS idx_summaries_session ON summaries(session_id)`);
|
||||
} catch {}
|
||||
|
||||
try { db.exec(`ALTER TABLE entities ADD COLUMN mention_count INTEGER NOT NULL DEFAULT 1`) } catch {}
|
||||
try { db.exec(`ALTER TABLE entities ADD COLUMN confidence REAL NOT NULL DEFAULT 1.0`) } catch {}
|
||||
try { db.exec(`ALTER TABLE entities ADD COLUMN source TEXT NOT NULL DEFAULT 'extraction'`) } catch {}
|
||||
try { db.exec(`ALTER TABLE entities ADD COLUMN last_seen_at INTEGER`) } catch {}
|
||||
|
||||
try { db.exec(`ALTER TABLE relationships ADD COLUMN mention_count INTEGER NOT NULL DEFAULT 1`) } catch {}
|
||||
try { db.exec(`ALTER TABLE relationships ADD COLUMN notes TEXT`) } catch {}
|
||||
|
||||
|
||||
// Sync FTS index with any existing episodes data
|
||||
db.exec(`INSERT OR REPLACE INTO episodes_fts(rowid, user_message, ai_response)
|
||||
SELECT id, user_message, ai_response FROM episodes`);
|
||||
|
||||
console.log(`Connected to SQLite database at ${path}`);
|
||||
logger.info(`Connected to SQLite database at ${path}`);
|
||||
}
|
||||
return db;
|
||||
}
|
||||
|
||||
52
packages/memory-service/src/db/projects.js
Normal file
52
packages/memory-service/src/db/projects.js
Normal file
@@ -0,0 +1,52 @@
|
||||
const { getDB } = require('./index');
|
||||
const { parseRow } = require('@nexusai/shared');
|
||||
|
||||
function createProject({ name, description, colour, icon, isolated }) {
|
||||
const db = getDB();
|
||||
const result = db.prepare(`
|
||||
INSERT INTO projects (name, description, colour, icon, isolated)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
`).run(name, description ?? null, colour ?? null, icon ?? null, isolated ?? 0);
|
||||
return getProject(result.lastInsertRowid);
|
||||
}
|
||||
|
||||
function getProjects() {
|
||||
const db = getDB();
|
||||
return db.prepare(`SELECT * FROM projects ORDER BY created_at DESC`).all().map(parseRow);
|
||||
}
|
||||
|
||||
function getProject(id) {
|
||||
const db = getDB();
|
||||
return parseRow(db.prepare(`SELECT * FROM projects WHERE id = ?`).get(id));
|
||||
}
|
||||
|
||||
function updateProject(id, fields = {}) {
|
||||
const db = getDB();
|
||||
const allowed = ['name', 'description', 'colour', 'icon', 'isolated', 'notes', 'system_prompt'];
|
||||
const updates = [];
|
||||
const values = [];
|
||||
|
||||
for (const key of allowed) {
|
||||
if (fields[key] !== undefined) {
|
||||
updates.push(`${key} = ?`);
|
||||
values.push(fields[key] ?? null);
|
||||
}
|
||||
}
|
||||
|
||||
if (updates.length === 0) return getProject(id);
|
||||
|
||||
values.push(id);
|
||||
db.prepare(`UPDATE projects SET ${updates.join(', ')} WHERE id = ?`).run(...values);
|
||||
return getProject(id);
|
||||
}
|
||||
|
||||
function deleteProject(id) {
|
||||
const db = getDB();
|
||||
const doDelete = db.transaction(() => {
|
||||
db.prepare(`UPDATE sessions SET project_id = NULL WHERE project_id = ?`).run(id);
|
||||
db.prepare(`DELETE FROM projects WHERE id = ?`).run(id);
|
||||
});
|
||||
doDelete();
|
||||
}
|
||||
|
||||
module.exports = { createProject, getProjects, getProject, updateProject, deleteProject };
|
||||
@@ -38,10 +38,35 @@ const schema = `
|
||||
UNIQUE(from_id, to_id, label)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_relationships_from ON relationships(from_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_relationships_to ON relationships(to_id);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS entity_episodes (
|
||||
entity_id INTEGER NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
|
||||
episode_id INTEGER NOT NULL REFERENCES episodes(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (entity_id, episode_id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_entity_episodes_entity ON entity_episodes(entity_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_entity_episodes_episode ON entity_episodes(episode_id);
|
||||
|
||||
|
||||
|
||||
CREATE TABLE IF NOT EXISTS projects (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
description TEXT,
|
||||
colour TEXT,
|
||||
icon TEXT,
|
||||
created_at INTEGER NOT NULL DEFAULT (unixepoch())
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS summaries (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id INTEGER REFERENCES sessions(id) ON DELETE CASCADE,
|
||||
project_id INTEGER REFERENCES projects(id) ON DELETE CASCADE,
|
||||
content TEXT NOT NULL,
|
||||
token_count INTEGER,
|
||||
episode_range TEXT,
|
||||
created_at INTEGER NOT NULL DEFAULT (unixepoch()),
|
||||
metadata TEXT
|
||||
@@ -53,8 +78,6 @@ const schema = `
|
||||
ON episodes(created_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_entities_type
|
||||
ON entities(type);
|
||||
CREATE INDEX IF NOT EXISTS idx_summaries_session
|
||||
ON summaries(session_id);
|
||||
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS episodes_fts
|
||||
USING fts5(user_message, ai_response, content=episodes, content_rowid=id);
|
||||
@@ -78,6 +101,8 @@ const schema = `
|
||||
INSERT INTO episodes_fts(rowid, user_message, ai_response)
|
||||
VALUES (new.id, new.user_message, new.ai_response);
|
||||
END;
|
||||
|
||||
|
||||
`;
|
||||
|
||||
module.exports = schema;
|
||||
76
packages/memory-service/src/db/summaries.js
Normal file
76
packages/memory-service/src/db/summaries.js
Normal file
@@ -0,0 +1,76 @@
|
||||
const { getDB } = require('./index');
|
||||
const { parseRow } = require('@nexusai/shared');
|
||||
|
||||
function createSummary({ sessionId = null, projectId = null, content, tokenCount = null, episodeRange = null, metadata = null }) {
|
||||
const db = getDB();
|
||||
const result = db.prepare(`
|
||||
INSERT INTO summaries (session_id, project_id, content, token_count, episode_range, metadata)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
`).run(sessionId, projectId, content, tokenCount, episodeRange, metadata ? JSON.stringify(metadata) : null);
|
||||
|
||||
return getSummary(result.lastInsertRowid);
|
||||
}
|
||||
|
||||
function getSummary(id) {
|
||||
const db = getDB();
|
||||
const row = db.prepare(`SELECT * FROM summaries WHERE id = ?`).get(id);
|
||||
return row ? parseRow(row) : null;
|
||||
}
|
||||
|
||||
function getSummariesBySession(sessionId) {
|
||||
const db = getDB();
|
||||
return db.prepare(`SELECT * FROM summaries WHERE session_id = ? ORDER BY created_at ASC`)
|
||||
.all(sessionId).map(parseRow);
|
||||
}
|
||||
|
||||
function getSummariesByProject(projectId) {
|
||||
const db = getDB();
|
||||
return db.prepare(`SELECT * FROM summaries WHERE project_id = ? ORDER BY created_at ASC`)
|
||||
.all(projectId).map(parseRow);
|
||||
}
|
||||
|
||||
function updateSummary(id, { content, tokenCount, episodeRange, metadata }) {
|
||||
const db = getDB();
|
||||
const fields = [];
|
||||
const values = [];
|
||||
|
||||
if (content !== undefined) { fields.push('content = ?'); values.push(content); }
|
||||
if (tokenCount !== undefined) { fields.push('token_count = ?'); values.push(tokenCount); }
|
||||
if (episodeRange !== undefined){ fields.push('episode_range = ?'); values.push(episodeRange); }
|
||||
if (metadata !== undefined) { fields.push('metadata = ?'); values.push(JSON.stringify(metadata)); }
|
||||
|
||||
if (!fields.length) return getSummary(id);
|
||||
|
||||
values.push(id);
|
||||
db.prepare(`UPDATE summaries SET ${fields.join(', ')} WHERE id = ?`).run(...values);
|
||||
return getSummary(id);
|
||||
}
|
||||
|
||||
function deleteSummary(id) {
|
||||
getDB().prepare(`DELETE FROM summaries WHERE id = ?`).run(id);
|
||||
}
|
||||
|
||||
// Fetches session summaries that belong to sessions in a given project
|
||||
// Joins through sessions table since session summaries don't store project_id directly
|
||||
function getSessionSummariesForProject(projectId) {
|
||||
const db = getDB();
|
||||
return db.prepare(`
|
||||
SELECT s.* FROM summaries s
|
||||
JOIN sessions sess ON sess.id = s.session_id
|
||||
WHERE sess.project_id = ? AND s.session_id IS NOT NULL
|
||||
ORDER BY s.created_at ASC
|
||||
`).all(projectId).map(parseRow);
|
||||
}
|
||||
|
||||
// Fetches the most recent project-level overview summary (session_id IS NULL distinguishes it)
|
||||
function getProjectOverviewSummary(projectId) {
|
||||
const db = getDB();
|
||||
const row = db.prepare(`
|
||||
SELECT * FROM summaries
|
||||
WHERE project_id = ? AND session_id IS NULL
|
||||
ORDER BY created_at DESC LIMIT 1
|
||||
`).get(projectId);
|
||||
return row ? parseRow(row) : null;
|
||||
}
|
||||
|
||||
module.exports = { createSummary, getSummary, getSummariesBySession, getSummariesByProject, updateSummary, deleteSummary, getSessionSummariesForProject, getProjectOverviewSummary };
|
||||
172
packages/memory-service/src/entities/extraction.js
Normal file
172
packages/memory-service/src/entities/extraction.js
Normal file
@@ -0,0 +1,172 @@
|
||||
const semantic = require('../semantic')
|
||||
const { getEnv, SERVICES, formatEpisodeText, ENTITIES, logger } = require('@nexusai/shared');
|
||||
const { upsertEntity, upsertRelationship, linkEntityToEpisode } = require('./index');
|
||||
|
||||
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
|
||||
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b'); // ChatML format — see buildExtractionPrompt
|
||||
const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL);
|
||||
|
||||
const ENTITY_TYPES = ENTITIES.TYPES;
|
||||
const IGNORED_NAMES = ['good morning', 'good night', 'hello', 'goodbye', 'thanks', 'thank you'];
|
||||
|
||||
// NOTE: This prompt uses ChatML format (<|im_start|> / <|im_end|> tags), which is
|
||||
// specific to qwen-family models. If EXTRACTION_MODEL is changed to a Llama-family
|
||||
// or other model, this format will need to change — most alternatives use either
|
||||
// plain text or [INST] / <<SYS>> tags. Silent degradation is likely if mismatched.
|
||||
function buildExtractionPrompt(userMessage, aiResponse, knownEntities = []) {
|
||||
const knownBlock = knownEntities.length > 0
|
||||
? [
|
||||
'Already known entities (use these exact name and type values if the same entity appears):',
|
||||
...knownEntities.map(e => `- "${e.name}" (${e.type})`),
|
||||
'',
|
||||
].join('\n')
|
||||
: '';
|
||||
|
||||
return [
|
||||
'<|im_start|>system',
|
||||
'You are a named entity and relationship extractor. You output only valid JSON.',
|
||||
'<|im_end|>',
|
||||
'<|im_start|>user',
|
||||
'Read the conversation below and extract all named entities and the relationships between them.',
|
||||
`Entity types: ${ENTITY_TYPES.join(', ')}`,
|
||||
'Use "character" for any fictional, game, or media characters (e.g. characters from anime, games, books, TV shows, movies)',
|
||||
'Use "person" only for real people',
|
||||
'For each entity provide:',
|
||||
' "name": short proper noun only (max 4 words)',
|
||||
' "type": one of the valid types',
|
||||
' "notes": one specific sentence about this entity based on the conversation',
|
||||
'For relationships, use snake_case verb labels (e.g. works_on, manages, uses, knows, located_in, part_of, created_by).',
|
||||
'Only include relationships between entities you have listed above.',
|
||||
'Return this exact JSON structure:',
|
||||
'{ "entities": [{"name": "...", "type": "...", "notes": "..."}], "relationships": [{"from": "...", "fromType": "...", "to": "...", "toType": "...", "label": "...", "notes": "..."}] }',
|
||||
'',
|
||||
knownBlock,
|
||||
'--- CONVERSATION ---',
|
||||
`User: ${userMessage}`,
|
||||
`Assistant: ${aiResponse}`,
|
||||
'--- END CONVERSATION ---',
|
||||
'<|im_end|>',
|
||||
'<|im_start|>assistant',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
async function embedEntity(entity) {
|
||||
// Combine name, type and notes into a single descriptive string for embedding
|
||||
const text = `${entity.name} (${entity.type}): ${entity.notes ?? entity.name}`;
|
||||
|
||||
const res = await fetch(`${EMBEDDING_SERVICE_URL}/embed`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ text }),
|
||||
});
|
||||
|
||||
if (!res.ok) throw new Error(`Embedding service error: ${res.status}`);
|
||||
const data = await res.json();
|
||||
return data.embedding;
|
||||
}
|
||||
|
||||
async function extractAndStoreEntities(userMessage, aiResponse, episodeId=null, projectId=null) {
|
||||
logger.info('[entities] Extraction triggered')
|
||||
try {
|
||||
// Fetch existing entities to guide the model toward consistent name/type pairs
|
||||
const db = require('../db').getDB();
|
||||
const knownEntities = db.prepare(`SELECT name, type FROM entities ORDER BY rowid DESC LIMIT 20`).all();
|
||||
const prompt = buildExtractionPrompt(userMessage, aiResponse, knownEntities);
|
||||
|
||||
|
||||
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: EXTRACTION_MODEL,
|
||||
prompt: prompt,
|
||||
stream: false,
|
||||
format: 'json',
|
||||
options: {
|
||||
temperature: ENTITIES.TEMPERATURE,
|
||||
num_predict: ENTITIES.NUM_PREDICT,
|
||||
},
|
||||
}),
|
||||
signal: AbortSignal.timeout(60_000),
|
||||
});
|
||||
|
||||
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
|
||||
|
||||
const data = await res.json();
|
||||
const raw = data.response?.trim() ?? '';
|
||||
|
||||
const jsonMatch = raw.match(/\{[\s\S]*\}/);
|
||||
if (!jsonMatch) {
|
||||
logger.warn('[entities] No JSON object found in response');
|
||||
logger.debug('[entities] Raw response was:', raw);
|
||||
return;
|
||||
}
|
||||
|
||||
let parsed;
|
||||
try {
|
||||
parsed = JSON.parse(jsonMatch[0]);
|
||||
} catch (err) {
|
||||
logger.warn('[entities] Failed to parse extraction response:', err.message);
|
||||
logger.debug('[entities] Raw response was:', raw);
|
||||
return;
|
||||
}
|
||||
const entities = Array.isArray(parsed.entities) ? parsed.entities : [];
|
||||
if (entities.length === 0) {
|
||||
logger.debug('[entities] No entities found in this exchange — skipping');
|
||||
return;
|
||||
}
|
||||
|
||||
// Map of "name::type" → saved entity, used for relationship resolution below
|
||||
const entityMap = new Map();
|
||||
let saved = 0;
|
||||
|
||||
for (const { name, type, notes } of entities) {
|
||||
if (!name || !type || !ENTITY_TYPES.includes(type)) continue;
|
||||
if (IGNORED_NAMES.includes(name.toLowerCase())) continue;
|
||||
|
||||
const entity = upsertEntity(name, type, notes ?? null);
|
||||
entityMap.set(`${name}::${type}`, entity);
|
||||
logger.info('[entities] Upserted entity:', entity);
|
||||
|
||||
if (episodeId) linkEntityToEpisode(entity.id, episodeId);
|
||||
|
||||
embedEntity(entity)
|
||||
.then(vector => semantic.upsertEntity(entity.id, vector, {
|
||||
name: entity.name,
|
||||
type: entity.type,
|
||||
notes: entity.notes,
|
||||
projectId: projectId ?? null,
|
||||
}))
|
||||
.catch(err => {
|
||||
logger.warn(`[entities] Failed to embed entity "${entity.name}":`, err.message);
|
||||
});
|
||||
|
||||
saved++;
|
||||
}
|
||||
|
||||
if (saved > 0) logger.info(`[entities] Extracted and stored ${saved} entities`);
|
||||
|
||||
// Process extracted relationships — both entities must have been saved above
|
||||
const relationships = Array.isArray(parsed.relationships) ? parsed.relationships : [];
|
||||
let relSaved = 0;
|
||||
|
||||
for (const { from, fromType, to, toType, label, notes } of relationships) {
|
||||
if (!from || !fromType || !to || !toType || !label) continue;
|
||||
|
||||
const fromEntity = entityMap.get(`${from}::${fromType}`);
|
||||
const toEntity = entityMap.get(`${to}::${toType}`);
|
||||
if (!fromEntity || !toEntity) continue;
|
||||
|
||||
upsertRelationship(fromEntity.id, toEntity.id, label, notes ?? null);
|
||||
relSaved++;
|
||||
}
|
||||
|
||||
if (relSaved > 0) logger.info(`[entities] Extracted and stored ${relSaved} relationships`);
|
||||
|
||||
} catch (err) {
|
||||
// Non-critical — log and move on, episode is already saved
|
||||
logger.warn('[entities] Extraction failed:', err.message);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { extractAndStoreEntities };
|
||||
@@ -1,33 +1,39 @@
|
||||
const {getDB} = require('../db');
|
||||
const { parseRow } = require ('@nexusai/shared')
|
||||
|
||||
/******* Entities ********/
|
||||
|
||||
// Upsert an entity - insert or update if (name, type) already exists
|
||||
function upsertEntity(name, type, notes = null, metadata = null) {
|
||||
function upsertEntity(name, type, notes = null, metadata = null, source = 'extraction') {
|
||||
const db = getDB();
|
||||
const stmt = db.prepare(`
|
||||
INSERT INTO entities (name, type, notes, metadata)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(name, type) DO UPDATE SET
|
||||
notes = excluded.notes,
|
||||
metadata = excluded.metadata,
|
||||
updated_at = unixepoch()
|
||||
`);
|
||||
const result = stmt.run(name, type, notes, metadata ? JSON.stringify(metadata) : null);
|
||||
|
||||
const stmt = db.prepare(`
|
||||
INSERT INTO entities (name, type, notes, metadata, source, last_seen_at)
|
||||
VALUES (?, ?, ?, ?, ?, unixepoch())
|
||||
ON CONFLICT(name, type) DO UPDATE SET
|
||||
-- First extraction wins: notes are never overwritten once set.
|
||||
-- Revisit during Memory Consolidation Lifecycle (Phase 2) — once entity
|
||||
-- quality scoring exists, a higher-confidence extraction should be able
|
||||
-- to replace stale notes rather than being silently dropped.
|
||||
notes = COALESCE(entities.notes, excluded.notes),
|
||||
metadata = excluded.metadata,
|
||||
mention_count = entities.mention_count + 1,
|
||||
last_seen_at = unixepoch(),
|
||||
updated_at = unixepoch()
|
||||
`);
|
||||
stmt.run(name, type, notes, metadata ? JSON.stringify(metadata) : null, source);
|
||||
return getEntityByNameType(name, type);
|
||||
}
|
||||
|
||||
// Get an entity by its ID
|
||||
function getEntity(id) {
|
||||
const db = getDB();
|
||||
return parseEntity(db.prepare(`SELECT * FROM entities WHERE id = ?`).get(id));
|
||||
return parseRow(db.prepare(`SELECT * FROM entities WHERE id = ?`).get(id));
|
||||
}
|
||||
|
||||
// Get all entities of a given type
|
||||
function getEntitiesByType(type) {
|
||||
const db = getDB();
|
||||
return db.prepare(`SELECT * FROM entities WHERE type = ? ORDER BY name`).all(type).map(parseEntity);
|
||||
return db.prepare(`SELECT * FROM entities WHERE type = ? ORDER BY name`).all(type).map(parseRow);
|
||||
}
|
||||
|
||||
// Delete an entity by ID, cascades to delete relationships involving this entity
|
||||
@@ -39,15 +45,17 @@ function deleteEntity(id) {
|
||||
/********* Relationships *********/
|
||||
|
||||
// Upsert a relationship, insert or ignore if (from_id, to_id, label) already exists
|
||||
function upsertRelationship(fromId, toId, label, metadata = null){
|
||||
function upsertRelationship(fromId, toId, label, notes = null, metadata = null) {
|
||||
const db = getDB();
|
||||
const stmt = db.prepare(`
|
||||
INSERT INTO relationships (from_id, to_id, label, metadata)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(from_id, to_id, label) DO NOTHING
|
||||
INSERT INTO relationships (from_id, to_id, label, notes, metadata)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT(from_id, to_id, label) DO UPDATE SET
|
||||
mention_count = relationships.mention_count + 1,
|
||||
-- First extraction wins for notes — same policy as entities.
|
||||
notes = COALESCE(relationships.notes, excluded.notes)
|
||||
`);
|
||||
|
||||
const result = stmt.run(fromId, toId, label, metadata ?JSON.stringify(metadata) : null);
|
||||
stmt.run(fromId, toId, label, notes, metadata ? JSON.stringify(metadata) : null);
|
||||
return getRelationship(fromId, toId, label);
|
||||
}
|
||||
|
||||
@@ -55,7 +63,7 @@ function upsertRelationship(fromId, toId, label, metadata = null){
|
||||
function getRelationship(fromId, toId, label) {
|
||||
const db = getDB();
|
||||
|
||||
return parseRelationship(
|
||||
return parseRow(
|
||||
db.prepare(`SELECT * FROM relationships WHERE from_id = ? AND to_id = ? AND label = ?`)
|
||||
.get(fromId, toId, label)
|
||||
);
|
||||
@@ -64,38 +72,28 @@ function getRelationship(fromId, toId, label) {
|
||||
// Retrieves an entity by its unique (name, type) combination
|
||||
function getEntityByNameType(name, type) {
|
||||
const db = getDB();
|
||||
return parseEntity(db.prepare(`SELECT * FROM entities WHERE name = ? AND type = ?`).get(name, type));
|
||||
return parseRow(db.prepare(`SELECT * FROM entities WHERE name = ? AND type = ?`).get(name, type));
|
||||
}
|
||||
|
||||
// Retrive all relationships originating from a given entity
|
||||
function getRelationshipsByEntity(entityId) {
|
||||
function getOutboundRelationships(entityId) {
|
||||
const db = getDB();
|
||||
return db.prepare(`SELECT * FROM relationships WHERE from_id = ?`).all(entityId).map(parseRelationship);
|
||||
return db.prepare(`SELECT * FROM relationships WHERE from_id = ?`).all(entityId).map(parseRow);
|
||||
}
|
||||
|
||||
// Delete a specific relationship by (from_id, to_id, label)
|
||||
function deleteRelationship(fromid, toId, label) {
|
||||
function deleteRelationship(fromId, toId, label) {
|
||||
const db = getDB();
|
||||
|
||||
db.prepare(`DELETE FROM relationships WHERE from_id = ? AND to_id = ? AND label = ?`).run(fromId, toId, label);
|
||||
}
|
||||
|
||||
/*********** Parse Functions ***********/
|
||||
|
||||
function parseEntity(row) {
|
||||
if (!row) return null;
|
||||
return {
|
||||
...row,
|
||||
metadata: row.metadata ? JSON.parse(row.metadata) : null
|
||||
};
|
||||
}
|
||||
|
||||
function parseRelationship(row) {
|
||||
if (!row) return null;
|
||||
return {
|
||||
...row,
|
||||
metadata: row.metadata ? JSON.parse(row.metadata) : null
|
||||
};
|
||||
function linkEntityToEpisode(entityId, episodeId) {
|
||||
const db = getDB();
|
||||
db.prepare(`
|
||||
INSERT OR IGNORE INTO entity_episodes (entity_id, episode_id)
|
||||
VALUES (?, ?)
|
||||
`).run(entityId, episodeId);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
@@ -104,8 +102,9 @@ module.exports = {
|
||||
getEntitiesByType,
|
||||
getEntityByNameType,
|
||||
deleteEntity,
|
||||
linkEntityToEpisode,
|
||||
upsertRelationship,
|
||||
getRelationship,
|
||||
getRelationshipsByEntity,
|
||||
getOutboundRelationships,
|
||||
deleteRelationship
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
const {getDB} = require('../db');
|
||||
const { EPISODIC, getEnv, SERVICES } = require('@nexusai/shared');
|
||||
const { EPISODIC, getEnv, SERVICES, parseRow, formatEpisodeText, SUMMARIES, logger } = require('@nexusai/shared');
|
||||
const semantic = require('../semantic');
|
||||
const { extractAndStoreEntities } = require('../entities/extraction')
|
||||
|
||||
// --Sessions --------------------------------------------------
|
||||
|
||||
@@ -20,14 +21,35 @@ function createSession(externalId, metadata = null) {
|
||||
function getSession(id) {
|
||||
const db = getDB();
|
||||
const stmt = db.prepare(`SELECT * FROM sessions WHERE id = ?`);
|
||||
return parseSession(stmt.get(id));
|
||||
return parseRow(stmt.get(id));
|
||||
}
|
||||
|
||||
|
||||
function getSessions(limit = EPISODIC.DEFAULT_PAGE_SIZE, offset = EPISODIC.DEFAULT_OFFSET, projectId = null) {
|
||||
const db = getDB();
|
||||
const stmt = projectId
|
||||
? db.prepare(`
|
||||
SELECT * FROM sessions
|
||||
WHERE project_id = ?
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT ? OFFSET ?
|
||||
`)
|
||||
: db.prepare(`
|
||||
SELECT * FROM sessions
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT ? OFFSET ?
|
||||
`);
|
||||
|
||||
return projectId
|
||||
? stmt.all(projectId, limit, offset).map(parseRow)
|
||||
: stmt.all(limit, offset).map(parseRow);
|
||||
}
|
||||
|
||||
// Retrieves a session by its external ID
|
||||
function getSessionByExternalId(externalId) {
|
||||
const db = getDB();
|
||||
const stmt = db.prepare(`SELECT * FROM sessions WHERE external_id = ?`);
|
||||
return parseSession(stmt.get(externalId));
|
||||
return parseRow(stmt.get(externalId));
|
||||
}
|
||||
|
||||
// Updates the updated_at timestamp of a session to the current time
|
||||
@@ -42,29 +64,60 @@ function deleteSession(id) {
|
||||
db.prepare(`DELETE FROM sessions WHERE id = ?`).run(id);
|
||||
}
|
||||
|
||||
function updateSession(id, { name, projectId } = {}) {
|
||||
const db = getDB();
|
||||
|
||||
// Build update dynamically based on what was provided
|
||||
const updates = [];
|
||||
const values = [];
|
||||
|
||||
if (name !== undefined) { updates.push('name = ?'); values.push(name ?? null); }
|
||||
if (projectId !== undefined) { updates.push('project_id = ?'); values.push(projectId ?? null); }
|
||||
|
||||
if (updates.length === 0) return getSession(id);
|
||||
|
||||
updates.push('updated_at = unixepoch()');
|
||||
values.push(id);
|
||||
|
||||
db.prepare(`UPDATE sessions SET ${updates.join(', ')} WHERE id = ?`).run(...values);
|
||||
return getSession(id);
|
||||
}
|
||||
|
||||
function updateSessionByExternalId(externalId, fields) {
|
||||
const session = getSessionByExternalId(externalId);
|
||||
if (!session) throw new Error('Session not found');
|
||||
return updateSession(session.id, fields);
|
||||
}
|
||||
|
||||
function deleteSessionByExternalId(externalId) {
|
||||
const session = getSessionByExternalId(externalId);
|
||||
if(!session) throw new Error('Session not found');
|
||||
deleteSession(session.id);
|
||||
}
|
||||
|
||||
|
||||
// --Episodes --------------------------------------------------
|
||||
// Creates a new episode linked to a session, with user message, AI response, optional token count, and metadata
|
||||
async function createEpisode(sessionId, userMessage, aiResponse, tokenCount = null, metadata = null) {
|
||||
async function createEpisode(sessionId, userMessage, aiResponse, tokenCount = null, projectId=null) {
|
||||
const db = getDB();
|
||||
|
||||
// Wrap insert + session touch in a transaction — both succeed or neither does
|
||||
const insert = db.transaction(() => {
|
||||
const stmt = db.prepare(`
|
||||
INSERT INTO episodes (session_id, user_message, ai_response, token_count, metadata)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
INSERT INTO episodes (session_id, user_message, ai_response, token_count)
|
||||
VALUES (?, ?, ?, ?)
|
||||
`);
|
||||
const result = stmt.run(
|
||||
sessionId,
|
||||
userMessage,
|
||||
aiResponse,
|
||||
tokenCount,
|
||||
metadata ? JSON.stringify(metadata) : null
|
||||
);
|
||||
touchSession(sessionId);
|
||||
return getEpisode(result.lastInsertRowid);
|
||||
});
|
||||
|
||||
const episode= insert();
|
||||
const episode = insert();
|
||||
|
||||
//embed ascynchronously after SQLite completes, non-blocking. If embedding fail, the episode still saved.
|
||||
getEpisodeEmbedding(userMessage, aiResponse)
|
||||
@@ -72,7 +125,11 @@ async function createEpisode(sessionId, userMessage, aiResponse, tokenCount = nu
|
||||
sessionId: episode.session_id,
|
||||
createdAt: episode.created_at
|
||||
}))
|
||||
.catch(err => console.error(`Failed to embed episode ${episode.id}:`, err.message));
|
||||
.catch(err => logger.error(`Failed to embed episode ${episode.id}:`, err.message));
|
||||
|
||||
extractAndStoreEntities(userMessage, aiResponse, episode.id, projectId)
|
||||
.catch(err => logger.error(`Failed to extract entities for episode ${episode.id}:`, err.message));
|
||||
|
||||
|
||||
return episode;
|
||||
}
|
||||
@@ -81,11 +138,11 @@ async function createEpisode(sessionId, userMessage, aiResponse, tokenCount = nu
|
||||
function getEpisode(id) {
|
||||
const db = getDB();
|
||||
const stmt = db.prepare(`SELECT * FROM episodes WHERE id = ?`);
|
||||
return parseEpisode(stmt.get(id));
|
||||
return parseRow(stmt.get(id));
|
||||
}
|
||||
|
||||
// Retrieves episodes for a given session, ordered by creation time descending, with pagination
|
||||
function getEpisodesBySession(sessionId, limit = EPISODIC.DEFAULT_PAGE_SIZE, offset = 0) {
|
||||
function getEpisodesBySession(sessionId, limit = EPISODIC.DEFAULT_PAGE_SIZE, offset = EPISODIC.DEFAULT_OFFSET) {
|
||||
const db = getDB();
|
||||
const stmt = db.prepare(`
|
||||
SELECT * FROM episodes
|
||||
@@ -93,34 +150,45 @@ function getEpisodesBySession(sessionId, limit = EPISODIC.DEFAULT_PAGE_SIZE, off
|
||||
ORDER BY created_at DESC
|
||||
LIMIT ? OFFSET ?
|
||||
`);
|
||||
return stmt.all(sessionId, limit, offset).map(parseEpisode);
|
||||
return stmt.all(sessionId, limit, offset).map(parseRow);
|
||||
}
|
||||
|
||||
// Retrieves recent episodes across all sessions, ordered by creation time descending, with a limit
|
||||
function getRecentEpisodes(limit = EPISODIC.DEFAULT_RECENT_LIMIT) {
|
||||
function getRecentEpisodes(sessionId, limit = EPISODIC.DEFAULT_RECENT_LIMIT) {
|
||||
// Cross-session recent episodes — useful for recency-based retrieval
|
||||
const db = getDB();
|
||||
const stmt = db.prepare(`
|
||||
SELECT * FROM episodes
|
||||
WHERE session_id = ?
|
||||
ORDER BY created_at DESC
|
||||
LIMIT ?
|
||||
`);
|
||||
return stmt.all(limit).map(parseEpisode);
|
||||
return stmt.all(sessionId, limit).map(parseRow);
|
||||
}
|
||||
|
||||
|
||||
// Searches episodes using FTS5 full-text search, ordered by relevance, with a limit
|
||||
function searchEpisodes(query, limit = EPISODIC.DEFAULT_SEARCH_LIMIT) {
|
||||
// FTS5 full-text search across all episodes
|
||||
function searchEpisodes(query, limit = EPISODIC.DEFAULT_SEARCH_LIMIT, sessionIds = null) {
|
||||
const db = getDB();
|
||||
const stmt = db.prepare(`
|
||||
const safeQuery = `"${query.replace(/"/g, '""')}"`;
|
||||
if (sessionIds && sessionIds.length > 0) {
|
||||
const ph = sessionIds.map(() => '?').join(',');
|
||||
return db.prepare(`
|
||||
SELECT e.* FROM episodes e
|
||||
JOIN episodes_fts fts ON e.id = fts.rowid
|
||||
WHERE episodes_fts MATCH ?
|
||||
AND e.session_id IN (${ph})
|
||||
ORDER BY rank
|
||||
LIMIT ?
|
||||
`).all(safeQuery, ...sessionIds, limit).map(parseRow);
|
||||
}
|
||||
return db.prepare(`
|
||||
SELECT e.* FROM episodes e
|
||||
JOIN episodes_fts fts ON e.id = fts.rowid
|
||||
WHERE episodes_fts MATCH ?
|
||||
ORDER BY rank
|
||||
LIMIT ?
|
||||
`);
|
||||
return stmt.all(query, limit).map(parseEpisode);
|
||||
`).all(safeQuery, limit).map(parseRow);
|
||||
}
|
||||
|
||||
// Deletes an episode by its ID
|
||||
@@ -129,37 +197,18 @@ function deleteEpisode(id) {
|
||||
db.prepare(`DELETE FROM episodes WHERE id = ?`).run(id);
|
||||
}
|
||||
|
||||
// ─── Parsers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
// Parse JSON metadata back out on the way up — stored as string, returned as object
|
||||
function parseSession(row) {
|
||||
if (!row) return null;
|
||||
return {
|
||||
...row,
|
||||
metadata: row.metadata ? JSON.parse(row.metadata) : null
|
||||
};
|
||||
}
|
||||
|
||||
// Parse JSON metadata back out on the way up — stored as string, returned as object
|
||||
function parseEpisode(row) {
|
||||
if (!row) return null;
|
||||
return {
|
||||
...row,
|
||||
metadata: row.metadata ? JSON.parse(row.metadata) : null
|
||||
};
|
||||
}
|
||||
|
||||
/******** Embedding Helper ********/
|
||||
async function getEpisodeEmbedding(userMessage, aiResponse){
|
||||
const url = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL);
|
||||
|
||||
//Combine user message and AI response for embedding
|
||||
const text = `User: ${userMessage}\nAssistant: ${aiResponse}`;
|
||||
const text = formatEpisodeText(userMessage, aiResponse);
|
||||
|
||||
const res = await fetch(`${url}/embed`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ text })
|
||||
body: JSON.stringify({ text }),
|
||||
signal: AbortSignal.timeout(30_000),
|
||||
})
|
||||
|
||||
if (!res.ok) {
|
||||
@@ -169,15 +218,31 @@ async function getEpisodeEmbedding(userMessage, aiResponse){
|
||||
return data.embedding;
|
||||
}
|
||||
|
||||
function getEpisodesByProject(projectId, limit = SUMMARIES.MAX_PROJECT_EPISODE_LIMIT) {
|
||||
const db = getDB();
|
||||
return db.prepare(`
|
||||
SELECT e.* FROM episodes e
|
||||
JOIN sessions s ON s.id = e.session_id
|
||||
WHERE s.project_id = ?
|
||||
ORDER BY e.created_at ASC
|
||||
LIMIT ?
|
||||
`).all(projectId, limit).map(parseRow);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
createSession,
|
||||
getSession,
|
||||
getSessions,
|
||||
getSessionByExternalId,
|
||||
deleteSession,
|
||||
updateSession,
|
||||
updateSessionByExternalId,
|
||||
deleteSessionByExternalId,
|
||||
createEpisode,
|
||||
getEpisode,
|
||||
getEpisodesBySession,
|
||||
getRecentEpisodes,
|
||||
searchEpisodes,
|
||||
deleteEpisode
|
||||
deleteEpisode,
|
||||
getEpisodesByProject
|
||||
};
|
||||
77
packages/memory-service/src/graph/index.js
Normal file
77
packages/memory-service/src/graph/index.js
Normal file
@@ -0,0 +1,77 @@
|
||||
const { getDB } = require('../db');
|
||||
const { parseRow, ENTITIES } = require('@nexusai/shared');
|
||||
|
||||
// Single-entity neighborhood via recursive CTE — bidirectional, configurable depth
|
||||
function getNeighborhood(entityId, depth = ENTITIES.GRAPH_HOP_DEPTH) {
|
||||
const db = getDB();
|
||||
|
||||
const nodeRows = db.prepare(`
|
||||
WITH RECURSIVE traverse(entity_id, depth) AS (
|
||||
SELECT ?, 0
|
||||
UNION
|
||||
SELECT
|
||||
CASE WHEN r.from_id = t.entity_id THEN r.to_id ELSE r.from_id END,
|
||||
t.depth + 1
|
||||
FROM relationships r
|
||||
JOIN traverse t ON (r.from_id = t.entity_id OR r.to_id = t.entity_id)
|
||||
WHERE t.depth < ?
|
||||
)
|
||||
SELECT DISTINCT entity_id FROM traverse
|
||||
`).all(entityId, depth);
|
||||
|
||||
const nodeIds = nodeRows.map(r => r.entity_id);
|
||||
if (nodeIds.length === 0) return { nodes: [], edges: [] };
|
||||
|
||||
const ph = nodeIds.map(() => '?').join(',');
|
||||
const nodes = db.prepare(
|
||||
`SELECT * FROM entities WHERE id IN (${ph})`
|
||||
).all(...nodeIds).map(parseRow);
|
||||
|
||||
const edges = db.prepare(
|
||||
`SELECT * FROM relationships WHERE from_id IN (${ph}) AND to_id IN (${ph})`
|
||||
).all(...nodeIds, ...nodeIds).map(parseRow);
|
||||
|
||||
return { nodes, edges };
|
||||
}
|
||||
|
||||
// Bulk 1-hop neighborhood for orchestration — seeds are entity IDs from Qdrant search
|
||||
function getEntityNeighbors(entityIds) {
|
||||
if (!entityIds.length) return { nodes: [], edges: [] };
|
||||
const db = getDB();
|
||||
|
||||
const ph = entityIds.map(() => '?').join(',');
|
||||
|
||||
// entityIds appears three times — once for the CASE (finding the neighbor),
|
||||
// and once each for the FROM and TO sides of the WHERE clause
|
||||
const neighborRows = db.prepare(`
|
||||
SELECT DISTINCT
|
||||
CASE WHEN from_id IN (${ph}) THEN to_id ELSE from_id END AS entity_id
|
||||
FROM relationships
|
||||
WHERE from_id IN (${ph}) OR to_id IN (${ph})
|
||||
`).all(...entityIds, ...entityIds, ...entityIds);
|
||||
|
||||
const allIds = [...new Set([...entityIds, ...neighborRows.map(r => r.entity_id)])];
|
||||
const allPh = allIds.map(() => '?').join(',');
|
||||
|
||||
const nodes = db.prepare(
|
||||
`SELECT * FROM entities WHERE id IN (${allPh})`
|
||||
).all(...allIds).map(parseRow);
|
||||
|
||||
const edges = db.prepare(
|
||||
`SELECT * FROM relationships WHERE from_id IN (${allPh}) AND to_id IN (${allPh})`
|
||||
).all(...allIds, ...allIds).map(parseRow);
|
||||
|
||||
return { nodes, edges };
|
||||
}
|
||||
|
||||
// Returns episode IDs linked to any of the given entity IDs via entity_episodes
|
||||
function getEpisodeIdsByEntities(entityIds) {
|
||||
if (!entityIds.length) return [];
|
||||
const db = getDB();
|
||||
const ph = entityIds.map(() => '?').join(',');
|
||||
return db.prepare(
|
||||
`SELECT DISTINCT episode_id FROM entity_episodes WHERE entity_id IN (${ph})`
|
||||
).all(...entityIds).map(r => r.episode_id);
|
||||
}
|
||||
|
||||
module.exports = { getNeighborhood, getEntityNeighbors, getEpisodeIdsByEntities };
|
||||
@@ -1,22 +1,27 @@
|
||||
require ('dotenv').config();
|
||||
const express = require('express');
|
||||
const {getEnv} = require('@nexusai/shared');
|
||||
const {getEnv, PORTS, EPISODIC, logger} = require('@nexusai/shared');
|
||||
const { getDB } = require('./db');
|
||||
const { createProject, getProjects, getProject, updateProject, deleteProject } = require('./db/projects');
|
||||
const { createSummary, getSummary, getSummariesBySession, getSummariesByProject, updateSummary, deleteSummary } = require('./db/summaries');
|
||||
const { generateAndStoreProjectSummary } = require('./summarization/project');
|
||||
const graph = require('./graph');
|
||||
|
||||
const episodic = require('./episodic');
|
||||
const semantic = require('./semantic');
|
||||
const entities = require('./entities');
|
||||
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
app.use(express.json({ limit: '2mb' }));
|
||||
|
||||
const PORT = getEnv('PORT', '3002'); // Default to 3002 if PORT is not set
|
||||
const PORT = getEnv('PORT', PORTS.MEMORY);
|
||||
|
||||
//initialize database on startup
|
||||
const db = getDB();
|
||||
|
||||
semantic.initCollections()
|
||||
.then(() => console.log(`QDrant collections ready`))
|
||||
.catch(err => console.error(`QDrant initialization error:`, err.message));
|
||||
.then(() => logger.info(`QDrant collections ready`))
|
||||
.catch(err => logger.error(`QDrant initialization error:`, err.message));
|
||||
|
||||
// Health check endpoint
|
||||
app.get('/health', (req, res) => {
|
||||
@@ -28,24 +33,30 @@ app.get('/health', (req, res) => {
|
||||
/************************************ */
|
||||
|
||||
// Creates a new session with an external ID and optional metadata
|
||||
app.post('/sessions', (req, res) => {
|
||||
const {externalId, metadata} = req.body;
|
||||
if (!externalId) {
|
||||
return res.status(400).json({ error: 'externalId is required' });
|
||||
}
|
||||
try {
|
||||
const session = episodic.createSession(externalId, metadata);
|
||||
res.status(201).json(session);
|
||||
} catch (err) {
|
||||
res.status(409).json({ error: 'Session already exists', detail: err.message });
|
||||
}
|
||||
app.get('/sessions', (req, res) => {
|
||||
const {
|
||||
limit = EPISODIC.DEFAULT_PAGE_SIZE,
|
||||
offset = EPISODIC.DEFAULT_OFFSET,
|
||||
projectId
|
||||
} = req.query;
|
||||
|
||||
const parsedProjectId = projectId && projectId !== 'null' ? Number(projectId) : null;
|
||||
|
||||
const sessions = episodic.getSessions(Number(limit), Number(offset), parsedProjectId);
|
||||
res.json(sessions);
|
||||
});
|
||||
|
||||
// Retrieves a session by its internal ID
|
||||
app.get('/sessions/:id', (req, res) => {
|
||||
const session = episodic.getSession(req.params.id);
|
||||
if (!session) return res.status(404).json({ error: 'Session not found' });
|
||||
res.json(session);
|
||||
app.post('/sessions', (req, res) => {
|
||||
const { externalId, metadata } = req.body;
|
||||
if (!externalId) {
|
||||
return res.status(400).json({ error: 'externalId is required' });
|
||||
}
|
||||
try {
|
||||
const session = episodic.createSession(externalId, metadata);
|
||||
res.status(201).json(session);
|
||||
} catch (err) {
|
||||
res.status(409).json({ error: 'Session already exists', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Retrieves a session by its external ID
|
||||
@@ -56,9 +67,27 @@ app.get('/sessions/by-external/:externalId', (req, res) => {
|
||||
});
|
||||
|
||||
|
||||
// Updates the session's updated_at timestamp to now
|
||||
app.delete('/sessions/:id', (req, res) => {
|
||||
episodic.deleteSession(req.params.id);
|
||||
|
||||
// Retrieves a session by its internal ID
|
||||
app.get('/sessions/:id', (req, res) => {
|
||||
const session = episodic.getSession(req.params.id);
|
||||
if (!session) return res.status(404).json({ error: 'Session not found' });
|
||||
res.json(session);
|
||||
});
|
||||
|
||||
app.patch('/sessions/by-external/:externalId', (req, res) => {
|
||||
const { name, projectId } = req.body;
|
||||
try {
|
||||
const session = episodic.updateSessionByExternalId(req.params.externalId, {name, projectId });
|
||||
res.json(session);
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to update session', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Deletes a session and all associated episodes
|
||||
app.delete('/sessions/by-external/:externalId', (req, res) => {
|
||||
episodic.deleteSessionByExternalId(req.params.externalId);
|
||||
res.status(204).send();
|
||||
});
|
||||
|
||||
@@ -68,20 +97,46 @@ app.delete('/sessions/:id', (req, res) => {
|
||||
/************************************* */
|
||||
|
||||
app.post('/episodes', async (req, res) => {
|
||||
const { sessionId, userMessage, aiResponse, tokenCount, metadata } = req.body;
|
||||
const { sessionId, userMessage, aiResponse, tokenCount, projectId } = req.body;
|
||||
if (!sessionId || !userMessage || !aiResponse) {
|
||||
return res.status(400).json({ error: 'sessionId, userMessage and aiResponse are required' });
|
||||
}
|
||||
const episode = await episodic.createEpisode(sessionId, userMessage, aiResponse, tokenCount, metadata);
|
||||
const episode = await episodic.createEpisode(sessionId, userMessage, aiResponse, tokenCount, projectId);
|
||||
|
||||
res.status(201).json(episode);
|
||||
});
|
||||
|
||||
app.get('/episodes', (req, res) => {
|
||||
const { limit = 50, offset = 0, sessionId, q } = req.query;
|
||||
|
||||
if (q) {
|
||||
const results = episodic.searchEpisodes(q, Number(limit));
|
||||
return res.json({ episodes: results, total: results.length });
|
||||
}
|
||||
|
||||
const db = getDB();
|
||||
let episodes;
|
||||
|
||||
if (sessionId) {
|
||||
episodes = episodic.getEpisodesBySession(Number(sessionId), Number(limit), Number(offset));
|
||||
} else {
|
||||
episodes = db.prepare(
|
||||
`SELECT * FROM episodes ORDER BY created_at DESC LIMIT ? OFFSET ?`
|
||||
).all(Number(limit), Number(offset)).map(row => require('@nexusai/shared').parseRow(row));
|
||||
}
|
||||
|
||||
const total = db.prepare(`SELECT COUNT(*) as count FROM episodes`).get().count;
|
||||
res.json({ episodes, total });
|
||||
});
|
||||
|
||||
// Search MUST come before /:id — otherwise 'search' gets captured as an id
|
||||
app.get('/episodes/search', (req, res) => {
|
||||
const { q, limit = 10 } = req.query;
|
||||
const { q, limit = EPISODIC.DEFAULT_PAGE_SIZE, sessionIds } = req.query;
|
||||
if (!q) return res.status(400).json({ error: 'q (query) parameter is required' });
|
||||
const results = episodic.searchEpisodes(q, Number(limit));
|
||||
res.json(results);
|
||||
const parsedSessionIds = sessionIds
|
||||
? sessionIds.split(',').map(Number).filter(Boolean)
|
||||
: null;
|
||||
res.json(episodic.searchEpisodes(q, Number(limit), parsedSessionIds));
|
||||
});
|
||||
|
||||
app.get('/episodes/:id', (req, res) => {
|
||||
@@ -92,7 +147,7 @@ app.get('/episodes/:id', (req, res) => {
|
||||
|
||||
// Get paginated episodes for a session
|
||||
app.get('/sessions/:id/episodes', (req, res) => {
|
||||
const { limit = 10, offset = 0 } = req.query;
|
||||
const { limit = EPISODIC.DEFAULT_PAGE_SIZE, offset = EPISODIC.DEFAULT_OFFSET } = req.query;
|
||||
const episodes = episodic.getEpisodesBySession(
|
||||
req.params.id,
|
||||
Number(limit),
|
||||
@@ -102,7 +157,12 @@ app.get('/sessions/:id/episodes', (req, res) => {
|
||||
});
|
||||
|
||||
app.delete('/episodes/:id', (req, res) => {
|
||||
episodic.deleteEpisode(req.params.id);
|
||||
const id = Number(req.params.id);
|
||||
episodic.deleteEpisode(id);
|
||||
|
||||
semantic.deleteEpisode(id) // fire-and-forget
|
||||
.catch(err => logger.error(`[Memory] Qdrant delete failed for episode ${id}:`, err.message));
|
||||
|
||||
res.status(204).send();
|
||||
});
|
||||
|
||||
@@ -119,6 +179,11 @@ app.post('/entities', (req, res) => {
|
||||
res.status(201).json(entity);
|
||||
});
|
||||
|
||||
// Get all entities of a given type
|
||||
app.get('/entities/by-type/:type', (req, res) => {
|
||||
res.json(entities.getEntitiesByType(req.params.type));
|
||||
});
|
||||
|
||||
// Get an entity by ID
|
||||
app.get('/entities/:id', (req, res) => {
|
||||
const entity = entities.getEntity(req.params.id);
|
||||
@@ -126,10 +191,7 @@ app.get('/entities/:id', (req, res) => {
|
||||
res.json(entity);
|
||||
});
|
||||
|
||||
// Get all entities of a given type
|
||||
app.get('/entities/by-type/:type', (req, res) => {
|
||||
res.json(entities.getEntitiesByType(req.params.type));
|
||||
});
|
||||
|
||||
|
||||
// Delete an entity by ID
|
||||
app.delete('/entities/:id', (req, res) => {
|
||||
@@ -143,17 +205,17 @@ app.delete('/entities/:id', (req, res) => {
|
||||
|
||||
// Upsert a relationship between two entities
|
||||
app.post('/relationships', (req, res) => {
|
||||
const {fromId, toId, label, metadata } = req.body;
|
||||
const { fromId, toId, label, notes, metadata } = req.body;
|
||||
if (!fromId || !toId || !label) {
|
||||
return res.status(400).json({ error: 'fromId, toId and label are required' });
|
||||
}
|
||||
const relationship = entities.upsertRelationship(fromId, toId, label, metadata);
|
||||
const relationship = entities.upsertRelationship(fromId, toId, label, notes, metadata);
|
||||
res.status(201).json(relationship);
|
||||
});
|
||||
|
||||
// Get all relationships for a given entity ID
|
||||
app.get('/entities/:id/relationships', (req, res) => {
|
||||
res.json(entities.getRelationshipsByEntity(req.params.id));
|
||||
res.json(entities.getOutboundRelationships(req.params.id));
|
||||
});
|
||||
|
||||
// Delete a specific relationship
|
||||
@@ -166,11 +228,149 @@ app.delete('/relationships', (req, res) => {
|
||||
res.status(204).send();
|
||||
})
|
||||
|
||||
/********************************* */
|
||||
/********** Graph Routes ********** */
|
||||
/********************************* */
|
||||
|
||||
// Single-entity neighborhood — depth defaults to ENTITIES.GRAPH_HOP_DEPTH
|
||||
app.get('/graph/neighborhood/:entityId', (req, res) => {
|
||||
const entity = entities.getEntity(req.params.entityId);
|
||||
if (!entity) return res.status(404).json({ error: 'Entity not found' });
|
||||
|
||||
const depth = req.query.depth ? Math.min(Number(req.query.depth), 3) : undefined;
|
||||
const neighborhood = graph.getNeighborhood(Number(req.params.entityId), depth);
|
||||
res.json({ entity, neighborhood });
|
||||
});
|
||||
|
||||
// Bulk 1-hop neighborhood — body: { entityIds: [...] }
|
||||
app.post('/graph/neighbors', (req, res) => {
|
||||
const { entityIds } = req.body;
|
||||
if (!Array.isArray(entityIds) || entityIds.length === 0) {
|
||||
return res.status(400).json({ error: 'entityIds array is required' });
|
||||
}
|
||||
res.json(graph.getEntityNeighbors(entityIds.map(Number)));
|
||||
});
|
||||
|
||||
app.post('/episodes/by-entities', (req, res) => {
|
||||
const { entityIds } = req.body;
|
||||
if (!Array.isArray(entityIds) || entityIds.length === 0) {
|
||||
return res.status(400).json({ error: 'entityIds array is required' });
|
||||
}
|
||||
res.json({ episodeIds: graph.getEpisodeIdsByEntities(entityIds.map(Number)) });
|
||||
});
|
||||
|
||||
/*********************************** */
|
||||
/********** Project Routes ********** */
|
||||
/*********************************** */
|
||||
|
||||
app.post('/projects', (req, res) => {
|
||||
const { name, description, colour, icon } = req.body;
|
||||
if (!name?.trim()) return res.status(400).json({ error: 'name is required' });
|
||||
try {
|
||||
res.status(201).json(createProject({ name: name.trim(), description, colour, icon }));
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to create project', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
app.get('/projects', (req, res) => {
|
||||
res.json(getProjects());
|
||||
});
|
||||
|
||||
// Generate (or regenerate) a project overview summary on demand
|
||||
app.post('/projects/:id/summarize', async (req, res) => {
|
||||
const project = getProject(Number(req.params.id));
|
||||
if (!project) return res.status(404).json({ error: 'Project not found' });
|
||||
|
||||
try {
|
||||
const summary = await generateAndStoreProjectSummary(Number(req.params.id));
|
||||
res.status(201).json(summary);
|
||||
} catch (err) {
|
||||
if (err.message.includes('No session summaries or episodes')) {
|
||||
return res.status(422).json({ error: err.message });
|
||||
}
|
||||
res.status(500).json({ error: 'Failed to generate project summary', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Get the current project overview summary
|
||||
app.get('/projects/:id/overview', async (req, res) => {
|
||||
const { getProjectOverviewSummary } = require('./db/summaries');
|
||||
const summary = getProjectOverviewSummary(Number(req.params.id));
|
||||
// 200 with null is fine — frontend can handle "no overview yet" gracefully
|
||||
res.json(summary ?? null);
|
||||
});
|
||||
|
||||
// Get summaries for a project
|
||||
app.get('/projects/:id/summaries', (req, res) => {
|
||||
res.json(getSummariesByProject(req.params.id));
|
||||
});
|
||||
|
||||
app.get('/projects/:id', (req, res) => {
|
||||
const project = getProject(req.params.id);
|
||||
if (!project) return res.status(404).json({ error: 'Not found' });
|
||||
res.json(project);
|
||||
});
|
||||
|
||||
app.patch('/projects/:id', (req, res) => {
|
||||
const project = getProject(req.params.id);
|
||||
if (!project) return res.status(404).json({ error: 'Not found' });
|
||||
res.json(updateProject(req.params.id, req.body));
|
||||
});
|
||||
|
||||
app.delete('/projects/:id', (req, res) => {
|
||||
const project = getProject(req.params.id);
|
||||
if (!project) return res.status(404).json({ error: 'Not found' });
|
||||
deleteProject(req.params.id);
|
||||
res.status(204).send();
|
||||
});
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*********************************** */
|
||||
/********** Summary Routes ********** */
|
||||
/*********************************** */
|
||||
|
||||
// Create a summary (called by orchestration, fire-and-forget style)
|
||||
app.post('/summaries', (req, res) => {
|
||||
const { sessionId, projectId, content, tokenCount, episodeRange, metadata } = req.body;
|
||||
if (!content) return res.status(400).json({ error: 'content is required' });
|
||||
if (!sessionId && !projectId) return res.status(400).json({ error: 'sessionId or projectId is required' });
|
||||
|
||||
try {
|
||||
const summary = createSummary({ sessionId, projectId, content, tokenCount, episodeRange, metadata });
|
||||
res.status(201).json(summary);
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to create summary', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Get summaries for a session
|
||||
app.get('/sessions/:id/summaries', (req, res) => {
|
||||
res.json(getSummariesBySession(req.params.id));
|
||||
});
|
||||
|
||||
// Update a summary (for cumulative updates)
|
||||
app.patch('/summaries/:id', (req, res) => {
|
||||
const summary = getSummary(req.params.id);
|
||||
if (!summary) return res.status(404).json({ error: 'Not found' });
|
||||
res.json(updateSummary(req.params.id, req.body));
|
||||
});
|
||||
|
||||
// Delete a summary
|
||||
app.delete('/summaries/:id', (req, res) => {
|
||||
deleteSummary(req.params.id);
|
||||
res.status(204).send();
|
||||
});
|
||||
|
||||
|
||||
|
||||
/********************************** */
|
||||
/********** Start Server ********** */
|
||||
/********************************** */
|
||||
app.listen(PORT, () => {
|
||||
console.log(`Memory Service is running on port ${PORT}`);
|
||||
logger.info(`Memory Service is running on port ${PORT}`);
|
||||
});
|
||||
@@ -1,5 +1,5 @@
|
||||
const {QdrantClient} = require('@qdrant/js-client-rest');
|
||||
const {QDRANT, COLLECTIONS, getEnv} = require('@nexusai/shared');
|
||||
const {QDRANT, COLLECTIONS, getEnv, logger} = require('@nexusai/shared');
|
||||
|
||||
let client;
|
||||
|
||||
@@ -24,9 +24,9 @@ async function initCollections() {
|
||||
distance: QDRANT.DISTANCE_METRIC
|
||||
}
|
||||
});
|
||||
console.log(`Created Qdrant collection: ${name}`);
|
||||
logger.info(`Created Qdrant collection: ${name}`);
|
||||
} else {
|
||||
console.log(`Qdrant collection already exists: ${name}`);
|
||||
logger.info(`Qdrant collection already exists: ${name}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -95,6 +95,11 @@ async function deleteVector(collection, id) {
|
||||
});
|
||||
}
|
||||
|
||||
async function deleteEpisode(id) {
|
||||
return deleteVector(COLLECTIONS.EPISODES, id);
|
||||
}
|
||||
|
||||
|
||||
module.exports = {
|
||||
initCollections,
|
||||
upsertEpisode,
|
||||
@@ -103,5 +108,6 @@ module.exports = {
|
||||
searchEpisodes,
|
||||
searchEntities,
|
||||
searchSummaries,
|
||||
deleteVector
|
||||
deleteVector,
|
||||
deleteEpisode
|
||||
};
|
||||
142
packages/memory-service/src/summarization/project.js
Normal file
142
packages/memory-service/src/summarization/project.js
Normal file
@@ -0,0 +1,142 @@
|
||||
const { SERVICES, getEnv, SUMMARIES } = require('@nexusai/shared');
|
||||
const {
|
||||
getSessionSummariesForProject,
|
||||
getProjectOverviewSummary,
|
||||
createSummary,
|
||||
updateSummary,
|
||||
|
||||
} = require('../db/summaries');
|
||||
const { getEpisodesByProject } = require('../episodic');
|
||||
const { getProject } = require('../db/projects');
|
||||
|
||||
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
|
||||
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
|
||||
|
||||
const MAX_SUMMARY_CHARS = SUMMARIES.MAX_SUMMARY_CHARS; // generous ceiling before we truncate input
|
||||
|
||||
function buildProjectSummaryPrompt(projectName, sessionSummaries) {
|
||||
let summaryBlock = sessionSummaries
|
||||
.map((s, i) => `Session ${i + 1}:\n${s.content}`)
|
||||
.join('\n\n');
|
||||
|
||||
// Guard against very large inputs — truncate oldest sessions if needed
|
||||
if (summaryBlock.length > MAX_SUMMARY_CHARS) {
|
||||
summaryBlock = summaryBlock.slice(-MAX_SUMMARY_CHARS);
|
||||
}
|
||||
|
||||
return [
|
||||
'<|im_start|>user',
|
||||
`The following are session summaries from a project called "${projectName}".`,
|
||||
'Write a project overview covering: goals, progress, key decisions, and current state.',
|
||||
'Scale the length to the material — use multiple paragraphs for complex projects, a few sentences for simple ones.',
|
||||
'Be comprehensive but avoid padding. Do not repeat the same point twice.',
|
||||
'Write in third person. Output only the overview text, no headings or labels.',
|
||||
'',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
function buildProjectSummaryFromEpisodesPrompt(projectName, episodes) {
|
||||
// Condense episodes into a readable block, truncating if needed
|
||||
let episodeBlock = episodes
|
||||
.map(ep => `User: ${ep.user_message}\nAssistant: ${ep.ai_response}`)
|
||||
.join('\n\n');
|
||||
|
||||
if (episodeBlock.length > MAX_SUMMARY_CHARS) {
|
||||
// Keep the most recent episodes — slice from the end
|
||||
episodeBlock = episodeBlock.slice(-MAX_SUMMARY_CHARS);
|
||||
}
|
||||
|
||||
return [
|
||||
'<|im_start|>user',
|
||||
`The following are conversations from a project called "${projectName}".`,
|
||||
'Write a project overview covering: goals, progress, key decisions, and current state.',
|
||||
'Scale the length to the material — use multiple paragraphs for complex projects, a few sentences for simple ones.',
|
||||
'Be comprehensive but avoid padding. Do not repeat the same point twice.',
|
||||
'Write in third person. Output only the overview text, no headings or labels.',
|
||||
'',
|
||||
episodeBlock,
|
||||
'<|im_end|>',
|
||||
'<|im_start|>assistant',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
async function generateProjectSummaryFromEpisodes(projectName, episodes) {
|
||||
const prompt = buildProjectSummaryFromEpisodesPrompt(projectName, episodes);
|
||||
|
||||
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: EXTRACTION_MODEL,
|
||||
prompt,
|
||||
stream: false,
|
||||
options: { temperature: 0.2, num_predict: 1200 },
|
||||
}),
|
||||
});
|
||||
|
||||
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
|
||||
const data = await res.json();
|
||||
|
||||
const raw = data.response?.trim() ?? '';
|
||||
return raw
|
||||
.replace(/<\|im_start\|>.*?<\|im_end\|>/gs, '')
|
||||
.replace(/<\|im_start\|>|<\|im_end\|>|<\|im_sep\|>/g, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
async function generateProjectSummary(projectName, sessionSummaries) {
|
||||
const prompt = buildProjectSummaryPrompt(projectName, sessionSummaries);
|
||||
|
||||
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: EXTRACTION_MODEL,
|
||||
prompt,
|
||||
stream: false,
|
||||
// No format: 'json' — we want free-text narrative, same as session summarization
|
||||
options: { temperature: 0.2, num_predict: 1200 },
|
||||
}),
|
||||
});
|
||||
|
||||
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
|
||||
const data = await res.json();
|
||||
|
||||
const raw = data.response?.trim() ?? '';
|
||||
return raw
|
||||
.replace(/<\|im_start\|>.*?<\|im_end\|>/gs, '')
|
||||
.replace(/<\|im_start\|>|<\|im_end\|>|<\|im_sep\|>/g, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
// Main entry point — called by the route handler
|
||||
async function generateAndStoreProjectSummary(projectId) {
|
||||
const project = getProject(projectId);
|
||||
if (!project) throw new Error('Project not found');
|
||||
|
||||
let content;
|
||||
const sessionSummaries = getSessionSummariesForProject(projectId);
|
||||
|
||||
if (sessionSummaries.length > 0) {
|
||||
// Preferred path — summarize the summaries
|
||||
content = await generateProjectSummary(project.name, sessionSummaries);
|
||||
} else {
|
||||
// Fallback — summarize raw episodes directly
|
||||
const episodes = getEpisodesByProject(projectId);
|
||||
if (!episodes.length) {
|
||||
throw new Error('No session summaries or episodes found for this project');
|
||||
}
|
||||
content = await generateProjectSummaryFromEpisodes(project.name, episodes);
|
||||
}
|
||||
|
||||
if (!content) throw new Error('Model returned empty summary');
|
||||
|
||||
const existing = getProjectOverviewSummary(projectId);
|
||||
if (existing) {
|
||||
return updateSummary(existing.id, { content, tokenCount: null, episodeRange: null });
|
||||
} else {
|
||||
return createSummary({ projectId, content, sessionId: null });
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { generateAndStoreProjectSummary };
|
||||
156
packages/orchestration-service/CLAUDE.md
Normal file
156
packages/orchestration-service/CLAUDE.md
Normal file
@@ -0,0 +1,156 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
||||
|
||||
See the root [CLAUDE.md](../../CLAUDE.md) for overall architecture, service roles, and the end-to-end chat flow.
|
||||
|
||||
## Running This Service
|
||||
|
||||
```bash
|
||||
npm run orchestration # From repo root (node src/index.js)
|
||||
npm -w packages/orchestration-service run dev # With --watch
|
||||
```
|
||||
|
||||
Default port: **4000**. Depends on memory-service, embedding-service, inference-service, and Qdrant.
|
||||
|
||||
## Context Assembly (`src/chat/index.js`)
|
||||
|
||||
`assembleContext(externalId, userMessage)` is the core function that builds the inference prompt. Order of operations:
|
||||
|
||||
1. Resolve session by `externalId` (creates it if missing — every chat call is self-healing).
|
||||
2. If session has a `project_id`, load the project and fetch all sibling sessions (via `getProjectSessions`, hardcoded `limit=200`).
|
||||
3. Fetch `recentEpisodeLimit` recent episodes from memory-service.
|
||||
4. Embed the user message; search Qdrant EPISODES with `scoreThreshold`:
|
||||
- No project: `must: [sessionId == this session]`
|
||||
- Project: `should: [sessionId == s1, sessionId == s2, ...]` across all project sessions
|
||||
- Dedup against recent episode IDs before including.
|
||||
5. Run **fused episode retrieval** via `getFusedEpisodes` — Qdrant semantic search and FTS5 keyword search run in parallel, both filtered against `recentIds`, then merged via Reciprocal Rank Fusion (RRF). If `keywordWeight` is `0`, the FTS call is skipped. Returns top `semanticLimit` episodes by fused score.
|
||||
6. Embed and search Qdrant ENTITIES (filtered by `projectId` if in a project). Returns entity IDs alongside payload — the Qdrant point ID equals the SQLite entity ID.
|
||||
7. Expand matched entities into a 1-hop graph neighborhood via `POST /graph/neighbors` on the memory-service. Returns `{ nodes, edges }` — the full entity objects plus connecting relationships. Falls back to flat entity list (no edges) if the graph call fails.
|
||||
8. Build prompt in this fixed order: **system prompt → graph context → fused episodes → recent episodes → user message → "Assistant:"**
|
||||
|
||||
The ordering prioritizes established facts (graph context) and relevant past context (semantic) over pure recency.
|
||||
|
||||
## Graph Context Format
|
||||
|
||||
`formatGraphContext(nodes, edges)` in `src/chat/index.js` formats the neighborhood as:
|
||||
|
||||
```
|
||||
- Alice (person): software engineer working on NexusAI
|
||||
→ works_on NexusAI (project)
|
||||
→ knows Bob (person)
|
||||
- NexusAI (project): AI assistant framework
|
||||
- Bob (person): Alice's colleague
|
||||
```
|
||||
|
||||
Each node shows its notes on the first line. Outbound edges are indented below with `→ label target (type)`. Nodes with only inbound edges (neighbors pulled in by traversal) appear without connection lines.
|
||||
|
||||
## System Prompt Resolution
|
||||
|
||||
Priority from highest to lowest:
|
||||
1. `project.system_prompt` (stored on the project row in memory-service)
|
||||
2. `settings.systemPrompt` (saved in `data/settings.json`)
|
||||
3. `ORCHESTRATION.SYSTEM_PROMPT` (shared constants fallback)
|
||||
|
||||
## Settings (`src/config/settings.js`)
|
||||
|
||||
Settings are loaded from `data/settings.json` merged with defaults at every `GET /settings` call. `PATCH /settings` validates each field individually with specific constraints:
|
||||
|
||||
| Field | Constraint |
|
||||
|---|---|
|
||||
| `recentEpisodeLimit` | integer, 1–20 |
|
||||
| `semanticLimit` | integer, 1–20 |
|
||||
| `scoreThreshold` | number, 0–1 |
|
||||
| `temperature` | number, 0–2 |
|
||||
| `repeatPenalty` | number, 1–2 |
|
||||
| `topP` | number, 0–1 |
|
||||
| `topK` | integer, 1–100 |
|
||||
| `modelsFolderPath` | path must exist and be readable |
|
||||
| `systemPrompt` | string (trimmed); `null` reverts to shared default |
|
||||
|
||||
`data/settings.json` is created on first save. Parent directories are created if missing.
|
||||
|
||||
## Streaming SSE (`src/chat/index.js` — `chatStream`)
|
||||
|
||||
The route sets SSE headers and delegates to `chatStream`, which:
|
||||
1. Calls `inference.completeStream()` → receives a raw HTTP Response with a readable body.
|
||||
2. Reads the body in chunks, buffers across chunk boundaries, splits on `\n\n`.
|
||||
3. For each event line starting with `data: `, parses the JSON and calls `onChunk(data.response)`.
|
||||
4. The `[DONE]` sentinel (used by some llama-server versions) is explicitly ignored.
|
||||
5. After stream ends, saves the assembled full response as an episode (same as non-streaming).
|
||||
|
||||
If a chunk parse fails the error is logged and the stream continues. If the response body closes with no text accumulated, the episode is not saved (logged as warning).
|
||||
|
||||
## Fire-and-Forget Tasks
|
||||
|
||||
After every successful chat turn:
|
||||
- **Summarization** (`services/summarization.js` → `triggerSummary`): checks token threshold → recency guard → calls Ollama → POSTs to memory-service. Only runs if `SUMMARIES.THRESHOLD_TOKENS` is exceeded AND at least `SUMMARIES.MIN_EPISODES_SINCE` new episodes have occurred since the last summary.
|
||||
- **Auto-naming** (`chat/index.js` → `autoNameSession`): only fires on the first message of a session. Uses temp 0.3, `maxTokens=20`, prompts for a ≤5-word title.
|
||||
|
||||
Both tasks catch all errors and log warnings without surfacing to the client.
|
||||
|
||||
## Summarization Recency Guard
|
||||
|
||||
`src/services/summarization.js` reads the `episode_range` field of the latest existing summary (format: `"<startId>-<endId>"`). It counts SQLite episodes with `id > endId`; if fewer than `SUMMARIES.MIN_EPISODES_SINCE`, it skips. This prevents rapid re-summarization on high-traffic sessions.
|
||||
|
||||
When the existing summary's token count exceeds `SUMMARIES.MAX_SUMMARY_TOKENS`, it is treated as "expired" — a fresh summary is generated instead of an incremental update.
|
||||
|
||||
## Qdrant Calls (Direct, Not Via Memory-Service)
|
||||
|
||||
`src/services/qdrant.js` makes REST calls to Qdrant directly at `QDRANT_URL`. This bypasses memory-service for semantic search performance. Orchestration fetches episode/entity content from memory-service by ID *after* getting vector search results from Qdrant.
|
||||
|
||||
`searchEntities` checks `projectId !== null && projectId !== undefined` before applying the filter — a session with no project skips the filter entirely and searches globally.
|
||||
|
||||
## Retrieval Fusion (`src/chat/index.js`)
|
||||
|
||||
Three functions handle fusion — all pure or lightly async, all non-critical:
|
||||
|
||||
- **`getFTSResults(userMessage, { limit, sessionIds })`** — calls `memory.searchEpisodes`; returns `[]` and logs a warning on failure
|
||||
- **`fuseEpisodeResults(semanticEps, keywordEps, { semanticWeight, keywordWeight, limit })`** — pure RRF implementation. Key guard: FTS-only episodes are only added to the scores Map if `contrib > 0` (prevents score-0 bleed-through when `keywordWeight: 0`)
|
||||
- **`getFusedEpisodes(userMessage, session, recentIds, projectSessionIds, settings)`** — orchestrates both paths in `Promise.all`, applies `recentIds` filter to FTS results, calls fusion. Short-circuits FTS call entirely if `keywordWeight === 0`
|
||||
|
||||
FTS is scoped to `projectSessionIds` if in a project, otherwise `[session.id]` — mirrors Qdrant scoping exactly.
|
||||
|
||||
> For RRF formula, weight semantics, and enabling keyword search, see `docs/services/retrieval-fusion.md`.
|
||||
|
||||
## Graph Service Client (`src/services/graph.js`)
|
||||
|
||||
Thin HTTP client for memory-service graph endpoints. One function:
|
||||
|
||||
- **`getNeighbors(entityIds[])`** — POSTs to `memory-service/graph/neighbors` with the entity IDs from Qdrant entity search. Returns `{ nodes, edges }`. Throws on non-2xx — caller wraps in try/catch with graceful fallback.
|
||||
|
||||
## Models Endpoint
|
||||
|
||||
`GET /models` scans `modelsFolderPath` for `.gguf` files and optionally reads a `models.json` manifest (keyed by filename) for labels and descriptions. File size is reported in GB. Returns 500 if the folder is inaccessible.
|
||||
|
||||
`GET /models/props` proxies `/props` from llama-server and returns `{contextWindow, modelAlias}`. Returns 503 if llama-server is unreachable.
|
||||
|
||||
## Health Check
|
||||
|
||||
`GET /health/services` runs parallel fetch calls to all four dependent services with a 3-second `AbortSignal.timeout` each. Results are returned as an array — the endpoint never returns a non-2xx itself regardless of downstream status.
|
||||
|
||||
## Background Model (qwen2.5:3b)
|
||||
Used for entity/relationship extraction and summarization via Ollama on Mini PC 1. Uses **ChatML format** (`<|im_start|>` / `<|im_end|>`) — not Phi3 format. Use `format: 'json'` only for structured extraction, never for free-text summarization.
|
||||
|
||||
## API Endpoints Quick Reference
|
||||
|
||||
| Method | Path | Notes |
|
||||
|---|---|---|
|
||||
| GET | `/health` | Returns service URLs |
|
||||
| GET | `/health/services` | Parallel status of all dependencies |
|
||||
| POST | `/chat` | Blocking completion |
|
||||
| POST | `/chat/stream` | SSE streaming |
|
||||
| GET/PATCH | `/settings` | Persistent settings |
|
||||
| GET | `/models` | `.gguf` file scan |
|
||||
| GET | `/models/props` | llama-server model info |
|
||||
| GET | `/sessions` | Delegates to memory-service |
|
||||
| GET | `/sessions/:sessionId/history` | Paginated episodes by external ID |
|
||||
| PATCH | `/sessions/:sessionId` | `name` and/or `projectId` |
|
||||
| DELETE | `/sessions/:sessionId` | |
|
||||
| GET | `/episodes` | Delegates; supports `q` for FTS |
|
||||
| DELETE | `/episodes/:id` | Delegates |
|
||||
| GET/POST/PATCH/DELETE | `/projects` and `/projects/:id` | Delegates |
|
||||
| POST | `/summaries/project/:projectId/generate` | On-demand; 422 if no data |
|
||||
| GET | `/summaries/project/:projectId/overview` | |
|
||||
| GET | `/summaries/session/:sessionId` | Resolves external ID first |
|
||||
| GET | `/summaries/project/:projectId` | |
|
||||
@@ -8,6 +8,7 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@nexusai/shared": "^1.0.0",
|
||||
"cors": "^2.8.6",
|
||||
"dotenv": "^17.4.0",
|
||||
"express": "^5.2.1",
|
||||
"node-fetch": "^2.7.0"
|
||||
|
||||
@@ -1,63 +1,413 @@
|
||||
const memory = require('../services/memory');
|
||||
const inference = require('../services/inference');
|
||||
const memory = require("../services/memory");
|
||||
const inference = require("../services/inference");
|
||||
const embedding = require("../services/embedding");
|
||||
const qdrant = require("../services/qdrant");
|
||||
const { ORCHESTRATION, RETRIEVAL, logger } = require("@nexusai/shared");
|
||||
const appSettings = require("../config/settings");
|
||||
const {triggerSummary} = require('../services/summarization')
|
||||
const graph = require('../services/graph');
|
||||
|
||||
const SYSTEM_PROMPT = `You are a helpful, context-aware AI assistant.
|
||||
You have access to memories of past conversations with the user.
|
||||
Use them to provide consistent, personalised responses.`;
|
||||
function buildPrompt(guaranteed, selected, neighborhood, userMessage, systemPrompt) {
|
||||
const parts = [systemPrompt ?? ORCHESTRATION.SYSTEM_PROMPT];
|
||||
|
||||
const RECENT_EPISODE_LIMIT = 10; // Number of recent episodes to retrieve for context
|
||||
|
||||
function buildPrompt(recentEpisodes, userMessage) {
|
||||
const parts = [SYSTEM_PROMPT];
|
||||
|
||||
if (recentEpisodes.length > 0) {
|
||||
parts.push(`Here are some relevant memories from your past conversations:`);
|
||||
for (const ep of recentEpisodes) {
|
||||
parts.push(`User: ${ep.user_message}\nAssistant: ${ep.ai_response}`);
|
||||
}
|
||||
parts.push('--- End of recent memories ---\n');
|
||||
const graphText = formatGraphContext(neighborhood.nodes ?? [], neighborhood.edges ?? []);
|
||||
if (graphText) {
|
||||
parts.push("Here is what you know about entities relevant to this conversation and their connections:");
|
||||
parts.push(graphText);
|
||||
parts.push("---");
|
||||
}
|
||||
|
||||
parts.push(`User: ${userMessage}`);
|
||||
parts.push('Assistant:');
|
||||
if (selected.length > 0) {
|
||||
parts.push("Relevant memories from earlier conversations:");
|
||||
for (const ep of selected) {
|
||||
parts.push(`User: ${ep.user_message}\nAssistant: ${ep.ai_response}`);
|
||||
}
|
||||
parts.push("---");
|
||||
}
|
||||
|
||||
return parts.join('\n');
|
||||
if (guaranteed.length > 0) {
|
||||
parts.push("Recent conversation history (most recent exchanges):");
|
||||
for (const ep of guaranteed) {
|
||||
parts.push(`User: ${ep.user_message}\nAssistant: ${ep.ai_response}`);
|
||||
}
|
||||
parts.push("--- End of recent memories ---\n");
|
||||
}
|
||||
|
||||
parts.push(`User: ${userMessage}`);
|
||||
parts.push("Assistant:");
|
||||
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
||||
function buildNamingPrompt(userMessage, aiResponse) {
|
||||
return [
|
||||
"Your task is to generate a short title for a conversation based on its first exchange.",
|
||||
"Rules: maximum 5 words, no punctuation, no quotes, plain text only.",
|
||||
'Examples: "Setting up a Raspberry Pi", "Help with Python list comprehension", "Planning a trip to Japan"',
|
||||
"",
|
||||
`User: ${userMessage}`,
|
||||
`Assistant: ${aiResponse}`,
|
||||
"",
|
||||
"Title:",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function formatGraphContext(nodes, edges) {
|
||||
if (!nodes.length) return null;
|
||||
|
||||
const nodeMap = new Map(nodes.map(n => [n.id, n]));
|
||||
|
||||
// Build outbound adjacency
|
||||
const outbound = new Map(nodes.map(n => [n.id, []]));
|
||||
for (const edge of edges) {
|
||||
if (outbound.has(edge.from_id) && nodeMap.has(edge.to_id)) {
|
||||
const target = nodeMap.get(edge.to_id);
|
||||
outbound.get(edge.from_id).push(`${edge.label} ${target.name} (${target.type})`);
|
||||
}
|
||||
}
|
||||
|
||||
return nodes.map(n => {
|
||||
const lines = [`- ${n.name} (${n.type}): ${n.notes ?? '(no notes)'}`];
|
||||
for (const conn of outbound.get(n.id) ?? []) lines.push(` → ${conn}`);
|
||||
return lines.join('\n');
|
||||
}).join('\n');
|
||||
}
|
||||
|
||||
|
||||
async function autoNameSession(externalId, userMessage, aiResponse) {
|
||||
try {
|
||||
const prompt = buildNamingPrompt(userMessage, aiResponse);
|
||||
const result = await inference.complete(prompt, {
|
||||
maxTokens: 20, // title only needs a handful of tokens
|
||||
temperature: 0.3, // low temperature for consistent, factual naming
|
||||
});
|
||||
const name = result.text?.trim().replace(/^["']|["']$/g, ""); // strip any quotes the model adds
|
||||
if (name) {
|
||||
await memory.updateSession(externalId, { name });
|
||||
logger.info(
|
||||
`[orchestration] Auto-named session "${externalId}": "${name}"`,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
"[orchestration] Auto-naming failed (non-critical):",
|
||||
err.message,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async function getSemanticEpisodes(
|
||||
userMessage,
|
||||
sessionId,
|
||||
recentIds,
|
||||
projectSessionIds = null,
|
||||
{ semanticLimit, scoreThreshold } = {},
|
||||
) {
|
||||
try {
|
||||
const vector = await embedding.embed(userMessage);
|
||||
const results = await qdrant.searchEpisodes(vector, {
|
||||
limit: semanticLimit,
|
||||
scoreThreshold: scoreThreshold,
|
||||
sessionId: projectSessionIds ? null : sessionId,
|
||||
projectSessionIds,
|
||||
});
|
||||
|
||||
const fetched = await Promise.all(
|
||||
results
|
||||
.filter((r) => !recentIds.has(r.id))
|
||||
.map((r) => memory.getEpisodeById(r.id)),
|
||||
);
|
||||
return fetched.filter(Boolean);
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
`[orchestration] Semantic search failed, continuing without: `,
|
||||
err.message,
|
||||
);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function getRelevantEntities(userMessage, projectId = null) {
|
||||
try {
|
||||
const vector = await embedding.embed(userMessage);
|
||||
const results = await qdrant.searchEntities(vector, { projectId });
|
||||
logger.info(
|
||||
'[orchestration] Entity search results:',
|
||||
results.map((r) => ({ name: r.payload?.name, score: r.score })),
|
||||
);
|
||||
// Include the Qdrant point ID (== SQLite entity ID) for graph traversal
|
||||
return results.map((r) => r.payload ? { id: r.id, ...r.payload } : null).filter(Boolean);
|
||||
} catch (err) {
|
||||
logger.debug('[orchestration] Entity search failed, continuing without:', err.message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function getFTSResults(userMessage, { limit, sessionIds }) {
|
||||
try {
|
||||
return await memory.searchEpisodes(userMessage, { limit, sessionIds });
|
||||
} catch (err) {
|
||||
logger.warn('[orchestration] FTS search failed, continuing without:', err.message);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
// Returns {episode, score}[] — scores needed for buildScoredPool downstream
|
||||
function fuseEpisodeResults(semanticEps, keywordEps, { semanticWeight, keywordWeight, limit }) {
|
||||
const k = RETRIEVAL.RRF_K;
|
||||
const scores = new Map();
|
||||
|
||||
semanticEps.forEach((ep, i) => {
|
||||
scores.set(ep.id, { episode: ep, score: semanticWeight / (k + i + 1) });
|
||||
});
|
||||
|
||||
keywordEps.forEach((ep, i) => {
|
||||
const contrib = keywordWeight / (k + i + 1);
|
||||
if (scores.has(ep.id)) {
|
||||
scores.get(ep.id).score += contrib;
|
||||
} else if (contrib > 0) {
|
||||
scores.set(ep.id, { episode: ep, score: contrib });
|
||||
}
|
||||
});
|
||||
|
||||
return [...scores.values()]
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, limit);
|
||||
|
||||
}
|
||||
|
||||
function estimateTokens(episode) {
|
||||
return episode.token_count
|
||||
?? Math.ceil((episode.user_message.length + episode.ai_response.length) / 4);
|
||||
}
|
||||
|
||||
function buildScoredPool(fusedWithScores, recentEpisodes, entityBoostedIds, { entityWeight }) {
|
||||
const k = RETRIEVAL.RRF_K;
|
||||
const pool = new Map(); // episode.id → {episode, score}
|
||||
|
||||
for (const { episode, score } of fusedWithScores) {
|
||||
pool.set(episode.id, { episode, score });
|
||||
}
|
||||
|
||||
recentEpisodes.forEach((ep, i) => {
|
||||
const recencyScore = 1.0 / (k + i + 1);
|
||||
if (pool.has(ep.id)) {
|
||||
pool.get(ep.id).score += recencyScore;
|
||||
} else {
|
||||
pool.set(ep.id, { episode: ep, score: recencyScore });
|
||||
}
|
||||
});
|
||||
|
||||
for (const id of entityBoostedIds) {
|
||||
if (pool.has(id)) pool.get(id).score += entityWeight;
|
||||
}
|
||||
|
||||
return [...pool.values()].sort((a, b) => b.score - a.score);
|
||||
}
|
||||
|
||||
function selectWithinBudget(scoredPool, contextBudget, minRecentEpisodes, recentEpisodes) {
|
||||
let budget = contextBudget;
|
||||
const sortByTime = (a, b) => a.created_at - b.created_at;
|
||||
|
||||
// Guarantee floor: always include the N most recent episodes
|
||||
const guaranteed = recentEpisodes.slice(0, minRecentEpisodes);
|
||||
const guaranteedIds = new Set(guaranteed.map(ep => ep.id));
|
||||
for (const ep of guaranteed) budget -= estimateTokens(ep);
|
||||
|
||||
// Fill remaining budget from scored pool, highest-priority first
|
||||
const selected = [];
|
||||
for (const { episode } of scoredPool) {
|
||||
if (guaranteedIds.has(episode.id)) continue;
|
||||
const cost = estimateTokens(episode);
|
||||
|
||||
// // Break rather than skip — lower-priority episodes aren't worth fitting over higher-priority ones
|
||||
if (budget - cost < 0) break;
|
||||
selected.push(episode);
|
||||
budget -= cost;
|
||||
}
|
||||
|
||||
return {
|
||||
guaranteed: [...guaranteed].sort(sortByTime),
|
||||
selected: selected.sort(sortByTime),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
async function getFusedEpisodes(userMessage, session, recentIds, projectSessionIds, settings) {
|
||||
const { semanticLimit, scoreThreshold, semanticWeight, keywordWeight } = settings;
|
||||
const ftsSessionIds = projectSessionIds ?? [session.id];
|
||||
|
||||
const ftsPromise = keywordWeight > 0
|
||||
// FTS and semantic may have significant overlap, so fetching more from FTS gives the fusion step more to work with before deduplication.
|
||||
? getFTSResults(userMessage, { limit: semanticLimit * 2, sessionIds: ftsSessionIds })
|
||||
: Promise.resolve([]);
|
||||
|
||||
const [semanticEps, rawKeywordEps] = await Promise.all([
|
||||
getSemanticEpisodes(userMessage, session.id, recentIds, projectSessionIds, { semanticLimit, scoreThreshold }),
|
||||
ftsPromise,
|
||||
]);
|
||||
|
||||
const keywordEps = rawKeywordEps.filter(ep => !recentIds.has(ep.id));
|
||||
return fuseEpisodeResults(semanticEps, keywordEps, { semanticWeight, keywordWeight, limit: semanticLimit });
|
||||
}
|
||||
|
||||
async function assembleContext(externalId, userMessage) {
|
||||
const settings = appSettings.load();
|
||||
const { recentEpisodeLimit, semanticLimit, scoreThreshold,
|
||||
temperature, repeatPenalty, topP, topK, systemPrompt,
|
||||
semanticWeight, keywordWeight,
|
||||
contextBudget, entityWeight, minRecentEpisodes } = settings;
|
||||
|
||||
// 1. Resolve or create session
|
||||
let session = await memory.getSessionByExternalId(externalId);
|
||||
if (!session) session = await memory.createSession(externalId);
|
||||
|
||||
// 2. Resolve project context
|
||||
let projectSessionIds = null;
|
||||
let activeSystemPrompt = systemPrompt ?? ORCHESTRATION.SYSTEM_PROMPT;
|
||||
if (session.project_id) {
|
||||
try {
|
||||
const project = await memory.getProject(session.project_id);
|
||||
if (project) {
|
||||
const projectSessions = await memory.getProjectSessions(session.project_id);
|
||||
if (project.system_prompt) activeSystemPrompt = project.system_prompt;
|
||||
projectSessionIds = projectSessions.map(s => s.id);
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn('[orchestration] Failed to resolve project context:', err.message);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Fetch recent episodes
|
||||
const recentEpisodes = await memory.getRecentEpisodes(session.id, recentEpisodeLimit);
|
||||
const isFirstMessage = recentEpisodes.length === 0;
|
||||
const recentIds = new Set(recentEpisodes.map(e => e.id));
|
||||
|
||||
// 4. Fused retrieval + entity search in parallel (both are independent)
|
||||
const [fusedWithScores, entityResults] = await Promise.all([
|
||||
getFusedEpisodes(userMessage, session, recentIds, projectSessionIds, { semanticLimit, scoreThreshold, semanticWeight, keywordWeight }),
|
||||
getRelevantEntities(userMessage, session.project_id ?? null),
|
||||
]);
|
||||
|
||||
// 5. Entity-linked episode IDs for scoring bonus
|
||||
const entityIds = entityResults.map(e => e.id);
|
||||
let entityBoostedIds = new Set();
|
||||
if (entityIds.length > 0) {
|
||||
try {
|
||||
const result = await memory.getEpisodesByEntities(entityIds);
|
||||
entityBoostedIds = new Set(result.episodeIds);
|
||||
} catch (err) {
|
||||
logger.debug('[orchestration] Entity-episode lookup failed, skipping bonus:', err.message);
|
||||
}
|
||||
}
|
||||
|
||||
// 6. Build unified scored pool and select within token budget
|
||||
const scoredPool = buildScoredPool(fusedWithScores, recentEpisodes, entityBoostedIds, { entityWeight });
|
||||
const { guaranteed, selected } = selectWithinBudget(scoredPool, contextBudget, minRecentEpisodes, recentEpisodes);
|
||||
|
||||
// 7. Graph neighborhood expansion
|
||||
let neighborhood = { nodes: [], edges: [] };
|
||||
if (entityIds.length > 0) {
|
||||
try {
|
||||
neighborhood = await graph.getNeighbors(entityIds);
|
||||
} catch (err) {
|
||||
logger.warn('[orchestration] Graph neighborhood fetch failed, falling back to flat entities:', err.message);
|
||||
neighborhood = { nodes: entityResults, edges: [] };
|
||||
}
|
||||
}
|
||||
|
||||
// 8. Assemble prompt
|
||||
const prompt = buildPrompt(guaranteed, selected, neighborhood, userMessage, activeSystemPrompt);
|
||||
|
||||
return {
|
||||
session,
|
||||
prompt,
|
||||
isFirstMessage,
|
||||
inferenceOptions: { temperature, repeatPenalty, topP, topK },
|
||||
};
|
||||
}
|
||||
|
||||
async function chat(externalId, userMessage, options = {}) {
|
||||
// 1. Resolve or create session
|
||||
let session = await memory.getSessionByExternalId(externalId);
|
||||
if (!session) {
|
||||
session = await memory.createSession(externalId);
|
||||
const { session, prompt, isFirstMessage, inferenceOptions } = await assembleContext(externalId, userMessage);
|
||||
|
||||
const result = await inference.complete(prompt, { ...options, ...inferenceOptions });
|
||||
|
||||
try {
|
||||
await memory.createEpisode(
|
||||
session.id, userMessage, result.text,
|
||||
(result.evalCount || 0) + (result.promptEvalCount || 0),
|
||||
session.project_id ?? null,
|
||||
);
|
||||
} catch (err) {
|
||||
logger.error('[orchestration] Failed to save episode:', err.message);
|
||||
}
|
||||
|
||||
// 2. Fetch recent episodes for context
|
||||
const recentEpisodes = await memory.getRecentEpisodes(
|
||||
session.id,
|
||||
RECENT_EPISODE_LIMIT
|
||||
);
|
||||
const allEpisodes = await memory.getRecentEpisodes(session.id, 9999);
|
||||
triggerSummary(session, allEpisodes);
|
||||
|
||||
// 3. Assemble prompt
|
||||
const prompt = buildPrompt(recentEpisodes, userMessage);
|
||||
if (isFirstMessage && !session.name) {
|
||||
autoNameSession(externalId, userMessage, result.text).catch(() => {});
|
||||
}
|
||||
|
||||
// 4. Run inference
|
||||
const result = await inference.complete(prompt, options);
|
||||
|
||||
// 5. Write episode back to memory
|
||||
memory.createEpisode(
|
||||
session.id,
|
||||
userMessage,
|
||||
result.text,
|
||||
(result.evalCount || 0) + (result.promptEvalCount || 0 )
|
||||
).catch(err => console.error(`[orchestration] Failed to save episode`, err.message));
|
||||
|
||||
// 6. Return response
|
||||
return {
|
||||
sessionId: externalId,
|
||||
response: result.text,
|
||||
model: result.model,
|
||||
tokenCount: (result.evalCount || 0 ) + (result.promptEvalCount || 0 ),
|
||||
tokenCount: (result.evalCount || 0) + (result.promptEvalCount || 0),
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { chat };
|
||||
async function chatStream(externalId, userMessage, onChunk, options = {}) {
|
||||
try {
|
||||
const { session, prompt, isFirstMessage, inferenceOptions } = await assembleContext(externalId, userMessage);
|
||||
|
||||
const res = await inference.completeStream(prompt, { ...options, ...inferenceOptions });
|
||||
|
||||
let fullText = '', model = '', tokenCount = 0, buffer = '';
|
||||
|
||||
for await (const chunk of res.body) {
|
||||
buffer += Buffer.from(chunk).toString('utf8');
|
||||
const events = buffer.split('\n\n');
|
||||
buffer = events.pop() || '';
|
||||
|
||||
for (const event of events) {
|
||||
const dataLines = event.split('\n')
|
||||
.filter(line => line.startsWith('data: '))
|
||||
.map(line => line.slice(6));
|
||||
|
||||
if (!dataLines.length) continue;
|
||||
const raw = dataLines.join('\n').trim();
|
||||
if (raw === '[DONE]') continue;
|
||||
|
||||
try {
|
||||
const data = JSON.parse(raw);
|
||||
if (data.response) { fullText += data.response; onChunk(data.response); }
|
||||
if (data.model) model = data.model;
|
||||
if (data.done && data.tokenCount !== undefined) tokenCount = data.tokenCount;
|
||||
if (data.error) throw new Error(data.error);
|
||||
} catch (err) {
|
||||
logger.error('[orchestration] Failed to parse SSE event:', raw, err.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fullText.trim()) {
|
||||
await memory.createEpisode(session.id, userMessage, fullText, tokenCount, session.project_id ?? null);
|
||||
const allEpisodes = await memory.getRecentEpisodes(session.id, 9999);
|
||||
triggerSummary(session, allEpisodes);
|
||||
} else {
|
||||
logger.warn('[orchestration] Stream finished with no assistant text; episode not saved');
|
||||
}
|
||||
|
||||
if (isFirstMessage && !session.name) {
|
||||
autoNameSession(externalId, userMessage, fullText).catch(() => {});
|
||||
}
|
||||
|
||||
return { model, tokenCount };
|
||||
} catch (err) {
|
||||
logger.error('[orchestration] chatStream fatal error:', err.message, err.stack);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { chat, chatStream };
|
||||
|
||||
41
packages/orchestration-service/src/config/settings.js
Normal file
41
packages/orchestration-service/src/config/settings.js
Normal file
@@ -0,0 +1,41 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { getEnv, ORCHESTRATION, INFERENCE_DEFAULTS, RETRIEVAL } = require('@nexusai/shared');
|
||||
|
||||
const SETTINGS_PATH = path.join(__dirname, '../../data/settings.json');
|
||||
|
||||
const DEFAULTS = {
|
||||
recentEpisodeLimit: ORCHESTRATION.RECENT_EPISODE_LIMIT,
|
||||
semanticLimit: ORCHESTRATION.SEMANTIC_LIMIT,
|
||||
scoreThreshold: ORCHESTRATION.SCORE_THRESHOLD,
|
||||
modelsFolderPath: getEnv('MODELS_MANIFEST_PATH', '/mnt/nexus-models'),
|
||||
temperature: INFERENCE_DEFAULTS.TEMPERATURE,
|
||||
repeatPenalty: INFERENCE_DEFAULTS.REPEAT_PENALTY,
|
||||
topP: INFERENCE_DEFAULTS.TOP_P,
|
||||
topK: INFERENCE_DEFAULTS.TOP_K,
|
||||
systemPrompt: ORCHESTRATION.SYSTEM_PROMPT,
|
||||
semanticWeight: RETRIEVAL.SEMANTIC_WEIGHT,
|
||||
keywordWeight: RETRIEVAL.KEYWORD_WEIGHT,
|
||||
contextBudget: ORCHESTRATION.CONTEXT_BUDGET,
|
||||
entityWeight: ORCHESTRATION.ENTITY_WEIGHT,
|
||||
minRecentEpisodes: ORCHESTRATION.MIN_RECENT_EPISODES,
|
||||
};
|
||||
|
||||
function load() {
|
||||
try {
|
||||
const raw = fs.readFileSync(SETTINGS_PATH, 'utf8');
|
||||
return { ...DEFAULTS, ...JSON.parse(raw) };
|
||||
} catch {
|
||||
return { ...DEFAULTS }; // file doesn't exist yet — use defaults
|
||||
}
|
||||
}
|
||||
|
||||
function save(updates) {
|
||||
const current = load();
|
||||
const next = { ...current, ...updates };
|
||||
fs.mkdirSync(path.dirname(SETTINGS_PATH), { recursive: true });
|
||||
fs.writeFileSync(SETTINGS_PATH, JSON.stringify(next, null, 2));
|
||||
return next;
|
||||
}
|
||||
|
||||
module.exports = { load, save, DEFAULTS };
|
||||
@@ -1,27 +1,57 @@
|
||||
require ('dotenv').config();
|
||||
const express = require('express');
|
||||
const {getEnv} = require('@nexusai/shared');
|
||||
const {getEnv, PORTS, SERVICES, ORCHESTRATION, logger} = require('@nexusai/shared');
|
||||
|
||||
/**** ROUTERS *** */
|
||||
const chatRouter = require('./routes/chat');
|
||||
const sessionsRouter = require('./routes/sessions');
|
||||
const modelsRouter = require('./routes/models');
|
||||
const projectsRouter = require('./routes/projects');
|
||||
const episodesRouter = require('./routes/episodes');
|
||||
const settingsRouter = require('./routes/settings');
|
||||
const healthRouter = require('./routes/health');
|
||||
const summariesRouter = require('./routes/summaries')
|
||||
|
||||
const cors = require('cors');
|
||||
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
app.use(express.json({ limit: '2mb' }));
|
||||
|
||||
const PORT = getEnv('PORT', '4000'); // Default to 4000 if PORT is not set
|
||||
app.use(cors({
|
||||
origin: [
|
||||
getEnv('CORS_ORIGIN', ORCHESTRATION.CORS_ORIGIN),
|
||||
ORCHESTRATION.CORS_ORIGIN,
|
||||
],
|
||||
methods: ['GET', 'POST', 'DELETE'],
|
||||
allowedHeaders: ['Content-Type'],
|
||||
}))
|
||||
|
||||
const PORT = getEnv('PORT', PORTS.ORCHESTRATION);
|
||||
const MEMORY_URL = getEnv('MEMORY_SERVICE_URL', SERVICES.MEMORY_URL);
|
||||
const EMBEDDING_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL);
|
||||
const INFERENCE_URL = getEnv('INFERENCE_SERVICE_URL', SERVICES.INFERENCE_URL);
|
||||
|
||||
// Health check endpoint
|
||||
app.get('/health', (req, res) => {
|
||||
res.json({
|
||||
service: 'Orchestration Service',
|
||||
status: 'healthy',
|
||||
memoryService: getEnv('MEMORY_SERVICE_URL', 'http://localhost:3002'),
|
||||
embeddingService: getEnv('EMBEDDING_SERVICE_URL', 'http://localhost:3003'),
|
||||
inferenceService: getEnv('INFERENCE_SERVICE_URL', 'http://localhost:3001'),
|
||||
service: 'Orchestration Service',
|
||||
status: 'healthy',
|
||||
memoryService: MEMORY_URL,
|
||||
embeddingService: EMBEDDING_URL,
|
||||
inferenceService: INFERENCE_URL,
|
||||
});
|
||||
});
|
||||
|
||||
app.use('/chat', chatRouter);
|
||||
app.use('/sessions', sessionsRouter);
|
||||
app.use('/models', modelsRouter);
|
||||
app.use('/projects', projectsRouter);
|
||||
app.use('/episodes', episodesRouter);
|
||||
app.use('/settings', settingsRouter);
|
||||
app.use('/health/services', healthRouter);
|
||||
app.use('/summaries', summariesRouter)
|
||||
|
||||
/******* Start the server ************/
|
||||
app.listen(PORT, () => {
|
||||
console.log(`Orchestration Service is running on port ${PORT}`);
|
||||
logger.info(`Orchestration Service is running on port ${PORT}`);
|
||||
});
|
||||
@@ -1,5 +1,8 @@
|
||||
const { Router } = require('express')
|
||||
const { chat } = require('../chat/index');
|
||||
const { chat, chatStream } = require('../chat/index');
|
||||
const memory = require('../services/memory')
|
||||
const logger = require('@nexusai/shared');
|
||||
|
||||
|
||||
const router = Router();
|
||||
|
||||
@@ -16,8 +19,37 @@ router.post('/', async (req, res) => {
|
||||
});
|
||||
res.json(result)
|
||||
} catch (err) {
|
||||
console.error(`[orchestration] chat error: `, err.message)
|
||||
res.status(500).json ({ error: err.message})
|
||||
logger.error(`[orchestration] chat error: `, err.message)
|
||||
res.status(500).json ({ error: 'Chat failed', detail: err.message })
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/stream', async (req, res) => {
|
||||
const {sessionId, message} = req.body;
|
||||
if(!sessionId || !message) {
|
||||
return res.status(400).json({
|
||||
error: 'sessionId and message are required'
|
||||
});
|
||||
}
|
||||
|
||||
res.setHeader('Content-Type', 'text/event-stream');
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
res.flushHeaders();
|
||||
|
||||
try {
|
||||
const { model, tokenCount } = await chatStream(
|
||||
sessionId,
|
||||
message,
|
||||
(delta) => { res.write(`data: ${JSON.stringify({ text: delta })}\n\n`) },
|
||||
{ model: req.body.model, temperature: req.body.temperature }
|
||||
);
|
||||
|
||||
res.write(`data: ${JSON.stringify({ done: true, model, tokenCount })}\n\n`);
|
||||
} catch (err) {
|
||||
res.write(`data: ${JSON.stringify({error: err.message})}\n\n`);
|
||||
} finally {
|
||||
res.end();
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
25
packages/orchestration-service/src/routes/episodes.js
Normal file
25
packages/orchestration-service/src/routes/episodes.js
Normal file
@@ -0,0 +1,25 @@
|
||||
const { Router } = require('express');
|
||||
const memory = require('../services/memory');
|
||||
|
||||
const router = Router();
|
||||
|
||||
router.get('/', async (req, res) => {
|
||||
const { limit, offset, sessionId, q } = req.query;
|
||||
try {
|
||||
const result = await memory.getEpisodes({ limit, offset, sessionId, q });
|
||||
res.json(result);
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to fetch episodes', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.delete('/:id', async (req, res) => {
|
||||
try {
|
||||
await memory.deleteEpisode(req.params.id);
|
||||
res.status(204).send();
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to delete episode', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
30
packages/orchestration-service/src/routes/health.js
Normal file
30
packages/orchestration-service/src/routes/health.js
Normal file
@@ -0,0 +1,30 @@
|
||||
const { Router } = require('express');
|
||||
const fetch = require('node-fetch');
|
||||
const { getEnv, SERVICES, PORTS } = require('@nexusai/shared');
|
||||
|
||||
const router = Router();
|
||||
|
||||
const SERVICES_MAP = [
|
||||
{ key: 'inference', label: 'Inference', url: `${getEnv('INFERENCE_SERVICE_URL', SERVICES.INFERENCE_URL)}/health` },
|
||||
{ key: 'memory', label: 'Memory', url: `${getEnv('MEMORY_SERVICE_URL', SERVICES.MEMORY_URL)}/health` },
|
||||
{ key: 'embedding', label: 'Embedding', url: `${getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL)}/health` },
|
||||
{ key: 'orchestration', label: 'Orchestration', url: `http://localhost:${getEnv('PORT', PORTS.ORCHESTRATION)}/health` },
|
||||
];
|
||||
|
||||
router.get('/', async (req, res) => {
|
||||
const results = await Promise.all(
|
||||
SERVICES_MAP.map(async ({ key, label, url }) => {
|
||||
const start = Date.now();
|
||||
try {
|
||||
const r = await fetch(url, { signal: AbortSignal.timeout(3000) });
|
||||
const data = await r.json();
|
||||
return { key, label, status: 'healthy', latency: Date.now() - start, detail: data };
|
||||
} catch (err) {
|
||||
return { key, label, status: 'unreachable', latency: Date.now() - start, detail: null };
|
||||
}
|
||||
})
|
||||
);
|
||||
res.json(results);
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
70
packages/orchestration-service/src/routes/models.js
Normal file
70
packages/orchestration-service/src/routes/models.js
Normal file
@@ -0,0 +1,70 @@
|
||||
const express = require('express');
|
||||
const router = express.Router();
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const appSettings = require('../config/settings');
|
||||
|
||||
const { getEnv, LLAMACPP, logger } = require('@nexusai/shared');
|
||||
const LLAMA_URL = getEnv('LLAMA_SERVER_URL', LLAMACPP.DEFAULT_URL);
|
||||
|
||||
router.get('/', (req, res) => {
|
||||
const { modelsFolderPath } = appSettings.load();
|
||||
|
||||
try {
|
||||
// Try scanning folder for .gguf files
|
||||
const files = fs.readdirSync(modelsFolderPath)
|
||||
.filter(f => f.endsWith('.gguf'));
|
||||
|
||||
// Try loading models.json for richer metadata (label, description)
|
||||
let manifest = {};
|
||||
try {
|
||||
const manifestPath = path.join(modelsFolderPath, 'models.json');
|
||||
const raw = fs.readFileSync(manifestPath, 'utf8');
|
||||
// Index manifest by filename for quick lookup
|
||||
const list = JSON.parse(raw);
|
||||
for (const m of list) {
|
||||
manifest[m.value] = m;
|
||||
}
|
||||
} catch {
|
||||
// No manifest — scan only, that's fine
|
||||
}
|
||||
|
||||
const models = files.map(filename => ({
|
||||
value: filename,
|
||||
label: manifest[filename]?.label ?? filename.replace('.gguf', ''),
|
||||
description: manifest[filename]?.description ?? null,
|
||||
size: getFileSizeMB(path.join(modelsFolderPath, filename)),
|
||||
}));
|
||||
|
||||
res.json(models);
|
||||
} catch (err) {
|
||||
logger.error('[models] Failed to scan folder:', err.message);
|
||||
res.status(500).json({ error: `Could not read models folder: ${modelsFolderPath}` });
|
||||
}
|
||||
});
|
||||
|
||||
router.get('/props', async (req, res) => {
|
||||
try {
|
||||
const response = await fetch(`${LLAMA_URL}/props`);
|
||||
if (!response.ok) throw new Error(`llama-server error: ${response.status}`);
|
||||
const data = await response.json();
|
||||
res.json({
|
||||
contextWindow: data.default_generation_settings?.n_ctx ?? null,
|
||||
modelAlias: data.model_alias,
|
||||
});
|
||||
} catch (err) {
|
||||
logger.error('[models/props]', err.message);
|
||||
res.status(503).json({ error: 'Could not reach llama-server' });
|
||||
}
|
||||
});
|
||||
|
||||
function getFileSizeMB(filepath) {
|
||||
try {
|
||||
const bytes = fs.statSync(filepath).size;
|
||||
return (bytes / (1024 ** 3)).toFixed(1) + ' GB'; // models are big — show GB
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = router;
|
||||
41
packages/orchestration-service/src/routes/projects.js
Normal file
41
packages/orchestration-service/src/routes/projects.js
Normal file
@@ -0,0 +1,41 @@
|
||||
const { Router } = require('express');
|
||||
const memory = require('../services/memory');
|
||||
|
||||
const router = Router();
|
||||
|
||||
router.get('/', async (req, res) => {
|
||||
try {
|
||||
res.json(await memory.getProjects());
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to fetch projects', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/', async (req, res) => {
|
||||
const { name, description, colour, icon, isolated } = req.body;
|
||||
if (!name?.trim()) return res.status(400).json({ error: 'name is required' });
|
||||
try {
|
||||
res.status(201).json(await memory.createProject({ name: name.trim(), description, colour, icon, isolated }));
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to create project', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.patch('/:id', async (req, res) => {
|
||||
try {
|
||||
res.json(await memory.updateProject(req.params.id, req.body));
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to update project', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.delete('/:id', async (req, res) => {
|
||||
try {
|
||||
await memory.deleteProject(req.params.id);
|
||||
res.status(204).send();
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to delete project', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
61
packages/orchestration-service/src/routes/sessions.js
Normal file
61
packages/orchestration-service/src/routes/sessions.js
Normal file
@@ -0,0 +1,61 @@
|
||||
const { Router } = require('express');
|
||||
const memory = require('../services/memory');
|
||||
const { EPISODIC } = require('@nexusai/shared');
|
||||
|
||||
const router = Router();
|
||||
|
||||
router.get('/:sessionId/history', async (req, res) => {
|
||||
const { sessionId } = req.params;
|
||||
const { limit = EPISODIC.DEFAULT_PAGE_SIZE, offset = EPISODIC.DEFAULT_OFFSET } = req.query;
|
||||
|
||||
try {
|
||||
const session = await memory.getSessionByExternalId(sessionId);
|
||||
if (!session) return res.status(404).json({ error: 'Session not found' });
|
||||
|
||||
const history = await memory.getSessionHistory(session.id, Number(limit), Number(offset));
|
||||
res.json({ sessionId, episodes: history });
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to fetch session history', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.get('/', async (req, res) => {
|
||||
const { limit = EPISODIC.DEFAULT_PAGE_SIZE, offset = EPISODIC.DEFAULT_OFFSET, projectId } = req.query;
|
||||
const parsedProjectId = projectId && projectId !== 'null' ? projectId : null;
|
||||
try {
|
||||
const sessions = await memory.getSessions(Number(limit), Number(offset), parsedProjectId);
|
||||
res.json(sessions);
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to fetch sessions', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.patch('/:sessionId', async (req, res) => {
|
||||
const { name, projectId } = req.body;
|
||||
|
||||
// Allow patch with just projectId, or just name, or both
|
||||
if (!name?.trim() && projectId === undefined) {
|
||||
return res.status(400).json({ error: 'name or projectId is required' });
|
||||
}
|
||||
|
||||
try {
|
||||
const session = await memory.updateSession(req.params.sessionId, {
|
||||
name: name?.trim() || undefined,
|
||||
projectId
|
||||
});
|
||||
res.json(session);
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to update session', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.delete('/:sessionId', async (req, res) => {
|
||||
try {
|
||||
await memory.deleteSession(req.params.sessionId);
|
||||
res.status(204).send();
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to delete session', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
121
packages/orchestration-service/src/routes/settings.js
Normal file
121
packages/orchestration-service/src/routes/settings.js
Normal file
@@ -0,0 +1,121 @@
|
||||
const { Router } = require('express');
|
||||
const settings = require('../config/settings');
|
||||
const fs = require('fs');
|
||||
|
||||
const router = Router();
|
||||
|
||||
router.get('/', (req, res) => {
|
||||
res.json(settings.load());
|
||||
});
|
||||
|
||||
router.patch('/', (req, res) => {
|
||||
const { recentEpisodeLimit, semanticLimit, scoreThreshold } = req.body;
|
||||
const updates = {};
|
||||
|
||||
if (recentEpisodeLimit !== undefined) {
|
||||
const val = Number(recentEpisodeLimit);
|
||||
if (!Number.isInteger(val) || val < 1 || val > 20)
|
||||
return res.status(400).json({ error: 'recentEpisodeLimit must be 1–20' });
|
||||
updates.recentEpisodeLimit = val;
|
||||
}
|
||||
|
||||
if (semanticLimit !== undefined) {
|
||||
const val = Number(semanticLimit);
|
||||
if (!Number.isInteger(val) || val < 1 || val > 20)
|
||||
return res.status(400).json({ error: 'semanticLimit must be 1–20' });
|
||||
updates.semanticLimit = val;
|
||||
}
|
||||
|
||||
if (scoreThreshold !== undefined) {
|
||||
const val = Number(scoreThreshold);
|
||||
if (isNaN(val) || val < 0 || val > 1)
|
||||
return res.status(400).json({ error: 'scoreThreshold must be 0–1' });
|
||||
updates.scoreThreshold = val;
|
||||
}
|
||||
|
||||
if (req.body.modelsFolderPath !== undefined) {
|
||||
const val = req.body.modelsFolderPath.trim();
|
||||
if (!val) return res.status(400).json({ error: 'modelsFolderPath cannot be empty' });
|
||||
// Verify the path exists and is readable
|
||||
try {
|
||||
fs.readdirSync(val);
|
||||
} catch {
|
||||
return res.status(400).json({ error: `Path not accessible: ${val}` });
|
||||
}
|
||||
updates.modelsFolderPath = val;
|
||||
}
|
||||
|
||||
if (req.body.temperature !== undefined) {
|
||||
const val = Number(req.body.temperature);
|
||||
if (isNaN(val) || val < 0 || val > 2)
|
||||
return res.status(400).json({ error: 'temperature must be 0–2' });
|
||||
updates.temperature = val;
|
||||
}
|
||||
|
||||
if (req.body.repeatPenalty !== undefined) {
|
||||
const val = Number(req.body.repeatPenalty);
|
||||
if (isNaN(val) || val < 1 || val > 2)
|
||||
return res.status(400).json({ error: 'repeatPenalty must be 1–2' });
|
||||
updates.repeatPenalty = val;
|
||||
}
|
||||
|
||||
if (req.body.topP !== undefined) {
|
||||
const val = Number(req.body.topP);
|
||||
if (isNaN(val) || val < 0 || val > 1)
|
||||
return res.status(400).json({ error: 'topP must be 0–1' });
|
||||
updates.topP = val;
|
||||
}
|
||||
|
||||
if (req.body.topK !== undefined) {
|
||||
const val = Number(req.body.topK);
|
||||
if (!Number.isInteger(val) || val < 1 || val > 100)
|
||||
return res.status(400).json({ error: 'topK must be 1–100' });
|
||||
updates.topK = val;
|
||||
}
|
||||
|
||||
if (req.body.systemPrompt !== undefined) {
|
||||
const val = req.body.systemPrompt;
|
||||
if (typeof val !== 'string')
|
||||
return res.status(400).json({ error: 'systemPrompt must be a string' });
|
||||
updates.systemPrompt = val.trim() || null; // null reverts to default
|
||||
}
|
||||
|
||||
if (req.body.semanticWeight !== undefined) {
|
||||
const val = Number(req.body.semanticWeight);
|
||||
if (isNaN(val) || val < 0 || val > 5)
|
||||
return res.status(400).json({ error: 'semanticWeight must be 0–5' });
|
||||
updates.semanticWeight = val;
|
||||
}
|
||||
|
||||
if (req.body.keywordWeight !== undefined) {
|
||||
const val = Number(req.body.keywordWeight);
|
||||
if (isNaN(val) || val < 0 || val > 5)
|
||||
return res.status(400).json({ error: 'keywordWeight must be 0–5' });
|
||||
updates.keywordWeight = val;
|
||||
}
|
||||
|
||||
if (req.body.contextBudget !== undefined) {
|
||||
const val = Number(req.body.contextBudget);
|
||||
if (!Number.isInteger(val) || val < 512 || val > 32768)
|
||||
return res.status(400).json({ error: 'contextBudget must be 512–32768' });
|
||||
updates.contextBudget = val;
|
||||
}
|
||||
|
||||
if (req.body.entityWeight !== undefined) {
|
||||
const val = Number(req.body.entityWeight);
|
||||
if (isNaN(val) || val < 0 || val > 2)
|
||||
return res.status(400).json({ error: 'entityWeight must be 0–2' });
|
||||
updates.entityWeight = val;
|
||||
}
|
||||
|
||||
if (req.body.minRecentEpisodes !== undefined) {
|
||||
const val = Number(req.body.minRecentEpisodes);
|
||||
if (!Number.isInteger(val) || val < 0 || val > 10)
|
||||
return res.status(400).json({ error: 'minRecentEpisodes must be 0–10' });
|
||||
updates.minRecentEpisodes = val;
|
||||
}
|
||||
|
||||
res.json(settings.save(updates));
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
48
packages/orchestration-service/src/routes/summaries.js
Normal file
48
packages/orchestration-service/src/routes/summaries.js
Normal file
@@ -0,0 +1,48 @@
|
||||
const { Router } = require('express');
|
||||
const memory = require('../services/memory');
|
||||
|
||||
const router = Router();
|
||||
|
||||
// Trigger on-demand project summary generation
|
||||
router.post('/project/:projectId/generate', async (req, res) => {
|
||||
try {
|
||||
const summary = await memory.generateProjectSummary(req.params.projectId);
|
||||
res.status(201).json(summary);
|
||||
} catch (err) {
|
||||
// Pass through 422 from memory-service ("no session summaries yet")
|
||||
const status = err.message.includes('422') ? 422 : 500;
|
||||
res.status(status).json({ error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Get current project overview summary
|
||||
router.get('/project/:projectId/overview', async (req, res) => {
|
||||
try {
|
||||
const summary = await memory.getProjectOverviewSummary(req.params.projectId);
|
||||
res.json(summary);
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to fetch project overview summary', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.get('/session/:sessionId', async (req, res) => {
|
||||
try {
|
||||
const session = await memory.getSessionByExternalId(req.params.sessionId);
|
||||
if (!session) return res.status(404).json({ error: 'Session not found' });
|
||||
const summaries = await memory.getSummariesBySession(session.id);
|
||||
res.json(summaries);
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to fetch session summaries', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.get('/project/:projectId', async (req, res) => {
|
||||
try {
|
||||
const summaries = await memory.getSummariesByProject(req.params.projectId);
|
||||
res.json(summaries);
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: 'Failed to fetch project summaries', detail: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
18
packages/orchestration-service/src/services/embedding.js
Normal file
18
packages/orchestration-service/src/services/embedding.js
Normal file
@@ -0,0 +1,18 @@
|
||||
const {getEnv, SERVICES } = require('@nexusai/shared')
|
||||
|
||||
const BASE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL);
|
||||
|
||||
async function embed(text) {
|
||||
const res = await fetch(`${BASE_URL}/embed`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({text}),
|
||||
})
|
||||
|
||||
if (!res.ok) throw new Error(`Embedding service error: ${res.status}`);
|
||||
|
||||
const data = await res.json();
|
||||
return data.embedding;
|
||||
}
|
||||
|
||||
module.exports = { embed };
|
||||
15
packages/orchestration-service/src/services/graph.js
Normal file
15
packages/orchestration-service/src/services/graph.js
Normal file
@@ -0,0 +1,15 @@
|
||||
const { getEnv, SERVICES } = require('@nexusai/shared');
|
||||
|
||||
const MEMORY_URL = getEnv('MEMORY_SERVICE_URL', SERVICES.MEMORY_URL);
|
||||
|
||||
async function getNeighbors(entityIds) {
|
||||
const res = await fetch(`${MEMORY_URL}/graph/neighbors`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ entityIds }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Graph neighbors error: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
module.exports = { getNeighbors };
|
||||
@@ -1,7 +1,6 @@
|
||||
const fetch = require('node-fetch');
|
||||
const { getEnv } = require('@nexusai/shared');
|
||||
const { getEnv, SERVICES } = require('@nexusai/shared');
|
||||
|
||||
const BASE_URL = getEnv('INFERENCE_SERVICE_URL', 'http://localhost:3001');
|
||||
const BASE_URL = getEnv('INFERENCE_SERVICE_URL', SERVICES.INFERENCE_URL);
|
||||
|
||||
async function complete(prompt, options ={}) {
|
||||
const res = await fetch(`${BASE_URL}/complete`, {
|
||||
@@ -13,6 +12,17 @@ async function complete(prompt, options ={}) {
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function completeStream(prompt, options={}) {
|
||||
const res = await fetch(`${BASE_URL}/complete/stream`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({prompt, ...options}),
|
||||
})
|
||||
if (!res.ok) throw new Error(`Inference service error: ${res.status}`);
|
||||
return res;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
complete
|
||||
complete,
|
||||
completeStream
|
||||
}
|
||||
@@ -1,13 +1,12 @@
|
||||
const fetch = require('node-fetch');
|
||||
const { getEnv } = require('@nexusai/shared');
|
||||
const { getEnv, SERVICES, EPISODIC } = require('@nexusai/shared');
|
||||
|
||||
const BASE_URL = getEnv('MEMORY_SERVICE_URL', 'http://localhost:3002');
|
||||
const BASE_URL = getEnv('MEMORY_SERVICE_URL', SERVICES.MEMORY_URL);
|
||||
|
||||
//function to get session by external id, returns null if not found, throws error for other issues
|
||||
async function getSessionByExternalId(externalId) {
|
||||
const res = await fetch(`${BASE_URL}/sessions/by-external/${externalId}`);
|
||||
|
||||
if (!res.status === 404) return null; // Not found or bad request
|
||||
if (res.status === 404) return null; // Not found or bad request
|
||||
if (!res.ok) throw new Error(`Memory service error: ${res.status} ${res.statusText}`); // Other errors
|
||||
|
||||
return res.json();
|
||||
@@ -24,25 +23,223 @@ async function createSession(externalId) {
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function getRecentEpisodes(sessionId, limit = 10) {
|
||||
async function getRecentEpisodes(sessionId, limit = EPISODIC.DEFAULT_SESSIONS_LIMIT) {
|
||||
const res = await fetch(`${BASE_URL}/sessions/${sessionId}/episodes?limit=${limit}`);
|
||||
if (!res.ok) throw new Error(`Failed to fetch episodes: ${res.status} ${res.statusText}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function createEpisode(sessionId, userMessage, aiResponse, tokenCount) {
|
||||
async function createEpisode(sessionId, userMessage, aiResponse, tokenCount, projectId=null) {
|
||||
const res = await fetch(`${BASE_URL}/episodes`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ sessionId, userMessage, aiResponse, tokenCount })
|
||||
body: JSON.stringify({ sessionId, userMessage, aiResponse, tokenCount, projectId })
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to create episode: ${res.status} ${res.statusText}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function getEpisodeById(episodeId) {
|
||||
const res = await fetch(`${BASE_URL}/episodes/${episodeId}`);
|
||||
if (res.status === 404) return null;
|
||||
if (!res.ok) throw new Error(`Failed to fetch episode: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function getSessionHistory(sessionId, limit = EPISODIC.DEFAULT_SESSIONS_LIMIT, offset = EPISODIC.DEFAULT_OFFSET) {
|
||||
const res = await fetch(
|
||||
`${BASE_URL}/sessions/${sessionId}/episodes?limit=${limit}&offset=${offset}`
|
||||
);
|
||||
if (res.status === 404 ) return null;
|
||||
if (!res.ok) throw new Error(`Failed to fetch history: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function getSessions(limit = EPISODIC.DEFAULT_SESSIONS_LIMIT, offset = EPISODIC.DEFAULT_OFFSET, projectId = null) {
|
||||
const url = new URL(`${BASE_URL}/sessions`);
|
||||
url.searchParams.set('limit', limit);
|
||||
url.searchParams.set('offset', offset);
|
||||
if (projectId) url.searchParams.set('projectId', projectId);
|
||||
|
||||
const res = await fetch(url.toString());
|
||||
if (!res.ok) throw new Error(`Failed to fetch sessions: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function updateSession(externalId, { name, projectId }) {
|
||||
const res = await fetch(`${BASE_URL}/sessions/by-external/${externalId}`, {
|
||||
method: 'PATCH',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ name, projectId }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to update session: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function deleteSession(externalId) {
|
||||
const res = await fetch(`${BASE_URL}/sessions/by-external/${externalId}`, {
|
||||
method: 'DELETE',
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to delete session: ${res.status}`);
|
||||
}
|
||||
|
||||
/******** PROJECTS ********* */
|
||||
async function createProject({ name, description, colour, icon }) {
|
||||
const res = await fetch(`${BASE_URL}/projects`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ name, description, colour, icon })
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to create project: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function getProjects() {
|
||||
const res = await fetch(`${BASE_URL}/projects`);
|
||||
if (!res.ok) throw new Error(`Failed to fetch projects: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function updateProject(id, fields = {}) {
|
||||
const res = await fetch(`${BASE_URL}/projects/${id}`, {
|
||||
method: 'PATCH',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(fields)
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to update project: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function deleteProject(id) {
|
||||
const res = await fetch(`${BASE_URL}/projects/${id}`, { method: 'DELETE' });
|
||||
if (!res.ok) throw new Error(`Failed to delete project: ${res.status}`);
|
||||
}
|
||||
|
||||
async function getProjectSessions(projectId) {
|
||||
const url = new URL(`${BASE_URL}/sessions`);
|
||||
url.searchParams.set('limit', 200); // generous upper bound
|
||||
url.searchParams.set('offset', 0);
|
||||
url.searchParams.set('projectId', projectId);
|
||||
|
||||
const res = await fetch(url.toString());
|
||||
if (!res.ok) throw new Error(`Failed to fetch project sessions: ${res.status}`);
|
||||
return res.json(); // returns array of session objects
|
||||
}
|
||||
|
||||
async function getProject(id) {
|
||||
const res = await fetch(`${BASE_URL}/projects/${id}`);
|
||||
if (res.status === 404) return null;
|
||||
if (!res.ok) throw new Error(`Failed to fetch project: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function getEpisodes({ limit = 50, offset = 0, sessionId, q } = {}) {
|
||||
const url = new URL(`${BASE_URL}/episodes`);
|
||||
url.searchParams.set('limit', limit);
|
||||
url.searchParams.set('offset', offset);
|
||||
if (sessionId) url.searchParams.set('sessionId', sessionId);
|
||||
if (q) url.searchParams.set('q', q);
|
||||
|
||||
const res = await fetch(url.toString());
|
||||
if (!res.ok) throw new Error(`Failed to fetch episodes: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function deleteEpisode(id) {
|
||||
const res = await fetch(`${BASE_URL}/episodes/${id}`, { method: 'DELETE' });
|
||||
if (!res.ok) throw new Error(`Failed to delete episode: ${res.status}`);
|
||||
}
|
||||
|
||||
async function getSummariesBySession(sessionId) {
|
||||
const res = await fetch(`${BASE_URL}/sessions/${sessionId}/summaries`);
|
||||
if (!res.ok) throw new Error(`Failed to fetch summaries: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function createSummary({ sessionId, projectId, content, tokenCount, episodeRange }) {
|
||||
const res = await fetch(`${BASE_URL}/summaries`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ sessionId, projectId, content, tokenCount, episodeRange }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to create summary: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function updateSummary(id, { content, tokenCount, episodeRange }) {
|
||||
const res = await fetch(`${BASE_URL}/summaries/${id}`, {
|
||||
method: 'PATCH',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ content, tokenCount, episodeRange }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to update summary: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function getSummariesByProject(projectId) {
|
||||
const res = await fetch(`${BASE_URL}/projects/${projectId}/summaries`);
|
||||
if (!res.ok) throw new Error(`Failed to fetch summaries: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function generateProjectSummary(projectId) {
|
||||
const res = await fetch(`${BASE_URL}/projects/${projectId}/summarize`, {
|
||||
method: 'POST',
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to generate project summary: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function getProjectOverviewSummary(projectId) {
|
||||
const res = await fetch(`${BASE_URL}/projects/${projectId}/overview`);
|
||||
if (!res.ok) throw new Error(`Failed to fetch project overview: ${res.status}`);
|
||||
return res.json(); // null if none exists yet
|
||||
}
|
||||
|
||||
async function searchEpisodes(query, { limit = 10, sessionIds = null } = {}) {
|
||||
const url = new URL(`${BASE_URL}/episodes/search`);
|
||||
url.searchParams.set('q', query);
|
||||
url.searchParams.set('limit', limit);
|
||||
if (sessionIds?.length) url.searchParams.set('sessionIds', sessionIds.join(','));
|
||||
const res = await fetch(url.toString());
|
||||
if (!res.ok) throw new Error(`FTS search error: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function getEpisodesByEntities(entityIds) {
|
||||
const res = await fetch(`${BASE_URL}/episodes/by-entities`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ entityIds }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Episodes-by-entities error: ${res.status}`);
|
||||
return res.json(); // { episodeIds: [...] }
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getSessionByExternalId,
|
||||
createSession,
|
||||
getRecentEpisodes,
|
||||
createEpisode
|
||||
createEpisode,
|
||||
getEpisodeById,
|
||||
getSessionHistory,
|
||||
getSessions,
|
||||
updateSession,
|
||||
deleteSession,
|
||||
createProject,
|
||||
getProjects,
|
||||
updateProject,
|
||||
deleteProject,
|
||||
getProjectSessions,
|
||||
getProject,
|
||||
getEpisodes,
|
||||
deleteEpisode,
|
||||
getSummariesBySession,
|
||||
createSummary,
|
||||
updateSummary,
|
||||
getSummariesByProject,
|
||||
generateProjectSummary,
|
||||
getProjectOverviewSummary,
|
||||
searchEpisodes,
|
||||
getEpisodesByEntities,
|
||||
}
|
||||
58
packages/orchestration-service/src/services/qdrant.js
Normal file
58
packages/orchestration-service/src/services/qdrant.js
Normal file
@@ -0,0 +1,58 @@
|
||||
const {getEnv, QDRANT, COLLECTIONS, ORCHESTRATION } = require('@nexusai/shared')
|
||||
|
||||
const BASE_URL = getEnv('QDRANT_URL', QDRANT.DEFAULT_URL);
|
||||
|
||||
async function searchEpisodes( vector, {limit = ORCHESTRATION.RECENT_EPISODE_LIMIT, scoreThreshold = ORCHESTRATION.SCORE_THRESHOLD, sessionId, projectSessionIds } = {}) {
|
||||
const body = {vector, limit, score_threshold: scoreThreshold, with_payload: true};
|
||||
|
||||
if(projectSessionIds) {
|
||||
body.filter = {
|
||||
should: projectSessionIds.map(id => ({
|
||||
key: 'sessionId', match: { value: id }
|
||||
}))
|
||||
};
|
||||
} else if (sessionId) {
|
||||
body.filter = { must: [{key: 'sessionId', match: {value: sessionId} }] };
|
||||
}
|
||||
const res = await fetch (
|
||||
`${BASE_URL}/collections/${COLLECTIONS.EPISODES}/points/search`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify(body)
|
||||
}
|
||||
);
|
||||
|
||||
if (!res.ok) throw new Error(`QDrant error: ${res.status}`);
|
||||
|
||||
const data = await res.json();
|
||||
return data.result;
|
||||
}
|
||||
|
||||
async function searchEntities(vector, { limit = ORCHESTRATION.ENTITIES_LIMIT, scoreThreshold = ORCHESTRATION.ENTITIES_THRESHOLD, projectId = undefined } = {}) {
|
||||
const body = { vector, limit, score_threshold: scoreThreshold, with_payload: true };
|
||||
|
||||
if (projectId !== null && projectId !== undefined) {
|
||||
body.filter = {
|
||||
must: [{ key: 'projectId', match: { value: projectId } }]
|
||||
};
|
||||
}
|
||||
const res = await fetch(
|
||||
`${BASE_URL}/collections/${COLLECTIONS.ENTITIES}/points/search`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
}
|
||||
);
|
||||
|
||||
if (!res.ok) {
|
||||
const body = await res.text();
|
||||
throw new Error(`Qdrant error: ${res.status} - ${body}`);
|
||||
}
|
||||
|
||||
const data = await res.json();
|
||||
return data.result;
|
||||
}
|
||||
|
||||
module.exports = { searchEpisodes, searchEntities };
|
||||
151
packages/orchestration-service/src/services/summarization.js
Normal file
151
packages/orchestration-service/src/services/summarization.js
Normal file
@@ -0,0 +1,151 @@
|
||||
const { getEnv, SERVICES, SUMMARIES, logger } = require('@nexusai/shared');
|
||||
|
||||
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
|
||||
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
|
||||
const MEMORY_URL = getEnv('MEMORY_SERVICE_URL', SERVICES.MEMORY_URL);
|
||||
|
||||
const THRESHOLD_TOKENS = parseInt(getEnv('SUMMARY_THRESHOLD_TOKENS', SUMMARIES.THRESHOLD_TOKENS));
|
||||
const MAX_SUMMARY_TOKENS = parseInt(getEnv('SUMMARY_MAX_TOKENS', SUMMARIES.MAX_SUMMARY_TOKENS));
|
||||
const MIN_EPISODES_SINCE = parseInt(getEnv('SUMMARY_MIN_EPISODES', SUMMARIES.MIN_EPISODES_SINCE));
|
||||
|
||||
function buildSummaryPrompt(episodes, existingSummary = null) {
|
||||
const MAX_CHARS = 3000;
|
||||
let context = episodes
|
||||
.map(ep => `User: ${ep.user_message}\nAssistant: ${ep.ai_response}`)
|
||||
.join('\n\n');
|
||||
|
||||
if (context.length > MAX_CHARS) {
|
||||
context = context.slice(-MAX_CHARS);
|
||||
}
|
||||
|
||||
const instruction = existingSummary
|
||||
? `Update the summary below to incorporate the new exchanges.
|
||||
Write 3-5 sentences in third person. Do not quote directly — paraphrase only.
|
||||
Do not include greetings, sign-offs, or filler. Output only the updated summary text.
|
||||
|
||||
Previous summary:
|
||||
${existingSummary}
|
||||
|
||||
New exchanges:
|
||||
${context}`
|
||||
: `Summarize the conversation below in 3-5 sentences.
|
||||
Write in third person. Do not quote directly — paraphrase only.
|
||||
Do not include greetings, sign-offs, or filler. Output only the summary text.
|
||||
|
||||
Conversation:
|
||||
${context}`;
|
||||
|
||||
return [
|
||||
'<|im_start|>user', // ChatML for qwen2.5
|
||||
instruction,
|
||||
'<|im_end|>',
|
||||
'<|im_start|>assistant',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
async function generateSummary(episodes, existingSummary = null) {
|
||||
const prompt = buildSummaryPrompt(episodes, existingSummary);
|
||||
|
||||
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: EXTRACTION_MODEL,
|
||||
prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: 0.2, // slightly higher than entities — summaries benefit from some fluency
|
||||
num_predict: 500, // generous but bounded — keeps summaries from running long
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
|
||||
const data = await res.json();
|
||||
|
||||
|
||||
const raw = data.response?.trim() ?? '';
|
||||
// Strip any leaked ChatML tokens Qwen echoes back
|
||||
const content = raw
|
||||
.replace(/<\|im_start\|>.*?<\|im_end\|>/gs, '')
|
||||
.replace(/<\|im_start\|>|<\|im_end\|>|<\|im_sep\|>/g, '')
|
||||
.trim();
|
||||
return content;
|
||||
}
|
||||
|
||||
async function maybeSummarize(session, allEpisodes) {
|
||||
// 1. Sum total tokens for this session
|
||||
const totalTokens = allEpisodes.reduce((sum, ep) => sum + (ep.token_count || 0), 0);
|
||||
if (totalTokens < THRESHOLD_TOKENS) return; // under threshold — nothing to do
|
||||
|
||||
// 2. Fetch existing summaries for session
|
||||
const summariesRes = await fetch(`${MEMORY_URL}/sessions/${session.id}/summaries`);
|
||||
if (!summariesRes.ok) return;
|
||||
const summaries = await summariesRes.json();
|
||||
|
||||
const latest = summaries.at(-1) ?? null;
|
||||
const lastCoveredId = latest
|
||||
? parseInt(latest.episode_range?.split('-').at(-1)) || 0
|
||||
: 0;
|
||||
// 3. Guard — don't re-summarize until MIN_EPISODES_SINCE new episodes have accumulated
|
||||
if (latest) {
|
||||
const newEpisodes = allEpisodes.filter(ep => ep.id > lastCoveredId);
|
||||
if (newEpisodes.length < MIN_EPISODES_SINCE) return;
|
||||
}
|
||||
|
||||
// 4. Determine episodes to summarize
|
||||
const episodesToSummarize = latest
|
||||
? allEpisodes.filter(ep => ep.id > lastCoveredId)
|
||||
: allEpisodes;
|
||||
|
||||
// 5. Determine episode range from the episodes actually being summarized
|
||||
const summarizedIds = episodesToSummarize.map(ep => ep.id).sort((a,b) => a - b);
|
||||
const episodeRange = `${summarizedIds.at(0)}-${summarizedIds.at(-1)}`;
|
||||
const totalEpisodeTokens = allEpisodes.reduce((sum, ep) => sum + (ep.token_count || 0), 0);
|
||||
|
||||
// add temporarily before the generateSummary call
|
||||
logger.debug('[summarization] episodes to summarize:', episodesToSummarize.length);
|
||||
|
||||
const content = await generateSummary(
|
||||
episodesToSummarize,
|
||||
latest && latest.content.length < MAX_SUMMARY_TOKENS ? latest.content : null
|
||||
// if existing summary is already large, treat as fresh rather than appending to a huge blob
|
||||
);
|
||||
|
||||
if (!content) return;
|
||||
|
||||
// 6. Create new row or update existing
|
||||
if (!latest || latest.content.length >= MAX_SUMMARY_TOKENS) {
|
||||
await fetch(`${MEMORY_URL}/summaries`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
sessionId: session.id,
|
||||
content,
|
||||
tokenCount: totalEpisodeTokens,
|
||||
episodeRange,
|
||||
}),
|
||||
});
|
||||
logger.debug(`[summarization] Created new summary for session ${session.id}`);
|
||||
} else {
|
||||
await fetch(`${MEMORY_URL}/summaries/${latest.id}`, {
|
||||
method: 'PATCH',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
content,
|
||||
tokenCount: totalEpisodeTokens,
|
||||
episodeRange,
|
||||
}),
|
||||
});
|
||||
logger.debug(`[summarization] Updated summary ${latest.id} for session ${session.id}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function triggerSummary(session, allEpisodes) {
|
||||
// Intentionally fire-and-forget — caller doesn't await this
|
||||
maybeSummarize(session, allEpisodes).catch(err =>
|
||||
logger.warn('[summarization] Summary failed (non-critical):', err.message)
|
||||
);
|
||||
}
|
||||
|
||||
module.exports = { triggerSummary };
|
||||
@@ -17,15 +17,106 @@ const EPISODIC = {
|
||||
DEFAULT_RECENT_LIMIT: 10, // Default number of recent episodes to retrieve
|
||||
DEFAULT_PAGE_SIZE: 20, // Default number of episodes per page for pagination
|
||||
DEFAULT_SEARCH_LIMIT: 10, // Default number of search results to return
|
||||
DEFAULT_OFFSET: 0,
|
||||
DEFAULT_SESSIONS_LIMIT: 20,
|
||||
};
|
||||
|
||||
const ORCHESTRATION = {
|
||||
RECENT_EPISODE_LIMIT: 5,
|
||||
SEMANTIC_LIMIT: 5,
|
||||
SCORE_THRESHOLD: 0.5,
|
||||
ENTITIES_LIMIT: 5,
|
||||
ENTITIES_THRESHOLD: 0.55,
|
||||
TEMPERATURE: 0.7,
|
||||
CONTEXT_BUDGET: 4096,
|
||||
ENTITY_WEIGHT: 0.5,
|
||||
MIN_RECENT_EPISODES: 2,
|
||||
CORS_ORIGIN: 'http://localhost:5173',
|
||||
SYSTEM_PROMPT: `You are a helpful, context-aware AI assistant. You have access to memories of past conversations with the user. Use them to provide consistent, personalised responses.`
|
||||
}
|
||||
|
||||
const OLLAMA = {
|
||||
DEFAULT_URL: 'http://localhost:11434',
|
||||
EMBED_MODEL: 'nomic-embed-text',
|
||||
OLLAMA_MODEL: 'companion:latest',
|
||||
};
|
||||
|
||||
const LLAMACPP = {
|
||||
DEFAULT_URL: 'http://localhost:8080',
|
||||
DEFAULT_MODEL: 'qwen/qwen3.6-35b-a3b',
|
||||
}
|
||||
|
||||
const PORTS = {
|
||||
INFERENCE: '3001',
|
||||
MEMORY: '3002',
|
||||
EMBEDDING: '3003',
|
||||
ORCHESTRATION: '4000',
|
||||
};
|
||||
|
||||
const SERVICES = {
|
||||
EMBEDDING_URL: 'http://localhost:3003'
|
||||
EMBEDDING_URL: `http://localhost:${PORTS.EMBEDDING}`,
|
||||
MEMORY_URL: `http://localhost:${PORTS.MEMORY}`,
|
||||
INFERENCE_URL: `http://localhost:${PORTS.INFERENCE}`,
|
||||
};
|
||||
|
||||
const INFERENCE_DEFAULTS = {
|
||||
TEMPERATURE: 0.7, // Controls randomness. 0 = deterministic, 1 = creative
|
||||
MAX_TOKENS: 1024, // Max tokens to generate in a response
|
||||
TOP_P: 0.9, // Nucleus sampling — considers tokens comprising top 90% probability mass
|
||||
TOP_K: 40, // Limits token selection to top K candidates at each step
|
||||
REPEAT_PENALTY: 1.1, // Penalizes recently used tokens to reduce repetition
|
||||
SEED: null, // null = random. Set to an integer for reproducible outputs
|
||||
};
|
||||
|
||||
const SQLITE = {
|
||||
DEFAULT_PATH: './data/nexusai.db'
|
||||
}
|
||||
|
||||
const SUMMARIES = {
|
||||
THRESHOLD_TOKENS: 200, //trigger summary when session hits this many tokens
|
||||
MAX_SUMMARY_TOKENS: 800, //if existing summary exceeds this, create new instead of update
|
||||
MIN_EPISODES_SINCE: 5, // don't resummarize until N new episodes since last summary
|
||||
MAX_SUMMARY_CHARS: 8000, // max chars to include from recent episodes when generating summary (to control prompt size)
|
||||
MAX_PROJECT_EPISODE_LIMIT: 200, // max number of episodes to consider from the entire project when generating summary (to control prompt size)
|
||||
}
|
||||
|
||||
const ENTITIES = {
|
||||
TEMPERATURE: 0.1, // Low temperature, more precise extraction, less creative
|
||||
NUM_PREDICT: 1500, // Max tokens to consider for entity extraction (e.g. recent conversation)
|
||||
THRESHOLD: 0.55, // Minimum confidence score for an extracted entity to be included in the results
|
||||
PROMOTION_THRESHOLD: 3, // mention_count threshold before entity is considered well-established
|
||||
GRAPH_HOP_DEPTH: 1, // Default traversal depth for neighborhood queries
|
||||
TYPES: [
|
||||
'person',
|
||||
'place',
|
||||
'project',
|
||||
'technology',
|
||||
'concept',
|
||||
'organization',
|
||||
'character',
|
||||
'event',
|
||||
'topic'
|
||||
],
|
||||
}
|
||||
|
||||
const RETRIEVAL = {
|
||||
RRF_K: 60, // Reciprocal Rank Fusion smoothing constant, softens rank-1 advantage, not exposed in settings
|
||||
SEMANTIC_WEIGHT: 1.0, // Weight applied to semantic (QDrant) results
|
||||
KEYWORD_WEIGHT: 0, // Weight applied to keyword (SQLite) results, 0 = disables, set >0 to enable and tune balance between semantic vs keyword matches
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
QDRANT,
|
||||
COLLECTIONS,
|
||||
EPISODIC,
|
||||
SERVICES
|
||||
SERVICES,
|
||||
OLLAMA,
|
||||
PORTS,
|
||||
LLAMACPP,
|
||||
INFERENCE_DEFAULTS,
|
||||
SQLITE,
|
||||
ORCHESTRATION,
|
||||
SUMMARIES,
|
||||
ENTITIES,
|
||||
RETRIEVAL,
|
||||
};
|
||||
@@ -1,4 +1,24 @@
|
||||
const {getEnv} = require('./config/env');
|
||||
const {QDRANT, COLLECTIONS, EPISODIC, SERVICES } = require('./config/constants');
|
||||
const {QDRANT, COLLECTIONS, EPISODIC, SERVICES, OLLAMA, PORTS, LLAMACPP, INFERENCE_DEFAULTS, SQLITE, ORCHESTRATION, SUMMARIES, ENTITIES, RETRIEVAL } = require('./config/constants');
|
||||
const {parseRow, formatEpisodeText} = require('./utils')
|
||||
const logger = require('./utils/logger');
|
||||
|
||||
module.exports = {getEnv, QDRANT, COLLECTIONS, EPISODIC, SERVICES};
|
||||
module.exports = {
|
||||
getEnv,
|
||||
QDRANT,
|
||||
COLLECTIONS,
|
||||
EPISODIC,
|
||||
SERVICES,
|
||||
OLLAMA,
|
||||
PORTS,
|
||||
LLAMACPP,
|
||||
INFERENCE_DEFAULTS,
|
||||
SQLITE,
|
||||
ORCHESTRATION,
|
||||
parseRow,
|
||||
formatEpisodeText,
|
||||
SUMMARIES,
|
||||
ENTITIES,
|
||||
logger,
|
||||
RETRIEVAL,
|
||||
};
|
||||
13
packages/shared/src/utils.js
Normal file
13
packages/shared/src/utils.js
Normal file
@@ -0,0 +1,13 @@
|
||||
function parseRow(row) {
|
||||
if (!row) return null;
|
||||
return {
|
||||
...row,
|
||||
metadata: row.metadata ? JSON.parse(row.metadata) : null
|
||||
};
|
||||
}
|
||||
|
||||
function formatEpisodeText(userMessage, aiResponse) {
|
||||
return `User: ${userMessage}\nAssistant: ${aiResponse}`;
|
||||
}
|
||||
|
||||
module.exports = { parseRow, formatEpisodeText };
|
||||
12
packages/shared/src/utils/logger.js
Normal file
12
packages/shared/src/utils/logger.js
Normal file
@@ -0,0 +1,12 @@
|
||||
const LEVELS = { error: 0, warn: 1, info: 2, debug: 3 };
|
||||
|
||||
const current = LEVELS[process.env.LOG_LEVEL?.toLowerCase()] ?? LEVELS.info;
|
||||
|
||||
const logger = {
|
||||
error: (...args) => current >= LEVELS.error && console.error('[ERROR]', ...args),
|
||||
warn: (...args) => current >= LEVELS.warn && console.warn( '[WARN]', ...args),
|
||||
info: (...args) => current >= LEVELS.info && console.log( '[INFO]', ...args),
|
||||
debug: (...args) => current >= LEVELS.debug && console.log( '[DEBUG]', ...args),
|
||||
};
|
||||
|
||||
module.exports = logger;
|
||||
67
test-fusion.js
Normal file
67
test-fusion.js
Normal file
@@ -0,0 +1,67 @@
|
||||
// test-fusion.js
|
||||
const { RETRIEVAL } = require('./packages/shared/src/config/constants');
|
||||
|
||||
function fuseEpisodeResults(semanticEps, keywordEps, { semanticWeight, keywordWeight, limit }) {
|
||||
const k = RETRIEVAL.RRF_K;
|
||||
const scores = new Map();
|
||||
semanticEps.forEach((ep, i) => {
|
||||
scores.set(ep.id, { episode: ep, score: semanticWeight / (k + i + 1) });
|
||||
});
|
||||
keywordEps.forEach((ep, i) => {
|
||||
const contrib = keywordWeight / (k + i + 1);
|
||||
if (scores.has(ep.id)) {
|
||||
scores.get(ep.id).score += contrib;
|
||||
} else if (contrib > 0) {
|
||||
scores.set(ep.id, { episode: ep, score: contrib });
|
||||
}
|
||||
});
|
||||
return [...scores.values()]
|
||||
.sort((a, b) => b.score - a.score)
|
||||
.slice(0, limit)
|
||||
.map(({ episode }) => episode);
|
||||
}
|
||||
|
||||
// --- Test 1: episodes in both lists rank highest ---
|
||||
const semantic = [
|
||||
{ id: 1, user_message: 'ep1 — semantic only, rank 1' },
|
||||
{ id: 2, user_message: 'ep2 — in both lists, rank 2 semantic' },
|
||||
{ id: 3, user_message: 'ep3 — in both lists, rank 3 semantic' },
|
||||
];
|
||||
const keyword = [
|
||||
{ id: 3, user_message: 'ep3 — rank 1 FTS' },
|
||||
{ id: 2, user_message: 'ep2 — rank 2 FTS' },
|
||||
{ id: 4, user_message: 'ep4 — FTS only, rank 3' },
|
||||
];
|
||||
|
||||
const result = fuseEpisodeResults(semantic, keyword, { semanticWeight: 1, keywordWeight: 1, limit: 5 });
|
||||
console.log('Test 1 — equal weights, episodes in both lists should rank highest:');
|
||||
result.forEach((ep, i) => console.log(` ${i + 1}. id=${ep.id} "${ep.user_message}"`));
|
||||
console.assert(result[0].id === 2 || result[0].id === 3, 'FAIL: ep2 or ep3 should be rank 1');
|
||||
console.assert(!result.find(e => e.id === 1) || result.indexOf(result.find(e => e.id === 1)) > result.indexOf(result.find(e => e.id === 2)), 'FAIL: ep1 (semantic only) should rank below ep2');
|
||||
console.log(' PASS\n');
|
||||
|
||||
// --- Test 2: keywordWeight:0 → pure semantic passthrough ---
|
||||
const result2 = fuseEpisodeResults(semantic, keyword, { semanticWeight: 1, keywordWeight: 0, limit: 5 });
|
||||
console.log('Test 2 — keywordWeight:0 should return only semantic results in original order:');
|
||||
result2.forEach((ep, i) => console.log(` ${i + 1}. id=${ep.id}`));
|
||||
console.assert(result2.length === 3, `FAIL: expected 3, got ${result2.length}`);
|
||||
console.assert(result2[0].id === 1, 'FAIL: ep1 should be rank 1');
|
||||
console.assert(result2[1].id === 2, 'FAIL: ep2 should be rank 2');
|
||||
console.log(' PASS\n');
|
||||
|
||||
// --- Test 3: limit is respected ---
|
||||
const result3 = fuseEpisodeResults(semantic, keyword, { semanticWeight: 1, keywordWeight: 1, limit: 2 });
|
||||
console.log('Test 3 — limit:2 should return exactly 2 results:');
|
||||
console.assert(result3.length === 2, `FAIL: expected 2, got ${result3.length}`);
|
||||
console.log(' PASS\n');
|
||||
|
||||
// --- Test 4: no overlap → all unique episodes, ordered by individual contribution ---
|
||||
const semOnly = [{ id: 10, user_message: 'sem' }];
|
||||
const ftsOnly = [{ id: 20, user_message: 'fts' }];
|
||||
const result4 = fuseEpisodeResults(semOnly, ftsOnly, { semanticWeight: 1, keywordWeight: 1, limit: 5 });
|
||||
console.log('Test 4 — no overlap, both should appear:');
|
||||
console.assert(result4.length === 2, `FAIL: expected 2, got ${result4.length}`);
|
||||
console.assert(result4[0].id === 10, 'FAIL: semantic rank-1 should beat fts rank-1 (same weight, both rank 1, but semantic inserted first — tie goes to semantic)');
|
||||
console.log(' PASS\n');
|
||||
|
||||
console.log('All tests passed.');
|
||||
Reference in New Issue
Block a user