From 77275cf476886728ac46c9d640dc032c09aec0db Mon Sep 17 00:00:00 2001 From: Storme-bit Date: Fri, 17 Apr 2026 23:13:36 -0700 Subject: [PATCH] memory settings implementation --- packages/chat-client/src/api/orchestration.js | 68 +--- .../src/components/AllChatsView.jsx | 3 +- packages/chat-client/src/config/constants.js | 5 + .../orchestration-service/src/chat/index.js | 312 ++++++++++-------- .../src/config/settings.js | 30 ++ packages/orchestration-service/src/index.js | 4 +- .../src/routes/settings.js | 38 +++ 7 files changed, 254 insertions(+), 206 deletions(-) create mode 100644 packages/orchestration-service/src/config/settings.js create mode 100644 packages/orchestration-service/src/routes/settings.js diff --git a/packages/chat-client/src/api/orchestration.js b/packages/chat-client/src/api/orchestration.js index 722e89a..eba2ade 100644 --- a/packages/chat-client/src/api/orchestration.js +++ b/packages/chat-client/src/api/orchestration.js @@ -33,60 +33,6 @@ export async function sendMessage(sessionId, message, model) { return res.json(); } -// onChunk(text) called for each token -// onDone({ model, tokenCount }) called when stream closes -// returns an abort function — call it to cancel mid-stream -/* -export function streamMessage(sessionId, message, model, { onChunk, onDone, onError }) { - const controller = new AbortController(); - - (async () => { - try { - const res = await fetch(`${BASE_URL}/chat/stream`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ sessionId, message, model }), - signal: controller.signal, - }); - - if (!res.ok) throw new Error(`Stream request failed: ${res.status}`); - - const reader = res.body.getReader(); - const decoder = new TextDecoder(); - let buffer = ''; - - while (true) { - const { done, value } = await reader.read(); - if (done) break; - - // Append to buffer and split on double newline (SSE event delimiter) - buffer += decoder.decode(value, { stream: true }); - const events = buffer.split('\n\n'); - buffer = events.pop(); // last item may be incomplete — keep in buffer - - for (const event of events) { - const line = event.trim(); - if (!line.startsWith('data: ')) continue; - - const raw = line.slice(6); - try { - const data = JSON.parse(raw); - if (data.text) onChunk(data.text); - if (data.done) onDone({ model: data.model ?? model, tokenCount: data.tokenCount ?? 0 }); - if (data.error) onError(new Error(data.error)); - } catch { - // malformed JSON — skip - } - } - } - } catch (err) { - if (err.name !== 'AbortError') onError(err); - } - })(); - - return () => controller.abort(); -} -*/ export function streamMessage(sessionId, message, model, { onChunk, onDone, onError }) { const controller = new AbortController(); @@ -148,17 +94,7 @@ export async function fetchModels() { if(!res.ok) throw new Error(`Failted to fetch models: ${res.status}`); return res.json(); } -/* -export async function renameSession(sessionId, name) { - const res = await fetch(`${BASE_URL}/sessions/${sessionId}`, { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ name }), - }); - if (!res.ok) throw new Error(`Failed to rename session: ${res.status}`); - return res.json(); -} -*/ + export async function updateSession(sessionId, { name, projectId } = {}) { const res = await fetch(`${BASE_URL}/sessions/${sessionId}`, { method: 'PATCH', @@ -221,7 +157,7 @@ export async function updateSessionProject(sessionId, projectId) { return res.json(); } -export async function getEpisodes({ limit = 50, offset = 0, sessionId, q } = {}) { +export async function getEpisodes({ limit = API_DEFAULTS.EPISODE_LIMIT, offset = API_DEFAULTS.OFFSET, sessionId, q } = {}) { const url = new URL(`${BASE_URL}/episodes`, window.location.origin); url.searchParams.set('limit', limit); url.searchParams.set('offset', offset); diff --git a/packages/chat-client/src/components/AllChatsView.jsx b/packages/chat-client/src/components/AllChatsView.jsx index 1872e89..845a15e 100644 --- a/packages/chat-client/src/components/AllChatsView.jsx +++ b/packages/chat-client/src/components/AllChatsView.jsx @@ -1,7 +1,8 @@ import React, { useState, useEffect } from 'react'; import { fetchSessions, deleteSession } from '../api/orchestration'; +import { API_DEFAULTS } from '../config/constants'; -const PAGE_SIZE = 20; +const PAGE_SIZE = API_DEFAULTS.PAGE_SIZE; export default function AllChatsView({ onSelectSession }) { const [sessions, setSessions] = useState([]); diff --git a/packages/chat-client/src/config/constants.js b/packages/chat-client/src/config/constants.js index 0b80cf5..5a2aab5 100644 --- a/packages/chat-client/src/config/constants.js +++ b/packages/chat-client/src/config/constants.js @@ -11,4 +11,9 @@ export const API_DEFAULTS = { SESSIONS_LIMIT: 20, HISTORY_LIMIT: 50, OFFSET: 0, + EPISODE_LIMIT: 50, +} + +export const CLIENT_DEFAULTS = { + PAGE_SIZE: 20, } \ No newline at end of file diff --git a/packages/orchestration-service/src/chat/index.js b/packages/orchestration-service/src/chat/index.js index 22d62b0..cf04cdd 100644 --- a/packages/orchestration-service/src/chat/index.js +++ b/packages/orchestration-service/src/chat/index.js @@ -3,20 +3,21 @@ const inference = require("../services/inference"); const embedding = require("../services/embedding"); const qdrant = require("../services/qdrant"); const { ORCHESTRATION } = require("@nexusai/shared"); - -const { RECENT_EPISODE_LIMIT, SEMANTIC_LIMIT, SCORE_THRESHOLD, SYSTEM_PROMPT } = - ORCHESTRATION; +const appSettings = require("../config/settings"); +const { SYSTEM_PROMPT } = ORCHESTRATION; function buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage) { const parts = [SYSTEM_PROMPT]; if (entities.length > 0) { - parts.push('Here is what you know about entities relevant to this conversation:'); - for (const e of entities) { - parts.push(`- ${e.name} (${e.type}): ${e.notes}`); - } - parts.push('---'); + parts.push( + "Here is what you know about entities relevant to this conversation:", + ); + for (const e of entities) { + parts.push(`- ${e.name} (${e.type}): ${e.notes}`); } + parts.push("---"); + } if (semanticEpisodes.length > 0) { parts.push("Here are some relevant memories from earlier conversations:"); @@ -80,12 +81,13 @@ async function getSemanticEpisodes( sessionId, recentIds, projectSessionIds = null, + { semanticLimit, scoreThreshold } = {}, ) { try { const vector = await embedding.embed(userMessage); const results = await qdrant.searchEpisodes(vector, { - limit: SEMANTIC_LIMIT, - scoreThreshold: SCORE_THRESHOLD, + limit: semanticLimit, + scoreThreshold: scoreThreshold, sessionId: projectSessionIds ? null : sessionId, projectSessionIds, }); @@ -106,45 +108,60 @@ async function getSemanticEpisodes( } async function getRelevantEntities(userMessage) { - try { - const vector = await embedding.embed(userMessage); - const results = await qdrant.searchEntities(vector); - console.log('[orchestration] Entity search results:', - results.map(r => ({ name: r.payload?.name, score: r.score })) - ); - return results.map(r => r.payload).filter(Boolean); - } catch (err) { - console.warn('[orchestration] Entity search failed, continuing without:', err.message); - return []; - } + try { + const vector = await embedding.embed(userMessage); + const results = await qdrant.searchEntities(vector); + console.log( + "[orchestration] Entity search results:", + results.map((r) => ({ name: r.payload?.name, score: r.score })), + ); + return results.map((r) => r.payload).filter(Boolean); + } catch (err) { + console.warn( + "[orchestration] Entity search failed, continuing without:", + err.message, + ); + return []; + } } async function chat(externalId, userMessage, options = {}) { + const { recentEpisodeLimit, semanticLimit, scoreThreshold } = + appSettings.load(); // 1. Resolve or create session let session = await memory.getSessionByExternalId(externalId); if (!session) session = await memory.createSession(externalId); -let projectSessionIds = null; -if (session.project_id) { - try { - const project = await memory.getProject(session.project_id); - if (project) { - const projectSessions = await memory.getProjectSessions(session.project_id); - projectSessionIds = projectSessions.map(s => s.id); - if (project.isolated === 1) { - console.log(`[orchestration] Isolated project — restricting to ${projectSessionIds.length} sessions`); - } else { - console.log(`[orchestration] Non-isolated project — expanding search to ${projectSessionIds.length} sessions`); + let projectSessionIds = null; + if (session.project_id) { + try { + const project = await memory.getProject(session.project_id); + if (project) { + const projectSessions = await memory.getProjectSessions( + session.project_id, + ); + projectSessionIds = projectSessions.map((s) => s.id); + if (project.isolated === 1) { + console.log( + `[orchestration] Isolated project — restricting to ${projectSessionIds.length} sessions`, + ); + } else { + console.log( + `[orchestration] Non-isolated project — expanding search to ${projectSessionIds.length} sessions`, + ); + } } + } catch (err) { + console.warn( + "[orchestration] Failed to resolve project context:", + err.message, + ); } - } catch (err) { - console.warn('[orchestration] Failed to resolve project context:', err.message); } -} // 2. Fetch recent episodes for context const recentEpisodes = await memory.getRecentEpisodes( session.id, - RECENT_EPISODE_LIMIT, + recentEpisodeLimit, ); const isFirstMessage = recentEpisodes.length === 0; const recentIds = new Set(recentEpisodes.map((e) => e.id)); @@ -154,14 +171,20 @@ if (session.project_id) { userMessage, session.id, recentIds, - projectSessionIds + projectSessionIds, + { semanticLimit, scoreThreshold }, ); // 3b. Entity Search - const entities = await getRelevantEntities(userMessage) + const entities = await getRelevantEntities(userMessage); // 4. Assemble prompt - const prompt = buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage); + const prompt = buildPrompt( + recentEpisodes, + semanticEpisodes, + entities, + userMessage, + ); // 5. Run inference const result = await inference.complete(prompt, options); @@ -193,118 +216,131 @@ if (session.project_id) { } async function chatStream(externalId, userMessage, onChunk, options = {}) { - -console.log('[orchestration] chatStream entry'); try { + const { recentEpisodeLimit, semanticLimit, scoreThreshold } = appSettings.load(); + let session = await memory.getSessionByExternalId(externalId); + if (!session) session = await memory.createSession(externalId); - console.log("[orchestration] chatStream called:", { - externalId, - userMessage: userMessage.slice(0, 50), - }); - let session = await memory.getSessionByExternalId(externalId); - if (!session) session = await memory.createSession(externalId); - -let projectSessionIds = null; -if (session.project_id) { - try { - const project = await memory.getProject(session.project_id); - if (project) { - const projectSessions = await memory.getProjectSessions(session.project_id); - projectSessionIds = projectSessions.map(s => s.id); - if (project.isolated === 1) { - console.log(`[orchestration] Isolated project — restricting to ${projectSessionIds.length} sessions`); - } else { - console.log(`[orchestration] Non-isolated project — expanding search to ${projectSessionIds.length} sessions`); - } - } - } catch (err) { - console.warn('[orchestration] Failed to resolve project context:', err.message); - } -} - - const recentEpisodes = await memory.getRecentEpisodes( - session.id, - RECENT_EPISODE_LIMIT, - ); - const isFirstMessage = recentEpisodes.length === 0; - const recentIds = new Set(recentEpisodes.map((e) => e.id)); - const semanticEpisodes = await getSemanticEpisodes( - userMessage, - session.id, - recentIds, - projectSessionIds - ); - - const entities = await getRelevantEntities(userMessage); - - const prompt = buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage); - const res = await inference.completeStream(prompt, options); - - let fullText = ""; - let model = ""; - let tokenCount = 0; - let buffer = ""; - - for await (const chunk of res.body) { - buffer += Buffer.from(chunk).toString("utf8"); - - const events = buffer.split("\n\n"); - buffer = events.pop() || ""; - - for (const event of events) { - const lines = event.split("\n"); - const dataLines = lines - .filter((line) => line.startsWith("data: ")) - .map((line) => line.slice(6)); - - if (dataLines.length === 0) continue; - - const raw = dataLines.join("\n").trim(); - if (raw === "[DONE]") continue; - + let projectSessionIds = null; + if (session.project_id) { try { - const data = JSON.parse(raw); - - if (data.response) { - fullText += data.response; - onChunk(data.response); - } - - if (data.model) model = data.model; - if (data.done && data.tokenCount !== undefined) { - tokenCount = data.tokenCount; - } - - if (data.error) { - throw new Error(data.error); + const project = await memory.getProject(session.project_id); + if (project) { + const projectSessions = await memory.getProjectSessions( + session.project_id, + ); + projectSessionIds = projectSessions.map((s) => s.id); + if (project.isolated === 1) { + console.log( + `[orchestration] Isolated project — restricting to ${projectSessionIds.length} sessions`, + ); + } else { + console.log( + `[orchestration] Non-isolated project — expanding search to ${projectSessionIds.length} sessions`, + ); + } } } catch (err) { - console.error( - "[orchestration] Failed to parse inference SSE event:", - raw, + console.warn( + "[orchestration] Failed to resolve project context:", err.message, ); } } - } - console.log("[orchestration] final streamed text length:", fullText.length); - - if (fullText.trim()) { - await memory.createEpisode(session.id, userMessage, fullText, tokenCount); - } else { - console.warn( - "[orchestration] Stream finished with no assistant text; episode not saved", + const recentEpisodes = await memory.getRecentEpisodes( + session.id, + recentEpisodeLimit, + ); + const isFirstMessage = recentEpisodes.length === 0; + const recentIds = new Set(recentEpisodes.map((e) => e.id)); + const semanticEpisodes = await getSemanticEpisodes( + userMessage, + session.id, + recentIds, + projectSessionIds, + {semanticLimit, scoreThreshold } ); - } - if (isFirstMessage && !session.name) { - autoNameSession(externalId, userMessage, fullText).catch(() => {}); - } + const entities = await getRelevantEntities(userMessage); - return { model, tokenCount }; - } catch (err) { - console.error('[orchestration] chatStream fatal error:', err.message, err.stack); + const prompt = buildPrompt( + recentEpisodes, + semanticEpisodes, + entities, + userMessage, + ); + const res = await inference.completeStream(prompt, options); + + let fullText = ""; + let model = ""; + let tokenCount = 0; + let buffer = ""; + + for await (const chunk of res.body) { + buffer += Buffer.from(chunk).toString("utf8"); + + const events = buffer.split("\n\n"); + buffer = events.pop() || ""; + + for (const event of events) { + const lines = event.split("\n"); + const dataLines = lines + .filter((line) => line.startsWith("data: ")) + .map((line) => line.slice(6)); + + if (dataLines.length === 0) continue; + + const raw = dataLines.join("\n").trim(); + if (raw === "[DONE]") continue; + + try { + const data = JSON.parse(raw); + + if (data.response) { + fullText += data.response; + onChunk(data.response); + } + + if (data.model) model = data.model; + if (data.done && data.tokenCount !== undefined) { + tokenCount = data.tokenCount; + } + + if (data.error) { + throw new Error(data.error); + } + } catch (err) { + console.error( + "[orchestration] Failed to parse inference SSE event:", + raw, + err.message, + ); + } + } + } + + console.log("[orchestration] final streamed text length:", fullText.length); + + if (fullText.trim()) { + await memory.createEpisode(session.id, userMessage, fullText, tokenCount); + } else { + console.warn( + "[orchestration] Stream finished with no assistant text; episode not saved", + ); + } + + if (isFirstMessage && !session.name) { + autoNameSession(externalId, userMessage, fullText).catch(() => {}); + } + + return { model, tokenCount }; + } catch (err) { + console.error( + "[orchestration] chatStream fatal error:", + err.message, + err.stack, + ); throw err; } } diff --git a/packages/orchestration-service/src/config/settings.js b/packages/orchestration-service/src/config/settings.js new file mode 100644 index 0000000..2ed0d51 --- /dev/null +++ b/packages/orchestration-service/src/config/settings.js @@ -0,0 +1,30 @@ +const fs = require('fs'); +const path = require('path'); +const { ORCHESTRATION } = require('@nexusai/shared'); + +const SETTINGS_PATH = path.join(__dirname, '../../data/settings.json'); + +const DEFAULTS = { + recentEpisodeLimit: ORCHESTRATION.RECENT_EPISODE_LIMIT, + semanticLimit: ORCHESTRATION.SEMANTIC_LIMIT, + scoreThreshold: ORCHESTRATION.SCORE_THRESHOLD, +}; + +function load() { + try { + const raw = fs.readFileSync(SETTINGS_PATH, 'utf8'); + return { ...DEFAULTS, ...JSON.parse(raw) }; + } catch { + return { ...DEFAULTS }; // file doesn't exist yet — use defaults + } +} + +function save(updates) { + const current = load(); + const next = { ...current, ...updates }; + fs.mkdirSync(path.dirname(SETTINGS_PATH), { recursive: true }); + fs.writeFileSync(SETTINGS_PATH, JSON.stringify(next, null, 2)); + return next; +} + +module.exports = { load, save, DEFAULTS }; \ No newline at end of file diff --git a/packages/orchestration-service/src/index.js b/packages/orchestration-service/src/index.js index c0be01e..c53aa94 100644 --- a/packages/orchestration-service/src/index.js +++ b/packages/orchestration-service/src/index.js @@ -8,6 +8,7 @@ const sessionsRouter = require('./routes/sessions'); const modelsRouter = require('./routes/models'); const projectsRouter = require('./routes/projects'); const episodesRouter = require('./routes/episodes') +const settingsRouter = require('./routes/settings') const cors = require('cors'); @@ -43,7 +44,8 @@ app.use('/chat', chatRouter); app.use('/sessions', sessionsRouter); app.use('/models', modelsRouter); app.use('/projects', projectsRouter); -app.use('/episodes', episodesRouter) +app.use('/episodes', episodesRouter); +app.use('/settings', settingsRouter); /******* Start the server ************/ app.listen(PORT, () => { diff --git a/packages/orchestration-service/src/routes/settings.js b/packages/orchestration-service/src/routes/settings.js new file mode 100644 index 0000000..8a7c977 --- /dev/null +++ b/packages/orchestration-service/src/routes/settings.js @@ -0,0 +1,38 @@ +const { Router } = require('express'); +const settings = require('../config/settings'); + +const router = Router(); + +router.get('/', (req, res) => { + res.json(settings.load()); +}); + +router.patch('/', (req, res) => { + const { recentEpisodeLimit, semanticLimit, scoreThreshold } = req.body; + const updates = {}; + + if (recentEpisodeLimit !== undefined) { + const val = Number(recentEpisodeLimit); + if (!Number.isInteger(val) || val < 1 || val > 20) + return res.status(400).json({ error: 'recentEpisodeLimit must be 1–20' }); + updates.recentEpisodeLimit = val; + } + + if (semanticLimit !== undefined) { + const val = Number(semanticLimit); + if (!Number.isInteger(val) || val < 1 || val > 20) + return res.status(400).json({ error: 'semanticLimit must be 1–20' }); + updates.semanticLimit = val; + } + + if (scoreThreshold !== undefined) { + const val = Number(scoreThreshold); + if (isNaN(val) || val < 0 || val > 1) + return res.status(400).json({ error: 'scoreThreshold must be 0–1' }); + updates.scoreThreshold = val; + } + + res.json(settings.save(updates)); +}); + +module.exports = router; \ No newline at end of file