memory settings implementation

This commit is contained in:
Storme-bit
2026-04-17 23:13:36 -07:00
parent 1cc7b62d79
commit 77275cf476
7 changed files with 254 additions and 206 deletions

View File

@@ -33,60 +33,6 @@ export async function sendMessage(sessionId, message, model) {
return res.json(); return res.json();
} }
// onChunk(text) called for each token
// onDone({ model, tokenCount }) called when stream closes
// returns an abort function — call it to cancel mid-stream
/*
export function streamMessage(sessionId, message, model, { onChunk, onDone, onError }) {
const controller = new AbortController();
(async () => {
try {
const res = await fetch(`${BASE_URL}/chat/stream`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ sessionId, message, model }),
signal: controller.signal,
});
if (!res.ok) throw new Error(`Stream request failed: ${res.status}`);
const reader = res.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
// Append to buffer and split on double newline (SSE event delimiter)
buffer += decoder.decode(value, { stream: true });
const events = buffer.split('\n\n');
buffer = events.pop(); // last item may be incomplete — keep in buffer
for (const event of events) {
const line = event.trim();
if (!line.startsWith('data: ')) continue;
const raw = line.slice(6);
try {
const data = JSON.parse(raw);
if (data.text) onChunk(data.text);
if (data.done) onDone({ model: data.model ?? model, tokenCount: data.tokenCount ?? 0 });
if (data.error) onError(new Error(data.error));
} catch {
// malformed JSON — skip
}
}
}
} catch (err) {
if (err.name !== 'AbortError') onError(err);
}
})();
return () => controller.abort();
}
*/
export function streamMessage(sessionId, message, model, { onChunk, onDone, onError }) { export function streamMessage(sessionId, message, model, { onChunk, onDone, onError }) {
const controller = new AbortController(); const controller = new AbortController();
@@ -148,17 +94,7 @@ export async function fetchModels() {
if(!res.ok) throw new Error(`Failted to fetch models: ${res.status}`); if(!res.ok) throw new Error(`Failted to fetch models: ${res.status}`);
return res.json(); return res.json();
} }
/*
export async function renameSession(sessionId, name) {
const res = await fetch(`${BASE_URL}/sessions/${sessionId}`, {
method: 'PATCH',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ name }),
});
if (!res.ok) throw new Error(`Failed to rename session: ${res.status}`);
return res.json();
}
*/
export async function updateSession(sessionId, { name, projectId } = {}) { export async function updateSession(sessionId, { name, projectId } = {}) {
const res = await fetch(`${BASE_URL}/sessions/${sessionId}`, { const res = await fetch(`${BASE_URL}/sessions/${sessionId}`, {
method: 'PATCH', method: 'PATCH',
@@ -221,7 +157,7 @@ export async function updateSessionProject(sessionId, projectId) {
return res.json(); return res.json();
} }
export async function getEpisodes({ limit = 50, offset = 0, sessionId, q } = {}) { export async function getEpisodes({ limit = API_DEFAULTS.EPISODE_LIMIT, offset = API_DEFAULTS.OFFSET, sessionId, q } = {}) {
const url = new URL(`${BASE_URL}/episodes`, window.location.origin); const url = new URL(`${BASE_URL}/episodes`, window.location.origin);
url.searchParams.set('limit', limit); url.searchParams.set('limit', limit);
url.searchParams.set('offset', offset); url.searchParams.set('offset', offset);

View File

@@ -1,7 +1,8 @@
import React, { useState, useEffect } from 'react'; import React, { useState, useEffect } from 'react';
import { fetchSessions, deleteSession } from '../api/orchestration'; import { fetchSessions, deleteSession } from '../api/orchestration';
import { API_DEFAULTS } from '../config/constants';
const PAGE_SIZE = 20; const PAGE_SIZE = API_DEFAULTS.PAGE_SIZE;
export default function AllChatsView({ onSelectSession }) { export default function AllChatsView({ onSelectSession }) {
const [sessions, setSessions] = useState([]); const [sessions, setSessions] = useState([]);

View File

@@ -11,4 +11,9 @@ export const API_DEFAULTS = {
SESSIONS_LIMIT: 20, SESSIONS_LIMIT: 20,
HISTORY_LIMIT: 50, HISTORY_LIMIT: 50,
OFFSET: 0, OFFSET: 0,
EPISODE_LIMIT: 50,
}
export const CLIENT_DEFAULTS = {
PAGE_SIZE: 20,
} }

View File

@@ -3,20 +3,21 @@ const inference = require("../services/inference");
const embedding = require("../services/embedding"); const embedding = require("../services/embedding");
const qdrant = require("../services/qdrant"); const qdrant = require("../services/qdrant");
const { ORCHESTRATION } = require("@nexusai/shared"); const { ORCHESTRATION } = require("@nexusai/shared");
const appSettings = require("../config/settings");
const { RECENT_EPISODE_LIMIT, SEMANTIC_LIMIT, SCORE_THRESHOLD, SYSTEM_PROMPT } = const { SYSTEM_PROMPT } = ORCHESTRATION;
ORCHESTRATION;
function buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage) { function buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage) {
const parts = [SYSTEM_PROMPT]; const parts = [SYSTEM_PROMPT];
if (entities.length > 0) { if (entities.length > 0) {
parts.push('Here is what you know about entities relevant to this conversation:'); parts.push(
for (const e of entities) { "Here is what you know about entities relevant to this conversation:",
parts.push(`- ${e.name} (${e.type}): ${e.notes}`); );
} for (const e of entities) {
parts.push('---'); parts.push(`- ${e.name} (${e.type}): ${e.notes}`);
} }
parts.push("---");
}
if (semanticEpisodes.length > 0) { if (semanticEpisodes.length > 0) {
parts.push("Here are some relevant memories from earlier conversations:"); parts.push("Here are some relevant memories from earlier conversations:");
@@ -80,12 +81,13 @@ async function getSemanticEpisodes(
sessionId, sessionId,
recentIds, recentIds,
projectSessionIds = null, projectSessionIds = null,
{ semanticLimit, scoreThreshold } = {},
) { ) {
try { try {
const vector = await embedding.embed(userMessage); const vector = await embedding.embed(userMessage);
const results = await qdrant.searchEpisodes(vector, { const results = await qdrant.searchEpisodes(vector, {
limit: SEMANTIC_LIMIT, limit: semanticLimit,
scoreThreshold: SCORE_THRESHOLD, scoreThreshold: scoreThreshold,
sessionId: projectSessionIds ? null : sessionId, sessionId: projectSessionIds ? null : sessionId,
projectSessionIds, projectSessionIds,
}); });
@@ -106,45 +108,60 @@ async function getSemanticEpisodes(
} }
async function getRelevantEntities(userMessage) { async function getRelevantEntities(userMessage) {
try { try {
const vector = await embedding.embed(userMessage); const vector = await embedding.embed(userMessage);
const results = await qdrant.searchEntities(vector); const results = await qdrant.searchEntities(vector);
console.log('[orchestration] Entity search results:', console.log(
results.map(r => ({ name: r.payload?.name, score: r.score })) "[orchestration] Entity search results:",
); results.map((r) => ({ name: r.payload?.name, score: r.score })),
return results.map(r => r.payload).filter(Boolean); );
} catch (err) { return results.map((r) => r.payload).filter(Boolean);
console.warn('[orchestration] Entity search failed, continuing without:', err.message); } catch (err) {
return []; console.warn(
} "[orchestration] Entity search failed, continuing without:",
err.message,
);
return [];
}
} }
async function chat(externalId, userMessage, options = {}) { async function chat(externalId, userMessage, options = {}) {
const { recentEpisodeLimit, semanticLimit, scoreThreshold } =
appSettings.load();
// 1. Resolve or create session // 1. Resolve or create session
let session = await memory.getSessionByExternalId(externalId); let session = await memory.getSessionByExternalId(externalId);
if (!session) session = await memory.createSession(externalId); if (!session) session = await memory.createSession(externalId);
let projectSessionIds = null; let projectSessionIds = null;
if (session.project_id) { if (session.project_id) {
try { try {
const project = await memory.getProject(session.project_id); const project = await memory.getProject(session.project_id);
if (project) { if (project) {
const projectSessions = await memory.getProjectSessions(session.project_id); const projectSessions = await memory.getProjectSessions(
projectSessionIds = projectSessions.map(s => s.id); session.project_id,
if (project.isolated === 1) { );
console.log(`[orchestration] Isolated project — restricting to ${projectSessionIds.length} sessions`); projectSessionIds = projectSessions.map((s) => s.id);
} else { if (project.isolated === 1) {
console.log(`[orchestration] Non-isolated project — expanding search to ${projectSessionIds.length} sessions`); console.log(
`[orchestration] Isolated project — restricting to ${projectSessionIds.length} sessions`,
);
} else {
console.log(
`[orchestration] Non-isolated project — expanding search to ${projectSessionIds.length} sessions`,
);
}
} }
} catch (err) {
console.warn(
"[orchestration] Failed to resolve project context:",
err.message,
);
} }
} catch (err) {
console.warn('[orchestration] Failed to resolve project context:', err.message);
} }
}
// 2. Fetch recent episodes for context // 2. Fetch recent episodes for context
const recentEpisodes = await memory.getRecentEpisodes( const recentEpisodes = await memory.getRecentEpisodes(
session.id, session.id,
RECENT_EPISODE_LIMIT, recentEpisodeLimit,
); );
const isFirstMessage = recentEpisodes.length === 0; const isFirstMessage = recentEpisodes.length === 0;
const recentIds = new Set(recentEpisodes.map((e) => e.id)); const recentIds = new Set(recentEpisodes.map((e) => e.id));
@@ -154,14 +171,20 @@ if (session.project_id) {
userMessage, userMessage,
session.id, session.id,
recentIds, recentIds,
projectSessionIds projectSessionIds,
{ semanticLimit, scoreThreshold },
); );
// 3b. Entity Search // 3b. Entity Search
const entities = await getRelevantEntities(userMessage) const entities = await getRelevantEntities(userMessage);
// 4. Assemble prompt // 4. Assemble prompt
const prompt = buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage); const prompt = buildPrompt(
recentEpisodes,
semanticEpisodes,
entities,
userMessage,
);
// 5. Run inference // 5. Run inference
const result = await inference.complete(prompt, options); const result = await inference.complete(prompt, options);
@@ -193,118 +216,131 @@ if (session.project_id) {
} }
async function chatStream(externalId, userMessage, onChunk, options = {}) { async function chatStream(externalId, userMessage, onChunk, options = {}) {
console.log('[orchestration] chatStream entry');
try { try {
const { recentEpisodeLimit, semanticLimit, scoreThreshold } = appSettings.load();
let session = await memory.getSessionByExternalId(externalId);
if (!session) session = await memory.createSession(externalId);
console.log("[orchestration] chatStream called:", { let projectSessionIds = null;
externalId, if (session.project_id) {
userMessage: userMessage.slice(0, 50),
});
let session = await memory.getSessionByExternalId(externalId);
if (!session) session = await memory.createSession(externalId);
let projectSessionIds = null;
if (session.project_id) {
try {
const project = await memory.getProject(session.project_id);
if (project) {
const projectSessions = await memory.getProjectSessions(session.project_id);
projectSessionIds = projectSessions.map(s => s.id);
if (project.isolated === 1) {
console.log(`[orchestration] Isolated project — restricting to ${projectSessionIds.length} sessions`);
} else {
console.log(`[orchestration] Non-isolated project — expanding search to ${projectSessionIds.length} sessions`);
}
}
} catch (err) {
console.warn('[orchestration] Failed to resolve project context:', err.message);
}
}
const recentEpisodes = await memory.getRecentEpisodes(
session.id,
RECENT_EPISODE_LIMIT,
);
const isFirstMessage = recentEpisodes.length === 0;
const recentIds = new Set(recentEpisodes.map((e) => e.id));
const semanticEpisodes = await getSemanticEpisodes(
userMessage,
session.id,
recentIds,
projectSessionIds
);
const entities = await getRelevantEntities(userMessage);
const prompt = buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage);
const res = await inference.completeStream(prompt, options);
let fullText = "";
let model = "";
let tokenCount = 0;
let buffer = "";
for await (const chunk of res.body) {
buffer += Buffer.from(chunk).toString("utf8");
const events = buffer.split("\n\n");
buffer = events.pop() || "";
for (const event of events) {
const lines = event.split("\n");
const dataLines = lines
.filter((line) => line.startsWith("data: "))
.map((line) => line.slice(6));
if (dataLines.length === 0) continue;
const raw = dataLines.join("\n").trim();
if (raw === "[DONE]") continue;
try { try {
const data = JSON.parse(raw); const project = await memory.getProject(session.project_id);
if (project) {
if (data.response) { const projectSessions = await memory.getProjectSessions(
fullText += data.response; session.project_id,
onChunk(data.response); );
} projectSessionIds = projectSessions.map((s) => s.id);
if (project.isolated === 1) {
if (data.model) model = data.model; console.log(
if (data.done && data.tokenCount !== undefined) { `[orchestration] Isolated project — restricting to ${projectSessionIds.length} sessions`,
tokenCount = data.tokenCount; );
} } else {
console.log(
if (data.error) { `[orchestration] Non-isolated project — expanding search to ${projectSessionIds.length} sessions`,
throw new Error(data.error); );
}
} }
} catch (err) { } catch (err) {
console.error( console.warn(
"[orchestration] Failed to parse inference SSE event:", "[orchestration] Failed to resolve project context:",
raw,
err.message, err.message,
); );
} }
} }
}
console.log("[orchestration] final streamed text length:", fullText.length); const recentEpisodes = await memory.getRecentEpisodes(
session.id,
if (fullText.trim()) { recentEpisodeLimit,
await memory.createEpisode(session.id, userMessage, fullText, tokenCount); );
} else { const isFirstMessage = recentEpisodes.length === 0;
console.warn( const recentIds = new Set(recentEpisodes.map((e) => e.id));
"[orchestration] Stream finished with no assistant text; episode not saved", const semanticEpisodes = await getSemanticEpisodes(
userMessage,
session.id,
recentIds,
projectSessionIds,
{semanticLimit, scoreThreshold }
); );
}
if (isFirstMessage && !session.name) { const entities = await getRelevantEntities(userMessage);
autoNameSession(externalId, userMessage, fullText).catch(() => {});
}
return { model, tokenCount }; const prompt = buildPrompt(
} catch (err) { recentEpisodes,
console.error('[orchestration] chatStream fatal error:', err.message, err.stack); semanticEpisodes,
entities,
userMessage,
);
const res = await inference.completeStream(prompt, options);
let fullText = "";
let model = "";
let tokenCount = 0;
let buffer = "";
for await (const chunk of res.body) {
buffer += Buffer.from(chunk).toString("utf8");
const events = buffer.split("\n\n");
buffer = events.pop() || "";
for (const event of events) {
const lines = event.split("\n");
const dataLines = lines
.filter((line) => line.startsWith("data: "))
.map((line) => line.slice(6));
if (dataLines.length === 0) continue;
const raw = dataLines.join("\n").trim();
if (raw === "[DONE]") continue;
try {
const data = JSON.parse(raw);
if (data.response) {
fullText += data.response;
onChunk(data.response);
}
if (data.model) model = data.model;
if (data.done && data.tokenCount !== undefined) {
tokenCount = data.tokenCount;
}
if (data.error) {
throw new Error(data.error);
}
} catch (err) {
console.error(
"[orchestration] Failed to parse inference SSE event:",
raw,
err.message,
);
}
}
}
console.log("[orchestration] final streamed text length:", fullText.length);
if (fullText.trim()) {
await memory.createEpisode(session.id, userMessage, fullText, tokenCount);
} else {
console.warn(
"[orchestration] Stream finished with no assistant text; episode not saved",
);
}
if (isFirstMessage && !session.name) {
autoNameSession(externalId, userMessage, fullText).catch(() => {});
}
return { model, tokenCount };
} catch (err) {
console.error(
"[orchestration] chatStream fatal error:",
err.message,
err.stack,
);
throw err; throw err;
} }
} }

View File

@@ -0,0 +1,30 @@
const fs = require('fs');
const path = require('path');
const { ORCHESTRATION } = require('@nexusai/shared');
const SETTINGS_PATH = path.join(__dirname, '../../data/settings.json');
const DEFAULTS = {
recentEpisodeLimit: ORCHESTRATION.RECENT_EPISODE_LIMIT,
semanticLimit: ORCHESTRATION.SEMANTIC_LIMIT,
scoreThreshold: ORCHESTRATION.SCORE_THRESHOLD,
};
function load() {
try {
const raw = fs.readFileSync(SETTINGS_PATH, 'utf8');
return { ...DEFAULTS, ...JSON.parse(raw) };
} catch {
return { ...DEFAULTS }; // file doesn't exist yet — use defaults
}
}
function save(updates) {
const current = load();
const next = { ...current, ...updates };
fs.mkdirSync(path.dirname(SETTINGS_PATH), { recursive: true });
fs.writeFileSync(SETTINGS_PATH, JSON.stringify(next, null, 2));
return next;
}
module.exports = { load, save, DEFAULTS };

View File

@@ -8,6 +8,7 @@ const sessionsRouter = require('./routes/sessions');
const modelsRouter = require('./routes/models'); const modelsRouter = require('./routes/models');
const projectsRouter = require('./routes/projects'); const projectsRouter = require('./routes/projects');
const episodesRouter = require('./routes/episodes') const episodesRouter = require('./routes/episodes')
const settingsRouter = require('./routes/settings')
const cors = require('cors'); const cors = require('cors');
@@ -43,7 +44,8 @@ app.use('/chat', chatRouter);
app.use('/sessions', sessionsRouter); app.use('/sessions', sessionsRouter);
app.use('/models', modelsRouter); app.use('/models', modelsRouter);
app.use('/projects', projectsRouter); app.use('/projects', projectsRouter);
app.use('/episodes', episodesRouter) app.use('/episodes', episodesRouter);
app.use('/settings', settingsRouter);
/******* Start the server ************/ /******* Start the server ************/
app.listen(PORT, () => { app.listen(PORT, () => {

View File

@@ -0,0 +1,38 @@
const { Router } = require('express');
const settings = require('../config/settings');
const router = Router();
router.get('/', (req, res) => {
res.json(settings.load());
});
router.patch('/', (req, res) => {
const { recentEpisodeLimit, semanticLimit, scoreThreshold } = req.body;
const updates = {};
if (recentEpisodeLimit !== undefined) {
const val = Number(recentEpisodeLimit);
if (!Number.isInteger(val) || val < 1 || val > 20)
return res.status(400).json({ error: 'recentEpisodeLimit must be 120' });
updates.recentEpisodeLimit = val;
}
if (semanticLimit !== undefined) {
const val = Number(semanticLimit);
if (!Number.isInteger(val) || val < 1 || val > 20)
return res.status(400).json({ error: 'semanticLimit must be 120' });
updates.semanticLimit = val;
}
if (scoreThreshold !== undefined) {
const val = Number(scoreThreshold);
if (isNaN(val) || val < 0 || val > 1)
return res.status(400).json({ error: 'scoreThreshold must be 01' });
updates.scoreThreshold = val;
}
res.json(settings.save(updates));
});
module.exports = router;