From 4cc87d96b63401265558dcdc295dfdfc294b2930 Mon Sep 17 00:00:00 2001 From: Storme-bit Date: Sun, 19 Apr 2026 07:19:27 -0700 Subject: [PATCH] summary system backend implementation --- .../orchestration-service/src/chat/index.js | 13 +- .../src/services/memory.js | 29 ++++ .../src/services/summarization.js | 129 ++++++++++++++++++ packages/shared/src/config/constants.js | 8 +- packages/shared/src/index.js | 3 +- 5 files changed, 178 insertions(+), 4 deletions(-) create mode 100644 packages/orchestration-service/src/services/summarization.js diff --git a/packages/orchestration-service/src/chat/index.js b/packages/orchestration-service/src/chat/index.js index d5ffbb2..e525315 100644 --- a/packages/orchestration-service/src/chat/index.js +++ b/packages/orchestration-service/src/chat/index.js @@ -4,6 +4,7 @@ const embedding = require("../services/embedding"); const qdrant = require("../services/qdrant"); const { ORCHESTRATION } = require("@nexusai/shared"); const appSettings = require("../config/settings"); +const {triggerSummary} = require('../services/summarization') function buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage, systemPrompt) { const parts = [systemPrompt ?? ORCHESTRATION.SYSTEM_PROMPT]; @@ -193,12 +194,18 @@ async function chat(externalId, userMessage, options = {}) { console.error(`[orchestration] Failed to save episode`, err.message), ); - // 7. Auto-name on first message + // 7. Trigger summarization check (fire-and-forget) + // Pass full episodes list so summarization can sum tokens accurately + const allEpisodes = await memory.getRecentEpisodes(session.id, 9999); + triggerSummary(session, allEpisodes); + + + // 8. Auto-name on first message if (isFirstMessage && !session.name) { autoNameSession(externalId, userMessage, result.text).catch(() => {}); // already logged inside autoNameSession } - // 8. Return response + // 9. Return response return { sessionId: externalId, response: result.text, @@ -311,6 +318,8 @@ async function chatStream(externalId, userMessage, onChunk, options = {}) { if (fullText.trim()) { await memory.createEpisode(session.id, userMessage, fullText, tokenCount, session.project_id ?? null); + const allEpisodes = await memory.getRecentEpisodes(session.id, 9999); + triggerSummary(session, allEpisodes); } else { console.warn( "[orchestration] Stream finished with no assistant text; episode not saved", diff --git a/packages/orchestration-service/src/services/memory.js b/packages/orchestration-service/src/services/memory.js index dc2697b..749096b 100644 --- a/packages/orchestration-service/src/services/memory.js +++ b/packages/orchestration-service/src/services/memory.js @@ -150,6 +150,32 @@ async function deleteEpisode(id) { if (!res.ok) throw new Error(`Failed to delete episode: ${res.status}`); } +async function getSummariesBySession(sessionId) { + const res = await fetch(`${BASE_URL}/sessions/${sessionId}/summaries`); + if (!res.ok) throw new Error(`Failed to fetch summaries: ${res.status}`); + return res.json(); +} + +async function createSummary({ sessionId, projectId, content, tokenCount, episodeRange }) { + const res = await fetch(`${BASE_URL}/summaries`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ sessionId, projectId, content, tokenCount, episodeRange }), + }); + if (!res.ok) throw new Error(`Failed to create summary: ${res.status}`); + return res.json(); +} + +async function updateSummary(id, { content, tokenCount, episodeRange }) { + const res = await fetch(`${BASE_URL}/summaries/${id}`, { + method: 'PATCH', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ content, tokenCount, episodeRange }), + }); + if (!res.ok) throw new Error(`Failed to update summary: ${res.status}`); + return res.json(); +} + module.exports = { getSessionByExternalId, createSession, @@ -168,4 +194,7 @@ module.exports = { getProject, getEpisodes, deleteEpisode, + getSummariesBySession, + createSummary, + updateSummary, } \ No newline at end of file diff --git a/packages/orchestration-service/src/services/summarization.js b/packages/orchestration-service/src/services/summarization.js new file mode 100644 index 0000000..0ee9088 --- /dev/null +++ b/packages/orchestration-service/src/services/summarization.js @@ -0,0 +1,129 @@ +const { getEnv, SERVICES, SUMMARIES } = require('@nexusai/shared'); + +const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434'); +const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b'); +const MEMORY_URL = getEnv('MEMORY_SERVICE_URL', SERVICES.MEMORY_URL); + +const THRESHOLD_TOKENS = parseInt(getEnv('SUMMARY_THRESHOLD_TOKENS', SUMMARIES.THRESHOLD_TOKENS)); +const MAX_SUMMARY_TOKENS = parseInt(getEnv('SUMMARY_MAX_TOKENS', SUMMARIES.MAX_SUMMARY_TOKENS)); +const MIN_EPISODES_SINCE = parseInt(getEnv('SUMMARY_MIN_EPISODES', SUMMARIES.MIN_EPISODES_SINCE)); + +function buildSummaryPrompt(episodes, existingSummary = null) { + const context = episodes + .map(ep => `User: ${ep.user_message}\nAssistant: ${ep.ai_response}`) + .join('\n\n'); + + const instruction = existingSummary + ? `You have a previous summary of this conversation. Update it to include the new exchanges below. Keep it concise — plain prose, no markdown, no headers, third person, max 200 words. + +Previous summary: +${existingSummary} + +New exchanges to incorporate:` + : `Summarise this conversation. Be concise — plain prose, no markdown, no headers, third person, max 200 words. Preserve key decisions, facts, named entities, and unresolved questions. + +Conversation:`; + + return [ + '<|im_start|>system', + 'You are a conversation summarisation assistant. You write concise, factual summaries for long-term memory storage. Output only the summary text — no preamble, no labels.<|im_end|>', + '<|im_start|>user', + instruction, + '', + context, + '<|im_end|>', + '<|im_start|>assistant', + ].join('\n'); +} + +async function generateSummary(episodes, existingSummary = null) { + const prompt = buildSummaryPrompt(episodes, existingSummary); + + const res = await fetch(`${EXTRACTION_URL}/api/generate`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model: EXTRACTION_MODEL, + prompt, + stream: false, + options: { + temperature: 0.3, // slightly higher than entities — summaries benefit from some fluency + num_predict: 300, // generous but bounded — keeps summaries from running long + }, + }), + }); + + if (!res.ok) throw new Error(`Ollama responded ${res.status}`); + const data = await res.json(); + return data.response?.trim() ?? ''; +} + +async function maybeSummarize(session, allEpisodes) { + // 1. Sum total tokens for this session + const totalTokens = allEpisodes.reduce((sum, ep) => sum + (ep.token_count || 0), 0); + if (totalTokens < THRESHOLD_TOKENS) return; // under threshold — nothing to do + + // 2. Fetch existing summaries for session + const summariesRes = await fetch(`${MEMORY_URL}/sessions/${session.id}/summaries`); + if (!summariesRes.ok) return; + const summaries = await summariesRes.json(); + + const latest = summaries.at(-1) ?? null; + + // 3. Guard — don't re-summarize until MIN_EPISODES_SINCE new episodes have accumulated + if (latest) { + const lastEpisodeRange = latest.episode_range ?? '0'; + const lastCoveredId = parseInt(lastEpisodeRange.split('-').at(-1)) || 0; + const newEpisodes = allEpisodes.filter(ep => ep.id > lastCoveredId); + if (newEpisodes.length < MIN_EPISODES_SINCE) return; + } + + // 4. Determine episode range string e.g. "1-42" + const ids = allEpisodes.map(ep => ep.id); + const episodeRange = `${ids.at(0)}-${ids.at(-1)}`; + const totalEpisodeTokens = allEpisodes.reduce((sum, ep) => sum + (ep.token_count || 0), 0); + + // 5. Generate summary — pass existing content if updating + const content = await generateSummary( + allEpisodes, + latest && latest.content.length < MAX_SUMMARY_TOKENS ? latest.content : null + // if existing summary is already large, treat as fresh rather than appending to a huge blob + ); + + if (!content) return; + + // 6. Create new row or update existing + if (!latest || latest.content.length >= MAX_SUMMARY_TOKENS) { + await fetch(`${MEMORY_URL}/summaries`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + sessionId: session.id, + content, + tokenCount: totalEpisodeTokens, + episodeRange, + }), + }); + console.log(`[summarization] Created new summary for session ${session.id}`); + } else { + await fetch(`${MEMORY_URL}/summaries/${latest.id}`, { + method: 'PATCH', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + content, + tokenCount: totalEpisodeTokens, + episodeRange, + }), + }); + console.log(`[summarization] Updated summary ${latest.id} for session ${session.id}`); + } +} + +async function triggerSummary(session, allEpisodes) { + // Intentionally fire-and-forget — caller doesn't await this + maybeSummarize(session, allEpisodes).catch(err => + console.warn('[summarization] Summary failed (non-critical):', err.message) + ); +} + +module.exports = { triggerSummary }; \ No newline at end of file diff --git a/packages/shared/src/config/constants.js b/packages/shared/src/config/constants.js index 0076869..7257037 100644 --- a/packages/shared/src/config/constants.js +++ b/packages/shared/src/config/constants.js @@ -69,6 +69,11 @@ const SQLITE = { DEFAULT_PATH: './data/nexusai.db' } +const SUMMARIES = { + THRESHOLD_TOKENS: 5000, //trigger summary when session hits this many tokens + MAX_SUMMARY_TOKENS: 800, //if existing summary exceeds this, create new instead of update + MIN_EPISODES_SINCE: 5, // don't resummarize until N new episodes since last summary +} module.exports = { QDRANT, COLLECTIONS, @@ -79,5 +84,6 @@ module.exports = { LLAMACPP, INFERENCE_DEFAULTS, SQLITE, - ORCHESTRATION + ORCHESTRATION, + SUMMARIES }; \ No newline at end of file diff --git a/packages/shared/src/index.js b/packages/shared/src/index.js index 6740237..20e2502 100644 --- a/packages/shared/src/index.js +++ b/packages/shared/src/index.js @@ -1,5 +1,5 @@ const {getEnv} = require('./config/env'); -const {QDRANT, COLLECTIONS, EPISODIC, SERVICES, OLLAMA, PORTS, LLAMACPP, INFERENCE_DEFAULTS, SQLITE, ORCHESTRATION } = require('./config/constants'); +const {QDRANT, COLLECTIONS, EPISODIC, SERVICES, OLLAMA, PORTS, LLAMACPP, INFERENCE_DEFAULTS, SQLITE, ORCHESTRATION, SUMMARIES } = require('./config/constants'); const {parseRow, formatEpisodeText} = require('./utils') module.exports = { @@ -16,4 +16,5 @@ module.exports = { ORCHESTRATION, parseRow, formatEpisodeText, + SUMMARIES, }; \ No newline at end of file