summary system backend implementation
This commit is contained in:
@@ -4,6 +4,7 @@ const embedding = require("../services/embedding");
|
||||
const qdrant = require("../services/qdrant");
|
||||
const { ORCHESTRATION } = require("@nexusai/shared");
|
||||
const appSettings = require("../config/settings");
|
||||
const {triggerSummary} = require('../services/summarization')
|
||||
|
||||
function buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage, systemPrompt) {
|
||||
const parts = [systemPrompt ?? ORCHESTRATION.SYSTEM_PROMPT];
|
||||
@@ -193,12 +194,18 @@ async function chat(externalId, userMessage, options = {}) {
|
||||
console.error(`[orchestration] Failed to save episode`, err.message),
|
||||
);
|
||||
|
||||
// 7. Auto-name on first message
|
||||
// 7. Trigger summarization check (fire-and-forget)
|
||||
// Pass full episodes list so summarization can sum tokens accurately
|
||||
const allEpisodes = await memory.getRecentEpisodes(session.id, 9999);
|
||||
triggerSummary(session, allEpisodes);
|
||||
|
||||
|
||||
// 8. Auto-name on first message
|
||||
if (isFirstMessage && !session.name) {
|
||||
autoNameSession(externalId, userMessage, result.text).catch(() => {}); // already logged inside autoNameSession
|
||||
}
|
||||
|
||||
// 8. Return response
|
||||
// 9. Return response
|
||||
return {
|
||||
sessionId: externalId,
|
||||
response: result.text,
|
||||
@@ -311,6 +318,8 @@ async function chatStream(externalId, userMessage, onChunk, options = {}) {
|
||||
|
||||
if (fullText.trim()) {
|
||||
await memory.createEpisode(session.id, userMessage, fullText, tokenCount, session.project_id ?? null);
|
||||
const allEpisodes = await memory.getRecentEpisodes(session.id, 9999);
|
||||
triggerSummary(session, allEpisodes);
|
||||
} else {
|
||||
console.warn(
|
||||
"[orchestration] Stream finished with no assistant text; episode not saved",
|
||||
|
||||
@@ -150,6 +150,32 @@ async function deleteEpisode(id) {
|
||||
if (!res.ok) throw new Error(`Failed to delete episode: ${res.status}`);
|
||||
}
|
||||
|
||||
async function getSummariesBySession(sessionId) {
|
||||
const res = await fetch(`${BASE_URL}/sessions/${sessionId}/summaries`);
|
||||
if (!res.ok) throw new Error(`Failed to fetch summaries: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function createSummary({ sessionId, projectId, content, tokenCount, episodeRange }) {
|
||||
const res = await fetch(`${BASE_URL}/summaries`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ sessionId, projectId, content, tokenCount, episodeRange }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to create summary: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
async function updateSummary(id, { content, tokenCount, episodeRange }) {
|
||||
const res = await fetch(`${BASE_URL}/summaries/${id}`, {
|
||||
method: 'PATCH',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ content, tokenCount, episodeRange }),
|
||||
});
|
||||
if (!res.ok) throw new Error(`Failed to update summary: ${res.status}`);
|
||||
return res.json();
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getSessionByExternalId,
|
||||
createSession,
|
||||
@@ -168,4 +194,7 @@ module.exports = {
|
||||
getProject,
|
||||
getEpisodes,
|
||||
deleteEpisode,
|
||||
getSummariesBySession,
|
||||
createSummary,
|
||||
updateSummary,
|
||||
}
|
||||
129
packages/orchestration-service/src/services/summarization.js
Normal file
129
packages/orchestration-service/src/services/summarization.js
Normal file
@@ -0,0 +1,129 @@
|
||||
const { getEnv, SERVICES, SUMMARIES } = require('@nexusai/shared');
|
||||
|
||||
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
|
||||
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
|
||||
const MEMORY_URL = getEnv('MEMORY_SERVICE_URL', SERVICES.MEMORY_URL);
|
||||
|
||||
const THRESHOLD_TOKENS = parseInt(getEnv('SUMMARY_THRESHOLD_TOKENS', SUMMARIES.THRESHOLD_TOKENS));
|
||||
const MAX_SUMMARY_TOKENS = parseInt(getEnv('SUMMARY_MAX_TOKENS', SUMMARIES.MAX_SUMMARY_TOKENS));
|
||||
const MIN_EPISODES_SINCE = parseInt(getEnv('SUMMARY_MIN_EPISODES', SUMMARIES.MIN_EPISODES_SINCE));
|
||||
|
||||
function buildSummaryPrompt(episodes, existingSummary = null) {
|
||||
const context = episodes
|
||||
.map(ep => `User: ${ep.user_message}\nAssistant: ${ep.ai_response}`)
|
||||
.join('\n\n');
|
||||
|
||||
const instruction = existingSummary
|
||||
? `You have a previous summary of this conversation. Update it to include the new exchanges below. Keep it concise — plain prose, no markdown, no headers, third person, max 200 words.
|
||||
|
||||
Previous summary:
|
||||
${existingSummary}
|
||||
|
||||
New exchanges to incorporate:`
|
||||
: `Summarise this conversation. Be concise — plain prose, no markdown, no headers, third person, max 200 words. Preserve key decisions, facts, named entities, and unresolved questions.
|
||||
|
||||
Conversation:`;
|
||||
|
||||
return [
|
||||
'<|im_start|>system',
|
||||
'You are a conversation summarisation assistant. You write concise, factual summaries for long-term memory storage. Output only the summary text — no preamble, no labels.<|im_end|>',
|
||||
'<|im_start|>user',
|
||||
instruction,
|
||||
'',
|
||||
context,
|
||||
'<|im_end|>',
|
||||
'<|im_start|>assistant',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
async function generateSummary(episodes, existingSummary = null) {
|
||||
const prompt = buildSummaryPrompt(episodes, existingSummary);
|
||||
|
||||
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
model: EXTRACTION_MODEL,
|
||||
prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: 0.3, // slightly higher than entities — summaries benefit from some fluency
|
||||
num_predict: 300, // generous but bounded — keeps summaries from running long
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
|
||||
const data = await res.json();
|
||||
return data.response?.trim() ?? '';
|
||||
}
|
||||
|
||||
async function maybeSummarize(session, allEpisodes) {
|
||||
// 1. Sum total tokens for this session
|
||||
const totalTokens = allEpisodes.reduce((sum, ep) => sum + (ep.token_count || 0), 0);
|
||||
if (totalTokens < THRESHOLD_TOKENS) return; // under threshold — nothing to do
|
||||
|
||||
// 2. Fetch existing summaries for session
|
||||
const summariesRes = await fetch(`${MEMORY_URL}/sessions/${session.id}/summaries`);
|
||||
if (!summariesRes.ok) return;
|
||||
const summaries = await summariesRes.json();
|
||||
|
||||
const latest = summaries.at(-1) ?? null;
|
||||
|
||||
// 3. Guard — don't re-summarize until MIN_EPISODES_SINCE new episodes have accumulated
|
||||
if (latest) {
|
||||
const lastEpisodeRange = latest.episode_range ?? '0';
|
||||
const lastCoveredId = parseInt(lastEpisodeRange.split('-').at(-1)) || 0;
|
||||
const newEpisodes = allEpisodes.filter(ep => ep.id > lastCoveredId);
|
||||
if (newEpisodes.length < MIN_EPISODES_SINCE) return;
|
||||
}
|
||||
|
||||
// 4. Determine episode range string e.g. "1-42"
|
||||
const ids = allEpisodes.map(ep => ep.id);
|
||||
const episodeRange = `${ids.at(0)}-${ids.at(-1)}`;
|
||||
const totalEpisodeTokens = allEpisodes.reduce((sum, ep) => sum + (ep.token_count || 0), 0);
|
||||
|
||||
// 5. Generate summary — pass existing content if updating
|
||||
const content = await generateSummary(
|
||||
allEpisodes,
|
||||
latest && latest.content.length < MAX_SUMMARY_TOKENS ? latest.content : null
|
||||
// if existing summary is already large, treat as fresh rather than appending to a huge blob
|
||||
);
|
||||
|
||||
if (!content) return;
|
||||
|
||||
// 6. Create new row or update existing
|
||||
if (!latest || latest.content.length >= MAX_SUMMARY_TOKENS) {
|
||||
await fetch(`${MEMORY_URL}/summaries`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
sessionId: session.id,
|
||||
content,
|
||||
tokenCount: totalEpisodeTokens,
|
||||
episodeRange,
|
||||
}),
|
||||
});
|
||||
console.log(`[summarization] Created new summary for session ${session.id}`);
|
||||
} else {
|
||||
await fetch(`${MEMORY_URL}/summaries/${latest.id}`, {
|
||||
method: 'PATCH',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
content,
|
||||
tokenCount: totalEpisodeTokens,
|
||||
episodeRange,
|
||||
}),
|
||||
});
|
||||
console.log(`[summarization] Updated summary ${latest.id} for session ${session.id}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function triggerSummary(session, allEpisodes) {
|
||||
// Intentionally fire-and-forget — caller doesn't await this
|
||||
maybeSummarize(session, allEpisodes).catch(err =>
|
||||
console.warn('[summarization] Summary failed (non-critical):', err.message)
|
||||
);
|
||||
}
|
||||
|
||||
module.exports = { triggerSummary };
|
||||
@@ -69,6 +69,11 @@ const SQLITE = {
|
||||
DEFAULT_PATH: './data/nexusai.db'
|
||||
}
|
||||
|
||||
const SUMMARIES = {
|
||||
THRESHOLD_TOKENS: 5000, //trigger summary when session hits this many tokens
|
||||
MAX_SUMMARY_TOKENS: 800, //if existing summary exceeds this, create new instead of update
|
||||
MIN_EPISODES_SINCE: 5, // don't resummarize until N new episodes since last summary
|
||||
}
|
||||
module.exports = {
|
||||
QDRANT,
|
||||
COLLECTIONS,
|
||||
@@ -79,5 +84,6 @@ module.exports = {
|
||||
LLAMACPP,
|
||||
INFERENCE_DEFAULTS,
|
||||
SQLITE,
|
||||
ORCHESTRATION
|
||||
ORCHESTRATION,
|
||||
SUMMARIES
|
||||
};
|
||||
@@ -1,5 +1,5 @@
|
||||
const {getEnv} = require('./config/env');
|
||||
const {QDRANT, COLLECTIONS, EPISODIC, SERVICES, OLLAMA, PORTS, LLAMACPP, INFERENCE_DEFAULTS, SQLITE, ORCHESTRATION } = require('./config/constants');
|
||||
const {QDRANT, COLLECTIONS, EPISODIC, SERVICES, OLLAMA, PORTS, LLAMACPP, INFERENCE_DEFAULTS, SQLITE, ORCHESTRATION, SUMMARIES } = require('./config/constants');
|
||||
const {parseRow, formatEpisodeText} = require('./utils')
|
||||
|
||||
module.exports = {
|
||||
@@ -16,4 +16,5 @@ module.exports = {
|
||||
ORCHESTRATION,
|
||||
parseRow,
|
||||
formatEpisodeText,
|
||||
SUMMARIES,
|
||||
};
|
||||
Reference in New Issue
Block a user