Files
nexusAI/packages/orchestration-service/src/services/summarization.js
2026-04-19 15:23:24 -07:00

132 lines
5.5 KiB
JavaScript

const { getEnv, SERVICES, SUMMARIES } = require('@nexusai/shared');
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
const MEMORY_URL = getEnv('MEMORY_SERVICE_URL', SERVICES.MEMORY_URL);
const THRESHOLD_TOKENS = parseInt(getEnv('SUMMARY_THRESHOLD_TOKENS', SUMMARIES.THRESHOLD_TOKENS));
const MAX_SUMMARY_TOKENS = parseInt(getEnv('SUMMARY_MAX_TOKENS', SUMMARIES.MAX_SUMMARY_TOKENS));
const MIN_EPISODES_SINCE = parseInt(getEnv('SUMMARY_MIN_EPISODES', SUMMARIES.MIN_EPISODES_SINCE));
function buildSummaryPrompt(episodes, existingSummary = null) {
const context = episodes
.map(ep => `User: ${ep.user_message}\nAssistant: ${ep.ai_response}`)
.join('\n\n');
const instruction = existingSummary
? `You have a previous summary of this conversation. Update it to include the new exchanges below. Keep it concise — plain prose, no markdown, no headers, third person, max 200 words.
Previous summary:
${existingSummary}
New exchanges to incorporate:`
: `Summarise this conversation. Be concise — plain prose, no markdown, no headers, third person, max 200 words. Preserve key decisions, facts, named entities, and unresolved questions.
Conversation:`;
return [
'<|im_start|>system',
'You are a conversation summarisation assistant. You write concise, factual summaries for long-term memory storage. Output only the summary text — no preamble, no labels.<|im_end|>',
'<|im_start|>user',
instruction,
'',
context,
'<|im_end|>',
'<|im_start|>assistant',
].join('\n');
}
async function generateSummary(episodes, existingSummary = null) {
const prompt = buildSummaryPrompt(episodes, existingSummary);
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: EXTRACTION_MODEL,
prompt,
stream: false,
options: {
temperature: 0.3, // slightly higher than entities — summaries benefit from some fluency
num_predict: 300, // generous but bounded — keeps summaries from running long
},
}),
});
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
const data = await res.json();
return data.response?.trim() ?? '';
}
async function maybeSummarize(session, allEpisodes) {
console.log('[summarization] MEMORY_URL:', MEMORY_URL)
// 1. Sum total tokens for this session
const totalTokens = allEpisodes.reduce((sum, ep) => sum + (ep.token_count || 0), 0);
console.log('[summarization] totalTokens:', totalTokens, 'threshold:', THRESHOLD_TOKENS);
if (totalTokens < THRESHOLD_TOKENS) return; // under threshold — nothing to do
console.log('[summarization] fetching existing summaries...');
// 2. Fetch existing summaries for session
const summariesRes = await fetch(`${MEMORY_URL}/sessions/${session.id}/summaries`);
console.log('[summarization] summaries fetch status:', summariesRes.status);
if (!summariesRes.ok) return;
const summaries = await summariesRes.json();
const latest = summaries.at(-1) ?? null;
// 3. Guard — don't re-summarize until MIN_EPISODES_SINCE new episodes have accumulated
if (latest) {
const lastEpisodeRange = latest.episode_range ?? '0';
const lastCoveredId = parseInt(lastEpisodeRange.split('-').at(-1)) || 0;
const newEpisodes = allEpisodes.filter(ep => ep.id > lastCoveredId);
if (newEpisodes.length < MIN_EPISODES_SINCE) return;
}
// 4. Determine episode range string e.g. "1-42"
const ids = allEpisodes.map(ep => ep.id);
const episodeRange = `${ids.at(0)}-${ids.at(-1)}`;
const totalEpisodeTokens = allEpisodes.reduce((sum, ep) => sum + (ep.token_count || 0), 0);
// 5. Generate summary — pass existing content if updating
const content = await generateSummary(
allEpisodes,
latest && latest.content.length < MAX_SUMMARY_TOKENS ? latest.content : null
// if existing summary is already large, treat as fresh rather than appending to a huge blob
);
if (!content) return;
// 6. Create new row or update existing
if (!latest || latest.content.length >= MAX_SUMMARY_TOKENS) {
await fetch(`${MEMORY_URL}/summaries`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
sessionId: session.id,
content,
tokenCount: totalEpisodeTokens,
episodeRange,
}),
});
console.log(`[summarization] Created new summary for session ${session.id}`);
} else {
await fetch(`${MEMORY_URL}/summaries/${latest.id}`, {
method: 'PATCH',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
content,
tokenCount: totalEpisodeTokens,
episodeRange,
}),
});
console.log(`[summarization] Updated summary ${latest.id} for session ${session.id}`);
}
}
async function triggerSummary(session, allEpisodes) {
// Intentionally fire-and-forget — caller doesn't await this
maybeSummarize(session, allEpisodes).catch(err =>
console.warn('[summarization] Summary failed (non-critical):', err.message)
);
}
module.exports = { triggerSummary };