diff --git a/packages/orchestration-service/src/services/summarization.js b/packages/orchestration-service/src/services/summarization.js index 5bea54b..7ae3b28 100644 --- a/packages/orchestration-service/src/services/summarization.js +++ b/packages/orchestration-service/src/services/summarization.js @@ -9,30 +9,34 @@ const MAX_SUMMARY_TOKENS = parseInt(getEnv('SUMMARY_MAX_TOKENS', SUMMARIES.MAX_S const MIN_EPISODES_SINCE = parseInt(getEnv('SUMMARY_MIN_EPISODES', SUMMARIES.MIN_EPISODES_SINCE)); function buildSummaryPrompt(episodes, existingSummary = null) { - const context = episodes + const MAX_CHARS = 3000; // truncate input to keep Phi3 focused + let context = episodes .map(ep => `User: ${ep.user_message}\nAssistant: ${ep.ai_response}`) .join('\n\n'); + // Truncate from the start if too long — keep the most recent exchanges + if (context.length > MAX_CHARS) { + context = context.slice(-MAX_CHARS); + } + const instruction = existingSummary - ? `Update this summary to include the new conversation exchanges below. Output only the updated summary — do not continue the conversation. + ? `Update the summary below to include the new exchanges. Write 3-5 sentences in third person. Output only the updated summary text, nothing else. Previous summary: ${existingSummary} -New exchanges to incorporate:` - : `Summarise this conversation. Output only the summary — do not continue the conversation or give recommendations. +New exchanges: +${context}` + : `Summarize the conversation below in 3-5 sentences. Write in third person. Output only the summary text, nothing else. -Conversation:`; +Conversation: +${context}`; return [ - '<|im_start|>system', - 'You are a conversation summarisation assistant. You write concise, factual summaries for long-term memory storage. Output only the summary text — no preamble, no labels.<|im_end|>', - '<|im_start|>user', + '<|user|>', instruction, - '', - context, - '<|im_end|>', - '<|im_start|>assistant', + '<|end|>', + '<|assistant|>', ].join('\n'); } @@ -47,8 +51,8 @@ async function generateSummary(episodes, existingSummary = null) { prompt, stream: false, options: { - temperature: 0.3, // slightly higher than entities — summaries benefit from some fluency - num_predict: 300, // generous but bounded — keeps summaries from running long + temperature: 0.2, // slightly higher than entities — summaries benefit from some fluency + num_predict: 200, // generous but bounded — keeps summaries from running long }, }), });