diff --git a/packages/orchestration-service/src/chat/index.js b/packages/orchestration-service/src/chat/index.js index 7b805fb..097d4c4 100644 --- a/packages/orchestration-service/src/chat/index.js +++ b/packages/orchestration-service/src/chat/index.js @@ -117,7 +117,7 @@ async function getRelevantEntities(userMessage, projectId=null) { ); return results.map((r) => r.payload).filter(Boolean); } catch (err) { - logger.warn( + logger.debug( "[orchestration] Entity search failed, continuing without:", err.message, ); @@ -125,215 +125,134 @@ async function getRelevantEntities(userMessage, projectId=null) { } } -async function chat(externalId, userMessage, options = {}) { - const { recentEpisodeLimit, semanticLimit, scoreThreshold, temperature, repeatPenalty, topP, topK, systemPrompt} = - appSettings.load(); - // 1. Resolve or create session - let session = await memory.getSessionByExternalId(externalId); - if (!session) session = await memory.createSession(externalId); +async function assembleContext(externalId, userMessage) { + const settings = appSettings.load(); + const { recentEpisodeLimit, semanticLimit, scoreThreshold, + temperature, repeatPenalty, topP, topK, systemPrompt } = settings; - let projectSessionIds = null; - let activeSystemPrompt = systemPrompt ?? ORCHESTRATION.SYSTEM_PROMPT; - if (session.project_id) { - try { - const project = await memory.getProject(session.project_id); - if (project) { - const projectSessions = await memory.getProjectSessions(session.project_id); - if (project?.system_prompt) activeSystemPrompt = project.system_prompt; - projectSessionIds = projectSessions.map((s) => s.id); - } - } catch (err) { - logger.warn( - "[orchestration] Failed to resolve project context:", - err.message, - ); - } - } - // 2. Fetch recent episodes for context - const recentEpisodes = await memory.getRecentEpisodes( - session.id, - recentEpisodeLimit, - ); - const isFirstMessage = recentEpisodes.length === 0; - const recentIds = new Set(recentEpisodes.map((e) => e.id)); - - // 3. Semantic Search - const semanticEpisodes = await getSemanticEpisodes( - userMessage, - session.id, - recentIds, - projectSessionIds, - { semanticLimit, scoreThreshold }, - ); - - // 3b. Entity Search - const entities = await getRelevantEntities(userMessage, session.project_id ?? null); - - // 4. Assemble prompt - const prompt = buildPrompt( - recentEpisodes, - semanticEpisodes, - entities, - userMessage, - activeSystemPrompt, - ); - - // 5. Run inference - const result = await inference.complete(prompt, {...options, temperature, repeatPenalty, topP, topK}); - - // 6. Write episode back to memory - try { - await memory.createEpisode( - session.id, userMessage, result.text, - (result.evalCount || 0) + (result.promptEvalCount || 0), - session.project_id ?? null, - ); - } catch (err) { - logger.error('[orchestration] Failed to save episode:', err.message); - } - const allEpisodes = await memory.getRecentEpisodes(session.id, 9999); - triggerSummary(session, allEpisodes); - - - // 8. Auto-name on first message - if (isFirstMessage && !session.name) { - autoNameSession(externalId, userMessage, result.text).catch(() => {}); // already logged inside autoNameSession - } - - // 9. Return response - return { - sessionId: externalId, - response: result.text, - model: result.model, - tokenCount: (result.evalCount || 0) + (result.promptEvalCount || 0), - }; -} - -async function chatStream(externalId, userMessage, onChunk, options = {}) { - - - try { - const { recentEpisodeLimit, semanticLimit, scoreThreshold, temperature, repeatPenalty, topP, topK, systemPrompt } = appSettings.load(); + // 1. Resolve or create session let session = await memory.getSessionByExternalId(externalId); if (!session) session = await memory.createSession(externalId); + // 2. Resolve project context let projectSessionIds = null; let activeSystemPrompt = systemPrompt ?? ORCHESTRATION.SYSTEM_PROMPT; if (session.project_id) { - try { - const project = await memory.getProject(session.project_id); - if (project) { - const projectSessions = await memory.getProjectSessions( - session.project_id, - ); - projectSessionIds = projectSessions.map((s) => s.id); - if (project?.system_prompt) activeSystemPrompt = project.system_prompt; + try { + const project = await memory.getProject(session.project_id); + if (project) { + const projectSessions = await memory.getProjectSessions(session.project_id); + if (project.system_prompt) activeSystemPrompt = project.system_prompt; + projectSessionIds = projectSessions.map(s => s.id); + } + } catch (err) { + logger.warn('[orchestration] Failed to resolve project context:', err.message); } - - } catch (err) { - logger.warn( - "[orchestration] Failed to resolve project context:", - err.message, - ); - } } - const recentEpisodes = await memory.getRecentEpisodes( - session.id, - recentEpisodeLimit, - ); + // 3. Fetch recent episodes + const recentEpisodes = await memory.getRecentEpisodes(session.id, recentEpisodeLimit); const isFirstMessage = recentEpisodes.length === 0; - const recentIds = new Set(recentEpisodes.map((e) => e.id)); - const semanticEpisodes = await getSemanticEpisodes( - userMessage, - session.id, - recentIds, - projectSessionIds, - {semanticLimit, scoreThreshold } - ); + const recentIds = new Set(recentEpisodes.map(e => e.id)); + // 4. Semantic + entity search + const semanticEpisodes = await getSemanticEpisodes( + userMessage, session.id, recentIds, projectSessionIds, { semanticLimit, scoreThreshold } + ); const entities = await getRelevantEntities(userMessage, session.project_id ?? null); - const prompt = buildPrompt( - recentEpisodes, - semanticEpisodes, - entities, - userMessage, - activeSystemPrompt, - ); - const res = await inference.completeStream(prompt, {...options, temperature, repeatPenalty, topP, topK}); + // 5. Assemble prompt + const prompt = buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage, activeSystemPrompt); - let fullText = ""; - let model = ""; - let tokenCount = 0; - let buffer = ""; + return { + session, + prompt, + isFirstMessage, + inferenceOptions: { temperature, repeatPenalty, topP, topK }, + }; +} - for await (const chunk of res.body) { - buffer += Buffer.from(chunk).toString("utf8"); +async function chat(externalId, userMessage, options = {}) { + const { session, prompt, isFirstMessage, inferenceOptions } = await assembleContext(externalId, userMessage); - const events = buffer.split("\n\n"); - buffer = events.pop() || ""; + const result = await inference.complete(prompt, { ...options, ...inferenceOptions }); - for (const event of events) { - const lines = event.split("\n"); - const dataLines = lines - .filter((line) => line.startsWith("data: ")) - .map((line) => line.slice(6)); - - if (dataLines.length === 0) continue; - - const raw = dataLines.join("\n").trim(); - if (raw === "[DONE]") continue; - - try { - const data = JSON.parse(raw); - - if (data.response) { - fullText += data.response; - onChunk(data.response); - } - - if (data.model) model = data.model; - if (data.done && data.tokenCount !== undefined) { - tokenCount = data.tokenCount; - } - - if (data.error) { - throw new Error(data.error); - } - } catch (err) { - logger.error( - "[orchestration] Failed to parse inference SSE event:", - raw, - err.message, - ); - } - } + try { + await memory.createEpisode( + session.id, userMessage, result.text, + (result.evalCount || 0) + (result.promptEvalCount || 0), + session.project_id ?? null, + ); + } catch (err) { + logger.error('[orchestration] Failed to save episode:', err.message); } - if (fullText.trim()) { - await memory.createEpisode(session.id, userMessage, fullText, tokenCount, session.project_id ?? null); - const allEpisodes = await memory.getRecentEpisodes(session.id, 9999); - triggerSummary(session, allEpisodes); - } else { - logger.warn( - "[orchestration] Stream finished with no assistant text; episode not saved", - ); - } + const allEpisodes = await memory.getRecentEpisodes(session.id, 9999); + triggerSummary(session, allEpisodes); if (isFirstMessage && !session.name) { - autoNameSession(externalId, userMessage, fullText).catch(() => {}); + autoNameSession(externalId, userMessage, result.text).catch(() => {}); } - return { model, tokenCount }; - } catch (err) { - logger.error( - "[orchestration] chatStream fatal error:", - err.message, - err.stack, - ); - throw err; - } + return { + sessionId: externalId, + response: result.text, + model: result.model, + tokenCount: (result.evalCount || 0) + (result.promptEvalCount || 0), + }; +} + +async function chatStream(externalId, userMessage, onChunk, options = {}) { + try { + const { session, prompt, isFirstMessage, inferenceOptions } = await assembleContext(externalId, userMessage); + + const res = await inference.completeStream(prompt, { ...options, ...inferenceOptions }); + + let fullText = '', model = '', tokenCount = 0, buffer = ''; + + for await (const chunk of res.body) { + buffer += Buffer.from(chunk).toString('utf8'); + const events = buffer.split('\n\n'); + buffer = events.pop() || ''; + + for (const event of events) { + const dataLines = event.split('\n') + .filter(line => line.startsWith('data: ')) + .map(line => line.slice(6)); + + if (!dataLines.length) continue; + const raw = dataLines.join('\n').trim(); + if (raw === '[DONE]') continue; + + try { + const data = JSON.parse(raw); + if (data.response) { fullText += data.response; onChunk(data.response); } + if (data.model) model = data.model; + if (data.done && data.tokenCount !== undefined) tokenCount = data.tokenCount; + if (data.error) throw new Error(data.error); + } catch (err) { + logger.error('[orchestration] Failed to parse SSE event:', raw, err.message); + } + } + } + + if (fullText.trim()) { + await memory.createEpisode(session.id, userMessage, fullText, tokenCount, session.project_id ?? null); + const allEpisodes = await memory.getRecentEpisodes(session.id, 9999); + triggerSummary(session, allEpisodes); + } else { + logger.warn('[orchestration] Stream finished with no assistant text; episode not saved'); + } + + if (isFirstMessage && !session.name) { + autoNameSession(externalId, userMessage, fullText).catch(() => {}); + } + + return { model, tokenCount }; + } catch (err) { + logger.error('[orchestration] chatStream fatal error:', err.message, err.stack); + throw err; + } } module.exports = { chat, chatStream };