diff --git a/packages/orchestration-service/src/chat/index.js b/packages/orchestration-service/src/chat/index.js index a90661d..963fa87 100644 --- a/packages/orchestration-service/src/chat/index.js +++ b/packages/orchestration-service/src/chat/index.js @@ -7,9 +7,17 @@ const { ORCHESTRATION } = require("@nexusai/shared"); const { RECENT_EPISODE_LIMIT, SEMANTIC_LIMIT, SCORE_THRESHOLD, SYSTEM_PROMPT } = ORCHESTRATION; -function buildPrompt(recentEpisodes, semanticEpisodes, userMessage) { +function buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage) { const parts = [SYSTEM_PROMPT]; + if (entities.length > 0) { + parts.push('Here is what you know about entities relevant to this conversation:'); + for (const e of entities) { + parts.push(`- ${e.name} (${e.type}): ${e.notes}`); + } + parts.push('---'); + } + if (semanticEpisodes.length > 0) { parts.push("Here are some relevant memories from earlier conversations:"); for (const ep of semanticEpisodes) { @@ -97,6 +105,17 @@ async function getSemanticEpisodes( } } +async function getRelevantEntities(userMessage) { + try { + const vector = await embedding.embed(userMessage); + const results = await qdrant.searchEntities(vector); + return results.map(r => r.payload).filter(Boolean); + } catch (err) { + console.warn('[orchestration] Entity search failed, continuing without:', err.message); + return []; + } +} + async function chat(externalId, userMessage, options = {}) { // 1. Resolve or create session let session = await memory.getSessionByExternalId(externalId); @@ -135,8 +154,11 @@ if (session.project_id) { projectSessionIds ); + // 3b. Entity Search + const entities = await getRelevantEntities(userMessage) + // 4. Assemble prompt - const prompt = buildPrompt(recentEpisodes, semanticEpisodes, userMessage); + const prompt = buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage); // 5. Run inference const result = await inference.complete(prompt, options); @@ -210,7 +232,9 @@ if (session.project_id) { projectSessionIds ); - const prompt = buildPrompt(recentEpisodes, semanticEpisodes, userMessage); + const entities = await getRelevantEntities(userMessage); + + const prompt = buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage); const res = await inference.completeStream(prompt, options); let fullText = ""; diff --git a/packages/orchestration-service/src/services/qdrant.js b/packages/orchestration-service/src/services/qdrant.js index e9e5838..c39217d 100644 --- a/packages/orchestration-service/src/services/qdrant.js +++ b/packages/orchestration-service/src/services/qdrant.js @@ -30,4 +30,21 @@ async function searchEpisodes( vector, {limit = ORCHESTRATION.RECENT_EPISODE_LIM return data.result; } -module.exports = { searchEpisodes }; \ No newline at end of file +async function searchEntities(vector, { limit = 5, scoreThreshold = 0.6 } = {}) { + const body = { vector, limit, score_threshold: scoreThreshold, with_payload: true }; + + const res = await fetch( + `${BASE_URL}/collections/${COLLECTIONS.ENTITIES}/points/search`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + } + ); + + if (!res.ok) throw new Error(`Qdrant error: ${res.status}`); + const data = await res.json(); + return data.result; +} + +module.exports = { searchEpisodes, searchEntities }; \ No newline at end of file