adding in entity extraction layer with semantic search enabled

2026-04-17 06:18:39 -07:00
parent 902725b7f7
commit 06d7031e44
2 changed files with 45 additions and 4 deletions
--- a/packages/orchestration-service/src/chat/index.js
+++ b/packages/orchestration-service/src/chat/index.js
@@ -7,9 +7,17 @@ const { ORCHESTRATION } = require("@nexusai/shared");
 const { RECENT_EPISODE_LIMIT, SEMANTIC_LIMIT, SCORE_THRESHOLD, SYSTEM_PROMPT } =
  ORCHESTRATION;

-function buildPrompt(recentEpisodes, semanticEpisodes, userMessage) {
+function buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage) {
  const parts = [SYSTEM_PROMPT];

+  if (entities.length > 0) {
+        parts.push('Here is what you know about entities relevant to this conversation:');
+        for (const e of entities) {
+            parts.push(`- ${e.name} (${e.type}): ${e.notes}`);
+        }
+        parts.push('---');
+    }
+
  if (semanticEpisodes.length > 0) {
    parts.push("Here are some relevant memories from earlier conversations:");
    for (const ep of semanticEpisodes) {
@@ -97,6 +105,17 @@ async function getSemanticEpisodes(
  }
 }

+async function getRelevantEntities(userMessage) {
+    try {
+        const vector = await embedding.embed(userMessage);
+        const results = await qdrant.searchEntities(vector);
+        return results.map(r => r.payload).filter(Boolean);
+    } catch (err) {
+        console.warn('[orchestration] Entity search failed, continuing without:', err.message);
+        return [];
+    }
+}
+
 async function chat(externalId, userMessage, options = {}) {
  // 1. Resolve or create session
  let session = await memory.getSessionByExternalId(externalId);
@@ -135,8 +154,11 @@ if (session.project_id) {
    projectSessionIds
  );

+  // 3b. Entity Search
+    const entities = await getRelevantEntities(userMessage)
+
  // 4. Assemble prompt
-  const prompt = buildPrompt(recentEpisodes, semanticEpisodes, userMessage);
+  const prompt = buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage);

  // 5. Run inference
  const result = await inference.complete(prompt, options);
@@ -210,7 +232,9 @@ if (session.project_id) {
    projectSessionIds
  );

-  const prompt = buildPrompt(recentEpisodes, semanticEpisodes, userMessage);
+  const entities = await getRelevantEntities(userMessage);
+
+  const prompt = buildPrompt(recentEpisodes, semanticEpisodes, entities, userMessage);
  const res = await inference.completeStream(prompt, options);

  let fullText = "";
--- a/packages/orchestration-service/src/services/qdrant.js
+++ b/packages/orchestration-service/src/services/qdrant.js
@@ -30,4 +30,21 @@ async function searchEpisodes( vector, {limit = ORCHESTRATION.RECENT_EPISODE_LIM
    return data.result;
 }

-module.exports = { searchEpisodes };
+async function searchEntities(vector, { limit = 5, scoreThreshold = 0.6 } = {}) {
+    const body = { vector, limit, score_threshold: scoreThreshold, with_payload: true };
+
+    const res = await fetch(
+        `${BASE_URL}/collections/${COLLECTIONS.ENTITIES}/points/search`,
+        {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify(body),
+        }
+    );
+
+    if (!res.ok) throw new Error(`Qdrant error: ${res.status}`);
+    const data = await res.json();
+    return data.result;
+}
+
+module.exports = { searchEpisodes, searchEntities };