From cef1803af6d9420552db8d0b8d2bd40033e25c68 Mon Sep 17 00:00:00 2001 From: Storme-bit Date: Fri, 17 Apr 2026 06:01:21 -0700 Subject: [PATCH] adding in entity extraction layer with semantic search enabled --- .../memory-service/src/entities/extraction.js | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/packages/memory-service/src/entities/extraction.js b/packages/memory-service/src/entities/extraction.js index 0bdf851..52ca042 100644 --- a/packages/memory-service/src/entities/extraction.js +++ b/packages/memory-service/src/entities/extraction.js @@ -1,8 +1,10 @@ -const { getEnv } = require('@nexusai/shared'); +const {semantic} = require('../semantic') +const { getEn, SERVICES, formatEpisodeText } = require('@nexusai/shared'); const { upsertEntity } = require('./index'); const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434'); const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b'); +const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL); const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization']; @@ -38,12 +40,22 @@ function buildExtractionPrompt(userMessage, aiResponse) { ].join('\n'); } -async function extractAndStoreEntities(userMessage, aiResponse) { - console.log('[entities] Input:', { - userMessage: userMessage?.slice(0, 100), - aiResponse: aiResponse?.slice(0, 100) +async function embedEntity(entity) { + // Combine name, type and notes into a single descriptive string for embedding + const text = `${entity.name} (${entity.type}): ${entity.notes ?? entity.name}`; + + const res = await fetch(`${EMBEDDING_SERVICE_URL}/embed`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ text }), }); + if (!res.ok) throw new Error(`Embedding service error: ${res.status}`); + const data = await res.json(); + return data.embedding; +} + +async function extractAndStoreEntities(userMessage, aiResponse) { console.log('[entities] Extraction triggered') try { const res = await fetch(`${EXTRACTION_URL}/api/generate`, { @@ -60,26 +72,32 @@ async function extractAndStoreEntities(userMessage, aiResponse) { }), }); - console.log('[entities] Ollama responded:', res.status); - if (!res.ok) throw new Error(`Ollama responded ${res.status}`); const data = await res.json(); - console.log('[entities] Raw response:', data.response?.slice(0, 200)); const raw = data.response?.trim() ?? ''; // Strip markdown fences defensively — small models sometimes add them anyway const clean = raw.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim(); const entities = JSON.parse(clean); - console.log('[entities] Parsed entities:', entities); if (!Array.isArray(entities)) throw new Error('Response was not a JSON array'); let saved = 0; for (const { name, type, notes } of entities) { - // Skip anything malformed or with an unrecognised type if (!name || !type || !ENTITY_TYPES.includes(type)) continue; - upsertEntity(name, type, notes ?? null); + + const entity = upsertEntity(name, type, notes ?? null); + + // Embed and upsert to Qdrant fire-and-forget + embedEntity(entity) + .then(vector => semantic.upsertEntity(entity.id, vector, { + name: entity.name, + type: entity.type, + notes: entity.notes, + })) + .catch(err => console.warn(`[entities] Failed to embed entity "${entity.name}":`, err.message)); + saved++; }