From 7e50e82d8cf8cd351809c9cb28b7ce0e5b3cda9a Mon Sep 17 00:00:00 2001 From: Storme-bit Date: Fri, 17 Apr 2026 06:46:26 -0700 Subject: [PATCH] fix entity duplication glitch --- .../memory-service/src/entities/extraction.js | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/packages/memory-service/src/entities/extraction.js b/packages/memory-service/src/entities/extraction.js index 048b848..499dee8 100644 --- a/packages/memory-service/src/entities/extraction.js +++ b/packages/memory-service/src/entities/extraction.js @@ -8,7 +8,15 @@ const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization']; -function buildExtractionPrompt(userMessage, aiResponse) { +function buildExtractionPrompt(userMessage, aiResponse, knownEntities = []) { + const knownBlock = knownEntities.length > 0 + ? [ + 'Already known entities (use these exact name and type values if the same entity appears):', + ...knownEntities.map(e => `- "${e.name}" (${e.type})`), + '', + ].join('\n') + : ''; + return [ '<|im_start|>system', 'You are an entity extraction assistant. You extract named entities from conversations and return them as a JSON array. You never return empty arrays when entities are present. You only output valid JSON.<|im_end|>', @@ -17,6 +25,7 @@ function buildExtractionPrompt(userMessage, aiResponse) { '', `Valid types: ${ENTITY_TYPES.join(', ')}`, '', + knownBlock, 'Examples of entities to extract:', '- People: names of individuals', '- Projects: software projects, systems, tools being built', @@ -25,8 +34,8 @@ function buildExtractionPrompt(userMessage, aiResponse) { '- Places: locations, servers, infrastructure', '', 'Return a JSON array where each item has:', - ' "name": the entity name', - ' "type": one of the valid types above', + ' "name": the entity name (match exactly if already known)', + ' "type": one of the valid types above (match exactly if already known)', ' "notes": one sentence describing this entity based on the conversation', '', '### Conversation:', @@ -58,15 +67,19 @@ async function embedEntity(entity) { async function extractAndStoreEntities(userMessage, aiResponse) { console.log('[entities] Extraction triggered') try { + // Fetch existing entities to guide the model toward consistent name/type pairs + const db = require('../db').getDB(); + const knownEntities = db.prepare(`SELECT name, type FROM entities ORDER BY name`).all(); + const res = await fetch(`${EXTRACTION_URL}/api/generate`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: EXTRACTION_MODEL, - prompt: buildExtractionPrompt(userMessage, aiResponse), - stream: false, // we want the complete response, not a stream + prompt: buildExtractionPrompt(userMessage, aiResponse, knownEntities), + stream: false, options: { - temperature: 0.1, // low temp for deterministic structured output + temperature: 0.1, num_predict: 512, }, }),