diff --git a/packages/memory-service/src/entities/extraction.js b/packages/memory-service/src/entities/extraction.js index 93b1e29..8a78d6d 100644 --- a/packages/memory-service/src/entities/extraction.js +++ b/packages/memory-service/src/entities/extraction.js @@ -3,7 +3,7 @@ const { getEnv, SERVICES, formatEpisodeText } = require('@nexusai/shared'); const { upsertEntity } = require('./index'); const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434'); -const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'phi3:3.8ab'); +const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b'); const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL); const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization']; @@ -18,25 +18,21 @@ function buildExtractionPrompt(userMessage, aiResponse, knownEntities = []) { : ''; return [ - '<|user|>', - 'Extract all named entities from this conversation as a JSON array. Output only valid JSON, nothing else.', - '', + '<|im_start|>system', + 'You are a named entity extractor. You output only valid JSON.', + '<|im_end|>', + '<|im_start|>user', + 'Extract all named entities from this conversation.', `Valid types: ${ENTITY_TYPES.join(', ')}`, '', knownBlock, - 'Each item must have:', - ' "name": the entity name (match exactly if already known)', - ' "type": one of the valid types above', - ' "notes": one sentence describing this entity based on the conversation', + 'Return a JSON object: { "entities": [ { "name": "...", "type": "...", "notes": "..." } ] }', + 'Only include items where type is one of the valid types.', '', - 'Extract all named entities from this conversation.', - 'Respond with ONLY a JSON array. Start your response with [ and end with ].', - 'Do not include any text before or after the array.', `User: ${userMessage}`, `Assistant: ${aiResponse}`, - '<|end|>', - '<|assistant|>', - '[', + '<|im_end|>', + '<|im_start|>assistant', ].join('\n'); } @@ -69,6 +65,7 @@ async function extractAndStoreEntities(userMessage, aiResponse, projectId=null) model: EXTRACTION_MODEL, prompt: buildExtractionPrompt(userMessage, aiResponse, knownEntities), stream: false, + format: 'json', options: { temperature: 0.1, num_predict: 1024, @@ -82,20 +79,9 @@ async function extractAndStoreEntities(userMessage, aiResponse, projectId=null) const raw = data.response?.trim() ?? ''; console.log('[entities] raw response:', JSON.stringify(raw.slice(0, 300))); - // Extract just the JSON array — everything from [ to the last ] - let normalized = raw.replace(/```(?:json)?/g, '').trim(); - if (!normalized.startsWith('[')) { - normalized = '[' + normalized; - } - if (!normalized.endsWith(']')) { - // Trim any trailing comma or incomplete object before closing - normalized = normalized.replace(/,?\s*\{[^}]*$/, '') + ']'; - } - const match = normalized.match(/\[[\s\S]*\]/); - - if (!match) throw new Error('No JSON array found in response'); - const clean = match[0]; - const entities = JSON.parse(clean); + const parsed = JSON.parse(raw); + const entities = Array.isArray(parsed.entities) ? parsed.entities : []; + if (entities.length === 0) throw new Error('No entities in response'); if (!Array.isArray(entities)) throw new Error('Response was not a JSON array');