const { getEnv } = require('@nexusai/shared'); const { upsertEntity } = require('./index'); const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434'); const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b'); const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization']; function buildExtractionPrompt(userMessage, aiResponse) { return [ '<|im_start|>system', 'You are an entity extraction assistant. You extract named entities from conversations and return them as a JSON array. You never return empty arrays when entities are present. You only output valid JSON.<|im_end|>', '<|im_start|>user', 'Extract all named entities from this conversation.', '', `Valid types: ${ENTITY_TYPES.join(', ')}`, '', 'Examples of entities to extract:', '- People: names of individuals', '- Projects: software projects, systems, tools being built', '- Technologies: databases, frameworks, languages, hardware', '- Organizations: companies, teams', '- Places: locations, servers, infrastructure', '', 'Return a JSON array where each item has:', ' "name": the entity name', ' "type": one of the valid types above', ' "notes": one sentence describing this entity based on the conversation', '', '### Conversation:', `User: ${userMessage}`, `Assistant: ${aiResponse}`, '', '### Extracted entities as JSON array:', '<|im_end|>', '<|im_start|>assistant', '[', ].join('\n'); } async function extractAndStoreEntities(userMessage, aiResponse) { console.log('[entities] Input:', { userMessage: userMessage?.slice(0, 100), aiResponse: aiResponse?.slice(0, 100) }); console.log('[entities] Extraction triggered') try { const res = await fetch(`${EXTRACTION_URL}/api/generate`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: EXTRACTION_MODEL, prompt: buildExtractionPrompt(userMessage, aiResponse), stream: false, // we want the complete response, not a stream options: { temperature: 0.1, // low temp for deterministic structured output num_predict: 512, }, }), }); console.log('[entities] Ollama responded:', res.status); if (!res.ok) throw new Error(`Ollama responded ${res.status}`); const data = await res.json(); console.log('[entities] Raw response:', data.response?.slice(0, 200)); const raw = data.response?.trim() ?? ''; // Strip markdown fences defensively — small models sometimes add them anyway const clean = raw.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim(); const entities = JSON.parse(clean); console.log('[entities] Parsed entities:', entities); if (!Array.isArray(entities)) throw new Error('Response was not a JSON array'); let saved = 0; for (const { name, type, notes } of entities) { // Skip anything malformed or with an unrecognised type if (!name || !type || !ENTITY_TYPES.includes(type)) continue; upsertEntity(name, type, notes ?? null); saved++; } if (saved > 0) console.log(`[entities] Extracted and stored ${saved} entities`); } catch (err) { // Non-critical — log and move on, episode is already saved console.warn('[entities] Extraction failed:', err.message); console.warn('[entities] Stack:', err.stack); } } module.exports = { extractAndStoreEntities };