const semantic = require('../semantic') const { getEnv, SERVICES, formatEpisodeText } = require('@nexusai/shared'); const { upsertEntity } = require('./index'); const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434'); const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b'); const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL); const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization']; function buildExtractionPrompt(userMessage, aiResponse, knownEntities = []) { const knownBlock = knownEntities.length > 0 ? [ 'Already known entities (use these exact name and type values if the same entity appears):', ...knownEntities.map(e => `- "${e.name}" (${e.type})`), '', ].join('\n') : ''; return [ '<|im_start|>system', 'You are a named entity extractor. You output only valid JSON.', '<|im_end|>', '<|im_start|>user', 'Read the conversation below and extract every named entity mentioned.', `Entity types to extract: ${ENTITY_TYPES.join(', ')}`, 'For each entity found, provide: name, type, and a one-sentence notes field.', 'Return your answer as: { "entities": [ ... ] }', '', knownBlock, '--- CONVERSATION ---', // clear delimiter helps smaller models `User: ${userMessage}`, `Assistant: ${aiResponse}`, '--- END CONVERSATION ---', '<|im_end|>', '<|im_start|>assistant', '{"entities":', // primer nudges it to start filling the array ].join('\n'); } async function embedEntity(entity) { // Combine name, type and notes into a single descriptive string for embedding const text = `${entity.name} (${entity.type}): ${entity.notes ?? entity.name}`; const res = await fetch(`${EMBEDDING_SERVICE_URL}/embed`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ text }), }); if (!res.ok) throw new Error(`Embedding service error: ${res.status}`); const data = await res.json(); return data.embedding; } async function extractAndStoreEntities(userMessage, aiResponse, projectId=null) { console.log('[entities] Extraction triggered') try { // Fetch existing entities to guide the model toward consistent name/type pairs const db = require('../db').getDB(); console.log('[entities] fetching known entities...'); // add this const knownEntities = db.prepare(`SELECT name, type FROM entities ORDER BY name`).all(); console.log('[entities] known entities count:', knownEntities.length); const prompt = buildExtractionPrompt(userMessage, aiResponse, knownEntities); console.log('[entities] prompt preview:', JSON.stringify(prompt.slice(0, 500))); const res = await fetch(`${EXTRACTION_URL}/api/generate`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: EXTRACTION_MODEL, prompt: prompt, stream: false, format: 'json', options: { temperature: 0.1, num_predict: 1024, }, }), }); if (!res.ok) throw new Error(`Ollama responded ${res.status}`); const data = await res.json(); const raw = data.response?.trim() ?? ''; console.log('[entities] raw response:', JSON.stringify(raw.slice(0, 300))); const parsed = JSON.parse(raw); const entities = Array.isArray(parsed.entities) ? parsed.entities : []; if (entities.length === 0) throw new Error('No entities in response'); if (!Array.isArray(entities)) throw new Error('Response was not a JSON array'); let saved = 0; for (const { name, type, notes } of entities) { if (!name || !type || !ENTITY_TYPES.includes(type)) continue; const entity = upsertEntity(name, type, notes ?? null); console.log('[entities] Upserted entity:', entity); // Embed and upsert to Qdrant fire-and-forget embedEntity(entity) .then(vector => semantic.upsertEntity(entity.id, vector, { name: entity.name, type: entity.type, notes: entity.notes, projectId: projectId ?? null, })) .catch(err => { console.warn(`[entities] Failed to embed entity "${entity.name}":`, err.message); console.warn(`[entities] Embed error stack:`, err.stack); // add this }); saved++; } if (saved > 0) console.log(`[entities] Extracted and stored ${saved} entities`); } catch (err) { // Non-critical — log and move on, episode is already saved console.warn('[entities] Extraction failed:', err.message); console.warn('[entities] Stack:', err.stack); } } module.exports = { extractAndStoreEntities };