Files
nexusAI/packages/memory-service/src/entities/extraction.js
2026-04-20 23:50:15 -07:00

128 lines
5.1 KiB
JavaScript

const semantic = require('../semantic')
const { getEnv, SERVICES, formatEpisodeText } = require('@nexusai/shared');
const { upsertEntity } = require('./index');
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL);
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
function buildExtractionPrompt(userMessage, aiResponse, knownEntities = []) {
const knownBlock = knownEntities.length > 0
? [
'Already known entities (use these exact name and type values if the same entity appears):',
...knownEntities.map(e => `- "${e.name}" (${e.type})`),
'',
].join('\n')
: '';
return [
'<|im_start|>system',
'You are a named entity extractor. You output only valid JSON.',
'<|im_end|>',
'<|im_start|>user',
'Read the conversation below and extract every named entity mentioned.',
`Entity types to extract: ${ENTITY_TYPES.join(', ')}`,
'For each entity found, provide: name, type, and a one-sentence notes field.',
'Return your answer as: { "entities": [ ... ] }',
'',
knownBlock,
'--- CONVERSATION ---', // clear delimiter helps smaller models
`User: ${userMessage}`,
`Assistant: ${aiResponse}`,
'--- END CONVERSATION ---',
'<|im_end|>',
'<|im_start|>assistant',
'{"entities":', // primer nudges it to start filling the array
].join('\n');
}
async function embedEntity(entity) {
// Combine name, type and notes into a single descriptive string for embedding
const text = `${entity.name} (${entity.type}): ${entity.notes ?? entity.name}`;
const res = await fetch(`${EMBEDDING_SERVICE_URL}/embed`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text }),
});
if (!res.ok) throw new Error(`Embedding service error: ${res.status}`);
const data = await res.json();
return data.embedding;
}
async function extractAndStoreEntities(userMessage, aiResponse, projectId=null) {
console.log('[entities] Extraction triggered')
try {
// Fetch existing entities to guide the model toward consistent name/type pairs
const db = require('../db').getDB();
console.log('[entities] fetching known entities...'); // add this
const knownEntities = db.prepare(`SELECT name, type FROM entities ORDER BY name`).all();
console.log('[entities] known entities count:', knownEntities.length);
const prompt = buildExtractionPrompt(userMessage, aiResponse, knownEntities);
console.log('[entities] prompt preview:', JSON.stringify(prompt.slice(0, 500)));
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: EXTRACTION_MODEL,
prompt: prompt,
stream: false,
format: 'json',
options: {
temperature: 0.1,
num_predict: 1024,
},
}),
});
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
const data = await res.json();
const raw = data.response?.trim() ?? '';
console.log('[entities] raw response:', JSON.stringify(raw.slice(0, 300)));
const parsed = JSON.parse(raw);
const entities = Array.isArray(parsed.entities) ? parsed.entities : [];
if (entities.length === 0) throw new Error('No entities in response');
if (!Array.isArray(entities)) throw new Error('Response was not a JSON array');
let saved = 0;
for (const { name, type, notes } of entities) {
if (!name || !type || !ENTITY_TYPES.includes(type)) continue;
const entity = upsertEntity(name, type, notes ?? null);
console.log('[entities] Upserted entity:', entity);
// Embed and upsert to Qdrant fire-and-forget
embedEntity(entity)
.then(vector => semantic.upsertEntity(entity.id, vector, {
name: entity.name,
type: entity.type,
notes: entity.notes,
projectId: projectId ?? null,
}))
.catch(err => {
console.warn(`[entities] Failed to embed entity "${entity.name}":`, err.message);
console.warn(`[entities] Embed error stack:`, err.stack); // add this
});
saved++;
}
if (saved > 0) console.log(`[entities] Extracted and stored ${saved} entities`);
} catch (err) {
// Non-critical — log and move on, episode is already saved
console.warn('[entities] Extraction failed:', err.message);
console.warn('[entities] Stack:', err.stack);
}
}
module.exports = { extractAndStoreEntities };