Files
nexusAI/packages/memory-service/src/entities/extraction.js
2026-04-20 23:25:31 -07:00

124 lines
4.7 KiB
JavaScript

const semantic = require('../semantic')
const { getEnv, SERVICES, formatEpisodeText } = require('@nexusai/shared');
const { upsertEntity } = require('./index');
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'phi3:3.8ab');
const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL);
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
function buildExtractionPrompt(userMessage, aiResponse, knownEntities = []) {
const knownBlock = knownEntities.length > 0
? [
'Already known entities (use these exact name and type values if the same entity appears):',
...knownEntities.map(e => `- "${e.name}" (${e.type})`),
'',
].join('\n')
: '';
return [
'<|user|>',
'Extract all named entities from this conversation as a JSON array. Output only valid JSON, nothing else.',
'',
`Valid types: ${ENTITY_TYPES.join(', ')}`,
'',
knownBlock,
'Each item must have:',
' "name": the entity name (match exactly if already known)',
' "type": one of the valid types above',
' "notes": one sentence describing this entity based on the conversation',
'',
`User: ${userMessage}`,
`Assistant: ${aiResponse}`,
'<|end|>',
'<|assistant|>',
'[',
].join('\n');
}
async function embedEntity(entity) {
// Combine name, type and notes into a single descriptive string for embedding
const text = `${entity.name} (${entity.type}): ${entity.notes ?? entity.name}`;
const res = await fetch(`${EMBEDDING_SERVICE_URL}/embed`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text }),
});
if (!res.ok) throw new Error(`Embedding service error: ${res.status}`);
const data = await res.json();
return data.embedding;
}
async function extractAndStoreEntities(userMessage, aiResponse, projectId=null) {
console.log('[entities] Extraction triggered')
try {
// Fetch existing entities to guide the model toward consistent name/type pairs
const db = require('../db').getDB();
const knownEntities = db.prepare(`SELECT name, type FROM entities ORDER BY name`).all();
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: EXTRACTION_MODEL,
prompt: buildExtractionPrompt(userMessage, aiResponse, knownEntities),
stream: false,
options: {
temperature: 0.1,
num_predict: 768,
},
}),
});
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
const data = await res.json();
const raw = data.response?.trim() ?? '';
console.log('[entities] raw response:', JSON.stringify(raw.slice(0, 300)));
// Extract just the JSON array — everything from [ to the last ]
const stripped = raw.replace(/```(?:json)?/g, '').trim();
const match = stripped.match(/\[[\s\S]*\]/);
if (!match) throw new Error('No JSON array found in response');
const clean = match[0];
const entities = JSON.parse(clean);
if (!Array.isArray(entities)) throw new Error('Response was not a JSON array');
let saved = 0;
for (const { name, type, notes } of entities) {
if (!name || !type || !ENTITY_TYPES.includes(type)) continue;
const entity = upsertEntity(name, type, notes ?? null);
console.log('[entities] Upserted entity:', entity);
// Embed and upsert to Qdrant fire-and-forget
embedEntity(entity)
.then(vector => semantic.upsertEntity(entity.id, vector, {
name: entity.name,
type: entity.type,
notes: entity.notes,
projectId: projectId ?? null,
}))
.catch(err => {
console.warn(`[entities] Failed to embed entity "${entity.name}":`, err.message);
console.warn(`[entities] Embed error stack:`, err.stack); // add this
});
saved++;
}
if (saved > 0) console.log(`[entities] Extracted and stored ${saved} entities`);
} catch (err) {
// Non-critical — log and move on, episode is already saved
console.warn('[entities] Extraction failed:', err.message);
console.warn('[entities] Stack:', err.stack);
}
}
module.exports = { extractAndStoreEntities };