Files
nexusAI/packages/memory-service/src/entities/extraction.js
2026-04-17 05:54:33 -07:00

95 lines
3.8 KiB
JavaScript

const { getEnv } = require('@nexusai/shared');
const { upsertEntity } = require('./index');
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
function buildExtractionPrompt(userMessage, aiResponse) {
return [
'<|im_start|>system',
'You are an entity extraction assistant. You extract named entities from conversations and return them as a JSON array. You never return empty arrays when entities are present. You only output valid JSON.<|im_end|>',
'<|im_start|>user',
'Extract all named entities from this conversation.',
'',
`Valid types: ${ENTITY_TYPES.join(', ')}`,
'',
'Examples of entities to extract:',
'- People: names of individuals',
'- Projects: software projects, systems, tools being built',
'- Technologies: databases, frameworks, languages, hardware',
'- Organizations: companies, teams',
'- Places: locations, servers, infrastructure',
'',
'Return a JSON array where each item has:',
' "name": the entity name',
' "type": one of the valid types above',
' "notes": one sentence describing this entity based on the conversation',
'',
'### Conversation:',
`User: ${userMessage}`,
`Assistant: ${aiResponse}`,
'',
'### Extracted entities as JSON array:',
'<|im_end|>',
'<|im_start|>assistant',
'[',
].join('\n');
}
async function extractAndStoreEntities(userMessage, aiResponse) {
console.log('[entities] Input:', {
userMessage: userMessage?.slice(0, 100),
aiResponse: aiResponse?.slice(0, 100)
});
console.log('[entities] Extraction triggered')
try {
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: EXTRACTION_MODEL,
prompt: buildExtractionPrompt(userMessage, aiResponse),
stream: false, // we want the complete response, not a stream
options: {
temperature: 0.1, // low temp for deterministic structured output
num_predict: 512,
},
}),
});
console.log('[entities] Ollama responded:', res.status);
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
const data = await res.json();
console.log('[entities] Raw response:', data.response?.slice(0, 200));
const raw = data.response?.trim() ?? '';
// Strip markdown fences defensively — small models sometimes add them anyway
const clean = raw.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
const entities = JSON.parse(clean);
console.log('[entities] Parsed entities:', entities);
if (!Array.isArray(entities)) throw new Error('Response was not a JSON array');
let saved = 0;
for (const { name, type, notes } of entities) {
// Skip anything malformed or with an unrecognised type
if (!name || !type || !ENTITY_TYPES.includes(type)) continue;
upsertEntity(name, type, notes ?? null);
saved++;
}
if (saved > 0) console.log(`[entities] Extracted and stored ${saved} entities`);
} catch (err) {
// Non-critical — log and move on, episode is already saved
console.warn('[entities] Extraction failed:', err.message);
console.warn('[entities] Stack:', err.stack);
}
}
module.exports = { extractAndStoreEntities };