95 lines
3.8 KiB
JavaScript
95 lines
3.8 KiB
JavaScript
const { getEnv } = require('@nexusai/shared');
|
|
const { upsertEntity } = require('./index');
|
|
|
|
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
|
|
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
|
|
|
|
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
|
|
|
|
function buildExtractionPrompt(userMessage, aiResponse) {
|
|
return [
|
|
'<|im_start|>system',
|
|
'You are an entity extraction assistant. You extract named entities from conversations and return them as a JSON array. You never return empty arrays when entities are present. You only output valid JSON.<|im_end|>',
|
|
'<|im_start|>user',
|
|
'Extract all named entities from this conversation.',
|
|
'',
|
|
`Valid types: ${ENTITY_TYPES.join(', ')}`,
|
|
'',
|
|
'Examples of entities to extract:',
|
|
'- People: names of individuals',
|
|
'- Projects: software projects, systems, tools being built',
|
|
'- Technologies: databases, frameworks, languages, hardware',
|
|
'- Organizations: companies, teams',
|
|
'- Places: locations, servers, infrastructure',
|
|
'',
|
|
'Return a JSON array where each item has:',
|
|
' "name": the entity name',
|
|
' "type": one of the valid types above',
|
|
' "notes": one sentence describing this entity based on the conversation',
|
|
'',
|
|
'### Conversation:',
|
|
`User: ${userMessage}`,
|
|
`Assistant: ${aiResponse}`,
|
|
'',
|
|
'### Extracted entities as JSON array:',
|
|
'<|im_end|>',
|
|
'<|im_start|>assistant',
|
|
'[',
|
|
].join('\n');
|
|
}
|
|
|
|
async function extractAndStoreEntities(userMessage, aiResponse) {
|
|
console.log('[entities] Input:', {
|
|
userMessage: userMessage?.slice(0, 100),
|
|
aiResponse: aiResponse?.slice(0, 100)
|
|
});
|
|
|
|
console.log('[entities] Extraction triggered')
|
|
try {
|
|
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({
|
|
model: EXTRACTION_MODEL,
|
|
prompt: buildExtractionPrompt(userMessage, aiResponse),
|
|
stream: false, // we want the complete response, not a stream
|
|
options: {
|
|
temperature: 0.1, // low temp for deterministic structured output
|
|
num_predict: 512,
|
|
},
|
|
}),
|
|
});
|
|
|
|
console.log('[entities] Ollama responded:', res.status);
|
|
|
|
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
|
|
|
|
const data = await res.json();
|
|
console.log('[entities] Raw response:', data.response?.slice(0, 200));
|
|
const raw = data.response?.trim() ?? '';
|
|
|
|
// Strip markdown fences defensively — small models sometimes add them anyway
|
|
const clean = raw.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
|
|
const entities = JSON.parse(clean);
|
|
console.log('[entities] Parsed entities:', entities);
|
|
|
|
if (!Array.isArray(entities)) throw new Error('Response was not a JSON array');
|
|
|
|
let saved = 0;
|
|
for (const { name, type, notes } of entities) {
|
|
// Skip anything malformed or with an unrecognised type
|
|
if (!name || !type || !ENTITY_TYPES.includes(type)) continue;
|
|
upsertEntity(name, type, notes ?? null);
|
|
saved++;
|
|
}
|
|
|
|
if (saved > 0) console.log(`[entities] Extracted and stored ${saved} entities`);
|
|
|
|
} catch (err) {
|
|
// Non-critical — log and move on, episode is already saved
|
|
console.warn('[entities] Extraction failed:', err.message);
|
|
console.warn('[entities] Stack:', err.stack);
|
|
}
|
|
}
|
|
|
|
module.exports = { extractAndStoreEntities }; |