adding in entity extraction layer

This commit is contained in:
Storme-bit
2026-04-17 05:37:24 -07:00
parent bb05d1508d
commit ec44b935d1
2 changed files with 72 additions and 0 deletions

View File

@@ -0,0 +1,67 @@
const { getEnv } = require('@nexusai/shared');
const { upsertEntity } = require('./index');
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
function buildExtractionPrompt(userMessage, aiResponse) {
return [
'Extract named entities from the conversation below.',
`Valid entity types: ${ENTITY_TYPES.join(', ')}.`,
'Return ONLY a JSON array. No explanation, no markdown, no code fences.',
'Each item must have exactly these fields: "name" (string), "type" (one of the valid types), "notes" (one sentence fact about this entity from the conversation).',
'If no entities are found, return: []',
'',
`User: ${userMessage}`,
`Assistant: ${aiResponse}`,
'',
'JSON array:',
].join('\n');
}
async function extractAndStoreEntities(userMessage, aiResponse) {
try {
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: EXTRACTION_MODEL,
prompt: buildExtractionPrompt(userMessage, aiResponse),
stream: false, // we want the complete response, not a stream
options: {
temperature: 0.1, // low temp for deterministic structured output
num_predict: 512,
},
}),
});
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
const data = await res.json();
const raw = data.response?.trim() ?? '';
// Strip markdown fences defensively — small models sometimes add them anyway
const clean = raw.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
const entities = JSON.parse(clean);
if (!Array.isArray(entities)) throw new Error('Response was not a JSON array');
let saved = 0;
for (const { name, type, notes } of entities) {
// Skip anything malformed or with an unrecognised type
if (!name || !type || !ENTITY_TYPES.includes(type)) continue;
upsertEntity(name, type, notes ?? null);
saved++;
}
if (saved > 0) console.log(`[entities] Extracted and stored ${saved} entities`);
} catch (err) {
// Non-critical — log and move on, episode is already saved
console.warn('[entities] Extraction failed:', err.message);
}
}
module.exports = { extractAndStoreEntities };

View File

@@ -1,6 +1,7 @@
const {getDB} = require('../db');
const { EPISODIC, getEnv, SERVICES, parseRow, formatEpisodeText } = require('@nexusai/shared');
const semantic = require('../semantic');
const { extractAndStoreEntities } = require('../entities/extraction')
// --Sessions --------------------------------------------------
@@ -127,6 +128,10 @@ async function createEpisode(sessionId, userMessage, aiResponse, tokenCount = nu
}))
.catch(err => console.error(`Failed to embed episode ${episode.id}:`, err.message));
extractAndStoreEntities(userMessage, aiResponse)
.catch(err => console.error(`Failed to extract entities for episode ${episode.id}:`, err.message));
return episode;
}