extraction error logging

This commit is contained in:
Storme-bit
2026-04-20 23:40:20 -07:00
parent 405676edb5
commit 5de64ba68e

View File

@@ -3,7 +3,7 @@ const { getEnv, SERVICES, formatEpisodeText } = require('@nexusai/shared');
const { upsertEntity } = require('./index');
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'phi3:3.8ab');
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL);
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
@@ -18,25 +18,21 @@ function buildExtractionPrompt(userMessage, aiResponse, knownEntities = []) {
: '';
return [
'<|user|>',
'Extract all named entities from this conversation as a JSON array. Output only valid JSON, nothing else.',
'',
'<|im_start|>system',
'You are a named entity extractor. You output only valid JSON.',
'<|im_end|>',
'<|im_start|>user',
'Extract all named entities from this conversation.',
`Valid types: ${ENTITY_TYPES.join(', ')}`,
'',
knownBlock,
'Each item must have:',
' "name": the entity name (match exactly if already known)',
' "type": one of the valid types above',
' "notes": one sentence describing this entity based on the conversation',
'Return a JSON object: { "entities": [ { "name": "...", "type": "...", "notes": "..." } ] }',
'Only include items where type is one of the valid types.',
'',
'Extract all named entities from this conversation.',
'Respond with ONLY a JSON array. Start your response with [ and end with ].',
'Do not include any text before or after the array.',
`User: ${userMessage}`,
`Assistant: ${aiResponse}`,
'<|end|>',
'<|assistant|>',
'[',
'<|im_end|>',
'<|im_start|>assistant',
].join('\n');
}
@@ -69,6 +65,7 @@ async function extractAndStoreEntities(userMessage, aiResponse, projectId=null)
model: EXTRACTION_MODEL,
prompt: buildExtractionPrompt(userMessage, aiResponse, knownEntities),
stream: false,
format: 'json',
options: {
temperature: 0.1,
num_predict: 1024,
@@ -82,20 +79,9 @@ async function extractAndStoreEntities(userMessage, aiResponse, projectId=null)
const raw = data.response?.trim() ?? '';
console.log('[entities] raw response:', JSON.stringify(raw.slice(0, 300)));
// Extract just the JSON array — everything from [ to the last ]
let normalized = raw.replace(/```(?:json)?/g, '').trim();
if (!normalized.startsWith('[')) {
normalized = '[' + normalized;
}
if (!normalized.endsWith(']')) {
// Trim any trailing comma or incomplete object before closing
normalized = normalized.replace(/,?\s*\{[^}]*$/, '') + ']';
}
const match = normalized.match(/\[[\s\S]*\]/);
if (!match) throw new Error('No JSON array found in response');
const clean = match[0];
const entities = JSON.parse(clean);
const parsed = JSON.parse(raw);
const entities = Array.isArray(parsed.entities) ? parsed.entities : [];
if (entities.length === 0) throw new Error('No entities in response');
if (!Array.isArray(entities)) throw new Error('Response was not a JSON array');