extraction error logging

This commit is contained in:
Storme-bit
2026-04-20 23:40:20 -07:00
parent 405676edb5
commit 5de64ba68e

View File

@@ -3,7 +3,7 @@ const { getEnv, SERVICES, formatEpisodeText } = require('@nexusai/shared');
const { upsertEntity } = require('./index'); const { upsertEntity } = require('./index');
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434'); const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'phi3:3.8ab'); const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL); const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL);
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization']; const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
@@ -18,25 +18,21 @@ function buildExtractionPrompt(userMessage, aiResponse, knownEntities = []) {
: ''; : '';
return [ return [
'<|user|>', '<|im_start|>system',
'Extract all named entities from this conversation as a JSON array. Output only valid JSON, nothing else.', 'You are a named entity extractor. You output only valid JSON.',
'', '<|im_end|>',
'<|im_start|>user',
'Extract all named entities from this conversation.',
`Valid types: ${ENTITY_TYPES.join(', ')}`, `Valid types: ${ENTITY_TYPES.join(', ')}`,
'', '',
knownBlock, knownBlock,
'Each item must have:', 'Return a JSON object: { "entities": [ { "name": "...", "type": "...", "notes": "..." } ] }',
' "name": the entity name (match exactly if already known)', 'Only include items where type is one of the valid types.',
' "type": one of the valid types above',
' "notes": one sentence describing this entity based on the conversation',
'', '',
'Extract all named entities from this conversation.',
'Respond with ONLY a JSON array. Start your response with [ and end with ].',
'Do not include any text before or after the array.',
`User: ${userMessage}`, `User: ${userMessage}`,
`Assistant: ${aiResponse}`, `Assistant: ${aiResponse}`,
'<|end|>', '<|im_end|>',
'<|assistant|>', '<|im_start|>assistant',
'[',
].join('\n'); ].join('\n');
} }
@@ -69,6 +65,7 @@ async function extractAndStoreEntities(userMessage, aiResponse, projectId=null)
model: EXTRACTION_MODEL, model: EXTRACTION_MODEL,
prompt: buildExtractionPrompt(userMessage, aiResponse, knownEntities), prompt: buildExtractionPrompt(userMessage, aiResponse, knownEntities),
stream: false, stream: false,
format: 'json',
options: { options: {
temperature: 0.1, temperature: 0.1,
num_predict: 1024, num_predict: 1024,
@@ -82,20 +79,9 @@ async function extractAndStoreEntities(userMessage, aiResponse, projectId=null)
const raw = data.response?.trim() ?? ''; const raw = data.response?.trim() ?? '';
console.log('[entities] raw response:', JSON.stringify(raw.slice(0, 300))); console.log('[entities] raw response:', JSON.stringify(raw.slice(0, 300)));
// Extract just the JSON array — everything from [ to the last ] const parsed = JSON.parse(raw);
let normalized = raw.replace(/```(?:json)?/g, '').trim(); const entities = Array.isArray(parsed.entities) ? parsed.entities : [];
if (!normalized.startsWith('[')) { if (entities.length === 0) throw new Error('No entities in response');
normalized = '[' + normalized;
}
if (!normalized.endsWith(']')) {
// Trim any trailing comma or incomplete object before closing
normalized = normalized.replace(/,?\s*\{[^}]*$/, '') + ']';
}
const match = normalized.match(/\[[\s\S]*\]/);
if (!match) throw new Error('No JSON array found in response');
const clean = match[0];
const entities = JSON.parse(clean);
if (!Array.isArray(entities)) throw new Error('Response was not a JSON array'); if (!Array.isArray(entities)) throw new Error('Response was not a JSON array');