extraction error logging
This commit is contained in:
@@ -3,7 +3,7 @@ const { getEnv, SERVICES, formatEpisodeText } = require('@nexusai/shared');
|
||||
const { upsertEntity } = require('./index');
|
||||
|
||||
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
|
||||
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'phi3:3.8ab');
|
||||
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
|
||||
const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL);
|
||||
|
||||
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
|
||||
@@ -18,25 +18,21 @@ function buildExtractionPrompt(userMessage, aiResponse, knownEntities = []) {
|
||||
: '';
|
||||
|
||||
return [
|
||||
'<|user|>',
|
||||
'Extract all named entities from this conversation as a JSON array. Output only valid JSON, nothing else.',
|
||||
'',
|
||||
'<|im_start|>system',
|
||||
'You are a named entity extractor. You output only valid JSON.',
|
||||
'<|im_end|>',
|
||||
'<|im_start|>user',
|
||||
'Extract all named entities from this conversation.',
|
||||
`Valid types: ${ENTITY_TYPES.join(', ')}`,
|
||||
'',
|
||||
knownBlock,
|
||||
'Each item must have:',
|
||||
' "name": the entity name (match exactly if already known)',
|
||||
' "type": one of the valid types above',
|
||||
' "notes": one sentence describing this entity based on the conversation',
|
||||
'Return a JSON object: { "entities": [ { "name": "...", "type": "...", "notes": "..." } ] }',
|
||||
'Only include items where type is one of the valid types.',
|
||||
'',
|
||||
'Extract all named entities from this conversation.',
|
||||
'Respond with ONLY a JSON array. Start your response with [ and end with ].',
|
||||
'Do not include any text before or after the array.',
|
||||
`User: ${userMessage}`,
|
||||
`Assistant: ${aiResponse}`,
|
||||
'<|end|>',
|
||||
'<|assistant|>',
|
||||
'[',
|
||||
'<|im_end|>',
|
||||
'<|im_start|>assistant',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
@@ -69,6 +65,7 @@ async function extractAndStoreEntities(userMessage, aiResponse, projectId=null)
|
||||
model: EXTRACTION_MODEL,
|
||||
prompt: buildExtractionPrompt(userMessage, aiResponse, knownEntities),
|
||||
stream: false,
|
||||
format: 'json',
|
||||
options: {
|
||||
temperature: 0.1,
|
||||
num_predict: 1024,
|
||||
@@ -82,20 +79,9 @@ async function extractAndStoreEntities(userMessage, aiResponse, projectId=null)
|
||||
const raw = data.response?.trim() ?? '';
|
||||
console.log('[entities] raw response:', JSON.stringify(raw.slice(0, 300)));
|
||||
|
||||
// Extract just the JSON array — everything from [ to the last ]
|
||||
let normalized = raw.replace(/```(?:json)?/g, '').trim();
|
||||
if (!normalized.startsWith('[')) {
|
||||
normalized = '[' + normalized;
|
||||
}
|
||||
if (!normalized.endsWith(']')) {
|
||||
// Trim any trailing comma or incomplete object before closing
|
||||
normalized = normalized.replace(/,?\s*\{[^}]*$/, '') + ']';
|
||||
}
|
||||
const match = normalized.match(/\[[\s\S]*\]/);
|
||||
|
||||
if (!match) throw new Error('No JSON array found in response');
|
||||
const clean = match[0];
|
||||
const entities = JSON.parse(clean);
|
||||
const parsed = JSON.parse(raw);
|
||||
const entities = Array.isArray(parsed.entities) ? parsed.entities : [];
|
||||
if (entities.length === 0) throw new Error('No entities in response');
|
||||
|
||||
if (!Array.isArray(entities)) throw new Error('Response was not a JSON array');
|
||||
|
||||
|
||||
Reference in New Issue
Block a user