fix entity duplication glitch

This commit is contained in:
Storme-bit
2026-04-17 06:46:26 -07:00
parent cfa1358174
commit 7e50e82d8c

View File

@@ -8,7 +8,15 @@ const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization']; const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
function buildExtractionPrompt(userMessage, aiResponse) { function buildExtractionPrompt(userMessage, aiResponse, knownEntities = []) {
const knownBlock = knownEntities.length > 0
? [
'Already known entities (use these exact name and type values if the same entity appears):',
...knownEntities.map(e => `- "${e.name}" (${e.type})`),
'',
].join('\n')
: '';
return [ return [
'<|im_start|>system', '<|im_start|>system',
'You are an entity extraction assistant. You extract named entities from conversations and return them as a JSON array. You never return empty arrays when entities are present. You only output valid JSON.<|im_end|>', 'You are an entity extraction assistant. You extract named entities from conversations and return them as a JSON array. You never return empty arrays when entities are present. You only output valid JSON.<|im_end|>',
@@ -17,6 +25,7 @@ function buildExtractionPrompt(userMessage, aiResponse) {
'', '',
`Valid types: ${ENTITY_TYPES.join(', ')}`, `Valid types: ${ENTITY_TYPES.join(', ')}`,
'', '',
knownBlock,
'Examples of entities to extract:', 'Examples of entities to extract:',
'- People: names of individuals', '- People: names of individuals',
'- Projects: software projects, systems, tools being built', '- Projects: software projects, systems, tools being built',
@@ -25,8 +34,8 @@ function buildExtractionPrompt(userMessage, aiResponse) {
'- Places: locations, servers, infrastructure', '- Places: locations, servers, infrastructure',
'', '',
'Return a JSON array where each item has:', 'Return a JSON array where each item has:',
' "name": the entity name', ' "name": the entity name (match exactly if already known)',
' "type": one of the valid types above', ' "type": one of the valid types above (match exactly if already known)',
' "notes": one sentence describing this entity based on the conversation', ' "notes": one sentence describing this entity based on the conversation',
'', '',
'### Conversation:', '### Conversation:',
@@ -58,15 +67,19 @@ async function embedEntity(entity) {
async function extractAndStoreEntities(userMessage, aiResponse) { async function extractAndStoreEntities(userMessage, aiResponse) {
console.log('[entities] Extraction triggered') console.log('[entities] Extraction triggered')
try { try {
// Fetch existing entities to guide the model toward consistent name/type pairs
const db = require('../db').getDB();
const knownEntities = db.prepare(`SELECT name, type FROM entities ORDER BY name`).all();
const res = await fetch(`${EXTRACTION_URL}/api/generate`, { const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ body: JSON.stringify({
model: EXTRACTION_MODEL, model: EXTRACTION_MODEL,
prompt: buildExtractionPrompt(userMessage, aiResponse), prompt: buildExtractionPrompt(userMessage, aiResponse, knownEntities),
stream: false, // we want the complete response, not a stream stream: false,
options: { options: {
temperature: 0.1, // low temp for deterministic structured output temperature: 0.1,
num_predict: 512, num_predict: 512,
}, },
}), }),