fix entity duplication glitch

This commit is contained in:
Storme-bit
2026-04-17 06:46:26 -07:00
parent cfa1358174
commit 7e50e82d8c

View File

@@ -8,7 +8,15 @@ const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
function buildExtractionPrompt(userMessage, aiResponse) {
function buildExtractionPrompt(userMessage, aiResponse, knownEntities = []) {
const knownBlock = knownEntities.length > 0
? [
'Already known entities (use these exact name and type values if the same entity appears):',
...knownEntities.map(e => `- "${e.name}" (${e.type})`),
'',
].join('\n')
: '';
return [
'<|im_start|>system',
'You are an entity extraction assistant. You extract named entities from conversations and return them as a JSON array. You never return empty arrays when entities are present. You only output valid JSON.<|im_end|>',
@@ -17,6 +25,7 @@ function buildExtractionPrompt(userMessage, aiResponse) {
'',
`Valid types: ${ENTITY_TYPES.join(', ')}`,
'',
knownBlock,
'Examples of entities to extract:',
'- People: names of individuals',
'- Projects: software projects, systems, tools being built',
@@ -25,8 +34,8 @@ function buildExtractionPrompt(userMessage, aiResponse) {
'- Places: locations, servers, infrastructure',
'',
'Return a JSON array where each item has:',
' "name": the entity name',
' "type": one of the valid types above',
' "name": the entity name (match exactly if already known)',
' "type": one of the valid types above (match exactly if already known)',
' "notes": one sentence describing this entity based on the conversation',
'',
'### Conversation:',
@@ -58,15 +67,19 @@ async function embedEntity(entity) {
async function extractAndStoreEntities(userMessage, aiResponse) {
console.log('[entities] Extraction triggered')
try {
// Fetch existing entities to guide the model toward consistent name/type pairs
const db = require('../db').getDB();
const knownEntities = db.prepare(`SELECT name, type FROM entities ORDER BY name`).all();
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: EXTRACTION_MODEL,
prompt: buildExtractionPrompt(userMessage, aiResponse),
stream: false, // we want the complete response, not a stream
prompt: buildExtractionPrompt(userMessage, aiResponse, knownEntities),
stream: false,
options: {
temperature: 0.1, // low temp for deterministic structured output
temperature: 0.1,
num_predict: 512,
},
}),