fix entity duplication glitch
This commit is contained in:
@@ -8,7 +8,15 @@ const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING
|
|||||||
|
|
||||||
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
|
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
|
||||||
|
|
||||||
function buildExtractionPrompt(userMessage, aiResponse) {
|
function buildExtractionPrompt(userMessage, aiResponse, knownEntities = []) {
|
||||||
|
const knownBlock = knownEntities.length > 0
|
||||||
|
? [
|
||||||
|
'Already known entities (use these exact name and type values if the same entity appears):',
|
||||||
|
...knownEntities.map(e => `- "${e.name}" (${e.type})`),
|
||||||
|
'',
|
||||||
|
].join('\n')
|
||||||
|
: '';
|
||||||
|
|
||||||
return [
|
return [
|
||||||
'<|im_start|>system',
|
'<|im_start|>system',
|
||||||
'You are an entity extraction assistant. You extract named entities from conversations and return them as a JSON array. You never return empty arrays when entities are present. You only output valid JSON.<|im_end|>',
|
'You are an entity extraction assistant. You extract named entities from conversations and return them as a JSON array. You never return empty arrays when entities are present. You only output valid JSON.<|im_end|>',
|
||||||
@@ -17,6 +25,7 @@ function buildExtractionPrompt(userMessage, aiResponse) {
|
|||||||
'',
|
'',
|
||||||
`Valid types: ${ENTITY_TYPES.join(', ')}`,
|
`Valid types: ${ENTITY_TYPES.join(', ')}`,
|
||||||
'',
|
'',
|
||||||
|
knownBlock,
|
||||||
'Examples of entities to extract:',
|
'Examples of entities to extract:',
|
||||||
'- People: names of individuals',
|
'- People: names of individuals',
|
||||||
'- Projects: software projects, systems, tools being built',
|
'- Projects: software projects, systems, tools being built',
|
||||||
@@ -25,8 +34,8 @@ function buildExtractionPrompt(userMessage, aiResponse) {
|
|||||||
'- Places: locations, servers, infrastructure',
|
'- Places: locations, servers, infrastructure',
|
||||||
'',
|
'',
|
||||||
'Return a JSON array where each item has:',
|
'Return a JSON array where each item has:',
|
||||||
' "name": the entity name',
|
' "name": the entity name (match exactly if already known)',
|
||||||
' "type": one of the valid types above',
|
' "type": one of the valid types above (match exactly if already known)',
|
||||||
' "notes": one sentence describing this entity based on the conversation',
|
' "notes": one sentence describing this entity based on the conversation',
|
||||||
'',
|
'',
|
||||||
'### Conversation:',
|
'### Conversation:',
|
||||||
@@ -58,15 +67,19 @@ async function embedEntity(entity) {
|
|||||||
async function extractAndStoreEntities(userMessage, aiResponse) {
|
async function extractAndStoreEntities(userMessage, aiResponse) {
|
||||||
console.log('[entities] Extraction triggered')
|
console.log('[entities] Extraction triggered')
|
||||||
try {
|
try {
|
||||||
|
// Fetch existing entities to guide the model toward consistent name/type pairs
|
||||||
|
const db = require('../db').getDB();
|
||||||
|
const knownEntities = db.prepare(`SELECT name, type FROM entities ORDER BY name`).all();
|
||||||
|
|
||||||
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
|
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: EXTRACTION_MODEL,
|
model: EXTRACTION_MODEL,
|
||||||
prompt: buildExtractionPrompt(userMessage, aiResponse),
|
prompt: buildExtractionPrompt(userMessage, aiResponse, knownEntities),
|
||||||
stream: false, // we want the complete response, not a stream
|
stream: false,
|
||||||
options: {
|
options: {
|
||||||
temperature: 0.1, // low temp for deterministic structured output
|
temperature: 0.1,
|
||||||
num_predict: 512,
|
num_predict: 512,
|
||||||
},
|
},
|
||||||
}),
|
}),
|
||||||
|
|||||||
Reference in New Issue
Block a user