adding in entity extraction layer with semantic search enabled
This commit is contained in:
@@ -1,8 +1,10 @@
|
|||||||
const { getEnv } = require('@nexusai/shared');
|
const {semantic} = require('../semantic')
|
||||||
|
const { getEn, SERVICES, formatEpisodeText } = require('@nexusai/shared');
|
||||||
const { upsertEntity } = require('./index');
|
const { upsertEntity } = require('./index');
|
||||||
|
|
||||||
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
|
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
|
||||||
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
|
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
|
||||||
|
const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL);
|
||||||
|
|
||||||
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
|
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
|
||||||
|
|
||||||
@@ -38,12 +40,22 @@ function buildExtractionPrompt(userMessage, aiResponse) {
|
|||||||
].join('\n');
|
].join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
async function extractAndStoreEntities(userMessage, aiResponse) {
|
async function embedEntity(entity) {
|
||||||
console.log('[entities] Input:', {
|
// Combine name, type and notes into a single descriptive string for embedding
|
||||||
userMessage: userMessage?.slice(0, 100),
|
const text = `${entity.name} (${entity.type}): ${entity.notes ?? entity.name}`;
|
||||||
aiResponse: aiResponse?.slice(0, 100)
|
|
||||||
|
const res = await fetch(`${EMBEDDING_SERVICE_URL}/embed`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ text }),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (!res.ok) throw new Error(`Embedding service error: ${res.status}`);
|
||||||
|
const data = await res.json();
|
||||||
|
return data.embedding;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function extractAndStoreEntities(userMessage, aiResponse) {
|
||||||
console.log('[entities] Extraction triggered')
|
console.log('[entities] Extraction triggered')
|
||||||
try {
|
try {
|
||||||
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
|
const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
|
||||||
@@ -60,26 +72,32 @@ async function extractAndStoreEntities(userMessage, aiResponse) {
|
|||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log('[entities] Ollama responded:', res.status);
|
|
||||||
|
|
||||||
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
|
if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
|
||||||
|
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
console.log('[entities] Raw response:', data.response?.slice(0, 200));
|
|
||||||
const raw = data.response?.trim() ?? '';
|
const raw = data.response?.trim() ?? '';
|
||||||
|
|
||||||
// Strip markdown fences defensively — small models sometimes add them anyway
|
// Strip markdown fences defensively — small models sometimes add them anyway
|
||||||
const clean = raw.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
|
const clean = raw.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
|
||||||
const entities = JSON.parse(clean);
|
const entities = JSON.parse(clean);
|
||||||
console.log('[entities] Parsed entities:', entities);
|
|
||||||
|
|
||||||
if (!Array.isArray(entities)) throw new Error('Response was not a JSON array');
|
if (!Array.isArray(entities)) throw new Error('Response was not a JSON array');
|
||||||
|
|
||||||
let saved = 0;
|
let saved = 0;
|
||||||
for (const { name, type, notes } of entities) {
|
for (const { name, type, notes } of entities) {
|
||||||
// Skip anything malformed or with an unrecognised type
|
|
||||||
if (!name || !type || !ENTITY_TYPES.includes(type)) continue;
|
if (!name || !type || !ENTITY_TYPES.includes(type)) continue;
|
||||||
upsertEntity(name, type, notes ?? null);
|
|
||||||
|
const entity = upsertEntity(name, type, notes ?? null);
|
||||||
|
|
||||||
|
// Embed and upsert to Qdrant fire-and-forget
|
||||||
|
embedEntity(entity)
|
||||||
|
.then(vector => semantic.upsertEntity(entity.id, vector, {
|
||||||
|
name: entity.name,
|
||||||
|
type: entity.type,
|
||||||
|
notes: entity.notes,
|
||||||
|
}))
|
||||||
|
.catch(err => console.warn(`[entities] Failed to embed entity "${entity.name}":`, err.message));
|
||||||
|
|
||||||
saved++;
|
saved++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user