adding in entity extraction layer with semantic search enabled

This commit is contained in:
Storme-bit
2026-04-17 06:01:21 -07:00
parent 0cad85d4a7
commit cef1803af6

View File

@@ -1,8 +1,10 @@
const { getEnv } = require('@nexusai/shared'); const {semantic} = require('../semantic')
const { getEn, SERVICES, formatEpisodeText } = require('@nexusai/shared');
const { upsertEntity } = require('./index'); const { upsertEntity } = require('./index');
const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434'); const EXTRACTION_URL = getEnv('EXTRACTION_URL', 'http://localhost:11434');
const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b'); const EXTRACTION_MODEL = getEnv('EXTRACTION_MODEL', 'qwen2.5:3b');
const EMBEDDING_SERVICE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL);
const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization']; const ENTITY_TYPES = ['person', 'place', 'project', 'technology', 'concept', 'organization'];
@@ -38,12 +40,22 @@ function buildExtractionPrompt(userMessage, aiResponse) {
].join('\n'); ].join('\n');
} }
async function extractAndStoreEntities(userMessage, aiResponse) { async function embedEntity(entity) {
console.log('[entities] Input:', { // Combine name, type and notes into a single descriptive string for embedding
userMessage: userMessage?.slice(0, 100), const text = `${entity.name} (${entity.type}): ${entity.notes ?? entity.name}`;
aiResponse: aiResponse?.slice(0, 100)
const res = await fetch(`${EMBEDDING_SERVICE_URL}/embed`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text }),
}); });
if (!res.ok) throw new Error(`Embedding service error: ${res.status}`);
const data = await res.json();
return data.embedding;
}
async function extractAndStoreEntities(userMessage, aiResponse) {
console.log('[entities] Extraction triggered') console.log('[entities] Extraction triggered')
try { try {
const res = await fetch(`${EXTRACTION_URL}/api/generate`, { const res = await fetch(`${EXTRACTION_URL}/api/generate`, {
@@ -60,26 +72,32 @@ async function extractAndStoreEntities(userMessage, aiResponse) {
}), }),
}); });
console.log('[entities] Ollama responded:', res.status);
if (!res.ok) throw new Error(`Ollama responded ${res.status}`); if (!res.ok) throw new Error(`Ollama responded ${res.status}`);
const data = await res.json(); const data = await res.json();
console.log('[entities] Raw response:', data.response?.slice(0, 200));
const raw = data.response?.trim() ?? ''; const raw = data.response?.trim() ?? '';
// Strip markdown fences defensively — small models sometimes add them anyway // Strip markdown fences defensively — small models sometimes add them anyway
const clean = raw.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim(); const clean = raw.replace(/^```(?:json)?\n?/, '').replace(/\n?```$/, '').trim();
const entities = JSON.parse(clean); const entities = JSON.parse(clean);
console.log('[entities] Parsed entities:', entities);
if (!Array.isArray(entities)) throw new Error('Response was not a JSON array'); if (!Array.isArray(entities)) throw new Error('Response was not a JSON array');
let saved = 0; let saved = 0;
for (const { name, type, notes } of entities) { for (const { name, type, notes } of entities) {
// Skip anything malformed or with an unrecognised type
if (!name || !type || !ENTITY_TYPES.includes(type)) continue; if (!name || !type || !ENTITY_TYPES.includes(type)) continue;
upsertEntity(name, type, notes ?? null);
const entity = upsertEntity(name, type, notes ?? null);
// Embed and upsert to Qdrant fire-and-forget
embedEntity(entity)
.then(vector => semantic.upsertEntity(entity.id, vector, {
name: entity.name,
type: entity.type,
notes: entity.notes,
}))
.catch(err => console.warn(`[entities] Failed to embed entity "${entity.name}":`, err.message));
saved++; saved++;
} }