Added semantic episode searching
This commit is contained in:
@@ -1,15 +1,28 @@
|
|||||||
const memory = require('../services/memory');
|
const memory = require('../services/memory');
|
||||||
const inference = require('../services/inference');
|
const inference = require('../services/inference');
|
||||||
|
const embedding = require('../services/embedding');
|
||||||
|
const qdrant = require('../services/qdrant');
|
||||||
|
|
||||||
const SYSTEM_PROMPT = `You are a helpful, context-aware AI assistant.
|
const SYSTEM_PROMPT = `You are a helpful, context-aware AI assistant.
|
||||||
You have access to memories of past conversations with the user.
|
You have access to memories of past conversations with the user.
|
||||||
Use them to provide consistent, personalised responses.`;
|
Use them to provide consistent, personalised responses.`;
|
||||||
|
|
||||||
const RECENT_EPISODE_LIMIT = 10; // Number of recent episodes to retrieve for context
|
const RECENT_EPISODE_LIMIT = 10; // Number of recent episodes to retrieve for context
|
||||||
|
const SEMANTIC_LIMIT = 5;
|
||||||
|
const SCORE_THRESHOLD = 0.75;
|
||||||
|
|
||||||
function buildPrompt(recentEpisodes, userMessage) {
|
function buildPrompt(recentEpisodes, semanticEpisodes, userMessage) {
|
||||||
const parts = [SYSTEM_PROMPT];
|
const parts = [SYSTEM_PROMPT];
|
||||||
|
|
||||||
|
if (semanticEpisodes.length > 0 )
|
||||||
|
{
|
||||||
|
parts.push('Here are some relevant memories from earlier conversations:')
|
||||||
|
for (const ep of semanticEpisodes) {
|
||||||
|
parts.push(`User: ${ep.user_message}\nAssistant: ${ep.ai_response}`);
|
||||||
|
}
|
||||||
|
parts.push('---')
|
||||||
|
}
|
||||||
|
|
||||||
if (recentEpisodes.length > 0) {
|
if (recentEpisodes.length > 0) {
|
||||||
parts.push(`Here are some relevant memories from your past conversations:`);
|
parts.push(`Here are some relevant memories from your past conversations:`);
|
||||||
for (const ep of recentEpisodes) {
|
for (const ep of recentEpisodes) {
|
||||||
@@ -24,26 +37,46 @@ function buildPrompt(recentEpisodes, userMessage) {
|
|||||||
return parts.join('\n');
|
return parts.join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function getSemanticEpisodes(userMessage, sessionId, recentIds) {
|
||||||
|
try {
|
||||||
|
const vector = await embedding.embed(userMessage);
|
||||||
|
const results = await qdrant.searchEpisodes( vector, {
|
||||||
|
limit: SEMANTIC_LIMIT,
|
||||||
|
scoreThreshold: SCORE_THRESHOLD,
|
||||||
|
sessionId,
|
||||||
|
});
|
||||||
|
|
||||||
|
const fetched = await Promise.all(
|
||||||
|
results
|
||||||
|
.filter(r => !recentIds.has(r.id))
|
||||||
|
.map(r => memory.getEpisodeById(r.id))
|
||||||
|
);
|
||||||
|
return fetched.filter(Boolean);
|
||||||
|
} catch (err) {
|
||||||
|
console.warn(`[orchestration] Semantic search failed, continuing without: `, err.message);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function chat(externalId, userMessage, options = {}) {
|
async function chat(externalId, userMessage, options = {}) {
|
||||||
// 1. Resolve or create session
|
// 1. Resolve or create session
|
||||||
let session = await memory.getSessionByExternalId(externalId);
|
let session = await memory.getSessionByExternalId(externalId);
|
||||||
if (!session) {
|
if (!session) session = await memory.createSession(externalId);
|
||||||
session = await memory.createSession(externalId);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Fetch recent episodes for context
|
// 2. Fetch recent episodes for context
|
||||||
const recentEpisodes = await memory.getRecentEpisodes(
|
const recentEpisodes = await memory.getRecentEpisodes(session.id, RECENT_EPISODE_LIMIT );
|
||||||
session.id,
|
const recentIds = new Set(recentEpisodes.map(e => e.id));
|
||||||
RECENT_EPISODE_LIMIT
|
|
||||||
);
|
|
||||||
|
|
||||||
// 3. Assemble prompt
|
// 3. Semantic Search
|
||||||
const prompt = buildPrompt(recentEpisodes, userMessage);
|
const semanticEpisodes = await getSemanticEpisodes(userMessage, session.id, recentIds);
|
||||||
|
|
||||||
// 4. Run inference
|
// 4. Assemble prompt
|
||||||
|
const prompt = buildPrompt(recentEpisodes, semanticEpisodes, userMessage);
|
||||||
|
|
||||||
|
// 5. Run inference
|
||||||
const result = await inference.complete(prompt, options);
|
const result = await inference.complete(prompt, options);
|
||||||
|
|
||||||
// 5. Write episode back to memory
|
// 6. Write episode back to memory
|
||||||
memory.createEpisode(
|
memory.createEpisode(
|
||||||
session.id,
|
session.id,
|
||||||
userMessage,
|
userMessage,
|
||||||
@@ -51,7 +84,7 @@ async function chat(externalId, userMessage, options = {}) {
|
|||||||
(result.evalCount || 0) + (result.promptEvalCount || 0 )
|
(result.evalCount || 0) + (result.promptEvalCount || 0 )
|
||||||
).catch(err => console.error(`[orchestration] Failed to save episode`, err.message));
|
).catch(err => console.error(`[orchestration] Failed to save episode`, err.message));
|
||||||
|
|
||||||
// 6. Return response
|
// 7. Return response
|
||||||
return {
|
return {
|
||||||
sessionId: externalId,
|
sessionId: externalId,
|
||||||
response: result.text,
|
response: result.text,
|
||||||
|
|||||||
18
packages/orchestration-service/src/services/embedding.js
Normal file
18
packages/orchestration-service/src/services/embedding.js
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
const {getEnv, SERVICES } = require('@nexusai/shared')
|
||||||
|
|
||||||
|
const BASE_URL = getEnv('EMBEDDING_SERVICE_URL', SERVICES.EMBEDDING_URL);
|
||||||
|
|
||||||
|
async function embed(text) {
|
||||||
|
const res = await fetch(`${BASE_URL}/embed`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify({text}),
|
||||||
|
})
|
||||||
|
|
||||||
|
if (!res.ok) throw new Error(`Embedding service error: ${res.status}`);
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
return data.embedding;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = { embed };
|
||||||
@@ -40,9 +40,17 @@ async function createEpisode(sessionId, userMessage, aiResponse, tokenCount) {
|
|||||||
return res.json();
|
return res.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function getEpisodeById(episodeId) {
|
||||||
|
const res = await (`${BASE_URL}/episodes/${episodeId}`);
|
||||||
|
if (res.status === 404) return null;
|
||||||
|
if (!res.ok) throw new Error(`Failed to fetch episode: ${res.status}`);
|
||||||
|
return res.json();
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
getSessionByExternalId,
|
getSessionByExternalId,
|
||||||
createSession,
|
createSession,
|
||||||
getRecentEpisodes,
|
getRecentEpisodes,
|
||||||
createEpisode
|
createEpisode,
|
||||||
|
getEpisodeById
|
||||||
}
|
}
|
||||||
27
packages/orchestration-service/src/services/qdrant.js
Normal file
27
packages/orchestration-service/src/services/qdrant.js
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
const {getEnv, QDRANT, COLLECTIONS } = require('@nexusai/shared')
|
||||||
|
|
||||||
|
const BASE_URL = getEnv('QDrant_URL', QDRANT.DEFAULT_URL);
|
||||||
|
|
||||||
|
async function searchEpisodes( vector, {limit = 5, scoreThreshold = 0.75, sessionId } = {}) {
|
||||||
|
const body = {vector, limit, score_threshold: scoreThreshold, with_payload: true};
|
||||||
|
|
||||||
|
if (sessionId) {
|
||||||
|
body.filter = { must: [{key: 'sessionId', match: {value: sessionId} }] };
|
||||||
|
}
|
||||||
|
|
||||||
|
const res = await fetch (
|
||||||
|
`${BASE_URL}/collections/${COLLECTIONS.EPISODES}/points/search`,
|
||||||
|
{
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify(body)
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!res.ok) throw new Error(`QDrant error: ${res.status}`);
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
return data.result;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = { searchEpisodes };
|
||||||
Reference in New Issue
Block a user