added auto-naming on first message

2026-04-13 20:04:36 -07:00
parent 0b9fedcd6e
commit 4e0f7d33aa
1 changed files with 41 additions and 62 deletions
--- a/packages/orchestration-service/src/chat/index.js
+++ b/packages/orchestration-service/src/chat/index.js
@@ -32,6 +32,36 @@ function buildPrompt(recentEpisodes, semanticEpisodes, userMessage) {
    return parts.join('\n');
 }

+function buildNamingPrompt(userMessage, aiResponse) {
+    return [
+        'Your task is to generate a short title for a conversation based on its first exchange.',
+        'Rules: maximum 5 words, no punctuation, no quotes, plain text only.',
+        'Examples: "Setting up a Raspberry Pi", "Help with Python list comprehension", "Planning a trip to Japan"',
+        '',
+        `User: ${userMessage}`,
+        `Assistant: ${aiResponse}`,
+        '',
+        'Title:',
+    ].join('\n');
+}
+
+async function autoNameSession(externalId, userMessage, aiResponse) {
+    try {
+        const prompt = buildNamingPrompt(userMessage, aiResponse);
+        const result = await inference.complete(prompt, {
+            maxTokens: 20,      // title only needs a handful of tokens
+            temperature: 0.3,   // low temperature for consistent, factual naming
+        });
+        const name = result.text?.trim().replace(/^["']|["']$/g, ''); // strip any quotes the model adds
+        if (name) {
+            await memory.updateSession(externalId, { name });
+            console.log(`[orchestration] Auto-named session "${externalId}": "${name}"`);
+        }
+    } catch (err) {
+        console.warn('[orchestration] Auto-naming failed (non-critical):', err.message);
+    }
+}
+
 async function getSemanticEpisodes(userMessage, sessionId, recentIds) {
    try {
        const vector = await embedding.embed(userMessage);
@@ -79,7 +109,13 @@ async function chat(externalId, userMessage, options = {}) {
        (result.evalCount || 0) + (result.promptEvalCount || 0 )
    ).catch(err => console.error(`[orchestration] Failed to save episode`, err.message));

-    // 7. Return response
+    // 7. Auto-name on first message
+    if (isFirstMessage && !session.name) {
+        autoNameSession(externalId, userMessage, result.text)
+            .catch(() => {}); // already logged inside autoNameSession
+    }
+
+    // 8. Return response
    return {
        sessionId: externalId,
        response: result.text,
@@ -87,68 +123,7 @@ async function chat(externalId, userMessage, options = {}) {
        tokenCount: (result.evalCount || 0 ) + (result.promptEvalCount || 0 ),
    };
 }
-/*
-async function chatStream(externalId, userMessage, onChunk, options = {} ) {
-    // 1. Resolve or create session
-    let session = await memory.getSessionByExternalId(externalId);
-    if (!session) session = await memory.createSession(externalId);

-    // 2. Context assembly
-    const recentEpisodes = await memory.getRecentEpisodes(session.id, RECENT_EPISODE_LIMIT);
-    const recentIds = new Set(recentEpisodes.map(e => e.id));
-    const semanticEpisodes = await getSemanticEpisodes(userMessage, session.id, recentIds)
-
-    // 3. Assemble Prompt
-    const prompt = buildPrompt(recentEpisodes, semanticEpisodes, userMessage);
-
-    // 4. Open stream to inference service
-    const res = await inference.completeStream(prompt, options);
-
-    let fullText = '';
-    let model = '';
-    let tokenCount = 0;
-
-    // 5. Parse SSE chunks
-    // Replace the current SSE parsing block in chatStream:
-    for await (const chunk of res.body) {
-        const lines = chunk.toString().split('\n');
-
-        for (const line of lines) {
-            if (!line.startsWith('data: ')) continue;
-            const raw = line.slice(6).trim();
-            if (raw === '[DONE]') continue;
-
-            try {
-                const data = JSON.parse(raw);
-
-                // llama.cpp provider shape: { response, done }
-                if (data.response) {
-                    fullText += data.response;
-                    onChunk(data.response);
-                }
-
-                // model comes through on done chunk from inference route
-                if (data.model) model = data.model;
-
-                // token count — inference.js route sends this on the done chunk
-                if (data.done && data.tokenCount !== undefined) {
-                    tokenCount = data.tokenCount;
-                }
-
-            } catch {
-                // partial chunk — skip
-            }
-        }
-    }
-
-    // 6. Write Complete episode to memory
-    if(fullText && fullText.trim()){
-        memory.createEpisode(session.id, userMessage, fullText, tokenCount)
-            .catch(err => console.error('[orchestration] Failed to save streamed episode:', err.message))
-    }
-    return {model, tokenCount};        
-}
-*/
 async function chatStream(externalId, userMessage, onChunk, options = {}) {
  let session = await memory.getSessionByExternalId(externalId);
  if (!session) session = await memory.createSession(externalId);
@@ -212,6 +187,10 @@ async function chatStream(externalId, userMessage, onChunk, options = {}) {
    console.warn('[orchestration] Stream finished with no assistant text; episode not saved');
  }

+  if (isFirstMessage && !session.name) {
+    autoNameSession(externalId, userMessage, fullText).catch(() => {});
+  }
+
  return { model, tokenCount };
 }
 module.exports = { chat, chatStream };