fixed token count reading

2026-04-19 07:59:31 -07:00
parent 0619c4c7f3
commit 7824404319
3 changed files with 7 additions and 0 deletions
--- a/packages/inference-service/src/providers/llamacpp.js
+++ b/packages/inference-service/src/providers/llamacpp.js
@@ -88,6 +88,11 @@ async function* completeStream(prompt, options = {}) {
        if (delta) yield { response: delta, done: false };
    }
  }
+
+  console.log('[llamacpp] finalTokenCount:', finalTokenCount);
+yield { response: '', done: true, model: finalModel, tokenCount: finalTokenCount };
+
+
  yield { response: '', done: true, model: finalModel, tokenCount: finalTokenCount };
 }

--- a/packages/inference-service/src/routes/inference.js
+++ b/packages/inference-service/src/routes/inference.js
@@ -42,6 +42,7 @@ router.post('/complete/stream', async (req, res) => {
                // capture final metadata from the done signal
                lastModel  = chunk.model      ?? lastModel;
                tokenCount = chunk.tokenCount ?? tokenCount;
+                console.log('[inference router] tokenCount from chunk:', chunk.tokenCount, '→', tokenCount);
            }
        }

--- a/packages/orchestration-service/src/chat/index.js
+++ b/packages/orchestration-service/src/chat/index.js
@@ -317,6 +317,7 @@ async function chatStream(externalId, userMessage, onChunk, options = {}) {
    console.log("[orchestration] final streamed text length:", fullText.length);

    if (fullText.trim()) {
+      console.log('[chat] tokenCount before save:', tokenCount);
      await memory.createEpisode(session.id, userMessage, fullText, tokenCount, session.project_id ?? null);
      const allEpisodes = await memory.getRecentEpisodes(session.id, 9999);
      triggerSummary(session, allEpisodes);