From 782440431942eea037cb80939caa7727418e7d12 Mon Sep 17 00:00:00 2001 From: Storme-bit Date: Sun, 19 Apr 2026 07:59:31 -0700 Subject: [PATCH] fixed token count reading --- packages/inference-service/src/providers/llamacpp.js | 5 +++++ packages/inference-service/src/routes/inference.js | 1 + packages/orchestration-service/src/chat/index.js | 1 + 3 files changed, 7 insertions(+) diff --git a/packages/inference-service/src/providers/llamacpp.js b/packages/inference-service/src/providers/llamacpp.js index 69eb6ac..46db8e1 100644 --- a/packages/inference-service/src/providers/llamacpp.js +++ b/packages/inference-service/src/providers/llamacpp.js @@ -88,6 +88,11 @@ async function* completeStream(prompt, options = {}) { if (delta) yield { response: delta, done: false }; } } + + console.log('[llamacpp] finalTokenCount:', finalTokenCount); +yield { response: '', done: true, model: finalModel, tokenCount: finalTokenCount }; + + yield { response: '', done: true, model: finalModel, tokenCount: finalTokenCount }; } diff --git a/packages/inference-service/src/routes/inference.js b/packages/inference-service/src/routes/inference.js index 4b23c52..d850750 100644 --- a/packages/inference-service/src/routes/inference.js +++ b/packages/inference-service/src/routes/inference.js @@ -42,6 +42,7 @@ router.post('/complete/stream', async (req, res) => { // capture final metadata from the done signal lastModel = chunk.model ?? lastModel; tokenCount = chunk.tokenCount ?? tokenCount; + console.log('[inference router] tokenCount from chunk:', chunk.tokenCount, '→', tokenCount); } } diff --git a/packages/orchestration-service/src/chat/index.js b/packages/orchestration-service/src/chat/index.js index e525315..7707124 100644 --- a/packages/orchestration-service/src/chat/index.js +++ b/packages/orchestration-service/src/chat/index.js @@ -317,6 +317,7 @@ async function chatStream(externalId, userMessage, onChunk, options = {}) { console.log("[orchestration] final streamed text length:", fullText.length); if (fullText.trim()) { + console.log('[chat] tokenCount before save:', tokenCount); await memory.createEpisode(session.id, userMessage, fullText, tokenCount, session.project_id ?? null); const allEpisodes = await memory.getRecentEpisodes(session.id, 9999); triggerSummary(session, allEpisodes);