fixed token count reading

2026-04-19 07:59:31 -07:00
parent 0619c4c7f3
commit 7824404319
3 changed files with 7 additions and 0 deletions
--- a/packages/inference-service/src/providers/llamacpp.js
+++ b/packages/inference-service/src/providers/llamacpp.js
@@ -88,6 +88,11 @@ async function* completeStream(prompt, options = {}) {
        if (delta) yield { response: delta, done: false };
    }
  }
+
+  console.log('[llamacpp] finalTokenCount:', finalTokenCount);
+yield { response: '', done: true, model: finalModel, tokenCount: finalTokenCount };
+
+
  yield { response: '', done: true, model: finalModel, tokenCount: finalTokenCount };
 }

--- a/packages/inference-service/src/routes/inference.js
+++ b/packages/inference-service/src/routes/inference.js
@@ -42,6 +42,7 @@ router.post('/complete/stream', async (req, res) => {
                // capture final metadata from the done signal
                lastModel  = chunk.model      ?? lastModel;
                tokenCount = chunk.tokenCount ?? tokenCount;
+                console.log('[inference router] tokenCount from chunk:', chunk.tokenCount, '→', tokenCount);
            }
        }