fixed token count reading

2026-04-19 07:50:10 -07:00
parent 225728e531
commit 0619c4c7f3
1 changed files with 11 additions and 8 deletions
--- a/packages/inference-service/src/providers/llamacpp.js
+++ b/packages/inference-service/src/providers/llamacpp.js
@@ -73,16 +73,19 @@ async function* completeStream(prompt, options = {}) {
      .filter((l) => l.startsWith("data: ") && l !== "data: [DONE]");
    for (const line of lines) {
-      const json = JSON.parse(line.slice(6));
+        const json = JSON.parse(line.slice(6));
-      const delta = json.choices?.[0]?.delta?.content;
+        const delta = json.choices?.[0]?.delta?.content;
-      // Capture final metadata from the stop chunk
+        if (json.choices?.[0]?.finish_reason === 'stop') {
-      if (json.choices?.[0]?.finish_reason === "stop") {
+            finalModel = json.model ?? finalModel;
-        finalModel = json.model ?? finalModel;
+        }
        finalTokenCount = json.usage?.completion_tokens ?? finalTokenCount;
      }
-      if (delta) yield { response: delta, done: false };
+        // usage arrives in a separate final chunk with empty choices array
        if (json.usage) {
            finalTokenCount = (json.usage.completion_tokens ?? 0) + (json.usage.prompt_tokens ?? 0);
        }
        if (delta) yield { response: delta, done: false };
    }
  }
  yield { response: '', done: true, model: finalModel, tokenCount: finalTokenCount };