diff --git a/packages/inference-service/src/providers/llamacpp.js b/packages/inference-service/src/providers/llamacpp.js
index 9503f24..69eb6ac 100644
--- a/packages/inference-service/src/providers/llamacpp.js
+++ b/packages/inference-service/src/providers/llamacpp.js
@@ -73,16 +73,19 @@ async function* completeStream(prompt, options = {}) {
       .filter((l) => l.startsWith("data: ") && l !== "data: [DONE]");
 
     for (const line of lines) {
-      const json = JSON.parse(line.slice(6));
-      const delta = json.choices?.[0]?.delta?.content;
+        const json = JSON.parse(line.slice(6));
+        const delta = json.choices?.[0]?.delta?.content;
 
-      // Capture final metadata from the stop chunk
-      if (json.choices?.[0]?.finish_reason === "stop") {
-        finalModel = json.model ?? finalModel;
-        finalTokenCount = json.usage?.completion_tokens ?? finalTokenCount;
-      }
+        if (json.choices?.[0]?.finish_reason === 'stop') {
+            finalModel = json.model ?? finalModel;
+        }
 
-      if (delta) yield { response: delta, done: false };
+        // usage arrives in a separate final chunk with empty choices array
+        if (json.usage) {
+            finalTokenCount = (json.usage.completion_tokens ?? 0) + (json.usage.prompt_tokens ?? 0);
+        }
+
+        if (delta) yield { response: delta, done: false };
     }
   }
   yield { response: '', done: true, model: finalModel, tokenCount: finalTokenCount };