fixed token count reading
This commit is contained in:
@@ -88,6 +88,11 @@ async function* completeStream(prompt, options = {}) {
|
||||
if (delta) yield { response: delta, done: false };
|
||||
}
|
||||
}
|
||||
|
||||
console.log('[llamacpp] finalTokenCount:', finalTokenCount);
|
||||
yield { response: '', done: true, model: finalModel, tokenCount: finalTokenCount };
|
||||
|
||||
|
||||
yield { response: '', done: true, model: finalModel, tokenCount: finalTokenCount };
|
||||
}
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@ router.post('/complete/stream', async (req, res) => {
|
||||
// capture final metadata from the done signal
|
||||
lastModel = chunk.model ?? lastModel;
|
||||
tokenCount = chunk.tokenCount ?? tokenCount;
|
||||
console.log('[inference router] tokenCount from chunk:', chunk.tokenCount, '→', tokenCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user