chat client clean up and switch to llama.cpp with models folder network sharing
This commit is contained in:
@@ -109,31 +109,35 @@ async function chatStream(externalId, userMessage, onChunk, options = {} ) {
|
||||
let tokenCount = 0;
|
||||
|
||||
// 5. Parse SSE chunks
|
||||
for await (const chunk of res.body){
|
||||
// Replace the current SSE parsing block in chatStream:
|
||||
for await (const chunk of res.body) {
|
||||
const lines = chunk.toString().split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith('data: ')) continue;
|
||||
const raw = line.slice(6).trim();
|
||||
if (raw === '[DONE]') continue //stream closed sentinel
|
||||
if (raw === '[DONE]') continue;
|
||||
|
||||
try {
|
||||
const data = JSON.parse(raw);
|
||||
if (data.model) model = data.model
|
||||
|
||||
// llama.cpp provider shape: { response, done }
|
||||
if (data.response) {
|
||||
fullText += data.response;
|
||||
onChunk(data.response);
|
||||
}
|
||||
|
||||
if (data.done && data.eval_count !== undefined) {
|
||||
tokenCount = (data.eval_count || 0) + (data.prompt_eval_count || 0)
|
||||
}
|
||||
} catch {
|
||||
//partial chunk
|
||||
//skip and wait for next
|
||||
}
|
||||
// model comes through on done chunk from inference route
|
||||
if (data.model) model = data.model;
|
||||
|
||||
// token count — inference.js route sends this on the done chunk
|
||||
if (data.done && data.tokenCount !== undefined) {
|
||||
tokenCount = data.tokenCount;
|
||||
}
|
||||
|
||||
} catch {
|
||||
// partial chunk — skip
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user