chat client clean up and switch to llama.cpp with models folder network sharing

2026-04-09 04:13:21 -07:00
parent 541e664da1
commit 5c6e027fc1
15 changed files with 305 additions and 305 deletions
--- a/packages/inference-service/src/routes/inference.js
+++ b/packages/inference-service/src/routes/inference.js
@@ -24,22 +24,34 @@ router.post('/complete', async (req, res) => {
 router.post('/complete/stream', async (req, res) => {
    const { prompt, model, temperature } = req.body;

-    if (!prompt) {
-        return res.status(400).json({error: 'prompt is required'});
-    }
+    if (!prompt) return res.status(400).json({ error: 'prompt is required' });

    res.setHeader('Content-Type', 'text/event-stream');
    res.setHeader('Cache-Control', 'no-cache');
    res.setHeader('Connection', 'keep-alive');

    try {
-        for await (const chunk of completeStream(prompt, {model, temperature})) {
-            res.write(`data: ${JSON.stringify(chunk)}\n\n`);
+        let lastModel = model;
+        let tokenCount = 0;
+
+        for await (const chunk of completeStream(prompt, { model, temperature })) {
+            if (chunk.response) {
+                res.write(`data: ${JSON.stringify({ response: chunk.response })}\n\n`);
+            }
+            if (chunk.done) {
+                // capture final metadata from the done signal
+                lastModel  = chunk.model      ?? lastModel;
+                tokenCount = chunk.tokenCount ?? tokenCount;
+            }
        }
+
+        // Send a single done event with metadata after stream closes
+        res.write(`data: ${JSON.stringify({ done: true, model: lastModel, tokenCount })}\n\n`);
        res.write('data: [DONE]\n\n');
-    } catch (error) {
-        console.error('[Inference] Streaming error:', error.message);
-        res.write(`data: ${JSON.stringify({ error: error.message })}\n\n`);
+
+    } catch (err) {
+        console.error('[Inference] Streaming error:', err.message);
+        res.write(`data: ${JSON.stringify({ error: err.message })}\n\n`);
    } finally {
        res.end();
    }