chat client clean up and switch to llama.cpp with models folder network sharing

2026-04-09 04:13:21 -07:00
parent 541e664da1
commit 5c6e027fc1
15 changed files with 305 additions and 305 deletions
--- a/packages/orchestration-service/src/chat/index.js
+++ b/packages/orchestration-service/src/chat/index.js
@@ -109,31 +109,35 @@ async function chatStream(externalId, userMessage, onChunk, options = {} ) {
    let tokenCount = 0;

    // 5. Parse SSE chunks
-    for await (const chunk of res.body){
+    // Replace the current SSE parsing block in chatStream:
+    for await (const chunk of res.body) {
        const lines = chunk.toString().split('\n');

        for (const line of lines) {
            if (!line.startsWith('data: ')) continue;
            const raw = line.slice(6).trim();
-            if (raw === '[DONE]') continue //stream closed sentinel
+            if (raw === '[DONE]') continue;

            try {
                const data = JSON.parse(raw);
-                if (data.model) model = data.model

+                // llama.cpp provider shape: { response, done }
                if (data.response) {
                    fullText += data.response;
                    onChunk(data.response);
                }

-                if (data.done && data.eval_count !== undefined) {
-                    tokenCount = (data.eval_count || 0) + (data.prompt_eval_count || 0)
-                }
-            } catch {
-                //partial chunk
-                //skip and wait for next
-            }
+                // model comes through on done chunk from inference route
+                if (data.model) model = data.model;

+                // token count — inference.js route sends this on the done chunk
+                if (data.done && data.tokenCount !== undefined) {
+                    tokenCount = data.tokenCount;
+                }
+
+            } catch {
+                // partial chunk — skip
+            }
        }
    }

--- a/packages/orchestration-service/src/index.js
+++ b/packages/orchestration-service/src/index.js
@@ -3,6 +3,7 @@ const express = require('express');
 const {getEnv, PORTS, SERVICES, ORCHESTRATION} = require('@nexusai/shared');
 const chatRouter = require('./routes/chat');
 const sessionsRouter = require('./routes/sessions');
+const modelsRouter = require('./routes/models')
 const cors = require('cors');

 const app = express();
@@ -35,6 +36,7 @@ app.get('/health', (req, res) => {

 app.use('/chat', chatRouter);
 app.use('/sessions', sessionsRouter);
+app.use('/models', modelsRouter);

 /******* Start the server ************/
 app.listen(PORT, () => {
--- a/packages/orchestration-service/src/routes/chat.js
+++ b/packages/orchestration-service/src/routes/chat.js
@@ -36,10 +36,14 @@ router.post('/stream', async (req, res) => {
    res.flushHeaders();

    try {
-        await chatStream(sessionId, message, (delta) => {
-            res.write(`data: ${JSON.stringify({ text: delta})}\n\n`)
-        })
-        res.write(`data: ${JSON.stringify({done: true})}\n\n`);
+        const { model, tokenCount } = await chatStream(
+            sessionId, 
+            message, 
+            (delta) => { res.write(`data: ${JSON.stringify({ text: delta })}\n\n`) },
+            { model: req.body.model, temperature: req.body.temperature }
+        );
+
+        res.write(`data: ${JSON.stringify({ done: true, model, tokenCount })}\n\n`);
    } catch (err) {
        res.write(`data: ${JSON.stringify({error: err.message})}\n\n`);
    } finally {
--- a/packages/orchestration-service/src/routes/models.js
+++ b/packages/orchestration-service/src/routes/models.js
@@ -0,0 +1,21 @@
+// routes/models.js
+const express = require('express');
+const router = express.Router();
+const fs = require('fs');
+const path = require('path');
+const {getEnv} = require('@nexusai/shared');
+
+const MODELS_PATH = getEnv('MODELS_MANIFEST_PATH', path.join(__dirname, '../models.json'));
+
+router.get('/', (req, res) => {
+  try {
+    const raw = fs.readFileSync(MODELS_PATH, 'utf8');
+    const models = JSON.parse(raw);
+    res.json(models);
+  } catch (err) {
+    console.error('[models] Failed to read manifest:', err.message);
+    res.status(500).json({ error: 'Could not load models manifest' });
+  }
+});
+
+module.exports = router;