chat client clean up and switch to llama.cpp with models folder network sharing
This commit is contained in:
@@ -109,31 +109,35 @@ async function chatStream(externalId, userMessage, onChunk, options = {} ) {
|
||||
let tokenCount = 0;
|
||||
|
||||
// 5. Parse SSE chunks
|
||||
for await (const chunk of res.body){
|
||||
// Replace the current SSE parsing block in chatStream:
|
||||
for await (const chunk of res.body) {
|
||||
const lines = chunk.toString().split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith('data: ')) continue;
|
||||
const raw = line.slice(6).trim();
|
||||
if (raw === '[DONE]') continue //stream closed sentinel
|
||||
if (raw === '[DONE]') continue;
|
||||
|
||||
try {
|
||||
const data = JSON.parse(raw);
|
||||
if (data.model) model = data.model
|
||||
|
||||
// llama.cpp provider shape: { response, done }
|
||||
if (data.response) {
|
||||
fullText += data.response;
|
||||
onChunk(data.response);
|
||||
}
|
||||
|
||||
if (data.done && data.eval_count !== undefined) {
|
||||
tokenCount = (data.eval_count || 0) + (data.prompt_eval_count || 0)
|
||||
}
|
||||
} catch {
|
||||
//partial chunk
|
||||
//skip and wait for next
|
||||
}
|
||||
// model comes through on done chunk from inference route
|
||||
if (data.model) model = data.model;
|
||||
|
||||
// token count — inference.js route sends this on the done chunk
|
||||
if (data.done && data.tokenCount !== undefined) {
|
||||
tokenCount = data.tokenCount;
|
||||
}
|
||||
|
||||
} catch {
|
||||
// partial chunk — skip
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ const express = require('express');
|
||||
const {getEnv, PORTS, SERVICES, ORCHESTRATION} = require('@nexusai/shared');
|
||||
const chatRouter = require('./routes/chat');
|
||||
const sessionsRouter = require('./routes/sessions');
|
||||
const modelsRouter = require('./routes/models')
|
||||
const cors = require('cors');
|
||||
|
||||
const app = express();
|
||||
@@ -35,6 +36,7 @@ app.get('/health', (req, res) => {
|
||||
|
||||
app.use('/chat', chatRouter);
|
||||
app.use('/sessions', sessionsRouter);
|
||||
app.use('/models', modelsRouter);
|
||||
|
||||
/******* Start the server ************/
|
||||
app.listen(PORT, () => {
|
||||
|
||||
@@ -36,10 +36,14 @@ router.post('/stream', async (req, res) => {
|
||||
res.flushHeaders();
|
||||
|
||||
try {
|
||||
await chatStream(sessionId, message, (delta) => {
|
||||
res.write(`data: ${JSON.stringify({ text: delta})}\n\n`)
|
||||
})
|
||||
res.write(`data: ${JSON.stringify({done: true})}\n\n`);
|
||||
const { model, tokenCount } = await chatStream(
|
||||
sessionId,
|
||||
message,
|
||||
(delta) => { res.write(`data: ${JSON.stringify({ text: delta })}\n\n`) },
|
||||
{ model: req.body.model, temperature: req.body.temperature }
|
||||
);
|
||||
|
||||
res.write(`data: ${JSON.stringify({ done: true, model, tokenCount })}\n\n`);
|
||||
} catch (err) {
|
||||
res.write(`data: ${JSON.stringify({error: err.message})}\n\n`);
|
||||
} finally {
|
||||
|
||||
21
packages/orchestration-service/src/routes/models.js
Normal file
21
packages/orchestration-service/src/routes/models.js
Normal file
@@ -0,0 +1,21 @@
|
||||
// routes/models.js
|
||||
const express = require('express');
|
||||
const router = express.Router();
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const {getEnv} = require('@nexusai/shared');
|
||||
|
||||
const MODELS_PATH = getEnv('MODELS_MANIFEST_PATH', path.join(__dirname, '../models.json'));
|
||||
|
||||
router.get('/', (req, res) => {
|
||||
try {
|
||||
const raw = fs.readFileSync(MODELS_PATH, 'utf8');
|
||||
const models = JSON.parse(raw);
|
||||
res.json(models);
|
||||
} catch (err) {
|
||||
console.error('[models] Failed to read manifest:', err.message);
|
||||
res.status(500).json({ error: 'Could not load models manifest' });
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
Reference in New Issue
Block a user