From 2d1f7176ff89287cbe239fbff5f8b1d9aaa6a116 Mon Sep 17 00:00:00 2001 From: Storme-bit Date: Sat, 18 Apr 2026 06:23:50 -0700 Subject: [PATCH] model inference settings --- packages/orchestration-service/src/routes/models.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/orchestration-service/src/routes/models.js b/packages/orchestration-service/src/routes/models.js index c9e831d..c6e90a4 100644 --- a/packages/orchestration-service/src/routes/models.js +++ b/packages/orchestration-service/src/routes/models.js @@ -6,6 +6,7 @@ const appSettings = require('../config/settings'); const { getEnv, SERVICES } = require('@nexusai/shared'); const INFERENCE_SERVICE_URL = getEnv('INFERENCE_SERVICE_URL', SERVICES.INFERENCE_URL); +const LLAMA_URL = getEnv('LLAMA_SERVER_URL', LLAMACPP.DEFAULT_URL); router.get('/', (req, res) => { const { modelsFolderPath } = appSettings.load(); @@ -44,10 +45,9 @@ router.get('/', (req, res) => { }); router.get('/props', async (req, res) => { - console.log('[models/props] fetching from:', `${INFERENCE_SERVICE_URL}/props`); try { - const response = await fetch(`${INFERENCE_SERVICE_URL}/props`); - if (!response.ok) throw new Error(`Inference service error: ${response.status}`); + const response = await fetch(`${LLAMA_URL}/props`); + if (!response.ok) throw new Error(`llama-server error: ${response.status}`); const data = await response.json(); res.json({ contextWindow: data.n_ctx, @@ -55,7 +55,7 @@ router.get('/props', async (req, res) => { }); } catch (err) { console.error('[models/props]', err.message); - res.status(503).json({ error: 'Could not reach inference service' }); + res.status(503).json({ error: 'Could not reach llama-server' }); } });