model inference settings

This commit is contained in:
Storme-bit
2026-04-18 06:23:50 -07:00
parent 6935459428
commit 2d1f7176ff

View File

@@ -6,6 +6,7 @@ const appSettings = require('../config/settings');
const { getEnv, SERVICES } = require('@nexusai/shared'); const { getEnv, SERVICES } = require('@nexusai/shared');
const INFERENCE_SERVICE_URL = getEnv('INFERENCE_SERVICE_URL', SERVICES.INFERENCE_URL); const INFERENCE_SERVICE_URL = getEnv('INFERENCE_SERVICE_URL', SERVICES.INFERENCE_URL);
const LLAMA_URL = getEnv('LLAMA_SERVER_URL', LLAMACPP.DEFAULT_URL);
router.get('/', (req, res) => { router.get('/', (req, res) => {
const { modelsFolderPath } = appSettings.load(); const { modelsFolderPath } = appSettings.load();
@@ -44,10 +45,9 @@ router.get('/', (req, res) => {
}); });
router.get('/props', async (req, res) => { router.get('/props', async (req, res) => {
console.log('[models/props] fetching from:', `${INFERENCE_SERVICE_URL}/props`);
try { try {
const response = await fetch(`${INFERENCE_SERVICE_URL}/props`); const response = await fetch(`${LLAMA_URL}/props`);
if (!response.ok) throw new Error(`Inference service error: ${response.status}`); if (!response.ok) throw new Error(`llama-server error: ${response.status}`);
const data = await response.json(); const data = await response.json();
res.json({ res.json({
contextWindow: data.n_ctx, contextWindow: data.n_ctx,
@@ -55,7 +55,7 @@ router.get('/props', async (req, res) => {
}); });
} catch (err) { } catch (err) {
console.error('[models/props]', err.message); console.error('[models/props]', err.message);
res.status(503).json({ error: 'Could not reach inference service' }); res.status(503).json({ error: 'Could not reach llama-server' });
} }
}); });