model inference settings
This commit is contained in:
@@ -6,6 +6,7 @@ const appSettings = require('../config/settings');
|
|||||||
|
|
||||||
const { getEnv, SERVICES } = require('@nexusai/shared');
|
const { getEnv, SERVICES } = require('@nexusai/shared');
|
||||||
const INFERENCE_SERVICE_URL = getEnv('INFERENCE_SERVICE_URL', SERVICES.INFERENCE_URL);
|
const INFERENCE_SERVICE_URL = getEnv('INFERENCE_SERVICE_URL', SERVICES.INFERENCE_URL);
|
||||||
|
const LLAMA_URL = getEnv('LLAMA_SERVER_URL', LLAMACPP.DEFAULT_URL);
|
||||||
|
|
||||||
router.get('/', (req, res) => {
|
router.get('/', (req, res) => {
|
||||||
const { modelsFolderPath } = appSettings.load();
|
const { modelsFolderPath } = appSettings.load();
|
||||||
@@ -44,10 +45,9 @@ router.get('/', (req, res) => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
router.get('/props', async (req, res) => {
|
router.get('/props', async (req, res) => {
|
||||||
console.log('[models/props] fetching from:', `${INFERENCE_SERVICE_URL}/props`);
|
|
||||||
try {
|
try {
|
||||||
const response = await fetch(`${INFERENCE_SERVICE_URL}/props`);
|
const response = await fetch(`${LLAMA_URL}/props`);
|
||||||
if (!response.ok) throw new Error(`Inference service error: ${response.status}`);
|
if (!response.ok) throw new Error(`llama-server error: ${response.status}`);
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
res.json({
|
res.json({
|
||||||
contextWindow: data.n_ctx,
|
contextWindow: data.n_ctx,
|
||||||
@@ -55,7 +55,7 @@ router.get('/props', async (req, res) => {
|
|||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('[models/props]', err.message);
|
console.error('[models/props]', err.message);
|
||||||
res.status(503).json({ error: 'Could not reach inference service' });
|
res.status(503).json({ error: 'Could not reach llama-server' });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user