model inference settings
This commit is contained in:
@@ -126,7 +126,7 @@ async function getRelevantEntities(userMessage) {
|
||||
}
|
||||
|
||||
async function chat(externalId, userMessage, options = {}) {
|
||||
const { recentEpisodeLimit, semanticLimit, scoreThreshold, temperature} =
|
||||
const { recentEpisodeLimit, semanticLimit, scoreThreshold, temperature, repeatPenalty, topP, topK} =
|
||||
appSettings.load();
|
||||
// 1. Resolve or create session
|
||||
let session = await memory.getSessionByExternalId(externalId);
|
||||
@@ -187,7 +187,7 @@ async function chat(externalId, userMessage, options = {}) {
|
||||
);
|
||||
|
||||
// 5. Run inference
|
||||
const result = await inference.complete(prompt, {...options, temperature});
|
||||
const result = await inference.complete(prompt, {...options, temperature, repeatPenalty, topP, topK});
|
||||
|
||||
// 6. Write episode back to memory
|
||||
memory
|
||||
@@ -217,7 +217,7 @@ async function chat(externalId, userMessage, options = {}) {
|
||||
|
||||
async function chatStream(externalId, userMessage, onChunk, options = {}) {
|
||||
try {
|
||||
const { recentEpisodeLimit, semanticLimit, scoreThreshold, temperature } = appSettings.load();
|
||||
const { recentEpisodeLimit, semanticLimit, scoreThreshold, temperature, repeatPenalty, topP, topK } = appSettings.load();
|
||||
let session = await memory.getSessionByExternalId(externalId);
|
||||
if (!session) session = await memory.createSession(externalId);
|
||||
|
||||
@@ -270,7 +270,7 @@ async function chatStream(externalId, userMessage, onChunk, options = {}) {
|
||||
entities,
|
||||
userMessage,
|
||||
);
|
||||
const res = await inference.completeStream(prompt, {...options, temperature});
|
||||
const res = await inference.completeStream(prompt, {...options, temperature, repeatPenalty, topP, topK});
|
||||
|
||||
let fullText = "";
|
||||
let model = "";
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { getEnv, ORCHESTRATION } = require('@nexusai/shared');
|
||||
const { getEnv, ORCHESTRATION, INFERENCE_DEFAULTS } = require('@nexusai/shared');
|
||||
|
||||
const SETTINGS_PATH = path.join(__dirname, '../../data/settings.json');
|
||||
|
||||
@@ -9,7 +9,10 @@ const DEFAULTS = {
|
||||
semanticLimit: ORCHESTRATION.SEMANTIC_LIMIT,
|
||||
scoreThreshold: ORCHESTRATION.SCORE_THRESHOLD,
|
||||
modelsFolderPath: getEnv('MODELS_MANIFEST_PATH', '/mnt/nexus-models'),
|
||||
temperature: ORCHESTRATION.TEMPERATURE
|
||||
temperature: INFERENCE_DEFAULTS.TEMPERATURE,
|
||||
repeatPenalty: INFERENCE_DEFAULTS.REPEAT_PENALTY,
|
||||
topP: INFERENCE_DEFAULTS.TOP_P,
|
||||
topK: INFERENCE_DEFAULTS.TOP_K
|
||||
};
|
||||
|
||||
function load() {
|
||||
|
||||
@@ -52,6 +52,27 @@ router.patch('/', (req, res) => {
|
||||
updates.temperature = val;
|
||||
}
|
||||
|
||||
if (req.body.repeatPenalty !== undefined) {
|
||||
const val = Number(req.body.repeatPenalty);
|
||||
if (isNaN(val) || val < 1 || val > 2)
|
||||
return res.status(400).json({ error: 'repeatPenalty must be 1–2' });
|
||||
updates.repeatPenalty = val;
|
||||
}
|
||||
|
||||
if (req.body.topP !== undefined) {
|
||||
const val = Number(req.body.topP);
|
||||
if (isNaN(val) || val < 0 || val > 1)
|
||||
return res.status(400).json({ error: 'topP must be 0–1' });
|
||||
updates.topP = val;
|
||||
}
|
||||
|
||||
if (req.body.topK !== undefined) {
|
||||
const val = Number(req.body.topK);
|
||||
if (!Number.isInteger(val) || val < 1 || val > 100)
|
||||
return res.status(400).json({ error: 'topK must be 1–100' });
|
||||
updates.topK = val;
|
||||
}
|
||||
|
||||
res.json(settings.save(updates));
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user