diff --git a/packages/chat-client/src/components/SettingsView.jsx b/packages/chat-client/src/components/SettingsView.jsx index 34f29bc..187c015 100644 --- a/packages/chat-client/src/components/SettingsView.jsx +++ b/packages/chat-client/src/components/SettingsView.jsx @@ -264,6 +264,31 @@ function ModelsSection({ onNavigate }) { onSave={val => saveSetting('temperature', val)} saving={saving} /> + + saveSetting('repeatPenalty', val)} + saving={saving} + /> + saveSetting('topP', val)} + saving={saving} + /> + saveSetting('topK', val)} + saving={saving} + /> { - const { prompt, model, temperature, maxTokens } = req.body; + const { prompt, model, temperature, maxTokens, topP, topK, repeatPenalty } = req.body; if (!prompt) { return res.status(400).json({ error: 'prompt is required'}); } try { - const result = await complete (prompt, {model, temperature, maxTokens}); + const result = await complete (prompt, {model, temperature, maxTokens, topP, topK, repeatPenalty}); res.json(result); } catch (error) { console.error('[Inference] Completion error:', error.message); @@ -22,7 +22,7 @@ router.post('/complete', async (req, res) => { // Streaming completion endpoint - sends partial responses as they arrive router.post('/complete/stream', async (req, res) => { - const { prompt, model, temperature } = req.body; + const { prompt, model, temperature, topP, topK, repeatPenalty } = req.body; if (!prompt) return res.status(400).json({ error: 'prompt is required' }); @@ -34,7 +34,7 @@ router.post('/complete/stream', async (req, res) => { let lastModel = model; let tokenCount = 0; - for await (const chunk of completeStream(prompt, { model, temperature })) { + for await (const chunk of completeStream(prompt, { model, temperature, topP, topK, repeatPenalty })) { if (chunk.response) { res.write(`data: ${JSON.stringify({ response: chunk.response })}\n\n`); } diff --git a/packages/orchestration-service/src/chat/index.js b/packages/orchestration-service/src/chat/index.js index ddd8b36..14df81b 100644 --- a/packages/orchestration-service/src/chat/index.js +++ b/packages/orchestration-service/src/chat/index.js @@ -126,7 +126,7 @@ async function getRelevantEntities(userMessage) { } async function chat(externalId, userMessage, options = {}) { - const { recentEpisodeLimit, semanticLimit, scoreThreshold, temperature} = + const { recentEpisodeLimit, semanticLimit, scoreThreshold, temperature, repeatPenalty, topP, topK} = appSettings.load(); // 1. Resolve or create session let session = await memory.getSessionByExternalId(externalId); @@ -187,7 +187,7 @@ async function chat(externalId, userMessage, options = {}) { ); // 5. Run inference - const result = await inference.complete(prompt, {...options, temperature}); + const result = await inference.complete(prompt, {...options, temperature, repeatPenalty, topP, topK}); // 6. Write episode back to memory memory @@ -217,7 +217,7 @@ async function chat(externalId, userMessage, options = {}) { async function chatStream(externalId, userMessage, onChunk, options = {}) { try { - const { recentEpisodeLimit, semanticLimit, scoreThreshold, temperature } = appSettings.load(); + const { recentEpisodeLimit, semanticLimit, scoreThreshold, temperature, repeatPenalty, topP, topK } = appSettings.load(); let session = await memory.getSessionByExternalId(externalId); if (!session) session = await memory.createSession(externalId); @@ -270,7 +270,7 @@ async function chatStream(externalId, userMessage, onChunk, options = {}) { entities, userMessage, ); - const res = await inference.completeStream(prompt, {...options, temperature}); + const res = await inference.completeStream(prompt, {...options, temperature, repeatPenalty, topP, topK}); let fullText = ""; let model = ""; diff --git a/packages/orchestration-service/src/config/settings.js b/packages/orchestration-service/src/config/settings.js index 7f56336..045297d 100644 --- a/packages/orchestration-service/src/config/settings.js +++ b/packages/orchestration-service/src/config/settings.js @@ -1,6 +1,6 @@ const fs = require('fs'); const path = require('path'); -const { getEnv, ORCHESTRATION } = require('@nexusai/shared'); +const { getEnv, ORCHESTRATION, INFERENCE_DEFAULTS } = require('@nexusai/shared'); const SETTINGS_PATH = path.join(__dirname, '../../data/settings.json'); @@ -9,7 +9,10 @@ const DEFAULTS = { semanticLimit: ORCHESTRATION.SEMANTIC_LIMIT, scoreThreshold: ORCHESTRATION.SCORE_THRESHOLD, modelsFolderPath: getEnv('MODELS_MANIFEST_PATH', '/mnt/nexus-models'), - temperature: ORCHESTRATION.TEMPERATURE + temperature: INFERENCE_DEFAULTS.TEMPERATURE, + repeatPenalty: INFERENCE_DEFAULTS.REPEAT_PENALTY, + topP: INFERENCE_DEFAULTS.TOP_P, + topK: INFERENCE_DEFAULTS.TOP_K }; function load() { diff --git a/packages/orchestration-service/src/routes/settings.js b/packages/orchestration-service/src/routes/settings.js index 3a1055c..963a0a9 100644 --- a/packages/orchestration-service/src/routes/settings.js +++ b/packages/orchestration-service/src/routes/settings.js @@ -52,6 +52,27 @@ router.patch('/', (req, res) => { updates.temperature = val; } + if (req.body.repeatPenalty !== undefined) { + const val = Number(req.body.repeatPenalty); + if (isNaN(val) || val < 1 || val > 2) + return res.status(400).json({ error: 'repeatPenalty must be 1–2' }); + updates.repeatPenalty = val; +} + +if (req.body.topP !== undefined) { + const val = Number(req.body.topP); + if (isNaN(val) || val < 0 || val > 1) + return res.status(400).json({ error: 'topP must be 0–1' }); + updates.topP = val; +} + +if (req.body.topK !== undefined) { + const val = Number(req.body.topK); + if (!Number.isInteger(val) || val < 1 || val > 100) + return res.status(400).json({ error: 'topK must be 1–100' }); + updates.topK = val; +} + res.json(settings.save(updates)); });