model temperature settings

This commit is contained in:
Storme-bit
2026-04-18 02:40:31 -07:00
parent 9950ea3b62
commit 8bd4836cd7
4 changed files with 21 additions and 5 deletions

View File

@@ -241,6 +241,7 @@ function ServiceHealth() {
function ModelsSection({ onNavigate }) { function ModelsSection({ onNavigate }) {
const { models, selectedModel, setSelectedModel } = useModels(); const { models, selectedModel, setSelectedModel } = useModels();
const [selectedInfo, setSelectedInfo] = useState(null); const [selectedInfo, setSelectedInfo] = useState(null);
const {settings, saveSetting, saving} = useSettings();
// Sync info panel when selection changes // Sync info panel when selection changes
useEffect(() => { useEffect(() => {
@@ -255,6 +256,19 @@ function ModelsSection({ onNavigate }) {
description="Path to folder containing .gguf files" description="Path to folder containing .gguf files"
action={<ModelsFolderSetting />} action={<ModelsFolderSetting />}
/> />
<SettingsRow
label="Temperature"
description="Response randomness — lower is more focused, higher is more creative (02)"
action={
<NumberSetting
label=""
value={settings?.temperature}
min={0} max={2} step={0.05}
onSave={val => saveSetting('temperature', val)}
saving={saving}
/>
}
/>
<SettingsRow <SettingsRow
label="Active Model" label="Active Model"
description="Model used for inference" description="Model used for inference"

View File

@@ -126,7 +126,7 @@ async function getRelevantEntities(userMessage) {
} }
async function chat(externalId, userMessage, options = {}) { async function chat(externalId, userMessage, options = {}) {
const { recentEpisodeLimit, semanticLimit, scoreThreshold } = const { recentEpisodeLimit, semanticLimit, scoreThreshold, temperature} =
appSettings.load(); appSettings.load();
// 1. Resolve or create session // 1. Resolve or create session
let session = await memory.getSessionByExternalId(externalId); let session = await memory.getSessionByExternalId(externalId);
@@ -187,7 +187,7 @@ async function chat(externalId, userMessage, options = {}) {
); );
// 5. Run inference // 5. Run inference
const result = await inference.complete(prompt, options); const result = await inference.complete(prompt, {...options, temperature});
// 6. Write episode back to memory // 6. Write episode back to memory
memory memory
@@ -217,7 +217,7 @@ async function chat(externalId, userMessage, options = {}) {
async function chatStream(externalId, userMessage, onChunk, options = {}) { async function chatStream(externalId, userMessage, onChunk, options = {}) {
try { try {
const { recentEpisodeLimit, semanticLimit, scoreThreshold } = appSettings.load(); const { recentEpisodeLimit, semanticLimit, scoreThreshold, temperature } = appSettings.load();
let session = await memory.getSessionByExternalId(externalId); let session = await memory.getSessionByExternalId(externalId);
if (!session) session = await memory.createSession(externalId); if (!session) session = await memory.createSession(externalId);
@@ -270,7 +270,7 @@ async function chatStream(externalId, userMessage, onChunk, options = {}) {
entities, entities,
userMessage, userMessage,
); );
const res = await inference.completeStream(prompt, options); const res = await inference.completeStream(prompt, {...options, temperature});
let fullText = ""; let fullText = "";
let model = ""; let model = "";

View File

@@ -8,7 +8,8 @@ const DEFAULTS = {
recentEpisodeLimit: ORCHESTRATION.RECENT_EPISODE_LIMIT, recentEpisodeLimit: ORCHESTRATION.RECENT_EPISODE_LIMIT,
semanticLimit: ORCHESTRATION.SEMANTIC_LIMIT, semanticLimit: ORCHESTRATION.SEMANTIC_LIMIT,
scoreThreshold: ORCHESTRATION.SCORE_THRESHOLD, scoreThreshold: ORCHESTRATION.SCORE_THRESHOLD,
modelsFolderPath: getEnv('MODELS_MANIFEST_PATH', '/mnt/nexus-models') modelsFolderPath: getEnv('MODELS_MANIFEST_PATH', '/mnt/nexus-models'),
temperature: ORCHESTRATION.TEMPERATURE
}; };
function load() { function load() {

View File

@@ -25,6 +25,7 @@ const ORCHESTRATION = {
RECENT_EPISODE_LIMIT: 5, RECENT_EPISODE_LIMIT: 5,
SEMANTIC_LIMIT: 5, SEMANTIC_LIMIT: 5,
SCORE_THRESHOLD: 0.75, SCORE_THRESHOLD: 0.75,
TEMPERATURE: 0.7,
CORS_ORIGIN: 'http://localhost:5173', CORS_ORIGIN: 'http://localhost:5173',
SYSTEM_PROMPT: `You are a helpful, context-aware AI assistant. You have access to memories of past conversations with the user. Use them to provide consistent, personalised responses.` SYSTEM_PROMPT: `You are a helpful, context-aware AI assistant. You have access to memories of past conversations with the user. Use them to provide consistent, personalised responses.`
} }