model inference settings

This commit is contained in:
Storme-bit
2026-04-18 03:25:22 -07:00
parent 2b47b06563
commit daf5b9a8ae
5 changed files with 59 additions and 10 deletions

View File

@@ -5,14 +5,14 @@ const router = Router();
// Standard completion endpoint - returns full response when done
router.post('/complete', async (req, res) => {
const { prompt, model, temperature, maxTokens } = req.body;
const { prompt, model, temperature, maxTokens, topP, topK, repeatPenalty } = req.body;
if (!prompt) {
return res.status(400).json({ error: 'prompt is required'});
}
try {
const result = await complete (prompt, {model, temperature, maxTokens});
const result = await complete (prompt, {model, temperature, maxTokens, topP, topK, repeatPenalty});
res.json(result);
} catch (error) {
console.error('[Inference] Completion error:', error.message);
@@ -22,7 +22,7 @@ router.post('/complete', async (req, res) => {
// Streaming completion endpoint - sends partial responses as they arrive
router.post('/complete/stream', async (req, res) => {
const { prompt, model, temperature } = req.body;
const { prompt, model, temperature, topP, topK, repeatPenalty } = req.body;
if (!prompt) return res.status(400).json({ error: 'prompt is required' });
@@ -34,7 +34,7 @@ router.post('/complete/stream', async (req, res) => {
let lastModel = model;
let tokenCount = 0;
for await (const chunk of completeStream(prompt, { model, temperature })) {
for await (const chunk of completeStream(prompt, { model, temperature, topP, topK, repeatPenalty })) {
if (chunk.response) {
res.write(`data: ${JSON.stringify({ response: chunk.response })}\n\n`);
}