model inference settings
This commit is contained in:
@@ -5,14 +5,14 @@ const router = Router();
|
||||
|
||||
// Standard completion endpoint - returns full response when done
|
||||
router.post('/complete', async (req, res) => {
|
||||
const { prompt, model, temperature, maxTokens } = req.body;
|
||||
const { prompt, model, temperature, maxTokens, topP, topK, repeatPenalty } = req.body;
|
||||
|
||||
if (!prompt) {
|
||||
return res.status(400).json({ error: 'prompt is required'});
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await complete (prompt, {model, temperature, maxTokens});
|
||||
const result = await complete (prompt, {model, temperature, maxTokens, topP, topK, repeatPenalty});
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[Inference] Completion error:', error.message);
|
||||
@@ -22,7 +22,7 @@ router.post('/complete', async (req, res) => {
|
||||
|
||||
// Streaming completion endpoint - sends partial responses as they arrive
|
||||
router.post('/complete/stream', async (req, res) => {
|
||||
const { prompt, model, temperature } = req.body;
|
||||
const { prompt, model, temperature, topP, topK, repeatPenalty } = req.body;
|
||||
|
||||
if (!prompt) return res.status(400).json({ error: 'prompt is required' });
|
||||
|
||||
@@ -34,7 +34,7 @@ router.post('/complete/stream', async (req, res) => {
|
||||
let lastModel = model;
|
||||
let tokenCount = 0;
|
||||
|
||||
for await (const chunk of completeStream(prompt, { model, temperature })) {
|
||||
for await (const chunk of completeStream(prompt, { model, temperature, topP, topK, repeatPenalty })) {
|
||||
if (chunk.response) {
|
||||
res.write(`data: ${JSON.stringify({ response: chunk.response })}\n\n`);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user