62 lines
2.3 KiB
JavaScript
62 lines
2.3 KiB
JavaScript
const { Router } = require('express');
|
|
const { complete, completeStream } = require('../infer');
|
|
const { logger } = require('@nexusai/shared');
|
|
|
|
const router = Router();
|
|
|
|
// Standard completion endpoint - returns full response when done
|
|
router.post('/complete', async (req, res) => {
|
|
const { prompt, model, temperature, maxTokens, topP, topK, repeatPenalty } = req.body;
|
|
|
|
if (!prompt) {
|
|
return res.status(400).json({ error: 'prompt is required'});
|
|
}
|
|
|
|
try {
|
|
const result = await complete (prompt, {model, temperature, maxTokens, topP, topK, repeatPenalty});
|
|
res.json(result);
|
|
} catch (error) {
|
|
logger.error('[Inference] Completion error:', error.message);
|
|
res.status(500).json({ error: error.message });
|
|
}
|
|
});
|
|
|
|
// Streaming completion endpoint - sends partial responses as they arrive
|
|
router.post('/complete/stream', async (req, res) => {
|
|
const { prompt, model, temperature, topP, topK, repeatPenalty } = req.body;
|
|
|
|
if (!prompt) return res.status(400).json({ error: 'prompt is required' });
|
|
|
|
res.setHeader('Content-Type', 'text/event-stream');
|
|
res.setHeader('Cache-Control', 'no-cache');
|
|
res.setHeader('Connection', 'keep-alive');
|
|
|
|
try {
|
|
let lastModel = model;
|
|
let tokenCount = 0;
|
|
|
|
for await (const chunk of completeStream(prompt, { model, temperature, topP, topK, repeatPenalty })) {
|
|
if (chunk.response) {
|
|
res.write(`data: ${JSON.stringify({ response: chunk.response })}\n\n`);
|
|
}
|
|
if (chunk.done) {
|
|
// capture final metadata from the done signal
|
|
lastModel = chunk.model ?? lastModel;
|
|
tokenCount = chunk.tokenCount ?? tokenCount;
|
|
logger.info('[inference router] tokenCount from chunk:', chunk.tokenCount, '→', tokenCount);
|
|
}
|
|
}
|
|
|
|
// Send a single done event with metadata after stream closes
|
|
res.write(`data: ${JSON.stringify({ done: true, model: lastModel, tokenCount })}\n\n`);
|
|
res.write('data: [DONE]\n\n');
|
|
|
|
} catch (err) {
|
|
logger.error('[Inference] Streaming error:', err.message);
|
|
res.write(`data: ${JSON.stringify({ error: err.message })}\n\n`);
|
|
} finally {
|
|
res.end();
|
|
}
|
|
});
|
|
|
|
module.exports = router; |