minor clean up
This commit is contained in:
@@ -4,7 +4,7 @@ const {getEnv, PORTS, OLLAMA, logger} = require('@nexusai/shared');
|
||||
const inferenceRouter = require('./routes/inference');
|
||||
|
||||
const app = express();
|
||||
app.use(express.json());
|
||||
app.use(express.json({ limit: '8mb' })); // prompts include full context window
|
||||
|
||||
const PORT = getEnv('PORT', PORTS.INFERENCE);
|
||||
const PROVIDER = getEnv('INFERENCE_PROVIDER', 'ollama');
|
||||
|
||||
@@ -57,7 +57,16 @@ async function* completeStream(prompt, options = {} ) {
|
||||
});
|
||||
|
||||
for await (const chunk of stream) {
|
||||
yield chunk;
|
||||
if (chunk.done) {
|
||||
yield {
|
||||
response: '',
|
||||
done: true,
|
||||
model: chunk.model,
|
||||
tokenCount: (chunk.eval_count ?? 0) + (chunk.prompt_eval_count ?? 0),
|
||||
};
|
||||
} else {
|
||||
yield chunk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ router.post('/complete', async (req, res) => {
|
||||
|
||||
// Streaming completion endpoint - sends partial responses as they arrive
|
||||
router.post('/complete/stream', async (req, res) => {
|
||||
const { prompt, model, temperature, topP, topK, repeatPenalty } = req.body;
|
||||
const { prompt, model, temperature, maxTokens, topP, topK, repeatPenalty } = req.body;
|
||||
|
||||
if (!prompt) return res.status(400).json({ error: 'prompt is required' });
|
||||
|
||||
@@ -35,7 +35,7 @@ router.post('/complete/stream', async (req, res) => {
|
||||
let lastModel = model;
|
||||
let tokenCount = 0;
|
||||
|
||||
for await (const chunk of completeStream(prompt, { model, temperature, topP, topK, repeatPenalty })) {
|
||||
for await (const chunk of completeStream(prompt, { model, temperature, maxTokens,topP, topK, repeatPenalty })) {
|
||||
if (chunk.response) {
|
||||
res.write(`data: ${JSON.stringify({ response: chunk.response })}\n\n`);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user