implemented inference service
This commit is contained in:
48
packages/inference-service/src/routes/inference.js
Normal file
48
packages/inference-service/src/routes/inference.js
Normal file
@@ -0,0 +1,48 @@
|
||||
const { Router } = require('express');
|
||||
const { complete, completeStream } = require('../infer');
|
||||
|
||||
const router = Router();
|
||||
|
||||
// Standard completion endpoint - returns full response when done
|
||||
router.post('/complete', async (req, res) => {
|
||||
const { prompt, model, temperature, maxTokens } = req.body;
|
||||
|
||||
if (!prompt) {
|
||||
return res.status(400).json({ error: 'prompt is required'});
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await complete (prompt, {model, temperature, maxTokens});
|
||||
res.json(result);
|
||||
} catch (error) {
|
||||
console.error('[Inference] Completion error:', error.message);
|
||||
res.status(500).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Streaming completion endpoint - sends partial responses as they arrive
|
||||
router.post('/complete/stream', async (req, res) => {
|
||||
const { prompt, model, temperature } = req.body;
|
||||
|
||||
if (!prompt) {
|
||||
return res.status(400).json({error: 'prompt is required'});
|
||||
}
|
||||
|
||||
res.setHeader('Content-Type', 'text/event-stream');
|
||||
res.setHeader('Cache-Control', 'no-cache');
|
||||
res.setHeader('Connection', 'keep-alive');
|
||||
|
||||
try {
|
||||
for await (const chunk of completeStream(prompt, {model, temperature})) {
|
||||
res.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
||||
}
|
||||
res.write('data: [DONE]\n\n');
|
||||
} catch (error) {
|
||||
console.error('[Inference] Streaming error:', error.message);
|
||||
res.write(`data: ${JSON.stringify({ error: error.message })}\n\n`);
|
||||
} finally {
|
||||
res.end();
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
Reference in New Issue
Block a user