refactoring and clean up
This commit is contained in:
@@ -1,15 +1,32 @@
|
||||
const { getEnv } = require('@nexusai/shared');
|
||||
const { getEnv, LLAMACPP, INFERENCE_DEFAULTS } = require('@nexusai/shared');
|
||||
|
||||
const BASE_URL = getEnv('INFERENCE_URL', 'http://localhost:8080');
|
||||
const DEFAULT_MODEL = getEnv('DEFAULT_MODEL', 'local-model');
|
||||
const BASE_URL = getEnv('INFERENCE_URL', LLAMACPP.DEFAULT_URL);
|
||||
const DEFAULT_MODEL = getEnv('DEFAULT_MODEL', LLAMACPP.DEFAULT_MODEL);
|
||||
|
||||
function resolveOptions(options) {
|
||||
return {
|
||||
temperature: options.temperature ?? INFERENCE_DEFAULTS.TEMPERATURE,
|
||||
maxTokens: options.maxTokens ?? INFERENCE_DEFAULTS.MAX_TOKENS,
|
||||
topP: options.topP ?? INFERENCE_DEFAULTS.TOP_P,
|
||||
topK: options.topK ?? INFERENCE_DEFAULTS.TOP_K,
|
||||
repeatPenalty: options.repeatPenalty ?? INFERENCE_DEFAULTS.REPEAT_PENALTY,
|
||||
seed: options.seed ?? INFERENCE_DEFAULTS.SEED,
|
||||
};
|
||||
}
|
||||
|
||||
function buildPayload(prompt, options, stream = false){
|
||||
const opts = resolveOptions(options);
|
||||
|
||||
return {
|
||||
model: options.model || DEFAULT_MODEL,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
temperature: options.temperature ?? 0.7,
|
||||
max_tokens: options.num_predict ?? 1024,
|
||||
temperature: opts.temperature,
|
||||
max_tokens: opts.maxTokens,
|
||||
top_p: opts.topP,
|
||||
top_k: opts.topK,
|
||||
repeat_penalty: opts.repeatPenalty,
|
||||
stream,
|
||||
...(opts.seed !== null && { seed: opts.seed }),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,17 +1,33 @@
|
||||
const { Ollama } = require('ollama');
|
||||
const { getEnv } = require('@nexusai/shared');
|
||||
const { getEnv, OLLAMA, INFERENCE_DEFAULTS } = require('@nexusai/shared');
|
||||
|
||||
const client = new Ollama({ host: getEnv('INFERENCE_URL', 'http://localhost:11434') });
|
||||
const DEFAULT_MODEL = getEnv('DEFAULT_MODEL', 'companion:latest');
|
||||
const client = new Ollama({ host: getEnv('INFERENCE_URL', OLLAMA.DEFAULT_URL) });
|
||||
const DEFAULT_MODEL = getEnv('DEFAULT_MODEL', OLLAMA.OLLAMA_MODEL);
|
||||
|
||||
function resolveOptions(options){
|
||||
return {
|
||||
temperature: options.temperature ?? INFERENCE_DEFAULTS.TEMPERATURE,
|
||||
maxTokens: options.maxTokens ?? INFERENCE_DEFAULTS.MAX_TOKENS,
|
||||
topP: options.topP ?? INFERENCE_DEFAULTS.TOP_P,
|
||||
topK: options.topK ?? INFERENCE_DEFAULTS.TOP_K,
|
||||
repeatPenalty: options.repeatPenalty ?? INFERENCE_DEFAULTS.REPEAT_PENALTY,
|
||||
seed: options.seed ?? INFERENCE_DEFAULTS.SEED,
|
||||
}
|
||||
}
|
||||
|
||||
async function complete(prompt, options = {} ) {
|
||||
const opts = resolveOptions(options);
|
||||
const response = await client.generate({
|
||||
model: options.model || DEFAULT_MODEL,
|
||||
prompt,
|
||||
stream: false,
|
||||
options: {
|
||||
temperature: options.temperature ?? 0.7,
|
||||
num_predict: options.maxTokens ?? 1024,
|
||||
temperature: opts.temperature,
|
||||
num_predict: opts.maxTokens,
|
||||
top_p: opts.topP,
|
||||
top_k: opts.topK,
|
||||
repeat_penalty: opts.repeatPenalty,
|
||||
...(opts.seed !== null && { seed: opts.seed }),
|
||||
}
|
||||
});
|
||||
|
||||
@@ -25,12 +41,18 @@ async function complete(prompt, options = {} ) {
|
||||
}
|
||||
|
||||
async function* completeStream(prompt, options = {} ) {
|
||||
const opts = resolveOptions(options);
|
||||
const stream = await client.generate({
|
||||
model: options.model || DEFAULT_MODEL,
|
||||
prompt,
|
||||
stream: true,
|
||||
options:{
|
||||
temperature: options.temperature ?? 0.7,
|
||||
temperature: opts.temperature,
|
||||
num_predict: opts.maxTokens,
|
||||
top_p: opts.topP,
|
||||
top_k: opts.topK,
|
||||
repeat_penalty: opts.repeatPenalty,
|
||||
...(opts.seed !== null && { seed: opts.seed }),
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user