101 lines
3.8 KiB
JavaScript
101 lines
3.8 KiB
JavaScript
//A store for tunables and constants used across the codebase, to avoid magic numbers and hardcoded values
|
|
|
|
const QDRANT = {
|
|
DEFAULT_URL: 'http://localhost:6333',
|
|
VECTOR_SIZE: 768, // Must match the output dimension of the embedding model (e.g. nomic-embed-text)
|
|
DISTANCE_METRIC: 'Cosine', // Best for normalized embeddings like text vectors
|
|
DEFAULT_LIMIT: 10, //Default top-=k for vector searches
|
|
};
|
|
|
|
const COLLECTIONS = {
|
|
EPISODES: 'episodes',
|
|
ENTITIES: 'entities',
|
|
SUMMARIES: 'summaries'
|
|
};
|
|
|
|
const EPISODIC = {
|
|
DEFAULT_RECENT_LIMIT: 10, // Default number of recent episodes to retrieve
|
|
DEFAULT_PAGE_SIZE: 20, // Default number of episodes per page for pagination
|
|
DEFAULT_SEARCH_LIMIT: 10, // Default number of search results to return
|
|
DEFAULT_OFFSET: 0,
|
|
DEFAULT_SESSIONS_LIMIT: 20,
|
|
};
|
|
|
|
const ORCHESTRATION = {
|
|
RECENT_EPISODE_LIMIT: 5,
|
|
SEMANTIC_LIMIT: 5,
|
|
SCORE_THRESHOLD: 0.5,
|
|
ENTITIES_LIMIT: 5,
|
|
ENTITIES_THRESHOLD: 0.55,
|
|
TEMPERATURE: 0.7,
|
|
CORS_ORIGIN: 'http://localhost:5173',
|
|
SYSTEM_PROMPT: `You are a helpful, context-aware AI assistant. You have access to memories of past conversations with the user. Use them to provide consistent, personalised responses.`
|
|
}
|
|
|
|
const OLLAMA = {
|
|
DEFAULT_URL: 'http://localhost:11434',
|
|
EMBED_MODEL: 'nomic-embed-text',
|
|
OLLAMA_MODEL: 'companion:latest',
|
|
};
|
|
|
|
const LLAMACPP = {
|
|
DEFAULT_URL: 'http://localhost:8080',
|
|
DEFAULT_MODEL: 'qwen/qwen3.6-35b-a3b',
|
|
}
|
|
|
|
const PORTS = {
|
|
INFERENCE: '3001',
|
|
MEMORY: '3002',
|
|
EMBEDDING: '3003',
|
|
ORCHESTRATION: '4000',
|
|
};
|
|
|
|
const SERVICES = {
|
|
EMBEDDING_URL: `http://localhost:${PORTS.EMBEDDING}`,
|
|
MEMORY_URL: `http://localhost:${PORTS.MEMORY}`,
|
|
INFERENCE_URL: `http://localhost:${PORTS.INFERENCE}`,
|
|
};
|
|
|
|
const INFERENCE_DEFAULTS = {
|
|
TEMPERATURE: 0.7, // Controls randomness. 0 = deterministic, 1 = creative
|
|
MAX_TOKENS: 1024, // Max tokens to generate in a response
|
|
TOP_P: 0.9, // Nucleus sampling — considers tokens comprising top 90% probability mass
|
|
TOP_K: 40, // Limits token selection to top K candidates at each step
|
|
REPEAT_PENALTY: 1.1, // Penalizes recently used tokens to reduce repetition
|
|
SEED: null, // null = random. Set to an integer for reproducible outputs
|
|
};
|
|
|
|
const SQLITE = {
|
|
DEFAULT_PATH: './data/nexusai.db'
|
|
}
|
|
|
|
const SUMMARIES = {
|
|
THRESHOLD_TOKENS: 200, //trigger summary when session hits this many tokens
|
|
MAX_SUMMARY_TOKENS: 800, //if existing summary exceeds this, create new instead of update
|
|
MIN_EPISODES_SINCE: 5, // don't resummarize until N new episodes since last summary
|
|
MAX_SUMMARY_CHARS: 8000, // max chars to include from recent episodes when generating summary (to control prompt size)
|
|
MAX_PROJECT_EPISODE_LIMIT: 200, // max number of episodes to consider from the entire project when generating summary (to control prompt size)
|
|
}
|
|
|
|
const ENTITIES = {
|
|
TEMPERATURE: 0.1, // Low temperature, more precise extraction, less creative
|
|
NUM_PREDICT: 1500, // Max tokens to consider for entity extraction (e.g. recent conversation)
|
|
THRESHOLD: 0.55, // Minimum confidence score for an extracted entity to be included in the results
|
|
PROMOTION_THRESHOLD: 3, // mention_count threshold before entity is considered well-established
|
|
GRAPH_HOP_DEPTH: 1, // Default traversal depth for neighborhood queries
|
|
}
|
|
|
|
module.exports = {
|
|
QDRANT,
|
|
COLLECTIONS,
|
|
EPISODIC,
|
|
SERVICES,
|
|
OLLAMA,
|
|
PORTS,
|
|
LLAMACPP,
|
|
INFERENCE_DEFAULTS,
|
|
SQLITE,
|
|
ORCHESTRATION,
|
|
SUMMARIES,
|
|
ENTITIES
|
|
}; |