nexusAI/packages/shared/src/config/constants.js

//A store for tunables and constants used across the codebase, to avoid magic numbers and hardcoded values

const QDRANT = {
    DEFAULT_URL:        'http://localhost:6333',
    VECTOR_SIZE:        768,            // Must match the output dimension of the embedding model (e.g. nomic-embed-text)
    DISTANCE_METRIC:    'Cosine',       // Best for normalized embeddings like text vectors
    DEFAULT_LIMIT:       10,            //Default top-=k for vector searches
};

const COLLECTIONS = {
    EPISODES: 'episodes',
    ENTITIES: 'entities',
    SUMMARIES: 'summaries'
};

const EPISODIC = {
    DEFAULT_RECENT_LIMIT:   10,   // Default number of recent episodes to retrieve
    DEFAULT_PAGE_SIZE:      20,   // Default number of episodes per page for pagination
    DEFAULT_SEARCH_LIMIT:   10,   // Default number of search results to return
    DEFAULT_OFFSET:         0,
    DEFAULT_SESSIONS_LIMIT: 20,
};

const ORCHESTRATION = {
    RECENT_EPISODE_LIMIT:   5,
    SEMANTIC_LIMIT:         5,
    SCORE_THRESHOLD:        0.5,
    ENTITIES_LIMIT:         5,
    ENTITIES_THRESHOLD:     0.55,
    TEMPERATURE:            0.7,
    CORS_ORIGIN:            'http://localhost:5173',
    SYSTEM_PROMPT:          `You are a helpful, context-aware AI assistant. You have access to memories of past conversations with the user. Use them to provide consistent, personalised responses.`
}

const OLLAMA = {
    DEFAULT_URL:   'http://localhost:11434',
    EMBED_MODEL:   'nomic-embed-text',
    OLLAMA_MODEL:  'companion:latest',
};

const LLAMACPP = {
    DEFAULT_URL:    'http://localhost:8080',
    DEFAULT_MODEL:  'qwen/qwen3.6-35b-a3b',
}

const PORTS = {
    INFERENCE:      '3001',
    MEMORY:         '3002',
    EMBEDDING:      '3003',
    ORCHESTRATION:  '4000',
};

const SERVICES = {
    EMBEDDING_URL:  `http://localhost:${PORTS.EMBEDDING}`,
    MEMORY_URL:     `http://localhost:${PORTS.MEMORY}`,
    INFERENCE_URL:  `http://localhost:${PORTS.INFERENCE}`,
};

const INFERENCE_DEFAULTS = {
    TEMPERATURE:    0.7,    // Controls randomness. 0 = deterministic, 1 = creative
    MAX_TOKENS:     1024,   // Max tokens to generate in a response
    TOP_P:          0.9,    // Nucleus sampling — considers tokens comprising top 90% probability mass
    TOP_K:          40,     // Limits token selection to top K candidates at each step
    REPEAT_PENALTY: 1.1,    // Penalizes recently used tokens to reduce repetition
    SEED:           null,   // null = random. Set to an integer for reproducible outputs
};

const SQLITE = {
    DEFAULT_PATH: './data/nexusai.db'
}

const SUMMARIES = {
    THRESHOLD_TOKENS:   200,    //trigger summary when session hits this many tokens
    MAX_SUMMARY_TOKENS: 800,    //if existing summary exceeds this, create new instead of update
    MIN_EPISODES_SINCE: 5,      // don't resummarize until N new episodes since last summary
    MAX_SUMMARY_CHARS:  8000,   // max chars to include from recent episodes when generating summary (to control prompt size)
    MAX_PROJECT_EPISODE_LIMIT: 200, // max number of episodes to consider from the entire project when generating summary (to control prompt size)
}

const ENTITIES = {
    TEMPERATURE:    0.1,    // Low temperature, more precise extraction, less creative
    NUM_PREDICT:    1500,   // Max tokens to consider for entity extraction (e.g. recent conversation)
    THRESHOLD:      0.55,   // Minimum confidence score for an extracted entity to be included in the results
    PROMOTION_THRESHOLD: 3, // mention_count threshold before entity is considered well-established
    GRAPH_HOP_DEPTH: 1,     // Default traversal depth for neighborhood queries
}

module.exports = {
    QDRANT,
    COLLECTIONS,
    EPISODIC,
    SERVICES,
    OLLAMA,
    PORTS,
    LLAMACPP,
    INFERENCE_DEFAULTS,
    SQLITE,
    ORCHESTRATION,
    SUMMARIES,
    ENTITIES
};