minor clean up

2026-04-27 20:17:05 -07:00
parent 055683424d
commit b58a4e4692
13 changed files with 171 additions and 18 deletions
--- a/packages/embedding-service/CLAUDE.md
+++ b/packages/embedding-service/CLAUDE.md
@@ -0,0 +1,64 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+See the root [CLAUDE.md](../../CLAUDE.md) for overall architecture, service roles, and deployment layout.
+
+## Running This Service
+
+```bash
+npm run embedding                          # From repo root
+npm -w packages/embedding-service run dev  # With --watch
+```
+
+Default port: **3003**. Requires Ollama to be reachable at `OLLAMA_URL`.
+
+## Single-File Service
+
+The entire service is `src/index.js` — no subdirectory structure. All routes, the Ollama helper, and startup are in one file.
+
+## Environment Variables
+
+| Variable | Default | Description |
+|---|---|---|
+| `PORT` | `3003` | Port to listen on |
+| `OLLAMA_URL` | `http://localhost:11434` | Ollama instance URL |
+| `EMBEDDING_MODEL` | `nomic-embed-text` | Model passed to Ollama `/api/embed` |
+
+Note: the env var name is `EMBEDDING_MODEL`, not `EMBED_MODEL` — the internal constant is `EMBED_MODEL` but the lookup key is different.
+
+## Ollama API Details
+
+Uses Ollama's `/api/embed` endpoint (not `/api/embeddings`). Request shape:
+
+```json
+{ "model": "nomic-embed-text", "input": "text to embed" }
+```
+
+Ollama returns `{ "embeddings": [[...]] }` — an array of arrays even for a single input. The helper takes `data.embeddings[0]` to return the single vector.
+
+The `ollama` npm package is listed as a dependency but is **not used** — all calls are raw `fetch`. Do not refactor to use the package without checking the API shape matches.
+
+## Batch Endpoint
+
+`POST /embed/batch` embeds items **sequentially** in a for-loop, not in parallel. The comment explains this: Ollama doesn't parallelise embedding calls, so parallel requests would queue internally anyway. Do not change to `Promise.all` without verifying Ollama behaviour.
+
+## Error Responses
+
+| Condition | Status | Notes |
+|---|---|---|
+| Missing/empty `text` | 400 | |
+| Ollama call fails | 502 | Upstream failure — correct status |
+| Empty `texts` array | 400 | |
+
+## Known Issue
+
+The 400 error message for `/embed` reads `"text is required and must be empty"` — the word "not" is missing. Should read `"must not be empty"`.
+
+## API Endpoints
+
+| Method | Path | Notes |
+|---|---|---|
+| GET | `/health` | Static response — does not verify Ollama is reachable |
+| POST | `/embed` | Body: `{ text: string }`. Returns `{ embedding, model, dimensions }` |
+| POST | `/embed/batch` | Body: `{ texts: string[] }`. Returns `{ embeddings, model, dimensions, count }` |
--- a/packages/embedding-service/package.json
+++ b/packages/embedding-service/package.json
@@ -9,7 +9,6 @@
  "dependencies": {
    "@nexusai/shared": "^1.0.0",
    "dotenv": "^17.4.0",
-    "express": "^5.2.1",
-    "ollama": "^0.6.3"
+    "express": "^5.2.1"
  }
 }
--- a/packages/embedding-service/src/index.js
+++ b/packages/embedding-service/src/index.js
@@ -3,7 +3,7 @@ const express = require('express');
 const {getEnv, OLLAMA, PORTS, logger} = require('@nexusai/shared');

 const app = express();
-app.use(express.json());
+app.use(express.json({ limit: '1mb' }));    // limit request body to 1mb to prevent abuse - embedding requests should be small

 const PORT          = getEnv('PORT',            PORTS.EMBEDDING);  
 const OLLAMA_URL    = getEnv('OLLAMA_URL',      OLLAMA.DEFAULT_URL); 
@@ -14,7 +14,8 @@ async function embedText(text) {
    const res = await fetch(`${OLLAMA_URL}/api/embed`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ model: EMBED_MODEL, input: text })
+        body: JSON.stringify({ model: EMBED_MODEL, input: text }),
+        signal: AbortSignal.timeout(30_000),
    });

    if (!res.ok) {
@@ -37,7 +38,7 @@ app.get('/health', (req,res) => {
 app.post('/embed', async (req, res) => {
    const { text } = req.body;
    if (!text || typeof text !== 'string' || text.trim() === '') {
-        return res.status(400).json({ error: 'text is required and must be empty' });
+        return res.status(400).json({ error: 'text is required and must not be empty' });
    }

    try {
@@ -60,7 +61,10 @@ app.post('/embed/batch', async (req, res) => {
    }

    try {
-        //sequential embedding for now, Ollama doesn't natively parallize embeddings
+        const invalid = texts.findIndex(t => !t || typeof t !== 'string' || t.trim() === '');
+        if (invalid !== -1)
+            return res.status(400).json({ error: `texts[${invalid}] is empty or not a string` });
+
        const embeddings = [];
        for (const text of texts) {
            embeddings.push(await embedText(text.trim()));