68 lines
3.3 KiB
JavaScript
68 lines
3.3 KiB
JavaScript
// test-fusion.js
|
|
const { RETRIEVAL } = require('./packages/shared/src/config/constants');
|
|
|
|
function fuseEpisodeResults(semanticEps, keywordEps, { semanticWeight, keywordWeight, limit }) {
|
|
const k = RETRIEVAL.RRF_K;
|
|
const scores = new Map();
|
|
semanticEps.forEach((ep, i) => {
|
|
scores.set(ep.id, { episode: ep, score: semanticWeight / (k + i + 1) });
|
|
});
|
|
keywordEps.forEach((ep, i) => {
|
|
const contrib = keywordWeight / (k + i + 1);
|
|
if (scores.has(ep.id)) {
|
|
scores.get(ep.id).score += contrib;
|
|
} else {
|
|
scores.set(ep.id, { episode: ep, score: contrib });
|
|
}
|
|
});
|
|
return [...scores.values()]
|
|
.sort((a, b) => b.score - a.score)
|
|
.slice(0, limit)
|
|
.map(({ episode }) => episode);
|
|
}
|
|
|
|
// --- Test 1: episodes in both lists rank highest ---
|
|
const semantic = [
|
|
{ id: 1, user_message: 'ep1 — semantic only, rank 1' },
|
|
{ id: 2, user_message: 'ep2 — in both lists, rank 2 semantic' },
|
|
{ id: 3, user_message: 'ep3 — in both lists, rank 3 semantic' },
|
|
];
|
|
const keyword = [
|
|
{ id: 3, user_message: 'ep3 — rank 1 FTS' },
|
|
{ id: 2, user_message: 'ep2 — rank 2 FTS' },
|
|
{ id: 4, user_message: 'ep4 — FTS only, rank 3' },
|
|
];
|
|
|
|
const result = fuseEpisodeResults(semantic, keyword, { semanticWeight: 1, keywordWeight: 1, limit: 5 });
|
|
console.log('Test 1 — equal weights, episodes in both lists should rank highest:');
|
|
result.forEach((ep, i) => console.log(` ${i + 1}. id=${ep.id} "${ep.user_message}"`));
|
|
console.assert(result[0].id === 2 || result[0].id === 3, 'FAIL: ep2 or ep3 should be rank 1');
|
|
console.assert(!result.find(e => e.id === 1) || result.indexOf(result.find(e => e.id === 1)) > result.indexOf(result.find(e => e.id === 2)), 'FAIL: ep1 (semantic only) should rank below ep2');
|
|
console.log(' PASS\n');
|
|
|
|
// --- Test 2: keywordWeight:0 → pure semantic passthrough ---
|
|
const result2 = fuseEpisodeResults(semantic, keyword, { semanticWeight: 1, keywordWeight: 0, limit: 5 });
|
|
console.log('Test 2 — keywordWeight:0 should return only semantic results in original order:');
|
|
result2.forEach((ep, i) => console.log(` ${i + 1}. id=${ep.id}`));
|
|
console.assert(result2.length === 3, `FAIL: expected 3, got ${result2.length}`);
|
|
console.assert(result2[0].id === 1, 'FAIL: ep1 should be rank 1');
|
|
console.assert(result2[1].id === 2, 'FAIL: ep2 should be rank 2');
|
|
console.log(' PASS\n');
|
|
|
|
// --- Test 3: limit is respected ---
|
|
const result3 = fuseEpisodeResults(semantic, keyword, { semanticWeight: 1, keywordWeight: 1, limit: 2 });
|
|
console.log('Test 3 — limit:2 should return exactly 2 results:');
|
|
console.assert(result3.length === 2, `FAIL: expected 2, got ${result3.length}`);
|
|
console.log(' PASS\n');
|
|
|
|
// --- Test 4: no overlap → all unique episodes, ordered by individual contribution ---
|
|
const semOnly = [{ id: 10, user_message: 'sem' }];
|
|
const ftsOnly = [{ id: 20, user_message: 'fts' }];
|
|
const result4 = fuseEpisodeResults(semOnly, ftsOnly, { semanticWeight: 1, keywordWeight: 1, limit: 5 });
|
|
console.log('Test 4 — no overlap, both should appear:');
|
|
console.assert(result4.length === 2, `FAIL: expected 2, got ${result4.length}`);
|
|
console.assert(result4[0].id === 10, 'FAIL: semantic rank-1 should beat fts rank-1 (same weight, both rank 1, but semantic inserted first — tie goes to semantic)');
|
|
console.log(' PASS\n');
|
|
|
|
console.log('All tests passed.');
|