-
Notifications
You must be signed in to change notification settings - Fork 275
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Run embeddings and symf retrieval in parallel and implement basic fus…
…ion (#2804) This PR is changing the simple chat context engine to run the symf search together with any dense retrieval strategies (local/remote embedding) in parallel and fuse the results. The fusion is currently done naively by allocating up to 80% of the context window for the symf results and the remaining 20% for embeddings. We will follow this up with a more advanced fusion logic that will also address some issues for Autocomplete. We, however, need to adjust the logic we have for RRF to have a finer granularity before we can do this. ## Test plan - Added unit test for the fusion code - Ask questions and look at the context items being picked up - Most importantly it passes the squirrel test.
- Loading branch information
1 parent
80caa98
commit 646d0b7
Showing
4 changed files
with
277 additions
and
85 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
import { describe, expect, it } from 'vitest' | ||
import { fuseContext } from './context' | ||
import { testFileUri } from '@sourcegraph/cody-shared' | ||
import type { ContextItem } from './SimpleChatModel' | ||
|
||
describe('fuseContext', () => { | ||
const uri = testFileUri('test.ts') | ||
const keywordItems = [ | ||
{ text: '0', uri }, | ||
{ text: '1', uri }, | ||
{ text: '2', uri }, | ||
{ text: '3', uri }, | ||
{ text: '4', uri }, | ||
{ text: '5', uri }, | ||
{ text: '6', uri }, | ||
{ text: '7', uri }, | ||
{ text: '8', uri }, | ||
{ text: '9', uri }, | ||
] | ||
const embeddingsItems = [ | ||
{ text: 'A', uri }, | ||
{ text: 'B', uri }, | ||
{ text: 'C', uri }, | ||
] | ||
|
||
function joined(items: ContextItem[]): string { | ||
return items.map(r => r.text).join('') | ||
} | ||
|
||
it('includes the right 80-20 split', () => { | ||
const maxChars = 10 | ||
const result = fuseContext(keywordItems, embeddingsItems, maxChars) | ||
expect(joined(result)).toEqual('01234567AB') | ||
}) | ||
|
||
it('skips over large items in an attempt to optimize utilization', () => { | ||
const keywordItems = [ | ||
{ text: '0', uri }, | ||
{ text: '1', uri }, | ||
{ text: '2', uri }, | ||
{ text: '3', uri }, | ||
{ text: '4', uri }, | ||
{ text: '5', uri }, | ||
{ text: 'very large keyword item', uri }, | ||
{ text: '6', uri }, | ||
{ text: '7', uri }, | ||
{ text: '8', uri }, | ||
{ text: '9', uri }, | ||
] | ||
const embeddingsItems = [ | ||
{ text: 'A', uri }, | ||
{ text: 'very large embeddings item', uri }, | ||
{ text: 'B', uri }, | ||
{ text: 'C', uri }, | ||
] | ||
const maxChars = 10 | ||
const result = fuseContext(keywordItems, embeddingsItems, maxChars) | ||
expect(joined(result)).toEqual('01234567AB') | ||
}) | ||
|
||
it('returns an empty array when maxChars is 0', () => { | ||
const result = fuseContext(keywordItems, embeddingsItems, 0) | ||
expect(result).toEqual([]) | ||
}) | ||
|
||
it('includes all keyword items if there are no embeddings items', () => { | ||
const maxChars = 10 | ||
const result = fuseContext(keywordItems, [], maxChars) | ||
expect(joined(result)).toEqual('0123456789') | ||
}) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.