Skip to content

Commit eeb05d4

Browse files
authored
refactor joins to use direct implementation or each type rather than composition of inner+anti joins (#571)
* first refactor * second pass * futher optimisations * changeset * format * Address review * additional comments
1 parent 60d5dc4 commit eeb05d4

5 files changed

Lines changed: 315 additions & 147 deletions

File tree

.changeset/fast-joins-redesign.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@tanstack/db-ivm": patch
3+
---
4+
5+
Redesign of the join operators with direct algorithms for major performance improvements by replacing composition-based joins (inner+anti) with implementation using mass tracking. Delivers significant performance gains while maintaining full correctness for all join types (inner, left, right, full, anti).

packages/db-ivm/src/indexes.ts

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,30 @@ export class Index<TKey, TValue, TPrefix = any> {
150150
* hash to identify identical values, storing them in a third level value map.
151151
*/
152152
#inner: IndexMap<TKey, TValue, TPrefix>
153+
#consolidatedMultiplicity: Map<TKey, number> = new Map() // sum of multiplicities per key
153154

154155
constructor() {
155156
this.#inner = new Map()
156157
}
157158

159+
/**
160+
* Create an Index from multiple MultiSet messages.
161+
* @param messages - Array of MultiSet messages to build the index from.
162+
* @returns A new Index containing all the data from the messages.
163+
*/
164+
static fromMultiSets<K, V>(messages: Array<MultiSet<[K, V]>>): Index<K, V> {
165+
const index = new Index<K, V>()
166+
167+
for (const message of messages) {
168+
for (const [item, multiplicity] of message.getInner()) {
169+
const [key, value] = item
170+
index.addValue(key, [value, multiplicity])
171+
}
172+
}
173+
174+
return index
175+
}
176+
158177
/**
159178
* This method returns a string representation of the index.
160179
* @param indent - Whether to indent the string representation.
@@ -184,6 +203,32 @@ export class Index<TKey, TValue, TPrefix = any> {
184203
return this.#inner.has(key)
185204
}
186205

206+
/**
207+
* Check if a key has presence (non-zero consolidated multiplicity).
208+
* @param key - The key to check.
209+
* @returns True if the key has non-zero consolidated multiplicity, false otherwise.
210+
*/
211+
hasPresence(key: TKey): boolean {
212+
return (this.#consolidatedMultiplicity.get(key) || 0) !== 0
213+
}
214+
215+
/**
216+
* Get the consolidated multiplicity (sum of multiplicities) for a key.
217+
* @param key - The key to get the consolidated multiplicity for.
218+
* @returns The consolidated multiplicity for the key.
219+
*/
220+
getConsolidatedMultiplicity(key: TKey): number {
221+
return this.#consolidatedMultiplicity.get(key) || 0
222+
}
223+
224+
/**
225+
* Get all keys that have presence (non-zero consolidated multiplicity).
226+
* @returns An iterator of keys with non-zero consolidated multiplicity.
227+
*/
228+
getPresenceKeys(): Iterable<TKey> {
229+
return this.#consolidatedMultiplicity.keys()
230+
}
231+
187232
/**
188233
* This method returns all values for a given key.
189234
* @param key - The key to get the values for.
@@ -257,6 +302,15 @@ export class Index<TKey, TValue, TPrefix = any> {
257302
// If the multiplicity is 0, do nothing
258303
if (multiplicity === 0) return
259304

305+
// Update consolidated multiplicity tracking
306+
const newConsolidatedMultiplicity =
307+
(this.#consolidatedMultiplicity.get(key) || 0) + multiplicity
308+
if (newConsolidatedMultiplicity === 0) {
309+
this.#consolidatedMultiplicity.delete(key)
310+
} else {
311+
this.#consolidatedMultiplicity.set(key, newConsolidatedMultiplicity)
312+
}
313+
260314
const mapOrSingleValue = this.#inner.get(key)
261315

262316
if (mapOrSingleValue === undefined) {

packages/db-ivm/src/multiset.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,12 @@ export class MultiSet<T> {
209209
chunkedArrayPush(this.#inner, otherArray)
210210
}
211211

212+
add(item: T, multiplicity: number): void {
213+
if (multiplicity !== 0) {
214+
this.#inner.push([item, multiplicity])
215+
}
216+
}
217+
212218
getInner(): MultiSetArray<T> {
213219
return this.#inner
214220
}

0 commit comments

Comments
 (0)