Skip to content

Commit 97e068d

Browse files
committed
Fix surrogate PK ordering
When performing LQ on a table without PKs, we need to ensure that the surrogate PKs that we use (the string representation of the entire row) is properly ordered. Otherwise we will end up with some objects being wrongly categorised as singletons as opposed to being grouped together, which will lead to wrong query results.
1 parent 77b3fc9 commit 97e068d

3 files changed

Lines changed: 158 additions & 61 deletions

File tree

splitgraph/core/fragment_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1367,8 +1367,8 @@ def generate_surrogate_pk(
13671367
return_shape=ResultShape.MANY_ONE,
13681368
)
13691369
)
1370-
object_pks = list(zip(result[::2], result[1::2]))
1371-
return object_pks
1370+
object_pks = [tuple(sorted(t)) for t in zip(result[::2], result[1::2])]
1371+
return cast(List[Tuple[Any, Any]], object_pks)
13721372

13731373
def _add_overlapping_objects(
13741374
self, table: "Table", all_objects: List[str], filtered_objects: List[str]

splitgraph/core/image.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,14 +129,15 @@ def get_table(self, table_name: str) -> Table:
129129
)
130130

131131
@manage_audit
132-
def checkout(self, force: bool = False, layered: bool = False) -> None:
132+
def checkout(self, force: bool = False, layered: bool = False, ddn_layout: bool = True) -> None:
133133
"""
134134
Checks the image out, changing the current HEAD pointer. Raises an error
135135
if there are pending changes to its checkout.
136136
137137
:param force: Discards all pending changes to the schema.
138138
:param layered: If True, uses layered querying to check out the image (doesn't materialize tables
139139
inside of it).
140+
:param ddn_layout: Determines whether to rotate the name prefix of lower and overlay table/view
140141
"""
141142
target_schema = self.repository.to_schema()
142143
if len(target_schema) > POSTGRES_MAX_IDENTIFIER:
@@ -166,7 +167,7 @@ def checkout(self, force: bool = False, layered: bool = False) -> None:
166167
self.object_engine.delete_table(target_schema, table)
167168

168169
if layered:
169-
self.lq_checkout()
170+
self.lq_checkout(ddn_layout=ddn_layout)
170171
else:
171172
for table in self.get_tables():
172173
self.get_table(table).materialize(table)

test/splitgraph/commands/test_writable_lq.py

Lines changed: 153 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,73 @@
22
from decimal import Decimal
33
from test.splitgraph.conftest import prepare_lq_repo
44

5+
import pytest
56
from psycopg2.sql import SQL, Identifier
67

78
from splitgraph.config import SPLITGRAPH_META_SCHEMA
8-
from splitgraph.core.overlay import WRITE_LOWER_PREFIX, WRITE_UPPER_PREFIX
9+
from splitgraph.core.overlay import (
10+
WRITE_LOWER_PREFIX,
11+
WRITE_MERGED_PREFIX,
12+
WRITE_UPPER_PREFIX,
13+
)
914

1015

11-
def test_basic_writes_no_pks(pg_repo_local):
16+
@pytest.mark.parametrize("ddn_layout", [False, True])
17+
def test_surrogate_pks_proper_ordering(pg_repo_local, ddn_layout):
1218
table_name = "fruits"
1319
head = pg_repo_local.head
14-
head.checkout(layered=True)
20+
head.checkout(layered=True, ddn_layout=ddn_layout)
21+
22+
lower_table = table_name if ddn_layout else WRITE_LOWER_PREFIX + table_name
23+
overlay_table = WRITE_MERGED_PREFIX + table_name if ddn_layout else table_name
24+
25+
original_fruits = [
26+
(1, "apple"),
27+
(2, "orange"),
28+
]
29+
30+
# Add rows such that actual min and max of the surrogate PKs are reversed compared to the non-surrogate PKs.
31+
# In other words, while the proper tuple ordering is (5, 'kiwi') < (10, 'mango'), when surrogate PKs are cast
32+
# as strings the proper ordering is reversed to '(10, mango)' <= '(5, kiwi)'.
33+
pg_repo_local.run_sql(
34+
SQL("INSERT INTO {} VALUES (5, 'kiwi'), (10, 'mango')").format(Identifier(overlay_table))
35+
)
36+
pg_repo_local.commit()
37+
38+
new_fruits = [
39+
(5, "kiwi"),
40+
(10, "mango"),
41+
]
42+
43+
assert (
44+
pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(lower_table)))
45+
== original_fruits + new_fruits
46+
)
47+
48+
pg_repo_local.run_sql(
49+
SQL("DELETE FROM {} WHERE name = 'kiwi' OR name = 'mango'").format(
50+
Identifier(overlay_table)
51+
)
52+
)
53+
pg_repo_local.commit()
54+
55+
# Assert that surrogate PKs were properly ordered such that the insertion and deletion object got bundled into
56+
# non-singleton groups and canceled each other out.
57+
assert (
58+
pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(lower_table)))
59+
== original_fruits
60+
)
61+
62+
63+
@pytest.mark.parametrize("ddn_layout", [False, True])
64+
def test_basic_writes_no_pks(pg_repo_local, ddn_layout):
65+
table_name = "fruits"
66+
head = pg_repo_local.head
67+
head.checkout(layered=True, ddn_layout=ddn_layout)
68+
69+
lower_table = table_name if ddn_layout else WRITE_LOWER_PREFIX + table_name
70+
upper_table = WRITE_UPPER_PREFIX + table_name
71+
overlay_table = WRITE_MERGED_PREFIX + table_name if ddn_layout else table_name
1572

1673
# Ensure that the table is now in the overlay/LQ mode
1774
assert pg_repo_local.is_overlay_view(table_name)
@@ -24,36 +81,52 @@ def test_basic_writes_no_pks(pg_repo_local):
2481
#
2582

2683
# Insert a couple of new rows
27-
pg_repo_local.run_sql("INSERT INTO fruits VALUES (3, 'banana')")
28-
pg_repo_local.run_sql("INSERT INTO fruits VALUES (4, 'pear')")
84+
pg_repo_local.run_sql(
85+
SQL("INSERT INTO {} VALUES (3, 'banana')").format(Identifier(overlay_table))
86+
)
87+
pg_repo_local.run_sql(
88+
SQL("INSERT INTO {} VALUES (4, 'pear')").format(Identifier(overlay_table))
89+
)
2990

3091
# Update a row existing in the lower table, and one existing only in the upper table (due to insert above)
31-
pg_repo_local.run_sql("UPDATE fruits SET name = 'mango' WHERE name = 'orange'")
32-
pg_repo_local.run_sql("UPDATE fruits SET name = 'watermelon' WHERE name = 'pear'")
92+
pg_repo_local.run_sql(
93+
SQL("UPDATE {} SET name = 'mango' WHERE name = 'orange'").format(Identifier(overlay_table))
94+
)
95+
pg_repo_local.run_sql(
96+
SQL("UPDATE {} SET name = 'watermelon' WHERE name = 'pear'").format(
97+
Identifier(overlay_table)
98+
)
99+
)
33100

34101
# Delete a row existing in the lower table, and one existing only in the upper (due to insert/update above)
35-
pg_repo_local.run_sql("DELETE FROM fruits WHERE fruit_id = 1")
36-
pg_repo_local.run_sql("DELETE FROM fruits WHERE name = 'watermelon'")
102+
pg_repo_local.run_sql(
103+
SQL("DELETE FROM {} WHERE fruit_id = 1").format(Identifier(overlay_table))
104+
)
105+
pg_repo_local.run_sql(
106+
SQL("DELETE FROM {} WHERE name = 'watermelon'").format(Identifier(overlay_table))
107+
)
37108

38109
# Won't affect the output of the view due to no PKs on the table
39-
pg_repo_local.run_sql("INSERT INTO fruits VALUES (3, 'banana')")
110+
pg_repo_local.run_sql(
111+
SQL("INSERT INTO {} VALUES (3, 'banana')").format(Identifier(overlay_table))
112+
)
40113

41114
# No-OPs: update/delete rows that are neither in upper, nor in lower table
42-
pg_repo_local.run_sql("UPDATE fruits SET name = 'kumquat' WHERE fruit_id = 10")
43-
pg_repo_local.run_sql("DELETE FROM fruits WHERE fruit_id = 11")
115+
pg_repo_local.run_sql(
116+
SQL("UPDATE {} SET name = 'kumquat' WHERE fruit_id = 10").format(Identifier(overlay_table))
117+
)
118+
pg_repo_local.run_sql(
119+
SQL("DELETE FROM {} WHERE fruit_id = 11").format(Identifier(overlay_table))
120+
)
44121

45122
# Assert that the lower table has the old values intact
46-
assert pg_repo_local.run_sql(
47-
SQL("SELECT * FROM {}").format(Identifier(WRITE_LOWER_PREFIX + table_name))
48-
) == [
123+
assert pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(lower_table))) == [
49124
(1, "apple"),
50125
(2, "orange"),
51126
]
52127

53128
# Assert the upper table stores the pending writes as expected
54-
assert pg_repo_local.run_sql(
55-
SQL("SELECT * FROM {}").format(Identifier(WRITE_UPPER_PREFIX + table_name))
56-
) == [
129+
assert pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(upper_table))) == [
57130
(3, "banana", True, 1),
58131
(4, "pear", True, 2),
59132
(2, "orange", False, 3),
@@ -66,7 +139,7 @@ def test_basic_writes_no_pks(pg_repo_local):
66139
]
67140

68141
# Assert the correct result from the overlay view
69-
assert pg_repo_local.run_sql("SELECT * FROM fruits") == [
142+
assert pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(overlay_table))) == [
70143
(2, "mango"),
71144
(3, "banana"),
72145
]
@@ -114,23 +187,16 @@ def test_basic_writes_no_pks(pg_repo_local):
114187
]
115188

116189
# Assert that the lower table now has the new values
117-
assert pg_repo_local.run_sql(
118-
SQL("SELECT * FROM {}").format(Identifier(WRITE_LOWER_PREFIX + table_name))
119-
) == [
190+
assert pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(lower_table))) == [
120191
(2, "mango"),
121192
(3, "banana"),
122193
]
123194

124195
# Assert the upper table is now empty, since we have just committed all pending changes
125-
assert (
126-
pg_repo_local.run_sql(
127-
SQL("SELECT * FROM {}").format(Identifier(WRITE_UPPER_PREFIX + table_name))
128-
)
129-
== []
130-
)
196+
assert pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(upper_table))) == []
131197

132198
# Assert the overlay view also shows the latest data
133-
assert pg_repo_local.run_sql("SELECT * FROM fruits") == [
199+
assert pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(overlay_table))) == [
134200
(2, "mango"),
135201
(3, "banana"),
136202
]
@@ -170,11 +236,16 @@ def test_basic_writes_no_pks(pg_repo_local):
170236
) == [(1, "potato", True), (2, "carrot", True)]
171237

172238

173-
def test_basic_writes_with_pks(pg_repo_local):
239+
@pytest.mark.parametrize("ddn_layout", [False, True])
240+
def test_basic_writes_with_pks(pg_repo_local, ddn_layout):
174241
table_name = "fruits"
175242
prepare_lq_repo(pg_repo_local, commit_after_every=False, include_pk=True)
176243
head = pg_repo_local.head
177-
head.checkout(layered=True)
244+
head.checkout(layered=True, ddn_layout=ddn_layout)
245+
246+
lower_table = table_name if ddn_layout else WRITE_LOWER_PREFIX + table_name
247+
upper_table = WRITE_UPPER_PREFIX + table_name
248+
overlay_table = WRITE_MERGED_PREFIX + table_name if ddn_layout else table_name
178249

179250
# Ensure that the table is now in the overlay/LQ mode
180251
assert pg_repo_local.is_overlay_view(table_name)
@@ -192,37 +263,69 @@ def test_basic_writes_with_pks(pg_repo_local):
192263
#
193264
# Also, note that one has a PK conflict, but due to the overlay mechanism
194265
# it will simply result in an update without throwing an error.
195-
pg_repo_local.run_sql("INSERT INTO fruits VALUES (3, 'banana', 1, '2022-01-01T12:00:00')")
196-
pg_repo_local.run_sql("INSERT INTO fruits VALUES (4, 'pear', 2, '2022-01-01T12:00:00')")
197-
pg_repo_local.run_sql("INSERT INTO fruits VALUES (5, 'orange', 3, '2022-01-01T12:00:00')")
198-
pg_repo_local.run_sql("INSERT INTO fruits VALUES (6, 'kiwi', 4, '2022-01-01T12:00:00')")
266+
pg_repo_local.run_sql(
267+
SQL("INSERT INTO {} VALUES (3, 'banana', 1, '2022-01-01T12:00:00')").format(
268+
Identifier(overlay_table)
269+
)
270+
)
271+
pg_repo_local.run_sql(
272+
SQL("INSERT INTO {} VALUES (4, 'pear', 2, '2022-01-01T12:00:00')").format(
273+
Identifier(overlay_table)
274+
)
275+
)
276+
pg_repo_local.run_sql(
277+
SQL("INSERT INTO {} VALUES (5, 'orange', 3, '2022-01-01T12:00:00')").format(
278+
Identifier(overlay_table)
279+
)
280+
)
281+
pg_repo_local.run_sql(
282+
SQL("INSERT INTO {} VALUES (6, 'kiwi', 4, '2022-01-01T12:00:00')").format(
283+
Identifier(overlay_table)
284+
)
285+
)
199286

200287
# Update a row existing in the lower table, and one existing only in the upper table (due to insert above)
201-
pg_repo_local.run_sql("UPDATE fruits SET name = 'mango' WHERE name = 'guitar'")
202-
pg_repo_local.run_sql("UPDATE fruits SET name = 'watermelon' WHERE name = 'kiwi'")
288+
pg_repo_local.run_sql(
289+
SQL("UPDATE {} SET name = 'mango' WHERE name = 'guitar'").format(Identifier(overlay_table))
290+
)
291+
pg_repo_local.run_sql(
292+
SQL("UPDATE {} SET name = 'watermelon' WHERE name = 'kiwi'").format(
293+
Identifier(overlay_table)
294+
)
295+
)
203296
# Updating a PK will result in overwriting of the previous row with that PK, instead of an error
204-
pg_repo_local.run_sql("UPDATE fruits SET fruit_id = 5, number = 100 WHERE fruit_id = 4")
297+
pg_repo_local.run_sql(
298+
SQL("UPDATE {} SET fruit_id = 5, number = 100 WHERE fruit_id = 4").format(
299+
Identifier(overlay_table)
300+
)
301+
)
205302

206303
# Delete a row existing in the lower table, and one existing only in the upper (due to insert/update above)
207-
pg_repo_local.run_sql("DELETE FROM fruits WHERE fruit_id = 2")
208-
pg_repo_local.run_sql("DELETE FROM fruits WHERE name = 'watermelon'")
304+
pg_repo_local.run_sql(
305+
SQL("DELETE FROM {} WHERE fruit_id = 2").format(Identifier(overlay_table))
306+
)
307+
pg_repo_local.run_sql(
308+
SQL("DELETE FROM {} WHERE name = 'watermelon'").format(Identifier(overlay_table))
309+
)
209310

210311
# No-OPs: update/delete rows that are neither in upper, nor in lower table
211-
pg_repo_local.run_sql("UPDATE fruits SET name = 'kumquat' WHERE fruit_id = 10")
212-
pg_repo_local.run_sql("DELETE FROM fruits WHERE fruit_id = 11")
312+
pg_repo_local.run_sql(
313+
SQL("UPDATE {} SET name = 'kumquat' WHERE fruit_id = 10").format(Identifier(overlay_table))
314+
)
315+
pg_repo_local.run_sql(
316+
SQL("DELETE FROM {} WHERE fruit_id = 11").format(Identifier(overlay_table))
317+
)
213318

214319
# Assert that the lower table has the old values intact
215320
assert pg_repo_local.run_sql(
216-
SQL("SELECT fruit_id, name FROM {}").format(Identifier(WRITE_LOWER_PREFIX + table_name))
321+
SQL("SELECT fruit_id, name FROM {}").format(Identifier(lower_table))
217322
) == [
218323
(3, "mayonnaise"),
219324
(2, "guitar"),
220325
]
221326

222327
# Assert the upper table stores the pending writes as expected
223-
assert pg_repo_local.run_sql(
224-
SQL("SELECT * FROM {}").format(Identifier(WRITE_UPPER_PREFIX + table_name))
225-
) == [
328+
assert pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(upper_table))) == [
226329
(3, "banana", Decimal("1"), datetime.datetime(2022, 1, 1, 12, 0), True, 1),
227330
(4, "pear", Decimal("2"), datetime.datetime(2022, 1, 1, 12, 0), True, 2),
228331
(5, "orange", Decimal("3"), datetime.datetime(2022, 1, 1, 12, 0), True, 3),
@@ -238,7 +341,7 @@ def test_basic_writes_with_pks(pg_repo_local):
238341
]
239342

240343
# Assert the correct result from the overlay view
241-
assert pg_repo_local.run_sql("SELECT * FROM fruits") == [
344+
assert pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(overlay_table))) == [
242345
(3, "banana", Decimal("1"), datetime.datetime(2022, 1, 1, 12, 0)),
243346
(5, "pear", Decimal("100"), datetime.datetime(2022, 1, 1, 12, 0)),
244347
]
@@ -285,23 +388,16 @@ def test_basic_writes_with_pks(pg_repo_local):
285388
]
286389

287390
# Assert that the lower table now has the new values
288-
assert pg_repo_local.run_sql(
289-
SQL("SELECT * FROM {}").format(Identifier(WRITE_LOWER_PREFIX + table_name))
290-
) == [
391+
assert pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(lower_table))) == [
291392
(3, "banana", Decimal("1"), datetime.datetime(2022, 1, 1, 12, 0)),
292393
(5, "pear", Decimal("100"), datetime.datetime(2022, 1, 1, 12, 0)),
293394
]
294395

295396
# Assert the upper table is now empty, since we have just committed all pending changes
296-
assert (
297-
pg_repo_local.run_sql(
298-
SQL("SELECT * FROM {}").format(Identifier(WRITE_UPPER_PREFIX + table_name))
299-
)
300-
== []
301-
)
397+
assert pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(upper_table))) == []
302398

303399
# Assert the overlay view also shows the latest data
304-
assert pg_repo_local.run_sql("SELECT * FROM fruits") == [
400+
assert pg_repo_local.run_sql(SQL("SELECT * FROM {}").format(Identifier(overlay_table))) == [
305401
(3, "banana", Decimal("1"), datetime.datetime(2022, 1, 1, 12, 0)),
306402
(5, "pear", Decimal("100"), datetime.datetime(2022, 1, 1, 12, 0)),
307403
]

0 commit comments

Comments
 (0)