From f9f2de0295fd483f90c87822412fdcaf2a7a15d9 Mon Sep 17 00:00:00 2001 From: Minh Vu <38443830+fallintoplace@users.noreply.github.com> Date: Sat, 23 May 2026 01:47:59 +0200 Subject: [PATCH 1/3] fix: preserve precision for large integral string conversions --- pyiceberg/conversions.py | 2 +- pyiceberg/expressions/literals.py | 10 +++++----- tests/expressions/test_literals.py | 8 ++++++++ tests/test_conversions.py | 3 +++ 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py index 42d996f756..268cbb93ec 100644 --- a/pyiceberg/conversions.py +++ b/pyiceberg/conversions.py @@ -143,7 +143,7 @@ def _(primitive_type: PrimitiveType, value_str: str) -> int: _, _, exponent = Decimal(value_str).as_tuple() if exponent != 0: # Raise if there are digits to the right of the decimal raise ValueError(f"Cannot convert partition value, value cannot have fractional digits for {primitive_type} partition") - return int(float(value_str)) + return int(value_str) @partition_to_py.register(FloatType) diff --git a/pyiceberg/expressions/literals.py b/pyiceberg/expressions/literals.py index 5bf70990b9..25ca76d570 100644 --- a/pyiceberg/expressions/literals.py +++ b/pyiceberg/expressions/literals.py @@ -24,7 +24,7 @@ import struct from abc import ABC, abstractmethod from datetime import date, datetime, time -from decimal import ROUND_HALF_UP, Decimal +from decimal import ROUND_DOWN, ROUND_HALF_UP, Decimal from functools import singledispatchmethod from math import isnan from typing import Any, Generic @@ -555,27 +555,27 @@ def _(self, _: StringType) -> Literal[str]: @to.register(IntegerType) def _(self, type_var: IntegerType) -> Literal[int]: try: - number = int(float(self.value)) + number = int(Decimal(self.value).to_integral_value(rounding=ROUND_DOWN)) if IntegerType.max < number: return IntAboveMax() elif IntegerType.min > number: return IntBelowMin() return LongLiteral(number) - except ValueError as e: + except (ArithmeticError, OverflowError, ValueError) as e: raise ValueError(f"Could not convert {self.value} into a {type_var}") from e @to.register(LongType) def _(self, type_var: LongType) -> Literal[int]: try: - long_value = int(float(self.value)) + long_value = int(Decimal(self.value).to_integral_value(rounding=ROUND_DOWN)) if LongType.max < long_value: return LongAboveMax() elif LongType.min > long_value: return LongBelowMin() else: return LongLiteral(long_value) - except (TypeError, ValueError) as e: + except (ArithmeticError, OverflowError, TypeError, ValueError) as e: raise ValueError(f"Could not convert {self.value} into a {type_var}") from e @to.register(DateType) diff --git a/tests/expressions/test_literals.py b/tests/expressions/test_literals.py index c3ace5d368..e9e4e453c1 100644 --- a/tests/expressions/test_literals.py +++ b/tests/expressions/test_literals.py @@ -845,6 +845,14 @@ def test_string_to_int_min_value() -> None: assert isinstance(literal(str(IntegerType.min - 1)).to(IntegerType()), IntBelowMin) +def test_string_to_long_max_value_without_precision_loss() -> None: + assert literal(str(LongType.max)).to(LongType()) == literal(LongType.max) + + +def test_string_to_long_large_integer_without_precision_loss() -> None: + assert literal("9007199254740993").to(LongType()) == literal(9007199254740993) + + def test_string_to_integer_type_invalid_value() -> None: with pytest.raises(ValueError) as e: _ = literal("abc").to(IntegerType()) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index e38bdbd6f2..9b73b2db8c 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -160,8 +160,11 @@ def test_unscaled_to_decimal(unscaled: int, scale: int, expected_result: Decimal (IntegerType(), "1", 1), (IntegerType(), "9999", 9999), (LongType(), "123456789", 123456789), + (LongType(), "9007199254740993", 9007199254740993), + (LongType(), str(LongType.max), LongType.max), (FloatType(), "1.1", 1.1), (DoubleType(), "99999.9", 99999.9), + (TimestampNanoType(), "9007199254740993", 9007199254740993), (DecimalType(5, 2), "123.45", Decimal("123.45")), (StringType(), "foo", "foo"), (UUIDType(), "f79c3e09-677c-4bbd-a479-3f349cb785e7", uuid.UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7")), From 6bdb20c1d481d89b820b58803c2f561f3ba5866c Mon Sep 17 00:00:00 2001 From: Minh Vu <38443830+fallintoplace@users.noreply.github.com> Date: Sat, 23 May 2026 02:06:24 +0200 Subject: [PATCH 2/3] refine integral string parsing behavior --- pyiceberg/expressions/literals.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyiceberg/expressions/literals.py b/pyiceberg/expressions/literals.py index 25ca76d570..af1fbceec0 100644 --- a/pyiceberg/expressions/literals.py +++ b/pyiceberg/expressions/literals.py @@ -24,7 +24,7 @@ import struct from abc import ABC, abstractmethod from datetime import date, datetime, time -from decimal import ROUND_DOWN, ROUND_HALF_UP, Decimal +from decimal import ROUND_HALF_UP, Decimal from functools import singledispatchmethod from math import isnan from typing import Any, Generic @@ -555,7 +555,8 @@ def _(self, _: StringType) -> Literal[str]: @to.register(IntegerType) def _(self, type_var: IntegerType) -> Literal[int]: try: - number = int(Decimal(self.value).to_integral_value(rounding=ROUND_DOWN)) + dec = Decimal(self.value) + number = int(self.value) if dec.as_tuple().exponent == 0 else int(float(self.value)) if IntegerType.max < number: return IntAboveMax() @@ -568,7 +569,8 @@ def _(self, type_var: IntegerType) -> Literal[int]: @to.register(LongType) def _(self, type_var: LongType) -> Literal[int]: try: - long_value = int(Decimal(self.value).to_integral_value(rounding=ROUND_DOWN)) + dec = Decimal(self.value) + long_value = int(self.value) if dec.as_tuple().exponent == 0 else int(float(self.value)) if LongType.max < long_value: return LongAboveMax() elif LongType.min > long_value: From d443f3796bcedcece4da9706b0262e16897219bb Mon Sep 17 00:00:00 2001 From: Minh Vu Date: Sun, 24 May 2026 04:24:00 +0200 Subject: [PATCH 3/3] fix: preserve exact numeric string literal conversions --- pyiceberg/expressions/literals.py | 10 ++++++---- tests/expressions/test_literals.py | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/pyiceberg/expressions/literals.py b/pyiceberg/expressions/literals.py index af1fbceec0..64e94a8ba7 100644 --- a/pyiceberg/expressions/literals.py +++ b/pyiceberg/expressions/literals.py @@ -68,6 +68,10 @@ UUID_BYTES_LENGTH = 16 +def _truncate_numeric_string_to_int(value: str) -> int: + return int(Decimal(value)) + + class Literal(IcebergRootModel[L], Generic[L], ABC): # type: ignore """Literal which has a value and can be converted between types.""" @@ -555,8 +559,7 @@ def _(self, _: StringType) -> Literal[str]: @to.register(IntegerType) def _(self, type_var: IntegerType) -> Literal[int]: try: - dec = Decimal(self.value) - number = int(self.value) if dec.as_tuple().exponent == 0 else int(float(self.value)) + number = _truncate_numeric_string_to_int(self.value) if IntegerType.max < number: return IntAboveMax() @@ -569,8 +572,7 @@ def _(self, type_var: IntegerType) -> Literal[int]: @to.register(LongType) def _(self, type_var: LongType) -> Literal[int]: try: - dec = Decimal(self.value) - long_value = int(self.value) if dec.as_tuple().exponent == 0 else int(float(self.value)) + long_value = _truncate_numeric_string_to_int(self.value) if LongType.max < long_value: return LongAboveMax() elif LongType.min > long_value: diff --git a/tests/expressions/test_literals.py b/tests/expressions/test_literals.py index e9e4e453c1..5e2d3d24bf 100644 --- a/tests/expressions/test_literals.py +++ b/tests/expressions/test_literals.py @@ -853,6 +853,22 @@ def test_string_to_long_large_integer_without_precision_loss() -> None: assert literal("9007199254740993").to(LongType()) == literal(9007199254740993) +def test_string_to_long_decimal_like_integer_without_precision_loss() -> None: + assert literal("9007199254740993.0").to(LongType()) == literal(9007199254740993) + + +def test_string_to_long_scientific_notation_integer_without_precision_loss() -> None: + assert literal("9007199254740993e0").to(LongType()) == literal(9007199254740993) + + +def test_string_to_long_max_decimal_like_integer_without_precision_loss() -> None: + assert literal(f"{LongType.max}.0").to(LongType()) == literal(LongType.max) + + +def test_string_to_integer_scientific_notation_without_regression() -> None: + assert literal("1e3").to(IntegerType()) == literal(1000) + + def test_string_to_integer_type_invalid_value() -> None: with pytest.raises(ValueError) as e: _ = literal("abc").to(IntegerType())