diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py index 42d996f756..268cbb93ec 100644 --- a/pyiceberg/conversions.py +++ b/pyiceberg/conversions.py @@ -143,7 +143,7 @@ def _(primitive_type: PrimitiveType, value_str: str) -> int: _, _, exponent = Decimal(value_str).as_tuple() if exponent != 0: # Raise if there are digits to the right of the decimal raise ValueError(f"Cannot convert partition value, value cannot have fractional digits for {primitive_type} partition") - return int(float(value_str)) + return int(value_str) @partition_to_py.register(FloatType) diff --git a/pyiceberg/expressions/literals.py b/pyiceberg/expressions/literals.py index 5bf70990b9..64e94a8ba7 100644 --- a/pyiceberg/expressions/literals.py +++ b/pyiceberg/expressions/literals.py @@ -68,6 +68,10 @@ UUID_BYTES_LENGTH = 16 +def _truncate_numeric_string_to_int(value: str) -> int: + return int(Decimal(value)) + + class Literal(IcebergRootModel[L], Generic[L], ABC): # type: ignore """Literal which has a value and can be converted between types.""" @@ -555,27 +559,27 @@ def _(self, _: StringType) -> Literal[str]: @to.register(IntegerType) def _(self, type_var: IntegerType) -> Literal[int]: try: - number = int(float(self.value)) + number = _truncate_numeric_string_to_int(self.value) if IntegerType.max < number: return IntAboveMax() elif IntegerType.min > number: return IntBelowMin() return LongLiteral(number) - except ValueError as e: + except (ArithmeticError, OverflowError, ValueError) as e: raise ValueError(f"Could not convert {self.value} into a {type_var}") from e @to.register(LongType) def _(self, type_var: LongType) -> Literal[int]: try: - long_value = int(float(self.value)) + long_value = _truncate_numeric_string_to_int(self.value) if LongType.max < long_value: return LongAboveMax() elif LongType.min > long_value: return LongBelowMin() else: return LongLiteral(long_value) - except (TypeError, ValueError) as e: + except (ArithmeticError, OverflowError, TypeError, ValueError) as e: raise ValueError(f"Could not convert {self.value} into a {type_var}") from e @to.register(DateType) diff --git a/tests/expressions/test_literals.py b/tests/expressions/test_literals.py index c3ace5d368..5e2d3d24bf 100644 --- a/tests/expressions/test_literals.py +++ b/tests/expressions/test_literals.py @@ -845,6 +845,30 @@ def test_string_to_int_min_value() -> None: assert isinstance(literal(str(IntegerType.min - 1)).to(IntegerType()), IntBelowMin) +def test_string_to_long_max_value_without_precision_loss() -> None: + assert literal(str(LongType.max)).to(LongType()) == literal(LongType.max) + + +def test_string_to_long_large_integer_without_precision_loss() -> None: + assert literal("9007199254740993").to(LongType()) == literal(9007199254740993) + + +def test_string_to_long_decimal_like_integer_without_precision_loss() -> None: + assert literal("9007199254740993.0").to(LongType()) == literal(9007199254740993) + + +def test_string_to_long_scientific_notation_integer_without_precision_loss() -> None: + assert literal("9007199254740993e0").to(LongType()) == literal(9007199254740993) + + +def test_string_to_long_max_decimal_like_integer_without_precision_loss() -> None: + assert literal(f"{LongType.max}.0").to(LongType()) == literal(LongType.max) + + +def test_string_to_integer_scientific_notation_without_regression() -> None: + assert literal("1e3").to(IntegerType()) == literal(1000) + + def test_string_to_integer_type_invalid_value() -> None: with pytest.raises(ValueError) as e: _ = literal("abc").to(IntegerType()) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index e38bdbd6f2..9b73b2db8c 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -160,8 +160,11 @@ def test_unscaled_to_decimal(unscaled: int, scale: int, expected_result: Decimal (IntegerType(), "1", 1), (IntegerType(), "9999", 9999), (LongType(), "123456789", 123456789), + (LongType(), "9007199254740993", 9007199254740993), + (LongType(), str(LongType.max), LongType.max), (FloatType(), "1.1", 1.1), (DoubleType(), "99999.9", 99999.9), + (TimestampNanoType(), "9007199254740993", 9007199254740993), (DecimalType(5, 2), "123.45", Decimal("123.45")), (StringType(), "foo", "foo"), (UUIDType(), "f79c3e09-677c-4bbd-a479-3f349cb785e7", uuid.UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7")),