Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions dataframely/columns/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand Down Expand Up @@ -79,6 +80,7 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
Comment thread
borchero marked this conversation as resolved.
if nullable and primary_key:
raise ValueError("Nullable primary key columns are not supported.")
Expand All @@ -89,6 +91,7 @@ def __init__(
self.check = check
self.alias = alias
self.metadata = metadata
self.description = description
# The name may be overridden by the schema on column access.
self._name = ""

Expand Down Expand Up @@ -277,7 +280,10 @@ def _pydantic_field_kwargs(self) -> dict[str, Any]:
Returns:
A dictionary of kwargs to pass to pydantic.Field.
"""
return {}
kwargs: dict[str, Any] = {}
if self.description is not None:
kwargs["description"] = self.description
return kwargs

# ------------------------------------ HELPER ------------------------------------ #

Expand Down Expand Up @@ -362,6 +368,17 @@ def with_metadata(self, metadata: dict[str, Any]) -> Self:
"""
return self.with_properties(metadata=metadata)

def with_description(self, description: str) -> Self:
"""Return a new column definition with the specified description.

Args:
description: A human-readable description of the column.

Returns:
A new column instance with the specified description.
"""
return self.with_properties(description=description)

# ----------------------------------- SAMPLING ----------------------------------- #

def sample(self, generator: Generator, n: int = 1) -> pl.Series:
Expand Down Expand Up @@ -436,7 +453,7 @@ def as_dict(self, expr: pl.Expr) -> dict[str, Any]:
else getattr(self, param)
)
for param in inspect.signature(self.__class__.__init__).parameters
if param not in ("self", "alias")
if param not in ("self", "alias", "description")
},
}

Expand Down Expand Up @@ -485,8 +502,9 @@ def matches(self, other: Column, expr: pl.Expr) -> bool:
for attr in attributes.parameters
# NOTE: We do not want to compare the `alias` here as the comparison should
# only evaluate the type and its constraints. Names are checked in
# :meth:`Schema.matches`.
if attr not in ("self", "alias")
# :meth:`Schema.matches`. The `description` is also excluded as it is
# human-readable documentation rather than a semantic constraint.
if attr not in ("self", "alias", "description")
)

def _attributes_match(
Expand All @@ -506,7 +524,9 @@ def __repr__(self) -> str:
self.__class__.__init__
).parameters.items()
if attribute
not in ["self", "alias"] # alias is always equal to the column name here
# alias is always equal to the column name here; description is
# human-readable documentation rather than a semantic constraint
not in ["self", "alias", "description"]
and not (
# Do not include attributes that are set to their default value
getattr(self, attribute) == param_details.default
Expand Down
3 changes: 3 additions & 0 deletions dataframely/columns/any.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand All @@ -53,13 +54,15 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
super().__init__(
nullable=True,
primary_key=False,
check=check,
alias=alias,
metadata=metadata,
description=description,
)

@property
Expand Down
3 changes: 3 additions & 0 deletions dataframely/columns/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand Down Expand Up @@ -69,6 +70,7 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
super().__init__(
nullable=nullable,
Expand All @@ -77,6 +79,7 @@ def __init__(
check=check,
alias=alias,
metadata=metadata,
description=description,
)
self.inner = inner
self.shape = shape if isinstance(shape, tuple) else (shape,)
Expand Down
3 changes: 3 additions & 0 deletions dataframely/columns/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand Down Expand Up @@ -59,6 +60,7 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
super().__init__(
nullable=nullable,
Expand All @@ -67,6 +69,7 @@ def __init__(
check=check,
alias=alias,
metadata=metadata,
description=description,
)

@property
Expand Down
12 changes: 12 additions & 0 deletions dataframely/columns/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand Down Expand Up @@ -88,6 +89,7 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
if resolution is not None:
offset_time = pl.Series([EPOCH_DATETIME]).dt.offset_by(resolution).dt.time()
Expand Down Expand Up @@ -117,6 +119,7 @@ def __init__(
check=check,
alias=alias,
metadata=metadata,
description=description,
)
self.resolution = resolution

Expand Down Expand Up @@ -188,6 +191,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand Down Expand Up @@ -230,6 +234,7 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
if resolution is not None:
offset_date = pl.Series([EPOCH_DATETIME]).dt.offset_by(resolution).dt.date()
Expand Down Expand Up @@ -259,6 +264,7 @@ def __init__(
check=check,
alias=alias,
metadata=metadata,
description=description,
)
self.resolution = resolution

Expand Down Expand Up @@ -338,6 +344,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand Down Expand Up @@ -384,6 +391,7 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
if resolution is not None and min is not None:
if not datetime_matches_resolution(min, resolution):
Expand All @@ -409,6 +417,7 @@ def __init__(
check=check,
alias=alias,
metadata=metadata,
description=description,
)
self.resolution = resolution
self.time_zone = time_zone
Expand Down Expand Up @@ -509,6 +518,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand Down Expand Up @@ -552,6 +562,7 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
if resolution is not None and min is not None:
if not timedelta_matches_resolution(min, resolution):
Expand All @@ -577,6 +588,7 @@ def __init__(
check=check,
alias=alias,
metadata=metadata,
description=description,
)
self.resolution = resolution
self.time_unit = time_unit
Expand Down
3 changes: 3 additions & 0 deletions dataframely/columns/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand Down Expand Up @@ -77,6 +78,7 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
if isinstance(min, int):
min = decimal.Decimal(min)
Expand Down Expand Up @@ -107,6 +109,7 @@ def __init__(
check=check,
alias=alias,
metadata=metadata,
description=description,
)
self.precision = precision
self.scale = scale
Expand Down
3 changes: 3 additions & 0 deletions dataframely/columns/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand Down Expand Up @@ -66,6 +67,7 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
super().__init__(
nullable=nullable,
Expand All @@ -74,6 +76,7 @@ def __init__(
check=check,
alias=alias,
metadata=metadata,
description=description,
)
if isclass(categories) and issubclass(categories, enum.Enum):
categories = (item.value for item in categories)
Expand Down
3 changes: 3 additions & 0 deletions dataframely/columns/float.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand Down Expand Up @@ -79,6 +80,7 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
if min is not None and min < self.min_value:
raise ValueError("Minimum value is too small for the data type.")
Expand All @@ -99,6 +101,7 @@ def __init__(
check=check,
alias=alias,
metadata=metadata,
description=description,
)

@classproperty
Expand Down
3 changes: 3 additions & 0 deletions dataframely/columns/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand Down Expand Up @@ -75,6 +76,7 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
if min is not None and min < self.min_value:
raise ValueError("`min` is too small for the data type.")
Expand All @@ -97,6 +99,7 @@ def __init__(
check=check,
alias=alias,
metadata=metadata,
description=description,
)

@classproperty
Expand Down
3 changes: 3 additions & 0 deletions dataframely/columns/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(
min_length: int | None = None,
max_length: int | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand Down Expand Up @@ -77,6 +78,7 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
super().__init__(
nullable=nullable,
Expand All @@ -85,6 +87,7 @@ def __init__(
check=check,
alias=alias,
metadata=metadata,
description=description,
)
self.inner = inner
self.min_length = min_length
Expand Down
3 changes: 3 additions & 0 deletions dataframely/columns/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(
check: Check | None = None,
alias: str | None = None,
metadata: dict[str, Any] | None = None,
description: str | None = None,
):
"""
Args:
Expand All @@ -49,12 +50,14 @@ def __init__(
this option does _not_ allow to refer to the column with two different
names, the specified alias is the only valid name.
metadata: A dictionary of metadata to attach to the column.
description: A human-readable description of the column.
"""
super().__init__(
nullable=nullable,
check=check,
alias=alias,
metadata=metadata,
description=description,
)

@property
Expand Down
Loading
Loading