pydantable.pyspark¶

PySpark-shaped façade. See PySpark UI.

pydantable.pyspark ¶

PySpark-shaped :class:DataFrame and :class:DataFrameModel (same engine as core).

This is a facade for familiar names (withColumn, orderBy, …), not a Spark cluster client. See :mod:pydantable.pyspark.sql for functions, types, and :class:~pydantable.window_spec.Window.

Expr ¶

Column expression: operators/methods build a Rust AST with static dtypes.

Source code in python/pydantable/expressions.py

class Expr:  # type: ignore[override]
    """Column expression: operators/methods build a Rust AST with static dtypes."""

    def __init__(self, *, rust_expr: Any):
        self._rust_expr = rust_expr

    @property
    def dtype(self) -> Any:
        return self._rust_expr.dtype

    def referenced_columns(self) -> set[str]:
        return set(self._rust_expr.referenced_columns())

    def alias(self, name: str) -> AliasedExpr:
        """Attach an output column name for use in `select` / `with_columns`."""
        if not isinstance(name, str) or not name:
            raise TypeError("alias(name) expects a non-empty string.")
        return AliasedExpr(name=str(name), expr=self)

    def __repr__(self) -> str:
        cls = type(self).__name__
        refs = sorted(self.referenced_columns())
        ref_s = f" refs={refs!r}" if refs else ""
        ast_s = _rust_expr_ast_snippet(self._rust_expr)
        return f"{cls}(dtype={self.dtype!r}{ref_s} ast={ast_s})"

    def _coerce_other(self, other: Any) -> Expr:
        if isinstance(other, Expr):
            return other
        return Literal(value=other)

    def _binary(self, op_symbol: str, other: Any) -> Expr:
        other_expr = self._coerce_other(other)
        rust_expr = get_expression_runtime().binary_op(
            op_symbol, self._rust_expr, other_expr._rust_expr
        )
        return BinaryOp(rust_expr=rust_expr)

    def _binary_reflected(self, op_symbol: str, other: Any) -> Expr:
        # `other <op> self`
        left_expr = self._coerce_other(other)
        rust_expr = get_expression_runtime().binary_op(
            op_symbol, left_expr._rust_expr, self._rust_expr
        )
        return BinaryOp(rust_expr=rust_expr)

    def _compare(self, op_symbol: str, other: Any) -> Expr:
        other_expr = self._coerce_other(other)
        rust_expr = get_expression_runtime().compare_op(
            op_symbol, self._rust_expr, other_expr._rust_expr
        )
        return CompareOp(rust_expr=rust_expr)

    def cast(self, dtype: Any) -> Expr:
        rust_expr = get_expression_runtime().cast_expr(self._rust_expr, dtype)
        return Expr(rust_expr=rust_expr)

    def is_null(self) -> Expr:
        rust_expr = get_expression_runtime().is_null_expr(self._rust_expr)
        return Expr(rust_expr=rust_expr)

    def is_not_null(self) -> Expr:
        rust_expr = get_expression_runtime().is_not_null_expr(self._rust_expr)
        return Expr(rust_expr=rust_expr)

    def over(
        self,
        partition_by: str | list[str] | tuple[str, ...] | None = None,
        order_by: str | list[str] | tuple[str, ...] | None = None,
    ) -> Expr:
        if partition_by is None and order_by is None:
            return self
        raise TypeError(
            "Expr.over(partition_by=..., order_by=...) is not supported. "
            "Use window functions such as row_number().over(WindowSpec(...)) "
            "or pydantable.window_spec.Window.partitionBy(...).orderBy(...)."
        )

    # Arithmetic
    def __add__(self, other: Any) -> Expr:
        return self._binary("+", other)

    def __sub__(self, other: Any) -> Expr:
        return self._binary("-", other)

    def __mul__(self, other: Any) -> Expr:
        return self._binary("*", other)

    def __truediv__(self, other: Any) -> Expr:
        return self._binary("/", other)

    def __radd__(self, other: Any) -> Expr:
        return self._binary_reflected("+", other)

    def __rsub__(self, other: Any) -> Expr:
        return self._binary_reflected("-", other)

    def __rmul__(self, other: Any) -> Expr:
        return self._binary_reflected("*", other)

    def __rtruediv__(self, other: Any) -> Expr:
        return self._binary_reflected("/", other)

    # Comparisons
    def __eq__(self, other: Any) -> Expr:  # type: ignore[override]
        return self._compare("==", other)

    def __ne__(self, other: Any) -> Expr:  # type: ignore[override]
        return self._compare("!=", other)

    def __lt__(self, other: Any) -> Expr:
        return self._compare("<", other)

    def __le__(self, other: Any) -> Expr:
        return self._compare("<=", other)

    def __gt__(self, other: Any) -> Expr:
        return self._compare(">", other)

    def __ge__(self, other: Any) -> Expr:
        return self._compare(">=", other)

    def isin(self, *values: Any) -> Expr:
        if len(values) == 1 and isinstance(values[0], (list, tuple)):
            vals = list(values[0])
        else:
            vals = list(values)
        rust_expr = get_expression_runtime().expr_in_list(self._rust_expr, vals)
        return Expr(rust_expr=rust_expr)

    def is_in(self, *values: Any) -> Expr:
        """Alias of :meth:`isin` (Polars naming parity)."""
        return self.isin(*values)

    def len(self) -> Expr:
        """String length alias (typed-safe): only valid for ``str`` columns."""
        dt = self.dtype
        origin = get_origin(dt)
        args = get_args(dt)
        if origin is None:
            base = dt
        elif origin is getattr(__import__("typing"), "Union", object()) or str(
            origin
        ).endswith("types.UnionType"):
            non_none = [a for a in args if a is not type(None)]
            base = non_none[0] if len(non_none) == 1 else dt
        else:
            base = dt
        if base is not str:
            raise TypeError("len() is only supported for string columns.")
        return self.char_length()

    def between(self, low: Any, high: Any) -> Expr:
        lo = self._coerce_other(low)
        hi = self._coerce_other(high)
        rust_expr = get_expression_runtime().expr_between(
            self._rust_expr, lo._rust_expr, hi._rust_expr
        )
        return Expr(rust_expr=rust_expr)

    def substr(self, start: Any, length: Any | None = None) -> Expr:
        st = self._coerce_other(start)
        rust = get_expression_runtime()
        if length is None:
            rust_expr = rust.expr_substring(self._rust_expr, st._rust_expr, None)
        else:
            ln = self._coerce_other(length)
            rust_expr = rust.expr_substring(
                self._rust_expr, st._rust_expr, ln._rust_expr
            )
        return Expr(rust_expr=rust_expr)

    def char_length(self) -> Expr:
        rust_expr = get_expression_runtime().expr_string_length(self._rust_expr)
        return Expr(rust_expr=rust_expr)

    def struct_field(self, name: str) -> Expr:
        rust_expr = get_expression_runtime().expr_struct_field(self._rust_expr, name)
        return Expr(rust_expr=rust_expr)

    def struct_json_encode(self) -> Expr:
        """Encode struct cells as JSON text (Polars ``struct.json_encode``)."""
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_struct_json_encode(self._rust_expr))

    def struct_json_path_match(self, path: str) -> Expr:
        """JSONPath against struct cells (JSON-encode then ``str.json_path_match``).

        Same null/match semantics as :meth:`str_json_path_match` on strings.
        Empty ``path`` raises ``ValueError``.
        """
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_struct_json_path_match(self._rust_expr, str(path)),
        )

    def struct_rename_fields(self, names: Sequence[str]) -> Expr:
        """Rename struct subfields in order (one new name per existing field)."""
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_struct_rename_fields(
                self._rust_expr, [str(x) for x in names]
            ),
        )

    def struct_with_fields(self, **fields: Any) -> Expr:
        """Add or replace struct subfields (Polars ``struct.with_fields``).

        Each keyword must be a field name; each value must be an :class:`Expr`.
        """
        if not fields:
            raise TypeError(
                "struct_with_fields() requires at least one keyword field=Expr."
            )
        rust = get_expression_runtime()
        updates: list[tuple[str, Any]] = []
        for k, v in fields.items():
            if not isinstance(v, Expr):
                raise TypeError(
                    f"struct_with_fields({k}=...) expects Expr, got {type(v).__name__}."
                )
            updates.append((str(k), v._rust_expr))
        return Expr(
            rust_expr=rust.expr_struct_with_fields(self._rust_expr, updates),
        )

    # Numeric
    def abs(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_abs(self._rust_expr))

    def round(self, decimals: int = 0) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_round(self._rust_expr, int(decimals)))

    def floor(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_floor(self._rust_expr))

    def ceil(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_ceil(self._rust_expr))

    def cumsum(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_row_accum_cum_sum(self._rust_expr))

    def cumprod(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_row_accum_cum_prod(self._rust_expr))

    def cummin(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_row_accum_cum_min(self._rust_expr))

    def cummax(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_row_accum_cum_max(self._rust_expr))

    def diff(self, periods: int = 1) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_row_accum_diff(self._rust_expr, int(periods)))

    def pct_change(self, periods: int = 1) -> Expr:
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_row_accum_pct_change(self._rust_expr, int(periods))
        )

    def clip(self, lower: Any = None, upper: Any = None) -> Expr:
        e: Expr = self
        if lower is not None:
            lo = self._coerce_other(lower)
            e = when(self < lo, lo).otherwise(e)
        if upper is not None:
            hi = self._coerce_other(upper)
            e = when(e > hi, hi).otherwise(e)
        return e

    def replace(self, to_replace: dict[Any, Any]) -> Expr:
        items = list(to_replace.items())
        if not items:
            return self
        if len(items) > 64:
            raise ValueError("replace() supports at most 64 mappings.")
        chain = when(self == Literal(value=items[0][0]), Literal(value=items[0][1]))
        for old, new in items[1:]:
            chain = chain.when(self == Literal(value=old), Literal(value=new))
        return chain.otherwise(self)

    # Strings
    def strip(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_string_unary(self._rust_expr, "strip"))

    def upper(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_string_unary(self._rust_expr, "upper"))

    def lower(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_string_unary(self._rust_expr, "lower"))

    def str_replace(
        self, pattern: str, replacement: str, *, literal: bool = True
    ) -> Expr:
        """Replace matches.

        Default ``literal=True`` is substring replace.
        Use ``literal=False`` for Rust regex (syntax differs from Python ``re``;
        see docs).

        Invalid regex patterns may yield null cells at execution (Polars) rather
        than raise.
        """
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_string_replace(
                self._rust_expr,
                str(pattern),
                str(replacement),
                literal=bool(literal),
            )
        )

    def starts_with(self, prefix: str) -> Expr:
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_string_predicate(
                self._rust_expr, "starts_with", str(prefix)
            )
        )

    def ends_with(self, suffix: str) -> Expr:
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_string_predicate(
                self._rust_expr, "ends_with", str(suffix)
            )
        )

    def str_contains(self, substring: str) -> Expr:
        """True where the string contains ``substring`` (literal, not regex).

        The empty substring matches every non-null string (Polars substring
        ``contains`` semantics).
        """
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_string_predicate(
                self._rust_expr, "contains", str(substring), literal=True
            )
        )

    def str_contains_pat(self, pattern: str, *, literal: bool = False) -> Expr:
        """Substring or Rust-regex match.

        ``literal=False`` uses the Rust ``regex`` dialect (not Python ``re``).
        Raises ``ValueError`` if ``pattern`` is empty in regex mode.
        Malformed regex may yield null per row at execution; see
        ``SUPPORTED_TYPES`` docs.
        """
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_string_predicate(
                self._rust_expr, "contains", str(pattern), literal=bool(literal)
            )
        )

    def matches(self, pattern: str) -> Expr:
        """Regex match predicate (Rust regex dialect)."""
        if not isinstance(pattern, str) or not pattern:
            raise TypeError("matches(pattern) expects a non-empty string.")
        return self.str_contains_pat(pattern, literal=False)

    def is_empty_str(self) -> Expr:
        """True where string cell is exactly ``\"\"``."""
        return self == ""

    def is_blank_str(self) -> Expr:
        """True where string cell is empty after stripping whitespace."""
        return self.strip().char_length() == 0

    def is_null_or_empty_str(self) -> Expr:
        return self.is_null() | self.is_empty_str()

    def is_not_null_and_not_empty_str(self) -> Expr:
        return self.is_not_null() & ~(self.is_empty_str())

    def str_split(self, delimiter: str) -> Expr:
        """Split string column into ``list[str]`` (per-row).

        Delimiter is literal (not regex). Empty ``delimiter`` follows Polars UTF-8
        split rules.
        Null string cells stay null. Edge cases are documented in
        ``SUPPORTED_TYPES``.
        """
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_string_split(self._rust_expr, str(delimiter)))

    def str_reverse(self) -> Expr:
        """Reverse each string (Polars ``str.reverse``).

        Unicode edge cases (e.g. combining marks) follow Polars, not naive
        codepoint reversal. See ``SUPPORTED_TYPES``.
        """
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_str_reverse(self._rust_expr))

    def str_pad_start(self, length: int, fill_char: str = " ") -> Expr:
        """Pad start to at least ``length`` characters (character count).

        ``fill_char`` must be exactly one non-empty character; otherwise
        ``ValueError`` at build time.
        """
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_str_pad_start(
                self._rust_expr, int(length), str(fill_char)
            )
        )

    def str_pad_end(self, length: int, fill_char: str = " ") -> Expr:
        """Pad end to at least ``length`` characters.

        Same ``fill_char`` rules as :meth:`str_pad_start`.
        """
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_str_pad_end(
                self._rust_expr, int(length), str(fill_char)
            )
        )

    def str_zfill(self, length: int) -> Expr:
        """Zero-pad strings to ``length`` (sign handled like Polars ``str.zfill``)."""
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_str_zfill(self._rust_expr, int(length)))

    def str_extract_regex(self, pattern: str, group_index: int = 1) -> Expr:
        """Extract a regex capture group per row (Rust ``regex`` dialect).

        ``group_index`` 0 is the full match; 1+ are capture groups. Empty
        ``pattern`` raises ``ValueError``. No match or invalid regex may yield
        null; see ``SUPPORTED_TYPES``.
        """
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_str_extract_regex(
                self._rust_expr, str(pattern), int(group_index)
            )
        )

    def str_json_path_match(self, path: str) -> Expr:
        """JSONPath against JSON text cells (Polars ``str.json_path_match``).

        Returns a **string** column (serialized match). Malformed JSON or no
        match often yields null at execution time. Empty ``path`` raises
        ``ValueError``.
        """
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_str_json_path_match(self._rust_expr, str(path)))

    def str_json_decode(self, dtype: Any) -> Expr:
        """Parse JSON text per row into struct or map (Polars ``str.json_decode``).

        ``dtype`` is a nested model or ``dict[str, T]`` annotation, same style as
        :meth:`cast`. Null string cells yield null. With Polars 0.53, **any
        invalid JSON in the column typically fails execution** at
        :meth:`~pydantable.dataframe.DataFrame.collect` (not a per-row null).
        Map targets use the physical list-of-``{key,value}`` entries; JSON must
        be an **array** such as ``[{"key":"a","value":1}]``, not a bare JSON
        object. Polars execution only; see ``INTERFACE_CONTRACT``.
        """
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_str_json_decode(self._rust_expr, dtype))

    def strip_prefix(self, prefix: str) -> Expr:
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_string_unary(
                self._rust_expr, "strip_prefix", str(prefix)
            )
        )

    def strip_suffix(self, suffix: str) -> Expr:
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_string_unary(
                self._rust_expr, "strip_suffix", str(suffix)
            )
        )

    def strip_chars(self, chars: str) -> Expr:
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_string_unary(self._rust_expr, "strip_chars", str(chars))
        )

    # Boolean logic (typed; operands must be boolean expressions)
    def __and__(self, other: Any) -> Expr:
        right = other if isinstance(other, Expr) else self._coerce_other(other)
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_logical_and(self._rust_expr, right._rust_expr))

    def __rand__(self, other: Any) -> Expr:
        left = self._coerce_other(other)
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_logical_and(left._rust_expr, self._rust_expr))

    def __or__(self, other: Any) -> Expr:
        right = other if isinstance(other, Expr) else self._coerce_other(other)
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_logical_or(self._rust_expr, right._rust_expr))

    def __ror__(self, other: Any) -> Expr:
        left = self._coerce_other(other)
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_logical_or(left._rust_expr, self._rust_expr))

    def __invert__(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_logical_not(self._rust_expr))

    # Datetime / date parts (Rust validates column type)
    def dt_year(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "year"))

    def dt_month(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "month"))

    def dt_day(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "day"))

    def dt_hour(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "hour"))

    def dt_minute(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "minute"))

    def dt_second(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "second"))

    def dt_nanosecond(self) -> Expr:
        """Sub-second nanoseconds component (``datetime`` or ``time`` columns)."""
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "nanosecond"))

    def dt_weekday(self) -> Expr:
        """ISO weekday on ``date`` / ``datetime`` (Mon=1 ... Sun=7, same as Polars).

        Not valid on ``time`` columns (``TypeError`` at build time).
        """
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "weekday"))

    def dt_quarter(self) -> Expr:
        """Calendar quarter 1-4 on ``date`` / ``datetime``.

        Not valid on ``time`` columns (``TypeError`` at build time).
        """
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "quarter"))

    def dt_week(self) -> Expr:
        """ISO 8601 week number 1-53 (``date`` / ``datetime``; Polars ``dt.week``).

        Same definition as Python ``datetime.date.isocalendar().week`` /
        Polars ``dt.week()`` (weeks start Monday; week 1 contains the first
        Thursday of the year). Not valid on ``time`` columns.
        """
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "week"))

    def dt_dayofyear(self) -> Expr:
        """Day of year 1-366 on ``date`` / ``datetime`` (Spark ``dayofyear``).

        Matches Polars ``dt.ordinal_day()``. Not valid on ``time`` columns.
        """
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "dayofyear"))

    def dt_date(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_datetime_to_date(self._rust_expr))

    def strptime(self, format: str, *, to_datetime: bool = False) -> Expr:
        """Parse strings to ``date`` or ``datetime`` (``strftime`` format string)."""
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_strptime(
                self._rust_expr, str(format), bool(to_datetime)
            ),
        )

    def unix_timestamp(self, unit: str = "seconds") -> Expr:
        """Unix epoch from ``date``/``datetime``; ``unit`` is ``seconds`` or ``ms``."""
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_unix_timestamp(self._rust_expr, str(unit)))

    def from_unix_time(self, unit: str = "seconds") -> Expr:
        """UTC-naive ``datetime`` from numeric epoch; ``unit`` is ``seconds`` or ``ms``.

        Inverse of :meth:`unix_timestamp` for typical non-null numeric input.
        """
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_from_unix_time(self._rust_expr, str(unit)))

    def binary_len(self) -> Expr:
        """Byte length of a ``bytes`` column."""
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_binary_length(self._rust_expr))

    def map_len(self) -> Expr:
        """Number of entries in a ``dict[str, T]`` map column."""
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_map_len(self._rust_expr))

    def map_get(self, key: str) -> Expr:
        """Value for a string key (missing key → null)."""
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_map_get(self._rust_expr, str(key)))

    def map_contains_key(self, key: str) -> Expr:
        """Whether the map contains the given string key."""
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_map_contains_key(self._rust_expr, str(key)))

    def map_keys(self) -> Expr:
        """List of keys for each map cell."""
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_map_keys(self._rust_expr))

    def map_values(self) -> Expr:
        """List of values for each map cell."""
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_map_values(self._rust_expr))

    def map_entries(self) -> Expr:
        """List of ``{key, value}`` entry structs for each map cell."""
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_map_entries(self._rust_expr))

    def map_from_entries(self) -> Expr:
        """Build ``dict[str, T]`` map cells from ``list[{key, value}]`` entries."""
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_map_from_entries(self._rust_expr))

    def element_at(self, key: str) -> Expr:
        """Alias of :meth:`map_get` for map columns."""
        return self.map_get(key)

    # List columns
    def list_len(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_list_len(self._rust_expr))

    def list_get(self, index: Any) -> Expr:
        rust = get_expression_runtime()
        idx = index if isinstance(index, Expr) else Literal(value=index)
        return Expr(
            rust_expr=rust.expr_list_get(self._rust_expr, idx._rust_expr),
        )

    def list_contains(self, value: Any) -> Expr:
        rust = get_expression_runtime()
        v = value if isinstance(value, Expr) else Literal(value=value)
        return Expr(
            rust_expr=rust.expr_list_contains(self._rust_expr, v._rust_expr),
        )

    def contains_any(self, values: Any) -> Expr:
        """Any of the provided values is contained in each list cell."""
        vals = values
        if isinstance(values, Expr):
            raise TypeError("contains_any(values) expects literal values, not Expr.")
        if not isinstance(values, (list, tuple, set)):
            vals = [values]
        expr: Expr | None = None
        for v in list(vals):
            term = self.list_contains(v)
            expr = term if expr is None else (expr | term)
        if expr is None:
            raise TypeError("contains_any(values) expects at least one value.")
        return expr

    def contains_all(self, values: Any) -> Expr:
        """All of the provided values are contained in each list cell."""
        vals = values
        if isinstance(values, Expr):
            raise TypeError("contains_all(values) expects literal values, not Expr.")
        if not isinstance(values, (list, tuple, set)):
            vals = [values]
        expr: Expr | None = None
        for v in list(vals):
            term = self.list_contains(v)
            expr = term if expr is None else (expr & term)
        if expr is None:
            raise TypeError("contains_all(values) expects at least one value.")
        return expr

    def list_is_empty(self) -> Expr:
        return self.list_len() == 0

    def list_any(self) -> Expr:
        """Any True in a boolean list."""
        return self.list_contains(True)

    def list_all(self) -> Expr:
        """All True in a boolean list."""
        return ~self.list_contains(False)

    def map_is_empty(self) -> Expr:
        return self.map_len() == 0

    def map_has_any_key(self, keys: Any) -> Expr:
        ks = keys
        if isinstance(keys, Expr):
            raise TypeError("map_has_any_key(keys) expects literal keys, not Expr.")
        if not isinstance(keys, (list, tuple, set)):
            ks = [keys]
        expr: Expr | None = None
        for k in list(ks):
            term = self.map_contains_key(str(k))
            expr = term if expr is None else (expr | term)
        if expr is None:
            raise TypeError("map_has_any_key(keys) expects at least one key.")
        return expr

    def list_min(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_list_min(self._rust_expr))

    def list_max(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_list_max(self._rust_expr))

    def list_sum(self) -> Expr:
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_list_sum(self._rust_expr))

    def list_mean(self) -> Expr:
        """Mean of each numeric list cell as ``float``.

        Requires ``list[int]`` or ``list[float]``. Empty lists and null list cells
        yield null.
        """
        rust = get_expression_runtime()
        return Expr(rust_expr=rust.expr_list_mean(self._rust_expr))

    def list_join(self, separator: str, *, ignore_nulls: bool = False) -> Expr:
        """Join each ``list[str]`` cell (Polars ``list.join``).

        Empty lists yield empty strings. ``ignore_nulls`` skips null list
        elements when ``True``. See ``SUPPORTED_TYPES``.
        """
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_list_join(
                self._rust_expr, str(separator), ignore_nulls=bool(ignore_nulls)
            )
        )

    def list_sort(
        self,
        *,
        descending: bool = False,
        nulls_last: bool = False,
        maintain_order: bool = False,
    ) -> Expr:
        """Sort each list cell in place (``list[int]``, ``list[float]``, etc.).

        ``descending``, ``nulls_last``, and ``maintain_order`` map to Polars
        ``list.sort`` options. Element-type rules are in ``SUPPORTED_TYPES``.
        """
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_list_sort(
                self._rust_expr,
                descending=bool(descending),
                nulls_last=bool(nulls_last),
                maintain_order=bool(maintain_order),
            )
        )

    def list_unique(self, *, stable: bool = False) -> Expr:
        """Deduplicate list elements per row.

        With ``stable=True``, first-seen order is preserved (Polars
        ``unique_stable``).
        """
        rust = get_expression_runtime()
        return Expr(
            rust_expr=rust.expr_list_unique(self._rust_expr, stable=bool(stable))
        )

alias ¶

alias(name)

Attach an output column name for use in select / with_columns.

Source code in python/pydantable/expressions.py

def alias(self, name: str) -> AliasedExpr:
    """Attach an output column name for use in `select` / `with_columns`."""
    if not isinstance(name, str) or not name:
        raise TypeError("alias(name) expects a non-empty string.")
    return AliasedExpr(name=str(name), expr=self)

is_in ¶

is_in(*values)

Alias of :meth:isin (Polars naming parity).

Source code in python/pydantable/expressions.py

def is_in(self, *values: Any) -> Expr:
    """Alias of :meth:`isin` (Polars naming parity)."""
    return self.isin(*values)

len ¶

len()

String length alias (typed-safe): only valid for str columns.

Source code in python/pydantable/expressions.py

def len(self) -> Expr:
    """String length alias (typed-safe): only valid for ``str`` columns."""
    dt = self.dtype
    origin = get_origin(dt)
    args = get_args(dt)
    if origin is None:
        base = dt
    elif origin is getattr(__import__("typing"), "Union", object()) or str(
        origin
    ).endswith("types.UnionType"):
        non_none = [a for a in args if a is not type(None)]
        base = non_none[0] if len(non_none) == 1 else dt
    else:
        base = dt
    if base is not str:
        raise TypeError("len() is only supported for string columns.")
    return self.char_length()

struct_json_encode ¶

struct_json_encode()

Encode struct cells as JSON text (Polars struct.json_encode).

Source code in python/pydantable/expressions.py

def struct_json_encode(self) -> Expr:
    """Encode struct cells as JSON text (Polars ``struct.json_encode``)."""
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_struct_json_encode(self._rust_expr))

struct_json_path_match ¶

struct_json_path_match(path)

JSONPath against struct cells (JSON-encode then str.json_path_match).

Same null/match semantics as :meth:str_json_path_match on strings. Empty path raises ValueError.

Source code in python/pydantable/expressions.py

def struct_json_path_match(self, path: str) -> Expr:
    """JSONPath against struct cells (JSON-encode then ``str.json_path_match``).

    Same null/match semantics as :meth:`str_json_path_match` on strings.
    Empty ``path`` raises ``ValueError``.
    """
    rust = get_expression_runtime()
    return Expr(
        rust_expr=rust.expr_struct_json_path_match(self._rust_expr, str(path)),
    )

struct_rename_fields ¶

struct_rename_fields(names)

Rename struct subfields in order (one new name per existing field).

Source code in python/pydantable/expressions.py

def struct_rename_fields(self, names: Sequence[str]) -> Expr:
    """Rename struct subfields in order (one new name per existing field)."""
    rust = get_expression_runtime()
    return Expr(
        rust_expr=rust.expr_struct_rename_fields(
            self._rust_expr, [str(x) for x in names]
        ),
    )

struct_with_fields ¶

struct_with_fields(**fields)

Add or replace struct subfields (Polars struct.with_fields).

Each keyword must be a field name; each value must be an :class:Expr.

Source code in python/pydantable/expressions.py

def struct_with_fields(self, **fields: Any) -> Expr:
    """Add or replace struct subfields (Polars ``struct.with_fields``).

    Each keyword must be a field name; each value must be an :class:`Expr`.
    """
    if not fields:
        raise TypeError(
            "struct_with_fields() requires at least one keyword field=Expr."
        )
    rust = get_expression_runtime()
    updates: list[tuple[str, Any]] = []
    for k, v in fields.items():
        if not isinstance(v, Expr):
            raise TypeError(
                f"struct_with_fields({k}=...) expects Expr, got {type(v).__name__}."
            )
        updates.append((str(k), v._rust_expr))
    return Expr(
        rust_expr=rust.expr_struct_with_fields(self._rust_expr, updates),
    )

str_replace ¶

str_replace(pattern, replacement, *, literal=True)

Replace matches.

Default literal=True is substring replace. Use literal=False for Rust regex (syntax differs from Python re; see docs).

Invalid regex patterns may yield null cells at execution (Polars) rather than raise.

Source code in python/pydantable/expressions.py

def str_replace(
    self, pattern: str, replacement: str, *, literal: bool = True
) -> Expr:
    """Replace matches.

    Default ``literal=True`` is substring replace.
    Use ``literal=False`` for Rust regex (syntax differs from Python ``re``;
    see docs).

    Invalid regex patterns may yield null cells at execution (Polars) rather
    than raise.
    """
    rust = get_expression_runtime()
    return Expr(
        rust_expr=rust.expr_string_replace(
            self._rust_expr,
            str(pattern),
            str(replacement),
            literal=bool(literal),
        )
    )

str_contains ¶

str_contains(substring)

True where the string contains substring (literal, not regex).

The empty substring matches every non-null string (Polars substring contains semantics).

Source code in python/pydantable/expressions.py

def str_contains(self, substring: str) -> Expr:
    """True where the string contains ``substring`` (literal, not regex).

    The empty substring matches every non-null string (Polars substring
    ``contains`` semantics).
    """
    rust = get_expression_runtime()
    return Expr(
        rust_expr=rust.expr_string_predicate(
            self._rust_expr, "contains", str(substring), literal=True
        )
    )

str_contains_pat ¶

str_contains_pat(pattern, *, literal=False)

Substring or Rust-regex match.

literal=False uses the Rust regex dialect (not Python re). Raises ValueError if pattern is empty in regex mode. Malformed regex may yield null per row at execution; see SUPPORTED_TYPES docs.

Source code in python/pydantable/expressions.py

def str_contains_pat(self, pattern: str, *, literal: bool = False) -> Expr:
    """Substring or Rust-regex match.

    ``literal=False`` uses the Rust ``regex`` dialect (not Python ``re``).
    Raises ``ValueError`` if ``pattern`` is empty in regex mode.
    Malformed regex may yield null per row at execution; see
    ``SUPPORTED_TYPES`` docs.
    """
    rust = get_expression_runtime()
    return Expr(
        rust_expr=rust.expr_string_predicate(
            self._rust_expr, "contains", str(pattern), literal=bool(literal)
        )
    )

matches ¶

matches(pattern)

Regex match predicate (Rust regex dialect).

Source code in python/pydantable/expressions.py

def matches(self, pattern: str) -> Expr:
    """Regex match predicate (Rust regex dialect)."""
    if not isinstance(pattern, str) or not pattern:
        raise TypeError("matches(pattern) expects a non-empty string.")
    return self.str_contains_pat(pattern, literal=False)

is_empty_str ¶

is_empty_str()

True where string cell is exactly "".

Source code in python/pydantable/expressions.py

def is_empty_str(self) -> Expr:
    """True where string cell is exactly ``\"\"``."""
    return self == ""

is_blank_str ¶

is_blank_str()

True where string cell is empty after stripping whitespace.

Source code in python/pydantable/expressions.py

def is_blank_str(self) -> Expr:
    """True where string cell is empty after stripping whitespace."""
    return self.strip().char_length() == 0

str_split ¶

str_split(delimiter)

Split string column into list[str] (per-row).

Delimiter is literal (not regex). Empty delimiter follows Polars UTF-8 split rules. Null string cells stay null. Edge cases are documented in SUPPORTED_TYPES.

Source code in python/pydantable/expressions.py

def str_split(self, delimiter: str) -> Expr:
    """Split string column into ``list[str]`` (per-row).

    Delimiter is literal (not regex). Empty ``delimiter`` follows Polars UTF-8
    split rules.
    Null string cells stay null. Edge cases are documented in
    ``SUPPORTED_TYPES``.
    """
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_string_split(self._rust_expr, str(delimiter)))

str_reverse ¶

str_reverse()

Reverse each string (Polars str.reverse).

Unicode edge cases (e.g. combining marks) follow Polars, not naive codepoint reversal. See SUPPORTED_TYPES.

Source code in python/pydantable/expressions.py

def str_reverse(self) -> Expr:
    """Reverse each string (Polars ``str.reverse``).

    Unicode edge cases (e.g. combining marks) follow Polars, not naive
    codepoint reversal. See ``SUPPORTED_TYPES``.
    """
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_str_reverse(self._rust_expr))

str_pad_start ¶

str_pad_start(length, fill_char=' ')

Pad start to at least length characters (character count).

fill_char must be exactly one non-empty character; otherwise ValueError at build time.

Source code in python/pydantable/expressions.py

def str_pad_start(self, length: int, fill_char: str = " ") -> Expr:
    """Pad start to at least ``length`` characters (character count).

    ``fill_char`` must be exactly one non-empty character; otherwise
    ``ValueError`` at build time.
    """
    rust = get_expression_runtime()
    return Expr(
        rust_expr=rust.expr_str_pad_start(
            self._rust_expr, int(length), str(fill_char)
        )
    )

str_pad_end ¶

str_pad_end(length, fill_char=' ')

Pad end to at least length characters.

Same fill_char rules as :meth:str_pad_start.

Source code in python/pydantable/expressions.py

def str_pad_end(self, length: int, fill_char: str = " ") -> Expr:
    """Pad end to at least ``length`` characters.

    Same ``fill_char`` rules as :meth:`str_pad_start`.
    """
    rust = get_expression_runtime()
    return Expr(
        rust_expr=rust.expr_str_pad_end(
            self._rust_expr, int(length), str(fill_char)
        )
    )

str_zfill ¶

str_zfill(length)

Zero-pad strings to length (sign handled like Polars str.zfill).

Source code in python/pydantable/expressions.py

def str_zfill(self, length: int) -> Expr:
    """Zero-pad strings to ``length`` (sign handled like Polars ``str.zfill``)."""
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_str_zfill(self._rust_expr, int(length)))

str_extract_regex ¶

str_extract_regex(pattern, group_index=1)

Extract a regex capture group per row (Rust regex dialect).

group_index 0 is the full match; 1+ are capture groups. Empty pattern raises ValueError. No match or invalid regex may yield null; see SUPPORTED_TYPES.

Source code in python/pydantable/expressions.py

def str_extract_regex(self, pattern: str, group_index: int = 1) -> Expr:
    """Extract a regex capture group per row (Rust ``regex`` dialect).

    ``group_index`` 0 is the full match; 1+ are capture groups. Empty
    ``pattern`` raises ``ValueError``. No match or invalid regex may yield
    null; see ``SUPPORTED_TYPES``.
    """
    rust = get_expression_runtime()
    return Expr(
        rust_expr=rust.expr_str_extract_regex(
            self._rust_expr, str(pattern), int(group_index)
        )
    )

str_json_path_match ¶

str_json_path_match(path)

JSONPath against JSON text cells (Polars str.json_path_match).

Returns a string column (serialized match). Malformed JSON or no match often yields null at execution time. Empty path raises ValueError.

Source code in python/pydantable/expressions.py

def str_json_path_match(self, path: str) -> Expr:
    """JSONPath against JSON text cells (Polars ``str.json_path_match``).

    Returns a **string** column (serialized match). Malformed JSON or no
    match often yields null at execution time. Empty ``path`` raises
    ``ValueError``.
    """
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_str_json_path_match(self._rust_expr, str(path)))

str_json_decode ¶

str_json_decode(dtype)

Parse JSON text per row into struct or map (Polars str.json_decode).

dtype is a nested model or dict[str, T] annotation, same style as :meth:cast. Null string cells yield null. With Polars 0.53, any invalid JSON in the column typically fails execution at :meth:~pydantable.dataframe.DataFrame.collect (not a per-row null). Map targets use the physical list-of-{key,value} entries; JSON must be an array such as [{"key":"a","value":1}], not a bare JSON object. Polars execution only; see INTERFACE_CONTRACT.

Source code in python/pydantable/expressions.py

def str_json_decode(self, dtype: Any) -> Expr:
    """Parse JSON text per row into struct or map (Polars ``str.json_decode``).

    ``dtype`` is a nested model or ``dict[str, T]`` annotation, same style as
    :meth:`cast`. Null string cells yield null. With Polars 0.53, **any
    invalid JSON in the column typically fails execution** at
    :meth:`~pydantable.dataframe.DataFrame.collect` (not a per-row null).
    Map targets use the physical list-of-``{key,value}`` entries; JSON must
    be an **array** such as ``[{"key":"a","value":1}]``, not a bare JSON
    object. Polars execution only; see ``INTERFACE_CONTRACT``.
    """
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_str_json_decode(self._rust_expr, dtype))

dt_nanosecond ¶

dt_nanosecond()

Sub-second nanoseconds component (datetime or time columns).

Source code in python/pydantable/expressions.py

def dt_nanosecond(self) -> Expr:
    """Sub-second nanoseconds component (``datetime`` or ``time`` columns)."""
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "nanosecond"))

dt_weekday ¶

dt_weekday()

ISO weekday on date / datetime (Mon=1 ... Sun=7, same as Polars).

Not valid on time columns (TypeError at build time).

Source code in python/pydantable/expressions.py

def dt_weekday(self) -> Expr:
    """ISO weekday on ``date`` / ``datetime`` (Mon=1 ... Sun=7, same as Polars).

    Not valid on ``time`` columns (``TypeError`` at build time).
    """
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "weekday"))

dt_quarter ¶

dt_quarter()

Calendar quarter 1-4 on date / datetime.

Not valid on time columns (TypeError at build time).

Source code in python/pydantable/expressions.py

def dt_quarter(self) -> Expr:
    """Calendar quarter 1-4 on ``date`` / ``datetime``.

    Not valid on ``time`` columns (``TypeError`` at build time).
    """
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "quarter"))

dt_week ¶

dt_week()

ISO 8601 week number 1-53 (date / datetime; Polars dt.week).

Same definition as Python datetime.date.isocalendar().week / Polars dt.week() (weeks start Monday; week 1 contains the first Thursday of the year). Not valid on time columns.

Source code in python/pydantable/expressions.py

def dt_week(self) -> Expr:
    """ISO 8601 week number 1-53 (``date`` / ``datetime``; Polars ``dt.week``).

    Same definition as Python ``datetime.date.isocalendar().week`` /
    Polars ``dt.week()`` (weeks start Monday; week 1 contains the first
    Thursday of the year). Not valid on ``time`` columns.
    """
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "week"))

dt_dayofyear ¶

dt_dayofyear()

Day of year 1-366 on date / datetime (Spark dayofyear).

Matches Polars dt.ordinal_day(). Not valid on time columns.

Source code in python/pydantable/expressions.py

def dt_dayofyear(self) -> Expr:
    """Day of year 1-366 on ``date`` / ``datetime`` (Spark ``dayofyear``).

    Matches Polars ``dt.ordinal_day()``. Not valid on ``time`` columns.
    """
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_temporal_part(self._rust_expr, "dayofyear"))

strptime ¶

strptime(format, *, to_datetime=False)

Parse strings to date or datetime (strftime format string).

Source code in python/pydantable/expressions.py

def strptime(self, format: str, *, to_datetime: bool = False) -> Expr:
    """Parse strings to ``date`` or ``datetime`` (``strftime`` format string)."""
    rust = get_expression_runtime()
    return Expr(
        rust_expr=rust.expr_strptime(
            self._rust_expr, str(format), bool(to_datetime)
        ),
    )

unix_timestamp ¶

unix_timestamp(unit='seconds')

Unix epoch from date/datetime; unit is seconds or ms.

Source code in python/pydantable/expressions.py

def unix_timestamp(self, unit: str = "seconds") -> Expr:
    """Unix epoch from ``date``/``datetime``; ``unit`` is ``seconds`` or ``ms``."""
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_unix_timestamp(self._rust_expr, str(unit)))

from_unix_time ¶

from_unix_time(unit='seconds')

UTC-naive datetime from numeric epoch; unit is seconds or ms.

Inverse of :meth:unix_timestamp for typical non-null numeric input.

Source code in python/pydantable/expressions.py

def from_unix_time(self, unit: str = "seconds") -> Expr:
    """UTC-naive ``datetime`` from numeric epoch; ``unit`` is ``seconds`` or ``ms``.

    Inverse of :meth:`unix_timestamp` for typical non-null numeric input.
    """
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_from_unix_time(self._rust_expr, str(unit)))

binary_len ¶

binary_len()

Byte length of a bytes column.

Source code in python/pydantable/expressions.py

def binary_len(self) -> Expr:
    """Byte length of a ``bytes`` column."""
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_binary_length(self._rust_expr))

map_len ¶

map_len()

Number of entries in a dict[str, T] map column.

Source code in python/pydantable/expressions.py

def map_len(self) -> Expr:
    """Number of entries in a ``dict[str, T]`` map column."""
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_map_len(self._rust_expr))

map_get ¶

map_get(key)

Value for a string key (missing key → null).

Source code in python/pydantable/expressions.py

def map_get(self, key: str) -> Expr:
    """Value for a string key (missing key → null)."""
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_map_get(self._rust_expr, str(key)))

map_contains_key ¶

map_contains_key(key)

Whether the map contains the given string key.

Source code in python/pydantable/expressions.py

def map_contains_key(self, key: str) -> Expr:
    """Whether the map contains the given string key."""
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_map_contains_key(self._rust_expr, str(key)))

map_keys ¶

map_keys()

List of keys for each map cell.

Source code in python/pydantable/expressions.py

def map_keys(self) -> Expr:
    """List of keys for each map cell."""
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_map_keys(self._rust_expr))

map_values ¶

map_values()

List of values for each map cell.

Source code in python/pydantable/expressions.py

def map_values(self) -> Expr:
    """List of values for each map cell."""
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_map_values(self._rust_expr))

map_entries ¶

map_entries()

List of {key, value} entry structs for each map cell.

Source code in python/pydantable/expressions.py

def map_entries(self) -> Expr:
    """List of ``{key, value}`` entry structs for each map cell."""
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_map_entries(self._rust_expr))

map_from_entries ¶

map_from_entries()

Build dict[str, T] map cells from list[{key, value}] entries.

Source code in python/pydantable/expressions.py

def map_from_entries(self) -> Expr:
    """Build ``dict[str, T]`` map cells from ``list[{key, value}]`` entries."""
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_map_from_entries(self._rust_expr))

element_at ¶

element_at(key)

Alias of :meth:map_get for map columns.

Source code in python/pydantable/expressions.py

def element_at(self, key: str) -> Expr:
    """Alias of :meth:`map_get` for map columns."""
    return self.map_get(key)

contains_any ¶

contains_any(values)

Any of the provided values is contained in each list cell.

Source code in python/pydantable/expressions.py

def contains_any(self, values: Any) -> Expr:
    """Any of the provided values is contained in each list cell."""
    vals = values
    if isinstance(values, Expr):
        raise TypeError("contains_any(values) expects literal values, not Expr.")
    if not isinstance(values, (list, tuple, set)):
        vals = [values]
    expr: Expr | None = None
    for v in list(vals):
        term = self.list_contains(v)
        expr = term if expr is None else (expr | term)
    if expr is None:
        raise TypeError("contains_any(values) expects at least one value.")
    return expr

contains_all ¶

contains_all(values)

All of the provided values are contained in each list cell.

Source code in python/pydantable/expressions.py

def contains_all(self, values: Any) -> Expr:
    """All of the provided values are contained in each list cell."""
    vals = values
    if isinstance(values, Expr):
        raise TypeError("contains_all(values) expects literal values, not Expr.")
    if not isinstance(values, (list, tuple, set)):
        vals = [values]
    expr: Expr | None = None
    for v in list(vals):
        term = self.list_contains(v)
        expr = term if expr is None else (expr & term)
    if expr is None:
        raise TypeError("contains_all(values) expects at least one value.")
    return expr

list_any ¶

list_any()

Any True in a boolean list.

Source code in python/pydantable/expressions.py

def list_any(self) -> Expr:
    """Any True in a boolean list."""
    return self.list_contains(True)

list_all ¶

list_all()

All True in a boolean list.

Source code in python/pydantable/expressions.py

def list_all(self) -> Expr:
    """All True in a boolean list."""
    return ~self.list_contains(False)

list_mean ¶

list_mean()

Mean of each numeric list cell as float.

Requires list[int] or list[float]. Empty lists and null list cells yield null.

Source code in python/pydantable/expressions.py

def list_mean(self) -> Expr:
    """Mean of each numeric list cell as ``float``.

    Requires ``list[int]`` or ``list[float]``. Empty lists and null list cells
    yield null.
    """
    rust = get_expression_runtime()
    return Expr(rust_expr=rust.expr_list_mean(self._rust_expr))

list_join ¶

list_join(separator, *, ignore_nulls=False)

Join each list[str] cell (Polars list.join).

Empty lists yield empty strings. ignore_nulls skips null list elements when True. See SUPPORTED_TYPES.

Source code in python/pydantable/expressions.py

def list_join(self, separator: str, *, ignore_nulls: bool = False) -> Expr:
    """Join each ``list[str]`` cell (Polars ``list.join``).

    Empty lists yield empty strings. ``ignore_nulls`` skips null list
    elements when ``True``. See ``SUPPORTED_TYPES``.
    """
    rust = get_expression_runtime()
    return Expr(
        rust_expr=rust.expr_list_join(
            self._rust_expr, str(separator), ignore_nulls=bool(ignore_nulls)
        )
    )

list_sort ¶

list_sort(*, descending=False, nulls_last=False, maintain_order=False)

Sort each list cell in place (list[int], list[float], etc.).

descending, nulls_last, and maintain_order map to Polars list.sort options. Element-type rules are in SUPPORTED_TYPES.

Source code in python/pydantable/expressions.py

def list_sort(
    self,
    *,
    descending: bool = False,
    nulls_last: bool = False,
    maintain_order: bool = False,
) -> Expr:
    """Sort each list cell in place (``list[int]``, ``list[float]``, etc.).

    ``descending``, ``nulls_last``, and ``maintain_order`` map to Polars
    ``list.sort`` options. Element-type rules are in ``SUPPORTED_TYPES``.
    """
    rust = get_expression_runtime()
    return Expr(
        rust_expr=rust.expr_list_sort(
            self._rust_expr,
            descending=bool(descending),
            nulls_last=bool(nulls_last),
            maintain_order=bool(maintain_order),
        )
    )

list_unique ¶

list_unique(*, stable=False)

Deduplicate list elements per row.

With stable=True, first-seen order is preserved (Polars unique_stable).

Source code in python/pydantable/expressions.py

def list_unique(self, *, stable: bool = False) -> Expr:
    """Deduplicate list elements per row.

    With ``stable=True``, first-seen order is preserved (Polars
    ``unique_stable``).
    """
    rust = get_expression_runtime()
    return Expr(
        rust_expr=rust.expr_list_unique(self._rust_expr, stable=bool(stable))
    )

Schema ¶

Bases: BaseModel

Base model for DataFrame[YourSchema] column definitions.

Uses extra="forbid" so unexpected fields fail validation at construction.

Source code in python/pydantable/schema/_impl.py

class Schema(BaseModel):
    """Base model for ``DataFrame[YourSchema]`` column definitions.

    Uses ``extra="forbid"`` so unexpected fields fail validation at construction.
    """

    model_config = ConfigDict(extra="forbid")