Source code for fugue.column.functions

from typing import Any, Optional

import pyarrow as pa
from fugue.column.expressions import (
    ColumnExpr,
    _FuncExpr,
    _to_col,
    function,
)
from triad import Schema


[docs] def coalesce(*args: Any) -> ColumnExpr: """SQL ``COALESCE`` function :param args: If a value is not :class:`~fugue.column.expressions.ColumnExpr` then it's converted to a literal column by :func:`~fugue.column.expressions.col` .. note:: this function can infer neither type nor alias .. admonition:: New Since :class: hint **0.6.0** .. admonition:: Examples .. code-block:: python import fugue.column.functions as f f.coalesce(col("a"), col("b")+col("c"), 1) """ return function("COALESCE", *[_to_col(x) for x in args])
[docs] def min(col: ColumnExpr) -> ColumnExpr: # pylint: disable=redefined-builtin """SQL ``MIN`` function (aggregation) :param col: the column to find min .. note:: * this function can infer type from ``col`` type * this function can infer alias from ``col``'s inferred alias .. admonition:: New Since :class: hint **0.6.0** .. admonition:: Examples .. code-block:: python import fugue.column.functions as f # assume col a has type double f.min(col("a")) # CAST(MIN(a) AS double) AS a f.min(-col("a")) # CAST(MIN(-a) AS double) AS a # neither type nor alias can be inferred in the following cases f.min(col("a")+1) f.min(col("a")+col("b")) # you can specify explicitly # CAST(MIN(a+b) AS int) AS x f.min(col("a")+col("b")).cast(int).alias("x") """ assert isinstance(col, ColumnExpr) return _SameTypeUnaryAggFuncExpr("MIN", col)
[docs] def max(col: ColumnExpr) -> ColumnExpr: # pylint: disable=redefined-builtin """SQL ``MAX`` function (aggregation) :param col: the column to find max .. note:: * this function can infer type from ``col`` type * this function can infer alias from ``col``'s inferred alias .. admonition:: New Since :class: hint **0.6.0** .. admonition:: Examples .. code-block:: python import fugue.column.functions as f # assume col a has type double f.max(col("a")) # CAST(MAX(a) AS double) AS a f.max(-col("a")) # CAST(MAX(-a) AS double) AS a # neither type nor alias can be inferred in the following cases f.max(col("a")+1) f.max(col("a")+col("b")) # you can specify explicitly # CAST(MAX(a+b) AS int) AS x f.max(col("a")+col("b")).cast(int).alias("x") """ assert isinstance(col, ColumnExpr) return _SameTypeUnaryAggFuncExpr("MAX", col)
[docs] def count(col: ColumnExpr) -> ColumnExpr: """SQL ``COUNT`` function (aggregation) :param col: the column to find count .. note:: * this function cannot infer type from ``col`` type * this function can infer alias from ``col``'s inferred alias .. admonition:: New Since :class: hint **0.6.0** .. admonition:: Examples .. code-block:: python import fugue.column.functions as f f.count(all_cols()) # COUNT(*) f.count(col("a")) # COUNT(a) AS a # you can specify explicitly # CAST(COUNT(a) AS double) AS a f.count(col("a")).cast(float) """ assert isinstance(col, ColumnExpr) return _UnaryAggFuncExpr("COUNT", col)
[docs] def count_distinct(col: ColumnExpr) -> ColumnExpr: """SQL ``COUNT DISTINCT`` function (aggregation) :param col: the column to find distinct element count .. note:: * this function cannot infer type from ``col`` type * this function can infer alias from ``col``'s inferred alias .. admonition:: New Since :class: hint **0.6.0** .. admonition:: Examples .. code-block:: python import fugue.column.functions as f f.count_distinct(all_cols()) # COUNT(DISTINCT *) f.count_distinct(col("a")) # COUNT(DISTINCT a) AS a # you can specify explicitly # CAST(COUNT(DISTINCT a) AS double) AS a f.count_distinct(col("a")).cast(float) """ assert isinstance(col, ColumnExpr) return _UnaryAggFuncExpr("COUNT", col, arg_distinct=True)
[docs] def avg(col: ColumnExpr) -> ColumnExpr: """SQL ``AVG`` function (aggregation) :param col: the column to find average .. note:: * this function cannot infer type from ``col`` type * this function can infer alias from ``col``'s inferred alias .. admonition:: New Since :class: hint **0.6.0** .. admonition:: Examples .. code-block:: python import fugue.column.functions as f f.avg(col("a")) # AVG(a) AS a # you can specify explicitly # CAST(AVG(a) AS double) AS a f.avg(col("a")).cast(float) """ assert isinstance(col, ColumnExpr) return _UnaryAggFuncExpr("AVG", col)
[docs] def sum(col: ColumnExpr) -> ColumnExpr: # pylint: disable=redefined-builtin """SQL ``SUM`` function (aggregation) :param col: the column to find sum .. note:: * this function cannot infer type from ``col`` type * this function can infer alias from ``col``'s inferred alias .. admonition:: New Since :class: hint **0.6.0** .. admonition:: Examples .. code-block:: python import fugue.column.functions as f f.sum(col("a")) # SUM(a) AS a # you can specify explicitly # CAST(SUM(a) AS double) AS a f.sum(col("a")).cast(float) """ assert isinstance(col, ColumnExpr) return _UnaryAggFuncExpr("SUM", col)
[docs] def first(col: ColumnExpr) -> ColumnExpr: """SQL ``FIRST`` function (aggregation) :param col: the column to find first .. note:: * this function can infer type from ``col`` type * this function can infer alias from ``col``'s inferred alias .. admonition:: New Since :class: hint **0.6.0** .. admonition:: Examples .. code-block:: python import fugue.column.functions as f # assume col a has type double f.first(col("a")) # CAST(FIRST(a) AS double) AS a f.first(-col("a")) # CAST(FIRST(-a) AS double) AS a # neither type nor alias can be inferred in the following cases f.first(col("a")+1) f.first(col("a")+col("b")) # you can specify explicitly # CAST(FIRST(a+b) AS int) AS x f.first(col("a")+col("b")).cast(int).alias("x") """ assert isinstance(col, ColumnExpr) return _SameTypeUnaryAggFuncExpr("FIRST", col)
[docs] def last(col: ColumnExpr) -> ColumnExpr: """SQL ``LAST`` function (aggregation) :param col: the column to find last .. note:: * this function can infer type from ``col`` type * this function can infer alias from ``col``'s inferred alias .. admonition:: New Since :class: hint **0.6.0** .. admonition:: Examples .. code-block:: python import fugue.column.functions as f # assume col a has type double f.last(col("a")) # CAST(LAST(a) AS double) AS a f.last(-col("a")) # CAST(LAST(-a) AS double) AS a # neither type nor alias can be inferred in the following cases f.last(col("a")+1) f.last(col("a")+col("b")) # you can specify explicitly # CAST(LAST(a+b) AS int) AS x f.last(col("a")+col("b")).cast(int).alias("x") """ assert isinstance(col, ColumnExpr) return _SameTypeUnaryAggFuncExpr("LAST", col)
[docs] def is_agg(column: Any) -> bool: """Check if a column contains aggregation operation :param col: the column to check :return: whether the column is :class:`~fugue.column.expressions.ColumnExpr` and contains aggregation operations .. admonition:: New Since :class: hint **0.6.0** .. admonition:: Examples .. code-block:: python import fugue.column.functions as f assert not f.is_agg(1) assert not f.is_agg(col("a")) assert not f.is_agg(col("a")+lit(1)) assert f.is_agg(f.max(col("a"))) assert f.is_agg(-f.max(col("a"))) assert f.is_agg(f.max(col("a")+1)) assert f.is_agg(f.max(col("a"))+f.min(col("a")))) """ if isinstance(column, _UnaryAggFuncExpr): return True if isinstance(column, _FuncExpr): return any(is_agg(x) for x in column.args) or any( is_agg(x) for x in column.kwargs.values() ) return False
class _UnaryAggFuncExpr(_FuncExpr): def __init__(self, func: str, col: ColumnExpr, arg_distinct: bool = False): super().__init__(func, col, arg_distinct=arg_distinct) def infer_alias(self) -> ColumnExpr: return ( self if self.output_name != "" else self.alias(self.args[0].infer_alias().output_name) ) def _copy(self) -> _FuncExpr: return _UnaryAggFuncExpr(self.func, *self.args, **self.kwargs) class _SameTypeUnaryAggFuncExpr(_UnaryAggFuncExpr): def _copy(self) -> _FuncExpr: return _SameTypeUnaryAggFuncExpr(self.func, *self.args, **self.kwargs) def infer_type(self, schema: Schema) -> Optional[pa.DataType]: return self.as_type or self.args[0].infer_type(schema)