from typing import Any, Dict, List, Union, Callable
from fugue.dataframe.dataframe import DataFrame
from triad.collections.dict import IndexedOrderedDict
from triad.exceptions import InvalidOperationError
from triad.utils.assertion import assert_or_throw
[docs]
class DataFrames(IndexedOrderedDict[str, DataFrame]):
"""Ordered dictionary of DataFrames. There are two modes: with keys
and without keys. If without key ``_<n>`` will be used as the key
for each dataframe, and it will be treated as an array in Fugue framework.
It's a subclass of dict, so it supports all dict operations. It's
also ordered, so you can trust the order of keys and values.
The initialization is flexible
>>> df1 = ArrayDataFrame([[0]],"a:int")
>>> df2 = ArrayDataFrame([[1]],"a:int")
>>> dfs = DataFrames(df1,df2) # init as [df1, df2]
>>> assert not dfs.has_key
>>> assert df1 is dfs[0] and df2 is dfs[1]
>>> dfs_array = list(dfs.values())
>>> dfs = DataFrames(a=df1,b=df2) # init as {a:df1, b:df2}
>>> assert dfs.has_key
>>> assert df1 is dfs[0] and df2 is dfs[1] # order is guaranteed
>>> df3 = ArrayDataFrame([[1]],"b:int")
>>> dfs2 = DataFrames(dfs, c=df3) # {a:df1, b:df2, c:df3}
>>> dfs2 = DataFrames(dfs, df3) # invalid, because dfs has key, df3 doesn't
>>> dfs2 = DataFrames(dict(a=df1,b=df2)) # init as {a:df1, b:df2}
>>> dfs2 = DataFrames([df1,df2],df3) # init as [df1, df2, df3]
"""
def __init__(self, *args: Any, **kwargs: Any): # noqa: C901
super().__init__()
self._has_key: bool = False
for d in args:
if isinstance(d, DataFrames):
if d.has_key:
for k, v in d.items():
self[k] = v
else:
for v in d.values():
self._append(v)
elif isinstance(d, DataFrame):
self._append(d)
elif isinstance(d, List):
for o in d:
if isinstance(o, tuple):
self[o[0]] = o[1]
else:
self._append(o)
elif isinstance(d, Dict):
self.update(d)
else:
raise ValueError(f"{d} is not valid to initialize DataFrames")
self.update(kwargs)
self.set_readonly()
@property
def has_key(self):
"""If this collection has key (dict-like) or not (list-like)"""
return self._has_key
def __setitem__( # type: ignore
self, key: str, value: DataFrame, *args: Any, **kwds: Any
) -> None:
assert isinstance(key, str)
assert_or_throw(
len(self) == 0 or self.has_key,
InvalidOperationError("this DataFrames can's have key"),
)
assert_or_throw(
isinstance(value, DataFrame),
lambda: ValueError(f"{key} has non DataFrame value"),
)
super().__setitem__(key, value, *args, **kwds) # type: ignore
self._has_key = True
def __getitem__(self, key: Union[str, int]) -> DataFrame: # type: ignore
if isinstance(key, int):
key = self.get_key_by_index(key)
return super().__getitem__(key) # type: ignore
[docs]
def convert(self, func: Callable[["DataFrame"], DataFrame]) -> "DataFrames":
"""Create another DataFrames with the same structure,
but all converted by ``func``
:return: the new DataFrames
.. admonition:: Examples
>>> dfs2 = dfs.convert(lambda df: df.as_local()) # convert all to local
"""
if self.has_key:
return DataFrames([(k, func(v)) for k, v in self.items()])
else:
return DataFrames([func(v) for v in self.values()])
def _append(self, value: Any):
assert_or_throw(
not self.has_key, InvalidOperationError("this DataFrames must have key")
)
assert_or_throw(
isinstance(value, DataFrame),
lambda: ValueError(f"{value} is not a DataFrame"),
)
super().__setitem__("_" + str(len(self)), value)