[docs]classDataset(ABC):"""The base class of Fugue :class:`~.fugue.dataframe.dataframe.DataFrame` and :class:`~.fugue.bag.bag.Bag`. .. note:: This is for internal use only. """def__init__(self):self._metadata:Optional[ParamDict]=None@propertydefmetadata(self)->ParamDict:"""Metadata of the dataset"""ifself._metadataisNone:self._metadata=ParamDict()returnself._metadata@propertydefhas_metadata(self)->bool:"""Whether this dataframe contains any metadata"""returnself._metadataisnotNoneandlen(self._metadata)>0
@property@abstractmethoddefnative(self)->Any:# pragma: no cover"""The native object this Dataset class wraps"""raiseNotImplementedError@property@abstractmethoddefis_local(self)->bool:# pragma: no cover"""Whether this dataframe is a local Dataset"""raiseNotImplementedError@property@abstractmethoddefis_bounded(self)->bool:# pragma: no cover"""Whether this dataframe is bounded"""raiseNotImplementedError@property@abstractmethoddefnum_partitions(self)->int:# pragma: no cover"""Number of physical partitions of this dataframe. Please read |PartitionTutorial| """raiseNotImplementedError@property@abstractmethoddefempty(self)->bool:# pragma: no cover"""Whether this dataframe is empty"""raiseNotImplementedError
[docs]@abstractmethoddefcount(self)->int:# pragma: no cover"""Get number of rows of this dataframe"""raiseNotImplementedError
[docs]defassert_not_empty(self)->None:"""Assert this dataframe is not empty :raises FugueDatasetEmptyError: if it is empty """assert_or_throw(notself.empty,FugueDatasetEmptyError("dataframe is empty"))
[docs]defshow(self,n:int=10,with_count:bool=False,title:Optional[str]=None)->None:"""Display the Dataset :param n: number of rows to print, defaults to 10 :param with_count: whether to show dataset count, defaults to False :param title: title of the dataset, defaults to None .. note:: When ``with_count`` is True, it can trigger expensive calculation for a distributed dataframe. So if you call this function directly, you may need to :func:`fugue.execution.execution_engine.ExecutionEngine.persist` the dataset. """returnget_dataset_display(self).show(n=n,with_count=with_count,title=title)
def__repr__(self):"""String representation of the Dataset"""returnget_dataset_display(self).repr()def_repr_html_(self):"""HTML representation of the Dataset"""returnget_dataset_display(self).repr_html()
[docs]classDatasetDisplay(ABC):"""The base class for display handlers of :class:`~.Dataset` :param ds: the Dataset """_SHOW_LOCK=SerializableRLock()def__init__(self,ds:Dataset):self._ds=ds
[docs]@abstractmethoddefshow(self,n:int=10,with_count:bool=False,title:Optional[str]=None)->None:# pragma: no cover"""Show the :class:`~.Dataset` :param n: top n items to display, defaults to 10 :param with_count: whether to display the total count, defaults to False :param title: title to display, defaults to None """raiseNotImplementedError
[docs]defrepr(self)->str:"""The string representation of the :class:`~.Dataset` :return: the string representation """returnstr(type(self._ds).__name__)
[docs]defrepr_html(self)->str:"""The HTML representation of the :class:`~.Dataset` :return: the HTML representation """returnhtml.escape(self.repr())
@fugue_plugindefget_dataset_display(ds:"Dataset")->DatasetDisplay:# pragma: no cover"""Get the display class to display a :class:`~.Dataset` :param ds: the Dataset to be displayed """raiseNotImplementedError(f"no matching DatasetDisplay registered for {type(ds)}")