fugue
- fugue.bag
- fugue.collections
- fugue.collections.partition
BagPartitionCursor
DatasetPartitionCursor
PartitionCursor
PartitionSpec
PartitionSpec.algo
PartitionSpec.empty
PartitionSpec.get_cursor()
PartitionSpec.get_key_schema()
PartitionSpec.get_num_partitions()
PartitionSpec.get_partitioner()
PartitionSpec.get_sorts()
PartitionSpec.jsondict
PartitionSpec.num_partitions
PartitionSpec.partition_by
PartitionSpec.presort
PartitionSpec.presort_expr
parse_presort_exp()
- fugue.collections.sql
- fugue.collections.yielded
- fugue.collections.partition
- fugue.column
- fugue.column.expressions
- fugue.column.functions
- fugue.column.sql
SQLExpressionGenerator
SelectColumns
SelectColumns.agg_funcs
SelectColumns.all_cols
SelectColumns.assert_all_with_names()
SelectColumns.assert_no_agg()
SelectColumns.assert_no_wildcard()
SelectColumns.group_keys
SelectColumns.has_agg
SelectColumns.has_literals
SelectColumns.is_distinct
SelectColumns.literals
SelectColumns.non_agg_funcs
SelectColumns.replace_wildcard()
SelectColumns.simple
SelectColumns.simple_cols
- fugue.dataframe
- fugue.dataframe.api
- fugue.dataframe.array_dataframe
- fugue.dataframe.arrow_dataframe
ArrowDataFrame
ArrowDataFrame.alter_columns()
ArrowDataFrame.as_array()
ArrowDataFrame.as_array_iterable()
ArrowDataFrame.as_arrow()
ArrowDataFrame.as_dict_iterable()
ArrowDataFrame.as_dicts()
ArrowDataFrame.as_pandas()
ArrowDataFrame.count()
ArrowDataFrame.empty
ArrowDataFrame.head()
ArrowDataFrame.native
ArrowDataFrame.native_as_df()
ArrowDataFrame.peek_array()
ArrowDataFrame.peek_dict()
ArrowDataFrame.rename()
- fugue.dataframe.dataframe
DataFrame
DataFrame.alter_columns()
DataFrame.as_array()
DataFrame.as_array_iterable()
DataFrame.as_arrow()
DataFrame.as_dict_iterable()
DataFrame.as_dicts()
DataFrame.as_local()
DataFrame.as_local_bounded()
DataFrame.as_pandas()
DataFrame.columns
DataFrame.drop()
DataFrame.get_info_str()
DataFrame.head()
DataFrame.native_as_df()
DataFrame.peek_array()
DataFrame.peek_dict()
DataFrame.rename()
DataFrame.schema
DataFrame.schema_discovered
DataFrameDisplay
LocalBoundedDataFrame
LocalDataFrame
LocalUnboundedDataFrame
YieldedDataFrame
as_fugue_df()
- fugue.dataframe.dataframe_iterable_dataframe
IterableArrowDataFrame
IterablePandasDataFrame
LocalDataFrameIterableDataFrame
LocalDataFrameIterableDataFrame.alter_columns()
LocalDataFrameIterableDataFrame.as_array()
LocalDataFrameIterableDataFrame.as_array_iterable()
LocalDataFrameIterableDataFrame.as_arrow()
LocalDataFrameIterableDataFrame.as_local_bounded()
LocalDataFrameIterableDataFrame.as_pandas()
LocalDataFrameIterableDataFrame.empty
LocalDataFrameIterableDataFrame.head()
LocalDataFrameIterableDataFrame.native
LocalDataFrameIterableDataFrame.peek_array()
LocalDataFrameIterableDataFrame.rename()
- fugue.dataframe.dataframes
- fugue.dataframe.function_wrapper
- fugue.dataframe.iterable_dataframe
- fugue.dataframe.pandas_dataframe
PandasDataFrame
PandasDataFrame.alter_columns()
PandasDataFrame.as_array()
PandasDataFrame.as_array_iterable()
PandasDataFrame.as_arrow()
PandasDataFrame.as_dict_iterable()
PandasDataFrame.as_dicts()
PandasDataFrame.as_pandas()
PandasDataFrame.count()
PandasDataFrame.empty
PandasDataFrame.head()
PandasDataFrame.native
PandasDataFrame.native_as_df()
PandasDataFrame.peek_array()
PandasDataFrame.rename()
- fugue.dataframe.utils
- fugue.dataset
- fugue.execution
- fugue.execution.api
aggregate()
anti_join()
assign()
broadcast()
clear_global_engine()
cross_join()
distinct()
dropna()
engine_context()
fillna()
filter()
full_outer_join()
get_context_engine()
get_current_conf()
get_current_parallelism()
inner_join()
intersect()
join()
left_outer_join()
load()
persist()
repartition()
right_outer_join()
run_engine_function()
sample()
save()
select()
semi_join()
set_global_engine()
subtract()
take()
union()
- fugue.execution.execution_engine
EngineFacet
ExecutionEngine
ExecutionEngine.aggregate()
ExecutionEngine.as_context()
ExecutionEngine.assign()
ExecutionEngine.broadcast()
ExecutionEngine.comap()
ExecutionEngine.conf
ExecutionEngine.convert_yield_dataframe()
ExecutionEngine.create_default_map_engine()
ExecutionEngine.create_default_sql_engine()
ExecutionEngine.distinct()
ExecutionEngine.dropna()
ExecutionEngine.fillna()
ExecutionEngine.filter()
ExecutionEngine.get_current_parallelism()
ExecutionEngine.in_context
ExecutionEngine.intersect()
ExecutionEngine.is_global
ExecutionEngine.join()
ExecutionEngine.load_df()
ExecutionEngine.load_yielded()
ExecutionEngine.map_engine
ExecutionEngine.on_enter_context()
ExecutionEngine.on_exit_context()
ExecutionEngine.persist()
ExecutionEngine.repartition()
ExecutionEngine.sample()
ExecutionEngine.save_df()
ExecutionEngine.select()
ExecutionEngine.set_global()
ExecutionEngine.set_sql_engine()
ExecutionEngine.sql_engine
ExecutionEngine.stop()
ExecutionEngine.stop_engine()
ExecutionEngine.subtract()
ExecutionEngine.take()
ExecutionEngine.union()
ExecutionEngine.zip()
ExecutionEngineParam
FugueEngineBase
MapEngine
SQLEngine
- fugue.execution.factory
- fugue.execution.native_execution_engine
NativeExecutionEngine
NativeExecutionEngine.broadcast()
NativeExecutionEngine.create_default_map_engine()
NativeExecutionEngine.create_default_sql_engine()
NativeExecutionEngine.distinct()
NativeExecutionEngine.dropna()
NativeExecutionEngine.fillna()
NativeExecutionEngine.get_current_parallelism()
NativeExecutionEngine.intersect()
NativeExecutionEngine.is_distributed
NativeExecutionEngine.join()
NativeExecutionEngine.load_df()
NativeExecutionEngine.log
NativeExecutionEngine.persist()
NativeExecutionEngine.pl_utils
NativeExecutionEngine.repartition()
NativeExecutionEngine.sample()
NativeExecutionEngine.save_df()
NativeExecutionEngine.subtract()
NativeExecutionEngine.take()
NativeExecutionEngine.to_df()
NativeExecutionEngine.union()
PandasMapEngine
QPDPandasEngine
- fugue.execution.api
- fugue.extensions
- fugue.extensions.creator
- fugue.extensions.outputter
- fugue.extensions.processor
- fugue.extensions.transformer
- fugue.extensions.context
ExtensionContext
ExtensionContext.callback
ExtensionContext.cursor
ExtensionContext.execution_engine
ExtensionContext.has_callback
ExtensionContext.key_schema
ExtensionContext.output_schema
ExtensionContext.params
ExtensionContext.partition_spec
ExtensionContext.rpc_server
ExtensionContext.validate_on_compile()
ExtensionContext.validate_on_runtime()
ExtensionContext.validation_rules
ExtensionContext.workflow_conf
- fugue.rpc
- fugue.sql
- fugue.workflow
- fugue.workflow.api
- fugue.workflow.input
- fugue.workflow.module
- fugue.workflow.workflow
FugueWorkflow
FugueWorkflow.add()
FugueWorkflow.assert_eq()
FugueWorkflow.assert_not_eq()
FugueWorkflow.conf
FugueWorkflow.create()
FugueWorkflow.create_data()
FugueWorkflow.df()
FugueWorkflow.get_result()
FugueWorkflow.intersect()
FugueWorkflow.join()
FugueWorkflow.last_df
FugueWorkflow.load()
FugueWorkflow.out_transform()
FugueWorkflow.output()
FugueWorkflow.process()
FugueWorkflow.run()
FugueWorkflow.select()
FugueWorkflow.set_op()
FugueWorkflow.show()
FugueWorkflow.spec_uuid()
FugueWorkflow.subtract()
FugueWorkflow.transform()
FugueWorkflow.union()
FugueWorkflow.yields
FugueWorkflow.zip()
FugueWorkflowResult
WorkflowDataFrame
WorkflowDataFrame.aggregate()
WorkflowDataFrame.alter_columns()
WorkflowDataFrame.anti_join()
WorkflowDataFrame.as_array()
WorkflowDataFrame.as_array_iterable()
WorkflowDataFrame.as_local()
WorkflowDataFrame.as_local_bounded()
WorkflowDataFrame.assert_eq()
WorkflowDataFrame.assert_not_eq()
WorkflowDataFrame.assign()
WorkflowDataFrame.broadcast()
WorkflowDataFrame.checkpoint()
WorkflowDataFrame.compute()
WorkflowDataFrame.count()
WorkflowDataFrame.cross_join()
WorkflowDataFrame.deterministic_checkpoint()
WorkflowDataFrame.distinct()
WorkflowDataFrame.drop()
WorkflowDataFrame.dropna()
WorkflowDataFrame.empty
WorkflowDataFrame.fillna()
WorkflowDataFrame.filter()
WorkflowDataFrame.full_outer_join()
WorkflowDataFrame.head()
WorkflowDataFrame.inner_join()
WorkflowDataFrame.intersect()
WorkflowDataFrame.is_bounded
WorkflowDataFrame.is_local
WorkflowDataFrame.join()
WorkflowDataFrame.left_anti_join()
WorkflowDataFrame.left_outer_join()
WorkflowDataFrame.left_semi_join()
WorkflowDataFrame.name
WorkflowDataFrame.native
WorkflowDataFrame.native_as_df()
WorkflowDataFrame.num_partitions
WorkflowDataFrame.out_transform()
WorkflowDataFrame.output()
WorkflowDataFrame.partition()
WorkflowDataFrame.partition_by()
WorkflowDataFrame.partition_spec
WorkflowDataFrame.peek_array()
WorkflowDataFrame.per_partition_by()
WorkflowDataFrame.per_row()
WorkflowDataFrame.persist()
WorkflowDataFrame.process()
WorkflowDataFrame.rename()
WorkflowDataFrame.result
WorkflowDataFrame.right_outer_join()
WorkflowDataFrame.sample()
WorkflowDataFrame.save()
WorkflowDataFrame.save_and_use()
WorkflowDataFrame.schema
WorkflowDataFrame.select()
WorkflowDataFrame.semi_join()
WorkflowDataFrame.show()
WorkflowDataFrame.spec_uuid()
WorkflowDataFrame.strong_checkpoint()
WorkflowDataFrame.subtract()
WorkflowDataFrame.take()
WorkflowDataFrame.transform()
WorkflowDataFrame.union()
WorkflowDataFrame.weak_checkpoint()
WorkflowDataFrame.workflow
WorkflowDataFrame.yield_dataframe_as()
WorkflowDataFrame.yield_file_as()
WorkflowDataFrame.yield_table_as()
WorkflowDataFrame.zip()
WorkflowDataFrames
fugue.api
fugue.constants
- fugue.constants.register_global_conf(conf, on_dup=0)[source]
Register global Fugue configs that can be picked up by any Fugue execution engines as the base configs.
- Parameters:
conf (Dict[str, Any]) – the config dictionary
on_dup (int) – see
triad.collections.dict.ParamDict.update()
, defaults toParamDict.OVERWRITE
- Return type:
None
Note
When using
ParamDict.THROW
oron_dup
, it’s transactional. If any key inconf
is already in global config and the value is different from the new value, then ValueError will be thrown.Examples
from fugue import register_global_conf, NativeExecutionEngine register_global_conf({"my.value",1}) engine = NativeExecutionEngine() assert 1 == engine.conf["my.value"] engine = NativeExecutionEngine({"my.value",2}) assert 2 == engine.conf["my.value"]
fugue.dev
All modeuls for developing and extending Fugue
fugue.exceptions
- exception fugue.exceptions.FugueBug[source]
Bases:
FugueError
Fugue internal bug
- exception fugue.exceptions.FugueDataFrameError[source]
Bases:
FugueError
Fugue dataframe related error
- exception fugue.exceptions.FugueDataFrameInitError[source]
Bases:
FugueDataFrameError
Fugue dataframe initialization error
- exception fugue.exceptions.FugueDataFrameOperationError[source]
Bases:
FugueDataFrameError
Fugue dataframe invalid operation
- exception fugue.exceptions.FugueDatasetEmptyError[source]
Bases:
FugueDataFrameError
Fugue dataframe is empty
- exception fugue.exceptions.FugueInterfacelessError[source]
Bases:
FugueWorkflowCompileError
Fugue interfaceless exceptions
- exception fugue.exceptions.FugueInvalidOperation[source]
Bases:
FugueError
Invalid operation on the Fugue framework
- exception fugue.exceptions.FuguePluginsRegistrationError[source]
Bases:
FugueError
Fugue plugins registration error
- exception fugue.exceptions.FugueSQLError[source]
Bases:
FugueWorkflowCompileError
Fugue SQL error
- exception fugue.exceptions.FugueSQLRuntimeError[source]
Bases:
FugueWorkflowRuntimeError
Fugue SQL runtime error
- exception fugue.exceptions.FugueSQLSyntaxError[source]
Bases:
FugueSQLError
Fugue SQL syntax error
- exception fugue.exceptions.FugueWorkflowCompileError[source]
Bases:
FugueWorkflowError
Fugue workflow compile time error
- exception fugue.exceptions.FugueWorkflowCompileValidationError[source]
Bases:
FugueWorkflowCompileError
Fugue workflow compile time validation error
- exception fugue.exceptions.FugueWorkflowError[source]
Bases:
FugueError
Fugue workflow exceptions
- exception fugue.exceptions.FugueWorkflowRuntimeError[source]
Bases:
FugueWorkflowError
Fugue workflow compile time error
- exception fugue.exceptions.FugueWorkflowRuntimeValidationError[source]
Bases:
FugueWorkflowRuntimeError
Fugue workflow runtime validation error