from ppc_robot_lib.steps.abstract_step import AbstractStep
from ppc_robot_lib.tasks import TaskContextInterface, StepPerformance
[docs]
class GroupByColumnStep(AbstractStep):
"""
Groups a table by specified column or columns.
Unlike :py:class:`ppc_robot_lib.steps.transformations.group_by_and_aggregate.GroupByAndAggregateStep`, this
step returns a :py:class:`pandas.core.groupby.DataFrameGroupBy`, not a :py:class:`pandas.DataFrame`. Be careful
when using the result.
See :ref:`pandas:groupby.split` for more information about working with
:py:class:`pandas.core.groupby.DataFrameGroupBy` objects.
**Example:**
>>> from ppc_robot_lib.steps.transformations import GroupByColumnStep
>>> GroupByColumnStep("in_table", ["Campaign", "AdGroup"], output_table="out_table")
"""
def __init__(self, input_table: str, columns: str | list[str] | tuple[str, ...], output_table: str, sort=False):
"""
:param input_table: Input table.
:param columns: Columns to group by.
:param output_table: Output table.
:param sort: Set to ``True`` if you would like to sort the result by the columns used for grouping.
"""
self.input_table = input_table
self.columns = columns
self.output_table = output_table
self.sort = sort
def execute(self, task_ctx: TaskContextInterface) -> StepPerformance:
table = task_ctx.work_set.get_table(self.input_table)
new_table = table.groupby(by=self.columns, sort=self.sort)
task_ctx.work_set.set_table(self.output_table, new_table)
row_count = len(table.index)
return StepPerformance(table, rows_in=row_count, rows_out=row_count)