Source code for ppc_robot_lib.steps.transformations.group_by_column

from ppc_robot_lib.steps.abstract_step import AbstractStep
from ppc_robot_lib.tasks import TaskContextInterface, StepPerformance


[docs] class GroupByColumnStep(AbstractStep): """ Groups a table by specified column or columns. Unlike :py:class:`ppc_robot_lib.steps.transformations.group_by_and_aggregate.GroupByAndAggregateStep`, this step returns a :py:class:`pandas.core.groupby.DataFrameGroupBy`, not a :py:class:`pandas.DataFrame`. Be careful when using the result. See :ref:`pandas:groupby.split` for more information about working with :py:class:`pandas.core.groupby.DataFrameGroupBy` objects. **Example:** >>> from ppc_robot_lib.steps.transformations import GroupByColumnStep >>> GroupByColumnStep("in_table", ["Campaign", "AdGroup"], output_table="out_table") """ def __init__(self, input_table: str, columns: str | list[str] | tuple[str, ...], output_table: str, sort=False): """ :param input_table: Input table. :param columns: Columns to group by. :param output_table: Output table. :param sort: Set to ``True`` if you would like to sort the result by the columns used for grouping. """ self.input_table = input_table self.columns = columns self.output_table = output_table self.sort = sort def execute(self, task_ctx: TaskContextInterface) -> StepPerformance: table = task_ctx.work_set.get_table(self.input_table) new_table = table.groupby(by=self.columns, sort=self.sort) task_ctx.work_set.set_table(self.output_table, new_table) row_count = len(table.index) return StepPerformance(table, rows_in=row_count, rows_out=row_count)