from ppc_robot_lib.steps import AbstractStep
from ppc_robot_lib.tasks import TaskContextInterface, StepPerformance
import pandas
[docs]
class ConcatStep(AbstractStep):
"""
Concatenates rows of multiple tables into one. Internally uses :py:func:`pandas.concat` over the rows axes, this
means that the same semantics applies.
**Example:**
>>> from ppc_robot_lib.steps.transformations import ConcatStep
>>> ConcatStep(["urls_ads", "urls_keywords", "urls_sitelinks"], output_table="urls")
"""
def __init__(self, input_tables: list[str], output_table: str):
"""
:param input_tables: List of input tables.
:param output_table: Output table. The output table must not already exist.
"""
self.input_tables = input_tables
self.output_table = output_table
def execute(self, task_ctx: TaskContextInterface) -> StepPerformance:
in_table_frames = [task_ctx.work_set.get_table(tbl_name) for tbl_name in self.input_tables]
new_table = pandas.concat(in_table_frames, ignore_index=True, copy=False) # type: pandas.DataFrame
task_ctx.work_set.set_table(self.output_table, new_table)
rows = len(new_table.index)
return StepPerformance(new_table, rows_in=rows, rows_out=rows)