Source code for ppc_robot_lib.steps.transformations.rename

from ppc_robot_lib.steps.abstract_step import AbstractStep
from ppc_robot_lib.tasks.task_context import TaskContextInterface, StepPerformance
from pandas import DataFrame


[docs] class RenameStep(AbstractStep): """ Allows you to rename or drop columns from a table. You have to specify ``rename``, ``drop`` or both. Optional argument ``drop_not_renamed`` allows you to drop columns that were not renamed by this operation. **Example:** >>> from ppc_robot_lib.steps.transformations import RenameStep >>> RenameStep("in_table", rename={'original': 'new', 'col1': "renamed_col1"}, ... drop=['unnecessary_column', "internal_value_users_shall_not_see"]) """ def __init__( self, input_table: str, rename: dict[str, str] = None, drop: list[str] = None, output_table: str = None, drop_not_renamed: bool = False, ): """ :param input_table: Input table. :param rename: Dictionary of columns, key is the original name and value is the new name. :param drop: List of column names to drop. At least one of ``drop`` and ``rename`` must be given. :param output_table: Output table. If not given, operations are performed in-place on the input table. :param drop_not_renamed: Drop columns that are not present in the ``rename`` argument. """ if rename is None and drop is None: raise ValueError('Either rename or drop arguments must be specified.') if drop_not_renamed is True and rename is None: raise ValueError('The rename argument must be specified when drop_not_renamed is set to true.') self.input_table = input_table self.rename = rename self.drop = drop if output_table is not None: self.output_table = output_table else: self.output_table = input_table self.drop_not_renamed = drop_not_renamed def execute(self, task_ctx: TaskContextInterface) -> StepPerformance: inplace = self.input_table == self.output_table table = task_ctx.work_set.get_table(self.input_table) new_table: DataFrame | None = None if self.rename: result = table.rename(columns=self.rename, inplace=inplace) if inplace: new_table = table else: new_table = result drop_cols = self.drop if self.drop_not_renamed: target_names = self.rename.values() not_renamed_cols = [col for col in new_table.columns.values if col not in target_names] if drop_cols: drop_cols.extend(not_renamed_cols) else: drop_cols = not_renamed_cols if drop_cols: if new_table is not None: # Axis 0 = Rows, Axis 1 = Columns new_table.drop(axis=1, labels=drop_cols, inplace=True, errors='ignore') else: result = table.drop(axis=1, labels=drop_cols, inplace=inplace, errors='ignore') if inplace: new_table = table else: new_table = result if self.output_table in task_ctx.work_set: task_ctx.work_set.delete_table(self.output_table) task_ctx.work_set.set_table(self.output_table, new_table) rows = len(new_table.index) return StepPerformance(new_table, rows_in=rows, rows_out=rows)