from ppc_robot_lib.steps.abstract_step import AbstractStep
from ppc_robot_lib.tasks.task_context import TaskContextInterface, StepPerformance
from pandas import DataFrame
[docs]
class RenameStep(AbstractStep):
"""
Allows you to rename or drop columns from a table. You have to specify ``rename``, ``drop`` or both.
Optional argument ``drop_not_renamed`` allows you to drop columns that were not renamed by this operation.
**Example:**
>>> from ppc_robot_lib.steps.transformations import RenameStep
>>> RenameStep("in_table", rename={'original': 'new', 'col1': "renamed_col1"},
... drop=['unnecessary_column', "internal_value_users_shall_not_see"])
"""
def __init__(
self,
input_table: str,
rename: dict[str, str] = None,
drop: list[str] = None,
output_table: str = None,
drop_not_renamed: bool = False,
):
"""
:param input_table: Input table.
:param rename: Dictionary of columns, key is the original name and value is the new name.
:param drop: List of column names to drop. At least one of ``drop`` and ``rename`` must be given.
:param output_table: Output table. If not given, operations are performed in-place on the input table.
:param drop_not_renamed: Drop columns that are not present in the ``rename`` argument.
"""
if rename is None and drop is None:
raise ValueError('Either rename or drop arguments must be specified.')
if drop_not_renamed is True and rename is None:
raise ValueError('The rename argument must be specified when drop_not_renamed is set to true.')
self.input_table = input_table
self.rename = rename
self.drop = drop
if output_table is not None:
self.output_table = output_table
else:
self.output_table = input_table
self.drop_not_renamed = drop_not_renamed
def execute(self, task_ctx: TaskContextInterface) -> StepPerformance:
inplace = self.input_table == self.output_table
table = task_ctx.work_set.get_table(self.input_table)
new_table: DataFrame | None = None
if self.rename:
result = table.rename(columns=self.rename, inplace=inplace)
if inplace:
new_table = table
else:
new_table = result
drop_cols = self.drop
if self.drop_not_renamed:
target_names = self.rename.values()
not_renamed_cols = [col for col in new_table.columns.values if col not in target_names]
if drop_cols:
drop_cols.extend(not_renamed_cols)
else:
drop_cols = not_renamed_cols
if drop_cols:
if new_table is not None:
# Axis 0 = Rows, Axis 1 = Columns
new_table.drop(axis=1, labels=drop_cols, inplace=True, errors='ignore')
else:
result = table.drop(axis=1, labels=drop_cols, inplace=inplace, errors='ignore')
if inplace:
new_table = table
else:
new_table = result
if self.output_table in task_ctx.work_set:
task_ctx.work_set.delete_table(self.output_table)
task_ctx.work_set.set_table(self.output_table, new_table)
rows = len(new_table.index)
return StepPerformance(new_table, rows_in=rows, rows_out=rows)