Source code for ppc_robot_lib.steps.input.sklik_report

from collections.abc import Callable, Iterable
from ppc_robot_lib.steps.abstract_step import AbstractStep
from ppc_robot_lib.sklik import Query, ReportDownloader, SklikCredentials
from ppc_robot_lib.tasks.task_context import TaskContextInterface, StepPerformance
import pandas


[docs] class SklikReportStep(AbstractStep): """ Downloads a report from Sklik API. This step uses the PPC Robots Sklik connector to download the report, this means that it automatically performs all necessary paging and data normalization. Only reports defined in the :py:mod:`ppc_robot_lib.sklik.reports` module can be used. See :doc:`/connectors/sklik/reports` for more details. The ``query`` parameter should be a :py:class:`ppc_robot_lib.sklik.query.Query` instance with one of the reports above. **Example:** >>> from ppc_robot_lib.sklik import Query, Condition, Op, During, Granularity >>> from ppc_robot_lib.sklik.types import StatusEnum >>> from ppc_robot_lib.steps.input import SklikReportStep >>> query = Query( ... select=['Name', 'Status', 'Clicks', 'Impressions'], ... from_report='group', ... where=[ ... Condition('Status', Op.EQ, StatusEnum.ACTIVE), ... Condition('Impressions', Op.GT, 0), ... ], ... during=During.LAST_30_DAYS, ... granularity=Granularity.DAILY, ... ) >>> SklikReportStep(query, output_table='ad_groups') """ def __init__( self, query: Query, output_table: str, allow_empty_statistics=False, transformation: Callable[[Iterable], Iterable] = None, custom_columns: list[str] = None, ): """ :param query: Query to execute. :param output_table: Name of the output table. :param allow_empty_statistics: Should we include rows with zero impressions? :param transformation: Custom transformation function. The function receives an iterable (a generator, to be more precise, but this behaviour can change in the future) and must return another iterable, that can be passed to :py:class:`pandas.DataFrame` as the ``data`` argument. :param custom_columns: Custom column names for the output table. If no columns are given, names from the query are used. Useful if you use a custom transformation that can change the columns. """ self.query = query self.output_table = output_table self.allow_empty_statistics = allow_empty_statistics self.transformation = transformation self.custom_columns = custom_columns def execute(self, task_ctx: TaskContextInterface) -> StepPerformance: credentials = task_ctx.credentials # type: SklikCredentials downloader = ReportDownloader(credentials.get_client(task_ctx)) report = downloader.create_report_definition(self.query) records = downloader.download(report, self.allow_empty_statistics) # Apply transformation. if self.transformation: records = self.transformation(records) # Get columns if self.custom_columns: columns = self.custom_columns else: columns = self.query.get_select() # Remove duplicate column names, because it would cause many problems # with two columns of the same name in one DataFrame. columns = list(dict.fromkeys(columns).keys()) new_table = pandas.DataFrame(data=records, columns=columns) task_ctx.work_set.set_table(self.output_table, new_table) rows_in = len(new_table.index) task_ctx.performance.add_input_rows(rows_in) return StepPerformance(new_table, rows_out=rows_in) def get_label_args(self): return [ repr(self.query), self.output_table, self.allow_empty_statistics, ]