from collections.abc import Callable, Iterable
from ppc_robot_lib.steps.abstract_step import AbstractStep
from ppc_robot_lib.sklik import Query, ReportDownloader, SklikCredentials
from ppc_robot_lib.tasks.task_context import TaskContextInterface, StepPerformance
import pandas
[docs]
class SklikReportStep(AbstractStep):
"""
Downloads a report from Sklik API. This step uses the PPC Robots Sklik connector to download the report,
this means that it automatically performs all necessary paging and data normalization.
Only reports defined in the :py:mod:`ppc_robot_lib.sklik.reports` module can be used.
See :doc:`/connectors/sklik/reports` for more details.
The ``query`` parameter should be a :py:class:`ppc_robot_lib.sklik.query.Query` instance with one of the reports
above.
**Example:**
>>> from ppc_robot_lib.sklik import Query, Condition, Op, During, Granularity
>>> from ppc_robot_lib.sklik.types import StatusEnum
>>> from ppc_robot_lib.steps.input import SklikReportStep
>>> query = Query(
... select=['Name', 'Status', 'Clicks', 'Impressions'],
... from_report='group',
... where=[
... Condition('Status', Op.EQ, StatusEnum.ACTIVE),
... Condition('Impressions', Op.GT, 0),
... ],
... during=During.LAST_30_DAYS,
... granularity=Granularity.DAILY,
... )
>>> SklikReportStep(query, output_table='ad_groups')
"""
def __init__(
self,
query: Query,
output_table: str,
allow_empty_statistics=False,
transformation: Callable[[Iterable], Iterable] = None,
custom_columns: list[str] = None,
):
"""
:param query: Query to execute.
:param output_table: Name of the output table.
:param allow_empty_statistics: Should we include rows with zero impressions?
:param transformation: Custom transformation function. The function receives an iterable (a generator, to be
more precise, but this behaviour can change in the future) and must return another iterable, that
can be passed to :py:class:`pandas.DataFrame` as the ``data`` argument.
:param custom_columns: Custom column names for the output table. If no columns are given, names from the
query are used. Useful if you use a custom transformation that can change the columns.
"""
self.query = query
self.output_table = output_table
self.allow_empty_statistics = allow_empty_statistics
self.transformation = transformation
self.custom_columns = custom_columns
def execute(self, task_ctx: TaskContextInterface) -> StepPerformance:
credentials = task_ctx.credentials # type: SklikCredentials
downloader = ReportDownloader(credentials.get_client(task_ctx))
report = downloader.create_report_definition(self.query)
records = downloader.download(report, self.allow_empty_statistics)
# Apply transformation.
if self.transformation:
records = self.transformation(records)
# Get columns
if self.custom_columns:
columns = self.custom_columns
else:
columns = self.query.get_select()
# Remove duplicate column names, because it would cause many problems
# with two columns of the same name in one DataFrame.
columns = list(dict.fromkeys(columns).keys())
new_table = pandas.DataFrame(data=records, columns=columns)
task_ctx.work_set.set_table(self.output_table, new_table)
rows_in = len(new_table.index)
task_ctx.performance.add_input_rows(rows_in)
return StepPerformance(new_table, rows_out=rows_in)
def get_label_args(self):
return [
repr(self.query),
self.output_table,
self.allow_empty_statistics,
]