Source code for ppc_robot_lib.reporting.input.sklik

from copy import deepcopy

from ppc_robot_lib import tasks
from ppc_robot_lib import sklik
from collections.abc import Callable, Iterable
from ppc_robot_lib.utils import iter
import pandas


def _create_downloader() -> sklik.ReportDownloader:
    task_ctx = tasks.get_context()
    credentials: sklik.SklikCredentials = task_ctx.credentials
    return sklik.ReportDownloader(credentials.get_client(task_ctx))


[docs] def download_sklik_report( query: sklik.Query, allow_empty_statistics=False, transformation: Callable[[Iterable], Iterable] = None, custom_columns: list[str] = None, ) -> pandas.DataFrame: """ Downloads a report from Sklik API. This step uses the PPC Robots Sklik connector to download the report, this means that it automatically performs all necessary paging and data normalization. Only reports defined in the :py:mod:`ppc_robot_lib.sklik.reports` module can be used. See :doc:`/connectors/sklik/reports` for more details. The ``query`` parameter should be a :py:class:`ppc_robot_lib.sklik.query.Query` instance with one of the reports above. **Example:** >>> from ppc_robot_lib.sklik import Query, Condition, Op, During, Granularity >>> from ppc_robot_lib.sklik.types import StatusEnum >>> from ppc_robot_lib.reporting.input import download_sklik_report >>> query = Query( ... select=['Name', 'Status', 'Clicks', 'Impressions'], ... from_report='group', ... where=[ ... Condition('Status', Op.EQ, StatusEnum.ACTIVE), ... Condition('Impressions', Op.GT, 0), ... ], ... during=During.LAST_30_DAYS, ... granularity=Granularity.DAILY, ... ) >>> df = download_sklik_report(query) :param query: Query to execute. :param allow_empty_statistics: Should we include rows with zero impressions? :param transformation: Custom transformation function. The function receives an iterable (a generator, to be more precise, but this behaviour can change in the future) and must return another iterable, that can be passed to :py:class:`pandas.DataFrame` as the ``data`` argument. :param custom_columns: Custom column names for the output table. If no columns are given, names from the query are used. Useful if you use a custom transformation that can change the columns. :return: DataFrame with downloaded data. """ downloader = _create_downloader() return downloader.download_as_dataframe(query, allow_empty_statistics, transformation, custom_columns)
[docs] def download_sklik_report_details( base_query: sklik.Query, cond_column: str, in_values: Iterable[str | int | float], allow_empty_statistics: bool = False, transformation: Callable[[Iterable], Iterable] = None, custom_columns: list[str] = None, batch_size: int = 10000, ) -> pandas.DataFrame: """ Downloads additional details for a given table. This step takes a column from source table, takes the base query and adds a new IN[] condition for specified column (defaults to the source column name) to the query. Then it uses this query to fetch report data. The data is fetched in multiple batches and ten concatenated. Example:: >>> from ppc_robot_lib.sklik import Query, During >>> from ppc_robot_lib.reporting.input import download_sklik_report_details >>> ids = [12, 15, 36, 85, 94, 101] # This can be any iterable, such as column from a DataFrame. >>> base_query = Query( ... select=['Id', 'Name', 'Impressions', 'Clicks'], ... from_report='campaign', ... during=During.LAST_30_DAYS, ... ) >>> campaign_details = download_sklik_report_details( ... base_query=base_query, ... cond_column='Id', ... in_values=ids, ... batch_size=5000, ... ) :param base_query: Base Query used to build a query for each batch. :param cond_column: Name of the column that will be used in the condition. :param in_values: Values that will be used for the IN[] condition. :param allow_empty_statistics: Should we include rows with zero impressions? :param transformation: Custom transformation function. The function receives an iterable (a generator, to be more precise, but this behaviour can change in the future) and must return another iterable, that can be passed to :py:class:`pandas.DataFrame` as the ``data`` argument. :param custom_columns: Custom column names for the output table. If no columns are given, names from the query are used. Useful if you use a custom transformation that can change the columns. :param batch_size: Number of values in the IN[] condition for each batch. :return: DataFrame with downloaded data. """ downloader = _create_downloader() unique_values = list(filter(None, iter.unique(in_values))) partial_tables = [] query = deepcopy(base_query) in_cond = sklik.Condition(cond_column, sklik.Op.EQ, []) query.add_condition(in_cond) for in_expr in iter.chunks(unique_values, batch_size): in_cond.value = in_expr partial_tables.append( downloader.download_as_dataframe(query, allow_empty_statistics, transformation, custom_columns) ) if len(partial_tables) > 0: new_table = pandas.concat(partial_tables, ignore_index=True) else: if custom_columns: columns = custom_columns else: columns = query.get_select() new_table = pandas.DataFrame(data=[], columns=columns) return new_table