From 80a9dd9193a634176ca7fe257a5f61fa3738a393 Mon Sep 17 00:00:00 2001 From: Jan Kadlec Date: Fri, 9 Jan 2026 12:56:23 +0100 Subject: [PATCH] feat: enable paging customization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From the feedback it is necessary to have an option to modify paging because in the case of large datasets the default paging – 1_000 is not optimal. JIRA: PSDK-227 risk: low --- .../src/gooddata_pandas/data_access.py | 10 ++++++++- .../src/gooddata_pandas/dataframe.py | 22 ++++++++++++++++++- .../src/gooddata_pandas/series.py | 9 ++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/packages/gooddata-pandas/src/gooddata_pandas/data_access.py b/packages/gooddata-pandas/src/gooddata_pandas/data_access.py index c6f39e9ab..096fc7bf6 100644 --- a/packages/gooddata-pandas/src/gooddata_pandas/data_access.py +++ b/packages/gooddata-pandas/src/gooddata_pandas/data_access.py @@ -358,6 +358,7 @@ def _extract_from_attributes_and_maybe_metrics( col_to_attr_idx: dict[str, int], col_to_metric_idx: dict[str, int], index_to_attr_idx: Optional[dict[str, int]] = None, + result_page_len: Optional[int] = None, ) -> tuple[dict, dict]: """ Internal function that extracts data from execution response with attributes columns and @@ -371,6 +372,8 @@ def _extract_from_attributes_and_maybe_metrics( col_to_metric_idx (dict[str, int]): A mapping of pandas column names to metric dimension indices. index_to_attr_idx (Optional[dict[str, int]]): An optional mapping of pandas index names to attribute dimension indices. + result_page_len (Optional[int]): Optional page size for result pagination. + Defaults to _RESULT_PAGE_LEN (1000). Larger values can improve performance for large result sets. Returns: tuple: A tuple containing the following dictionaries: @@ -379,7 +382,8 @@ def _extract_from_attributes_and_maybe_metrics( """ exec_def = execution.exec_def offset = [0 for _ in exec_def.dimensions] - limit = [len(exec_def.metrics), _RESULT_PAGE_LEN] if exec_def.has_metrics() else [_RESULT_PAGE_LEN] + page_len = result_page_len if result_page_len is not None else _RESULT_PAGE_LEN + limit = [len(exec_def.metrics), page_len] if exec_def.has_metrics() else [page_len] attribute_dim = 1 if exec_def.has_metrics() else 0 result = execution.read_result(limit=limit, offset=offset) safe_index_to_attr_idx = index_to_attr_idx if index_to_attr_idx is not None else dict() @@ -421,6 +425,7 @@ def compute_and_extract( filter_by: Optional[Union[Filter, list[Filter]]] = None, on_execution_submitted: Optional[Callable[[Execution], None]] = None, is_cancellable: bool = False, + result_page_len: Optional[int] = None, ) -> tuple[dict, dict]: """ Convenience function that computes and extracts data from the execution response. @@ -435,6 +440,8 @@ def compute_and_extract( submitted to the backend. is_cancellable (bool, optional): Whether the execution of this definition should be cancelled when the connection is interrupted. + result_page_len (Optional[int]): Optional page size for result pagination. + Defaults to 1000. Larger values can improve performance for large result sets. Returns: tuple: A tuple containing the following dictionaries: @@ -472,4 +479,5 @@ def compute_and_extract( col_to_attr_idx, col_to_metric_idx, index_to_attr_idx, + result_page_len=result_page_len, ) diff --git a/packages/gooddata-pandas/src/gooddata_pandas/dataframe.py b/packages/gooddata-pandas/src/gooddata_pandas/dataframe.py index e5d3b943f..fbfcee414 100644 --- a/packages/gooddata-pandas/src/gooddata_pandas/dataframe.py +++ b/packages/gooddata-pandas/src/gooddata_pandas/dataframe.py @@ -75,6 +75,7 @@ def indexed( filter_by: Optional[Union[Filter, list[Filter]]] = None, on_execution_submitted: Optional[Callable[[Execution], None]] = None, is_cancellable: bool = False, + result_page_len: Optional[int] = None, ) -> pandas.DataFrame: """ Creates a data frame indexed by values of the label. The data frame columns will be created from either @@ -90,6 +91,8 @@ def indexed( on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was submitted to the backend. is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted. + result_page_len (Optional[int]): Optional page size for result pagination. + Defaults to 1000. Larger values can improve performance for large result sets. Returns: pandas.DataFrame: A DataFrame instance. @@ -102,6 +105,7 @@ def indexed( filter_by=filter_by, on_execution_submitted=on_execution_submitted, is_cancellable=is_cancellable, + result_page_len=result_page_len, ) _idx = make_pandas_index(index) @@ -114,6 +118,7 @@ def not_indexed( filter_by: Optional[Union[Filter, list[Filter]]] = None, on_execution_submitted: Optional[Callable[[Execution], None]] = None, is_cancellable: bool = False, + result_page_len: Optional[int] = None, ) -> pandas.DataFrame: """ Creates a data frame with columns created from metrics and or labels. @@ -125,6 +130,8 @@ def not_indexed( on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was submitted to the backend. is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted. + result_page_len (Optional[int]): Optional page size for result pagination. + Defaults to 1000. Larger values can improve performance for large result sets. Returns: pandas.DataFrame: A DataFrame instance. @@ -137,6 +144,7 @@ def not_indexed( filter_by=filter_by, on_execution_submitted=on_execution_submitted, is_cancellable=is_cancellable, + result_page_len=result_page_len, ) return pandas.DataFrame(data=data) @@ -148,6 +156,7 @@ def for_items( auto_index: bool = True, on_execution_submitted: Optional[Callable[[Execution], None]] = None, is_cancellable: bool = False, + result_page_len: Optional[int] = None, ) -> pandas.DataFrame: """ Creates a data frame for named items. This is a convenience method that will create DataFrame with or @@ -162,6 +171,8 @@ def for_items( on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was submitted to the backend. is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted. + result_page_len (Optional[int]): Optional page size for result pagination. + Defaults to 1000. Larger values can improve performance for large result sets. Returns: pandas.DataFrame: A DataFrame instance. @@ -184,7 +195,11 @@ def for_items( if not auto_index or not has_measures or not has_attributes: columns: ColumnsDef = {**resolved_attr_cols, **resolved_measure_cols} - return self.not_indexed(columns=columns, filter_by=filter_by) + return self.not_indexed( + columns=columns, + filter_by=filter_by, + result_page_len=result_page_len, + ) return self.indexed( index_by=resolved_attr_cols, @@ -192,6 +207,7 @@ def for_items( filter_by=filter_by, on_execution_submitted=on_execution_submitted, is_cancellable=is_cancellable, + result_page_len=result_page_len, ) def for_visualization( @@ -200,6 +216,7 @@ def for_visualization( auto_index: bool = True, on_execution_submitted: Optional[Callable[[Execution], None]] = None, is_cancellable: bool = False, + result_page_len: Optional[int] = None, ) -> pandas.DataFrame: """ Creates a data frame with columns based on the content of the visualization with the provided identifier. @@ -211,6 +228,8 @@ def for_visualization( on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was submitted to the backend. is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted. + result_page_len (Optional[int]): Optional page size for result pagination. + Defaults to 1000. Larger values can improve performance for large result sets. Returns: pandas.DataFrame: A DataFrame instance. @@ -231,6 +250,7 @@ def for_visualization( auto_index=auto_index, on_execution_submitted=on_execution_submitted, is_cancellable=is_cancellable, + result_page_len=result_page_len, ) def for_created_visualization( diff --git a/packages/gooddata-pandas/src/gooddata_pandas/series.py b/packages/gooddata-pandas/src/gooddata_pandas/series.py index 6833c1448..e7e938a8a 100644 --- a/packages/gooddata-pandas/src/gooddata_pandas/series.py +++ b/packages/gooddata-pandas/src/gooddata_pandas/series.py @@ -30,6 +30,7 @@ def indexed( filter_by: Optional[Union[Filter, list[Filter]]] = None, on_execution_submitted: Optional[Callable[[Execution], None]] = None, is_cancellable: bool = False, + result_page_len: Optional[int] = None, ) -> pandas.Series: """Creates pandas Series from data points calculated from a single `data_by`. @@ -68,6 +69,9 @@ def indexed( is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted. + result_page_len (Optional[int]): Optional page size for result pagination. + Defaults to 1000. Larger values can improve performance for large result sets. + Returns: pandas.Series: pandas series instance """ @@ -80,6 +84,7 @@ def indexed( filter_by=filter_by, on_execution_submitted=on_execution_submitted, is_cancellable=is_cancellable, + result_page_len=result_page_len, ) _idx = make_pandas_index(index) @@ -93,6 +98,7 @@ def not_indexed( filter_by: Optional[Union[Filter, list[Filter]]] = None, on_execution_submitted: Optional[Callable[[Execution], None]] = None, is_cancellable: bool = False, + result_page_len: Optional[int] = None, ) -> pandas.Series: """ Creates a pandas.Series from data points calculated from a single `data_by` without constructing an index. @@ -122,6 +128,8 @@ def not_indexed( on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was submitted to the backend. is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted. + result_page_len (Optional[int]): Optional page size for result pagination. + Defaults to 1000. Larger values can improve performance for large result sets. Returns: pandas.Series: The resulting pandas Series instance. @@ -140,6 +148,7 @@ def not_indexed( filter_by=filter_by, on_execution_submitted=on_execution_submitted, is_cancellable=is_cancellable, + result_page_len=result_page_len, ) return pandas.Series(data=data["_series"])