From 80a9dd9193a634176ca7fe257a5f61fa3738a393 Mon Sep 17 00:00:00 2001
From: Jan Kadlec <jan.kadlec@gooddata.com>
Date: Fri, 9 Jan 2026 12:56:23 +0100
Subject: [PATCH] feat: enable paging customization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From the feedback it is necessary to have an option to modify paging because in the case of large datasets the default paging – 1_000 is not optimal.

JIRA: PSDK-227
risk: low
---
 .../src/gooddata_pandas/data_access.py        | 10 ++++++++-
 .../src/gooddata_pandas/dataframe.py          | 22 ++++++++++++++++++-
 .../src/gooddata_pandas/series.py             |  9 ++++++++
 3 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/packages/gooddata-pandas/src/gooddata_pandas/data_access.py b/packages/gooddata-pandas/src/gooddata_pandas/data_access.py
index c6f39e9ab..096fc7bf6 100644
--- a/packages/gooddata-pandas/src/gooddata_pandas/data_access.py
+++ b/packages/gooddata-pandas/src/gooddata_pandas/data_access.py
@@ -358,6 +358,7 @@ def _extract_from_attributes_and_maybe_metrics(
     col_to_attr_idx: dict[str, int],
     col_to_metric_idx: dict[str, int],
     index_to_attr_idx: Optional[dict[str, int]] = None,
+    result_page_len: Optional[int] = None,
 ) -> tuple[dict, dict]:
     """
     Internal function that extracts data from execution response with attributes columns and
@@ -371,6 +372,8 @@ def _extract_from_attributes_and_maybe_metrics(
         col_to_metric_idx (dict[str, int]): A mapping of pandas column names to metric dimension indices.
         index_to_attr_idx (Optional[dict[str, int]]):
             An optional mapping of pandas index names to attribute dimension indices.
+        result_page_len (Optional[int]): Optional page size for result pagination.
+            Defaults to _RESULT_PAGE_LEN (1000). Larger values can improve performance for large result sets.
 
     Returns:
         tuple: A tuple containing the following dictionaries:
@@ -379,7 +382,8 @@ def _extract_from_attributes_and_maybe_metrics(
     """
     exec_def = execution.exec_def
     offset = [0 for _ in exec_def.dimensions]
-    limit = [len(exec_def.metrics), _RESULT_PAGE_LEN] if exec_def.has_metrics() else [_RESULT_PAGE_LEN]
+    page_len = result_page_len if result_page_len is not None else _RESULT_PAGE_LEN
+    limit = [len(exec_def.metrics), page_len] if exec_def.has_metrics() else [page_len]
     attribute_dim = 1 if exec_def.has_metrics() else 0
     result = execution.read_result(limit=limit, offset=offset)
     safe_index_to_attr_idx = index_to_attr_idx if index_to_attr_idx is not None else dict()
@@ -421,6 +425,7 @@ def compute_and_extract(
     filter_by: Optional[Union[Filter, list[Filter]]] = None,
     on_execution_submitted: Optional[Callable[[Execution], None]] = None,
     is_cancellable: bool = False,
+    result_page_len: Optional[int] = None,
 ) -> tuple[dict, dict]:
     """
     Convenience function that computes and extracts data from the execution response.
@@ -435,6 +440,8 @@ def compute_and_extract(
             submitted to the backend.
         is_cancellable (bool, optional): Whether the execution of this definition should be cancelled when
             the connection is interrupted.
+        result_page_len (Optional[int]): Optional page size for result pagination.
+            Defaults to 1000. Larger values can improve performance for large result sets.
 
     Returns:
         tuple: A tuple containing the following dictionaries:
@@ -472,4 +479,5 @@ def compute_and_extract(
             col_to_attr_idx,
             col_to_metric_idx,
             index_to_attr_idx,
+            result_page_len=result_page_len,
         )
diff --git a/packages/gooddata-pandas/src/gooddata_pandas/dataframe.py b/packages/gooddata-pandas/src/gooddata_pandas/dataframe.py
index e5d3b943f..fbfcee414 100644
--- a/packages/gooddata-pandas/src/gooddata_pandas/dataframe.py
+++ b/packages/gooddata-pandas/src/gooddata_pandas/dataframe.py
@@ -75,6 +75,7 @@ def indexed(
         filter_by: Optional[Union[Filter, list[Filter]]] = None,
         on_execution_submitted: Optional[Callable[[Execution], None]] = None,
         is_cancellable: bool = False,
+        result_page_len: Optional[int] = None,
     ) -> pandas.DataFrame:
         """
         Creates a data frame indexed by values of the label. The data frame columns will be created from either
@@ -90,6 +91,8 @@ def indexed(
             on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
                 submitted to the backend.
             is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
+            result_page_len (Optional[int]): Optional page size for result pagination.
+                Defaults to 1000. Larger values can improve performance for large result sets.
 
         Returns:
             pandas.DataFrame: A DataFrame instance.
@@ -102,6 +105,7 @@ def indexed(
             filter_by=filter_by,
             on_execution_submitted=on_execution_submitted,
             is_cancellable=is_cancellable,
+            result_page_len=result_page_len,
         )
 
         _idx = make_pandas_index(index)
@@ -114,6 +118,7 @@ def not_indexed(
         filter_by: Optional[Union[Filter, list[Filter]]] = None,
         on_execution_submitted: Optional[Callable[[Execution], None]] = None,
         is_cancellable: bool = False,
+        result_page_len: Optional[int] = None,
     ) -> pandas.DataFrame:
         """
         Creates a data frame with columns created from metrics and or labels.
@@ -125,6 +130,8 @@ def not_indexed(
             on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
                 submitted to the backend.
             is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
+            result_page_len (Optional[int]): Optional page size for result pagination.
+                Defaults to 1000. Larger values can improve performance for large result sets.
 
         Returns:
             pandas.DataFrame: A DataFrame instance.
@@ -137,6 +144,7 @@ def not_indexed(
             filter_by=filter_by,
             on_execution_submitted=on_execution_submitted,
             is_cancellable=is_cancellable,
+            result_page_len=result_page_len,
         )
 
         return pandas.DataFrame(data=data)
@@ -148,6 +156,7 @@ def for_items(
         auto_index: bool = True,
         on_execution_submitted: Optional[Callable[[Execution], None]] = None,
         is_cancellable: bool = False,
+        result_page_len: Optional[int] = None,
     ) -> pandas.DataFrame:
         """
         Creates a data frame for named items. This is a convenience method that will create DataFrame with or
@@ -162,6 +171,8 @@ def for_items(
             on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
                 submitted to the backend.
             is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
+            result_page_len (Optional[int]): Optional page size for result pagination.
+                Defaults to 1000. Larger values can improve performance for large result sets.
 
         Returns:
             pandas.DataFrame: A DataFrame instance.
@@ -184,7 +195,11 @@ def for_items(
         if not auto_index or not has_measures or not has_attributes:
             columns: ColumnsDef = {**resolved_attr_cols, **resolved_measure_cols}
 
-            return self.not_indexed(columns=columns, filter_by=filter_by)
+            return self.not_indexed(
+                columns=columns,
+                filter_by=filter_by,
+                result_page_len=result_page_len,
+            )
 
         return self.indexed(
             index_by=resolved_attr_cols,
@@ -192,6 +207,7 @@ def for_items(
             filter_by=filter_by,
             on_execution_submitted=on_execution_submitted,
             is_cancellable=is_cancellable,
+            result_page_len=result_page_len,
         )
 
     def for_visualization(
@@ -200,6 +216,7 @@ def for_visualization(
         auto_index: bool = True,
         on_execution_submitted: Optional[Callable[[Execution], None]] = None,
         is_cancellable: bool = False,
+        result_page_len: Optional[int] = None,
     ) -> pandas.DataFrame:
         """
         Creates a data frame with columns based on the content of the visualization with the provided identifier.
@@ -211,6 +228,8 @@ def for_visualization(
             on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
                 submitted to the backend.
             is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
+            result_page_len (Optional[int]): Optional page size for result pagination.
+                Defaults to 1000. Larger values can improve performance for large result sets.
 
         Returns:
             pandas.DataFrame: A DataFrame instance.
@@ -231,6 +250,7 @@ def for_visualization(
             auto_index=auto_index,
             on_execution_submitted=on_execution_submitted,
             is_cancellable=is_cancellable,
+            result_page_len=result_page_len,
         )
 
     def for_created_visualization(
diff --git a/packages/gooddata-pandas/src/gooddata_pandas/series.py b/packages/gooddata-pandas/src/gooddata_pandas/series.py
index 6833c1448..e7e938a8a 100644
--- a/packages/gooddata-pandas/src/gooddata_pandas/series.py
+++ b/packages/gooddata-pandas/src/gooddata_pandas/series.py
@@ -30,6 +30,7 @@ def indexed(
         filter_by: Optional[Union[Filter, list[Filter]]] = None,
         on_execution_submitted: Optional[Callable[[Execution], None]] = None,
         is_cancellable: bool = False,
+        result_page_len: Optional[int] = None,
     ) -> pandas.Series:
         """Creates pandas Series from data points calculated from a single `data_by`.
 
@@ -68,6 +69,9 @@ def indexed(
 
             is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
 
+            result_page_len (Optional[int]): Optional page size for result pagination.
+                Defaults to 1000. Larger values can improve performance for large result sets.
+
         Returns:
             pandas.Series: pandas series instance
         """
@@ -80,6 +84,7 @@ def indexed(
             filter_by=filter_by,
             on_execution_submitted=on_execution_submitted,
             is_cancellable=is_cancellable,
+            result_page_len=result_page_len,
         )
 
         _idx = make_pandas_index(index)
@@ -93,6 +98,7 @@ def not_indexed(
         filter_by: Optional[Union[Filter, list[Filter]]] = None,
         on_execution_submitted: Optional[Callable[[Execution], None]] = None,
         is_cancellable: bool = False,
+        result_page_len: Optional[int] = None,
     ) -> pandas.Series:
         """
         Creates a pandas.Series from data points calculated from a single `data_by` without constructing an index.
@@ -122,6 +128,8 @@ def not_indexed(
             on_execution_submitted (Optional[Callable[[Execution], None]]): Callback to call when the execution was
                 submitted to the backend.
             is_cancellable (bool, optional): Whether the execution should be cancelled when the connection is interrupted.
+            result_page_len (Optional[int]): Optional page size for result pagination.
+                Defaults to 1000. Larger values can improve performance for large result sets.
 
         Returns:
             pandas.Series: The resulting pandas Series instance.
@@ -140,6 +148,7 @@ def not_indexed(
             filter_by=filter_by,
             on_execution_submitted=on_execution_submitted,
             is_cancellable=is_cancellable,
+            result_page_len=result_page_len,
         )
 
         return pandas.Series(data=data["_series"])