Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions pyiceberg/catalog/rest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ class ScanPlanningMode(Enum):
VIEW_ENDPOINTS_SUPPORTED = "view-endpoints-supported"
VIEW_ENDPOINTS_SUPPORTED_DEFAULT = False

PAGE_SIZE = "rest-page-size"

NAMESPACE_SEPARATOR_PROPERTY = "namespace-separator"
DEFAULT_NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)

Expand Down Expand Up @@ -1042,11 +1044,17 @@ def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
namespace_concat = self._encode_namespace_path(namespace_tuple)
url = self.url(Endpoints.list_tables, namespace=namespace_concat)

params: dict[str, str] = {}
page_size = property_as_int(self.properties, PAGE_SIZE, None)
if page_size is not None:
if page_size <= 0:
raise ValueError(f"{PAGE_SIZE} must be a positive integer")
params["pageSize"] = str(page_size)

tables: list[Identifier] = []
page_token: str | None = None

while True:
params: dict[str, str] = {}
if page_token:
params["pageToken"] = page_token
response = self._session.get(url, params=params)
Expand Down Expand Up @@ -1150,11 +1158,20 @@ def list_views(self, namespace: str | Identifier) -> list[Identifier]:
namespace_concat = self._encode_namespace_path(namespace_tuple)
url = self.url(Endpoints.list_views, namespace=namespace_concat)

params: dict[str, str] = {}
page_size = property_as_int(self.properties, PAGE_SIZE, None)
if page_size is not None:
if page_size <= 0:
raise ValueError(f"{PAGE_SIZE} must be a positive integer")
params["pageSize"] = str(page_size)

views: list[Identifier] = []
page_token: str | None = None

while True:
params = {"pageToken": page_token} if page_token else None
if page_token:
params["pageToken"] = page_token
Comment on lines +1172 to +1173
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if page_token:
params["pageToken"] = page_token
params: dict[str, str] = {}
if page_size is not None:
params["pageSize"] = str(page_size)
if page_token:
params["pageToken"] = page_token

nit: follows the same pattern as list_namespaces (#3347) and list_tables (#3348)

Copy link
Copy Markdown
Member Author

@ebyhr ebyhr May 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The page size remains constant throughout the while loop, unlike the page token. That is why I set the value outside the loop.

Let me know if you still want me to commit this suggestion.


response = self._session.get(url, params=params)
try:
response.raise_for_status()
Expand Down Expand Up @@ -1263,11 +1280,17 @@ def list_namespaces(self, namespace: str | Identifier = ()) -> list[Identifier]:
self._check_endpoint(Capability.V1_LIST_NAMESPACES)
namespace_tuple = self.identifier_to_tuple(namespace)

params: dict[str, str] = {}
page_size = property_as_int(self.properties, PAGE_SIZE, None)
if page_size is not None:
if page_size <= 0:
raise ValueError(f"{PAGE_SIZE} must be a positive integer")
params["pageSize"] = str(page_size)

namespaces: list[Identifier] = []
page_token: str | None = None

while True:
params: dict[str, str] = {}
if namespace_tuple:
params["parent"] = self._encode_namespace_path(namespace_tuple)
if page_token:
Expand Down
85 changes: 85 additions & 0 deletions tests/catalog/test_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
DEFAULT_ENDPOINTS,
EMPTY_BODY_SHA256,
OAUTH2_SERVER_URI,
PAGE_SIZE,
SIGV4_MAX_RETRIES,
SIGV4_MAX_RETRIES_DEFAULT,
SNAPSHOT_LOADING_MODE,
Expand Down Expand Up @@ -564,6 +565,29 @@ def test_list_tables_paginated_200_none_next_page_token(rest_mock: Mocker) -> No
]


def test_list_tables_page_size(rest_mock: Mocker) -> None:
namespace = "examples"
rest_mock.get(
f"{TEST_URI}v1/namespaces/{namespace}/tables",
json={
"identifiers": [
{"namespace": ["examples"], "name": "table1"},
{"namespace": ["examples"], "name": "table2"},
],
},
status_code=200,
request_headers=TEST_HEADERS,
)

result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{PAGE_SIZE: "100"}).list_tables(namespace)
assert rest_mock.last_request.url == f"{TEST_URI}v1/namespaces/examples/tables?pageSize=100"

assert result == [
("examples", "table1"),
("examples", "table2"),
]


def test_list_tables_200_sigv4(rest_mock: Mocker) -> None:
namespace = "examples"
rest_mock.get(
Expand Down Expand Up @@ -810,6 +834,48 @@ def test_list_views_paginated_200_none_next_page_token(rest_mock: Mocker) -> Non
]


def test_list_views_page_size(rest_mock: Mocker) -> None:
namespace = "examples"
rest_mock.get(
f"{TEST_URI}v1/namespaces/{namespace}/views",
json={
"identifiers": [
{"namespace": ["examples"], "name": "view1"},
{"namespace": ["examples"], "name": "view2"},
],
},
status_code=200,
request_headers=TEST_HEADERS,
)

result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{PAGE_SIZE: "100"}).list_views(namespace)
assert rest_mock.last_request.url == f"{TEST_URI}v1/namespaces/examples/views?pageSize=100"

assert result == [
("examples", "view1"),
("examples", "view2"),
]


def test_list_views_invalid_page_size(rest_mock: Mocker) -> None:
namespace = "examples"
rest_mock.get(
f"{TEST_URI}v1/namespaces/{namespace}/views",
json={
"identifiers": [
{"namespace": ["examples"], "name": "view1"},
{"namespace": ["examples"], "name": "view2"},
],
},
status_code=200,
request_headers=TEST_HEADERS,
)

with pytest.raises(ValueError) as e:
RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{PAGE_SIZE: "0"}).list_views(namespace)
assert str(e.value) == "rest-page-size must be a positive integer"


def test_list_views_200_sigv4(rest_mock: Mocker) -> None:
namespace = "examples"
rest_mock.get(
Expand Down Expand Up @@ -1006,6 +1072,25 @@ def test_list_namespaces_paginated_200_none_next_page_token(rest_mock: Mocker) -
]


def test_list_namespaces_page_size(rest_mock: Mocker) -> None:
rest_mock.get(
f"{TEST_URI}v1/namespaces",
json={
"namespaces": [["ns1"], ["ns2"]],
},
status_code=200,
request_headers=TEST_HEADERS,
)

result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{PAGE_SIZE: "100"}).list_namespaces()
assert rest_mock.last_request.url == f"{TEST_URI}v1/namespaces?pageSize=100"

assert result == [
("ns1",),
("ns2",),
]


def test_list_namespace_with_parent_404(rest_mock: Mocker) -> None:
rest_mock.get(
f"{TEST_URI}v1/namespaces?parent=some_namespace",
Expand Down