Skip to content

Commit f6ce940

Browse files
MahhheshhPGijsbers
andauthored
add extra error messages and fix response schema (#253)
fixes #114 - add's additional messages when something goes offff! - Fixes response schema. TODO: - find valid `dataset_id` for code 362. php ref: https://github.com/openml/OpenML/blob/b1265bda1b52780a6f10a7d239e3f54914517c2f/openml_OS/models/api/v1/Api_data.php#L1957-L2019 --------- Co-authored-by: PGijsbers <p.gijsbers@tue.nl>
1 parent a74b056 commit f6ce940

3 files changed

Lines changed: 84 additions & 43 deletions

File tree

src/core/errors.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,20 @@ class ServiceNotFoundError(ProblemDetailError):
374374
_default_status_code = HTTPStatus.NOT_FOUND
375375

376376

377+
# =============================================================================
378+
# Quality Errors
379+
# =============================================================================
380+
381+
382+
class NoQualitiesError(ProblemDetailError):
383+
"""Raised when a dataset has no stored quality values."""
384+
385+
uri = "https://openml.org/problems/quality-no-qualities"
386+
title = "No Qualities Found"
387+
_default_status_code = HTTPStatus.PRECONDITION_FAILED
388+
_default_code = 362
389+
390+
377391
# =============================================================================
378392
# Internal Errors
379393
# =============================================================================

src/routers/openml/qualities.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from http import HTTPStatus
21
from typing import Annotated, Literal
32

43
from fastapi import APIRouter, Depends
@@ -7,7 +6,12 @@
76
import database.datasets
87
import database.qualities
98
from core.access import _user_has_access
10-
from core.errors import DatasetNotFoundError
9+
from core.errors import (
10+
DatasetNotFoundError,
11+
DatasetNotProcessedError,
12+
DatasetProcessingError,
13+
NoQualitiesError,
14+
)
1115
from database.users import User
1216
from routers.dependencies import expdb_connection, fetch_user
1317
from schemas.datasets.openml import Quality
@@ -35,19 +39,24 @@ async def get_qualities(
3539
) -> list[Quality]:
3640
dataset = await database.datasets.get(dataset_id, expdb)
3741
if not dataset or not await _user_has_access(dataset, user):
38-
# Backwards compatibility: PHP API returns 412 with code 113
3942
msg = f"Dataset with id {dataset_id} not found."
40-
no_data_file = 113
4143
raise DatasetNotFoundError(
4244
msg,
43-
code=no_data_file,
44-
status_code=HTTPStatus.PRECONDITION_FAILED,
45-
)
46-
return await database.qualities.get_for_dataset(dataset_id, expdb)
47-
# The PHP API provided (sometime) helpful error messages
48-
# if not qualities:
49-
# check if dataset exists: error 360
50-
# check if user has access: error 361
51-
# check if there is a data processed entry and forward the error: 364
52-
# if nothing in process table: 363
53-
# otherwise: error 362
45+
code=361,
46+
) from None
47+
48+
processing = await database.datasets.get_latest_processing_update(dataset_id, expdb)
49+
if processing is None:
50+
msg = f"Dataset not processed yet for dataset {dataset_id}."
51+
raise DatasetNotProcessedError(msg, code=363)
52+
53+
if processing.error:
54+
msg = processing.error.strip() or "Error occurred during processing."
55+
raise DatasetProcessingError(msg, code=364)
56+
57+
qualities = await database.qualities.get_for_dataset(dataset_id, expdb)
58+
if not qualities:
59+
msg = f"No qualities found for dataset {dataset_id}."
60+
raise NoQualitiesError(msg)
61+
62+
return qualities

tests/routers/openml/qualities_test.py

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import asyncio
2+
import re
23
from http import HTTPStatus
34

45
import deepdiff
@@ -7,8 +8,6 @@
78
from sqlalchemy import text
89
from sqlalchemy.ext.asyncio import AsyncConnection
910

10-
from core.errors import DatasetNotFoundError
11-
1211

1312
async def _remove_quality_from_database(quality_name: str, expdb_test: AsyncConnection) -> None:
1413
await expdb_test.execute(
@@ -287,7 +286,7 @@ async def test_get_quality(py_api: httpx.AsyncClient) -> None:
287286

288287
@pytest.mark.parametrize(
289288
"data_id",
290-
list(set(range(1, 132)) - {55, 56, 59, 116, 130}),
289+
[*list(set(range(1, 133))), 9999999],
291290
)
292291
async def test_get_quality_identical(
293292
data_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient
@@ -296,8 +295,24 @@ async def test_get_quality_identical(
296295
py_api.get(f"/datasets/qualities/{data_id}"),
297296
php_api.get(f"/data/qualities/{data_id}"),
298297
)
299-
assert python_response.status_code == php_response.status_code
298+
if php_response.status_code == HTTPStatus.OK:
299+
_assert_get_quality_success_equal(python_response, php_response)
300+
return
301+
302+
php_error_code = int(php_response.json()["error"]["code"])
303+
if php_error_code == 361: # noqa: PLR2004
304+
_assert_get_quality_error_dataset_not_found(python_response, php_response)
305+
elif php_error_code == 364: # noqa: PLR2004
306+
_assert_get_quality_error_dataset_process_error(python_response, php_response)
307+
else:
308+
msg = f"Dataset {data_id} response not under test:", php_response.json()
309+
raise AssertionError(msg)
300310

311+
312+
def _assert_get_quality_success_equal(
313+
python_response: httpx.Response, php_response: httpx.Response
314+
) -> None:
315+
assert python_response.status_code == php_response.status_code
301316
expected = [
302317
{
303318
"name": quality["name"],
@@ -308,28 +323,31 @@ async def test_get_quality_identical(
308323
assert python_response.json() == expected
309324

310325

311-
@pytest.mark.parametrize(
312-
"data_id",
313-
[55, 56, 59, 116, 130, 132],
314-
)
315-
async def test_get_quality_identical_error(
316-
data_id: int,
317-
py_api: httpx.AsyncClient,
318-
php_api: httpx.AsyncClient,
326+
def _assert_get_quality_error_dataset_not_found(
327+
python_response: httpx.Response, php_response: httpx.Response
319328
) -> None:
320-
if data_id in [55, 56, 59]:
321-
pytest.skip("Detailed error for code 364 (failed processing) not yet supported.")
322-
if data_id in [116]: # noqa: FURB171
323-
pytest.skip("Detailed error for code 362 (no qualities) not yet supported.")
324-
python_response, php_response = await asyncio.gather(
325-
py_api.get(f"/datasets/qualities/{data_id}"),
326-
php_api.get(f"/data/qualities/{data_id}"),
327-
)
328-
assert python_response.status_code == php_response.status_code
329-
# RFC 9457: Python API now returns problem+json format
330-
assert python_response.headers["content-type"] == "application/problem+json"
331-
error = python_response.json()
332-
assert error["type"] == DatasetNotFoundError.uri
333-
# Verify the error message matches the PHP API semantically
334-
assert php_response.json()["error"]["message"] == "Unknown dataset"
335-
assert error["detail"] == f"Dataset with id {data_id} not found."
329+
assert php_response.status_code == HTTPStatus.PRECONDITION_FAILED
330+
assert python_response.status_code == HTTPStatus.NOT_FOUND
331+
332+
php_error = php_response.json()["error"]
333+
py_error = python_response.json()
334+
335+
assert php_error["code"] == py_error["code"]
336+
assert php_error["message"] == "Unknown dataset"
337+
assert re.match(r"Dataset with id \d+ not found.", py_error["detail"])
338+
339+
340+
def _assert_get_quality_error_dataset_process_error(
341+
python_response: httpx.Response, php_response: httpx.Response
342+
) -> None:
343+
assert php_response.status_code == python_response.status_code
344+
345+
php_error = php_response.json()["error"]
346+
py_error = python_response.json()
347+
348+
assert php_error["code"] == py_error["code"]
349+
assert php_error["message"] == "Dataset processed with error"
350+
assert py_error["title"] == "Dataset Processing Error"
351+
# The PHP can add some additional unnecessary escapes.
352+
assert php_error["additional_information"][:30] == py_error["detail"][:30]
353+
assert php_error["additional_information"][-30:] == py_error["detail"][-30:]

0 commit comments

Comments
 (0)