@@ -20,82 +20,82 @@ async def test_dataset_response_is_identical( # noqa: C901, PLR0912
2020 py_api : httpx .AsyncClient ,
2121 php_api : httpx .AsyncClient ,
2222) -> None :
23- new , original = await asyncio .gather (
23+ py_response , php_response = await asyncio .gather (
2424 py_api .get (f"/datasets/{ dataset_id } " ),
2525 php_api .get (f"/data/{ dataset_id } " ),
2626 )
2727
28- if new .status_code == HTTPStatus .FORBIDDEN :
29- assert original .status_code == HTTPStatus .PRECONDITION_FAILED
28+ if py_response .status_code == HTTPStatus .FORBIDDEN :
29+ assert php_response .status_code == HTTPStatus .PRECONDITION_FAILED
3030 else :
31- assert new .status_code == original .status_code
31+ assert py_response .status_code == php_response .status_code
3232
33- if new .status_code != HTTPStatus .OK :
33+ if py_response .status_code != HTTPStatus .OK :
3434 # RFC 9457: Python API now returns problem+json format
35- assert new .headers ["content-type" ] == "application/problem+json"
35+ assert py_response .headers ["content-type" ] == "application/problem+json"
3636 # Both APIs should return error responses in the same cases
37- assert new .json ()["code" ] == original .json ()["error" ]["code" ]
38- old_error_message = original .json ()["error" ]["message" ]
39- assert new .json ()["detail" ].startswith (old_error_message )
37+ assert py_response .json ()["code" ] == php_response .json ()["error" ]["code" ]
38+ old_error_message = php_response .json ()["error" ]["message" ]
39+ assert py_response .json ()["detail" ].startswith (old_error_message )
4040 return
4141
4242 try :
43- original_json = original .json ()["data_set_description" ]
43+ php_json = php_response .json ()["data_set_description" ]
4444 except json .decoder .JSONDecodeError :
4545 pytest .skip ("A PHP error occurred on the test server." )
4646
47- if "div" in original_json :
47+ if "div" in php_json :
4848 pytest .skip ("A PHP error occurred on the test server." )
4949
5050 # There are a few changes between the old API and the new API, so we convert here:
5151 # The new API has normalized `format` field:
52- original_json ["format" ] = original_json ["format" ].lower ()
52+ php_json ["format" ] = php_json ["format" ].lower ()
5353
5454 # Pydantic HttpURL serialization omits port 80 for HTTP urls.
55- original_json ["url" ] = original_json ["url" ].replace (":80" , "" )
55+ php_json ["url" ] = php_json ["url" ].replace (":80" , "" )
5656
5757 # There is odd behavior in the live server that I don't want to recreate:
5858 # when the creator is a list of csv names, it can either be a str or a list
5959 # depending on whether the names are quoted. E.g.:
6060 # '"Alice", "Bob"' -> ["Alice", "Bob"]
6161 # 'Alice, Bob' -> 'Alice, Bob'
6262 if (
63- "creator" in original_json
64- and isinstance (original_json ["creator" ], str )
65- and len (original_json ["creator" ].split ("," )) > 1
63+ "creator" in php_json
64+ and isinstance (php_json ["creator" ], str )
65+ and len (php_json ["creator" ].split ("," )) > 1
6666 ):
67- original_json ["creator" ] = [name .strip () for name in original_json ["creator" ].split ("," )]
67+ php_json ["creator" ] = [name .strip () for name in php_json ["creator" ].split ("," )]
6868
69- new_body = new .json ()
70- if processing_data := new_body .get ("processing_date" ):
71- new_body ["processing_date" ] = str (processing_data ).replace ("T" , " " )
69+ py_json = py_response .json ()
70+ if processing_data := py_json .get ("processing_date" ):
71+ py_json ["processing_date" ] = str (processing_data ).replace ("T" , " " )
7272
7373 manual = []
7474 # ref test.openml.org/d/33 (contributor) and d/34 (creator)
7575 # contributor/creator in database is '""'
7676 # json content is []
7777 for field in ["contributor" , "creator" ]:
78- if new_body [field ] == ["" ]:
79- new_body [field ] = []
78+ if py_json [field ] == ["" ]:
79+ py_json [field ] = []
8080 manual .append (field )
8181
82- if isinstance (new_body ["original_data_url" ], list ):
83- new_body ["original_data_url" ] = ", " .join (str (url ) for url in new_body ["original_data_url" ])
82+ if isinstance (py_json ["original_data_url" ], list ):
83+ py_json ["original_data_url" ] = ", " .join (str (url ) for url in py_json ["original_data_url" ])
8484
85- for field , value in list (new_body .items ()):
85+ for field , value in list (py_json .items ()):
8686 if field in manual :
8787 continue
8888 if isinstance (value , int ):
89- new_body [field ] = str (value )
89+ py_json [field ] = str (value )
9090 elif isinstance (value , list ) and len (value ) == 1 :
91- new_body [field ] = str (value [0 ])
92- if not new_body [field ]:
93- del new_body [field ]
91+ py_json [field ] = str (value [0 ])
92+ if not py_json [field ]:
93+ del py_json [field ]
9494
95- if "description" not in new_body :
96- new_body ["description" ] = []
95+ if "description" not in py_json :
96+ py_json ["description" ] = []
9797
98- assert new_body == original_json
98+ assert py_json == php_json
9999
100100
101101@pytest .mark .parametrize (
@@ -141,13 +141,13 @@ async def test_private_dataset_owner_access(
141141 api_key : str ,
142142) -> None :
143143 [private_dataset ] = tests .constants .PRIVATE_DATASET_ID
144- new_response , old_response = await asyncio .gather (
144+ py_response , php_response = await asyncio .gather (
145145 py_api .get (f"/datasets/{ private_dataset } ?api_key={ api_key } " ),
146146 php_api .get (f"/data/{ private_dataset } ?api_key={ api_key } " ),
147147 )
148- assert old_response .status_code == HTTPStatus .OK
149- assert new_response .status_code == old_response .status_code
150- assert new_response .json ()["id" ] == private_dataset
148+ assert php_response .status_code == HTTPStatus .OK
149+ assert py_response .status_code == php_response .status_code
150+ assert py_response .json ()["id" ] == private_dataset
151151
152152
153153@pytest .mark .mut
@@ -173,13 +173,13 @@ async def test_dataset_tag_response_is_identical(
173173 php_api : httpx .AsyncClient ,
174174) -> None :
175175 # PHP request must happen first to check state, can't parallelize
176- original = await php_api .post (
176+ php_response = await php_api .post (
177177 "/data/tag" ,
178178 data = {"api_key" : api_key , "tag" : tag , "data_id" : dataset_id },
179179 )
180180 already_tagged = (
181- original .status_code == HTTPStatus .INTERNAL_SERVER_ERROR
182- and "already tagged" in original .json ()["error" ]["message" ]
181+ php_response .status_code == HTTPStatus .INTERNAL_SERVER_ERROR
182+ and "already tagged" in php_response .json ()["error" ]["message" ]
183183 )
184184 if not already_tagged :
185185 # undo the tag, because we don't want to persist this change to the database
@@ -189,36 +189,36 @@ async def test_dataset_tag_response_is_identical(
189189 data = {"api_key" : api_key , "tag" : tag , "data_id" : dataset_id },
190190 )
191191 if (
192- original .status_code != HTTPStatus .OK
193- and original .json ()["error" ]["message" ] == "An Elastic Search Exception occured."
192+ php_response .status_code != HTTPStatus .OK
193+ and php_response .json ()["error" ]["message" ] == "An Elastic Search Exception occured."
194194 ):
195195 pytest .skip ("Encountered Elastic Search error." )
196- new = await py_api .post (
196+ py_response = await py_api .post (
197197 f"/datasets/tag?api_key={ api_key } " ,
198198 json = {"data_id" : dataset_id , "tag" : tag },
199199 )
200200
201201 # RFC 9457: Tag conflict now returns 409 instead of 500
202- if original .status_code == HTTPStatus .INTERNAL_SERVER_ERROR and already_tagged :
203- assert new .status_code == HTTPStatus .CONFLICT
204- assert new .json ()["code" ] == original .json ()["error" ]["code" ]
205- assert original .json ()["error" ]["message" ] == "Entity already tagged by this tag."
202+ if php_response .status_code == HTTPStatus .INTERNAL_SERVER_ERROR and already_tagged :
203+ assert py_response .status_code == HTTPStatus .CONFLICT
204+ assert py_response .json ()["code" ] == php_response .json ()["error" ]["code" ]
205+ assert php_response .json ()["error" ]["message" ] == "Entity already tagged by this tag."
206206 assert re .match (
207207 pattern = r"Dataset \d+ already tagged with " + f"'{ tag } '." ,
208- string = new .json ()["detail" ],
208+ string = py_response .json ()["detail" ],
209209 )
210210 return
211211
212- assert new .status_code == original .status_code , original .json ()
213- if new .status_code != HTTPStatus .OK :
214- assert new .json ()["code" ] == original .json ()["error" ]["code" ]
215- assert new .json ()["detail" ] == original .json ()["error" ]["message" ]
212+ assert py_response .status_code == php_response .status_code , php_response .json ()
213+ if py_response .status_code != HTTPStatus .OK :
214+ assert py_response .json ()["code" ] == php_response .json ()["error" ]["code" ]
215+ assert py_response .json ()["detail" ] == php_response .json ()["error" ]["message" ]
216216 return
217217
218- original = original .json ()
219- new = new .json ()
220- new = nested_remove_single_element_list (new )
221- assert new == original
218+ php_json = php_response .json ()
219+ py_json = py_response .json ()
220+ py_json = nested_remove_single_element_list (py_json )
221+ assert py_json == php_json
222222
223223
224224@pytest .mark .parametrize (
@@ -230,24 +230,24 @@ async def test_datasets_feature_is_identical(
230230 py_api : httpx .AsyncClient ,
231231 php_api : httpx .AsyncClient ,
232232) -> None :
233- new , original = await asyncio .gather (
233+ py_response , php_response = await asyncio .gather (
234234 py_api .get (f"/datasets/features/{ data_id } " ),
235235 php_api .get (f"/data/features/{ data_id } " ),
236236 )
237- assert new .status_code == original .status_code
237+ assert py_response .status_code == php_response .status_code
238238
239- if new .status_code != HTTPStatus .OK :
240- error = original .json ()["error" ]
241- assert new .json ()["code" ] == error ["code" ]
239+ if py_response .status_code != HTTPStatus .OK :
240+ error = php_response .json ()["error" ]
241+ assert py_response .json ()["code" ] == error ["code" ]
242242 if error ["message" ] == "No features found. Additionally, dataset processed with error" :
243243 pattern = r"No features found. Additionally, dataset \d+ processed with error\."
244- assert re .match (pattern , new .json ()["detail" ])
244+ assert re .match (pattern , py_response .json ()["detail" ])
245245 else :
246- assert new .json ()["detail" ] == error ["message" ]
246+ assert py_response .json ()["detail" ] == error ["message" ]
247247 return
248248
249- python_body = new .json ()
250- for feature in python_body :
249+ py_json = py_response .json ()
250+ for feature in py_json :
251251 for key , value in list (feature .items ()):
252252 if key == "nominal_values" :
253253 # The old API uses `nominal_value` instead of `nominal_values`
@@ -261,5 +261,5 @@ async def test_datasets_feature_is_identical(
261261 else :
262262 # The old API formats bool as string in lower-case
263263 feature [key ] = str (value ) if not isinstance (value , bool ) else str (value ).lower ()
264- original_features = original .json ()["data_features" ]["feature" ]
265- assert python_body == original_features
264+ php_features = php_response .json ()["data_features" ]["feature" ]
265+ assert py_json == php_features
0 commit comments