Skip to content

Commit 058fea0

Browse files
authored
Add more logging (#308)
Add common logging for authentication and also add logging to endpoints which permute the data.
1 parent 2a8de11 commit 058fea0

9 files changed

Lines changed: 123 additions & 16 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ dev = [
3434
"pytest-mock",
3535
"pytest-asyncio",
3636
"httpx",
37+
"asgi-lifespan",
3738
"hypothesis",
3839
"deepdiff",
3940
"pytest-xdist",

src/core/logging.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Utility functions for logging."""
22

33
import sys
4+
import time
45
import uuid
56
from collections.abc import Awaitable, Callable
67
from pathlib import Path
@@ -20,7 +21,11 @@ def setup_log_sinks(configuration_file: Path | None = None) -> None:
2021
sink = sink_configuration.pop("sink")
2122
if sink == "sys.stderr":
2223
sink = sys.stderr
23-
logger.add(sink, serialize=True, **sink_configuration)
24+
# Logs the additionally provided data as JSON.
25+
sink_configuration.setdefault("serialize", True)
26+
# Decouples log calls from I/O and makes it multiprocessing safe.
27+
sink_configuration.setdefault("enqueue", True)
28+
logger.add(sink, **sink_configuration)
2429

2530

2631
async def add_request_context_to_log(
@@ -29,10 +34,42 @@ async def add_request_context_to_log(
2934
) -> Response:
3035
"""Add a unique request id to each log call."""
3136
identifier = uuid.uuid4().hex
32-
with logger.contextualize(request_id=identifier):
37+
with logger.contextualize(
38+
request_id=identifier,
39+
method=request.method,
40+
path=request.url.path,
41+
):
3342
return await call_next(request)
3443

3544

45+
async def log_request_duration(
46+
request: Request,
47+
call_next: Callable[[Request], Awaitable[Response]],
48+
) -> Response:
49+
"""Log the process and wallclock time for each call.
50+
51+
Reported times cannot be attributed solely to processing the request.
52+
As multiple requests can be handled concurrently in the same process,
53+
process time may be spent on other requests as well. The same goes for
54+
wallclock time, which is additionally influenced by e.g., context switches.
55+
"""
56+
start_mono_ns = time.monotonic_ns()
57+
start_process_ns = time.process_time_ns()
58+
response: Response = await call_next(request)
59+
60+
duration_mono_ns = time.monotonic_ns() - start_mono_ns
61+
duration_process_ns = time.process_time_ns() - start_process_ns
62+
logger.info(
63+
"Request took {mono_ms} ms wallclock time (process time {process_ms} ms)",
64+
mono_ms=int(duration_mono_ns / 1_000_000),
65+
process_ms=int(duration_process_ns / 1_000_000),
66+
wallclock_time_ns=duration_mono_ns,
67+
process_time_ns=duration_process_ns,
68+
status=response.status_code,
69+
)
70+
return response
71+
72+
3673
async def request_response_logger(
3774
request: Request,
3875
call_next: Callable[[Request], Awaitable[Response]],

src/main.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import argparse
2+
import asyncio
23
import sys
34
from collections.abc import AsyncGenerator
45
from contextlib import asynccontextmanager
@@ -10,7 +11,12 @@
1011

1112
from config import load_configuration
1213
from core.errors import ProblemDetailError, problem_detail_exception_handler
13-
from core.logging import add_request_context_to_log, request_response_logger, setup_log_sinks
14+
from core.logging import (
15+
add_request_context_to_log,
16+
log_request_duration,
17+
request_response_logger,
18+
setup_log_sinks,
19+
)
1420
from database.setup import close_databases
1521
from routers.mldcat_ap.dataset import router as mldcat_ap_router
1622
from routers.openml.datasets import router as datasets_router
@@ -26,10 +32,13 @@
2632

2733

2834
@asynccontextmanager
29-
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: # noqa: ARG001
35+
async def lifespan(app: FastAPI | None) -> AsyncGenerator[None, None]: # noqa: ARG001
3036
"""Manage application lifespan - startup and shutdown events."""
3137
yield
32-
await close_databases()
38+
asyncio.gather(
39+
logger.complete(),
40+
close_databases(),
41+
)
3342

3443

3544
def _parse_args() -> argparse.Namespace:
@@ -72,6 +81,7 @@ def create_api(configuration_file: Path | None = None) -> FastAPI:
7281
# Order matters! Each added middleware wraps the previous, creating a stack.
7382
# See also: https://fastapi.tiangolo.com/tutorial/middleware/#multiple-middleware-execution-order
7483
app.middleware("http")(request_response_logger)
84+
app.middleware("http")(log_request_duration)
7585
app.middleware("http")(add_request_context_to_log)
7686

7787
app.add_exception_handler(ProblemDetailError, problem_detail_exception_handler) # type: ignore[arg-type]

src/routers/dependencies.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Annotated
33

44
from fastapi import Depends
5+
from loguru import logger
56
from pydantic import BaseModel
67
from sqlalchemy.ext.asyncio import AsyncConnection
78

@@ -25,21 +26,32 @@ async def userdb_connection() -> AsyncGenerator[AsyncConnection, None]:
2526
async def fetch_user(
2627
api_key: APIKey | None = None,
2728
user_data: Annotated[AsyncConnection | None, Depends(userdb_connection)] = None,
28-
) -> User | None:
29+
) -> AsyncGenerator[User | None, None]:
2930
if not (api_key and user_data):
30-
return None
31+
yield None
32+
return
3133

3234
user = await User.fetch(api_key, user_data)
33-
if user:
34-
return user
35-
msg = "Invalid API key provided."
36-
raise AuthenticationFailedError(msg)
35+
masked_key = api_key[-4:]
36+
if not user:
37+
logger.info("Authentication failed.", api_key=masked_key)
38+
msg = "Invalid API key provided."
39+
raise AuthenticationFailedError(msg)
40+
41+
logger.info(
42+
"User {identifier} authenticated with api key ending in '{api_key}'.",
43+
identifier=user.user_id,
44+
api_key=masked_key,
45+
)
46+
with logger.contextualize(user_id=user.user_id):
47+
yield user
3748

3849

3950
def fetch_user_or_raise(
4051
user: Annotated[User | None, Depends(fetch_user)] = None,
4152
) -> User:
4253
if user is None:
54+
logger.info("Unauthenticated user tried to access endpoint that requires authentication.")
4355
msg = "No API key provided."
4456
raise AuthenticationRequiredError(msg)
4557
return user

src/routers/openml/datasets.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from typing import Annotated, Any, Literal, NamedTuple
66

77
from fastapi import APIRouter, Body, Depends
8+
from loguru import logger
89
from sqlalchemy import bindparam, text
910
from sqlalchemy.engine import Row
1011
from sqlalchemy.ext.asyncio import AsyncConnection
@@ -61,6 +62,7 @@ async def tag_dataset(
6162
raise TagAlreadyExistsError(msg)
6263

6364
await database.datasets.tag(data_id, tag, user_id=user.user_id, connection=expdb_db)
65+
logger.info("Dataset {dataset_id} tagged '{tag}'.", dataset_id=data_id, tag=tag)
6466
return {
6567
"data_tag": {"id": str(data_id), "tag": [*tags, tag]},
6668
}
@@ -375,6 +377,12 @@ async def update_dataset_status(
375377
msg = f"Unknown status transition: {current_status} -> {status}"
376378
raise InternalError(msg)
377379

380+
logger.info(
381+
"Dataset {dataset_id} changed from {previous} to {current}",
382+
dataset_id=dataset_id,
383+
previous=current_status.status if current_status else DatasetStatus.IN_PREPARATION,
384+
current=status,
385+
)
378386
return {"dataset_id": dataset_id, "status": status}
379387

380388

src/routers/openml/setups.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import Annotated
55

66
from fastapi import APIRouter, Body, Depends, Path
7+
from loguru import logger
78
from sqlalchemy.ext.asyncio import AsyncConnection
89

910
import database.setups
@@ -65,6 +66,7 @@ async def tag_setup(
6566
raise TagAlreadyExistsError(msg)
6667

6768
await database.setups.tag(setup_id, tag, user.user_id, expdb_db)
69+
logger.info("Setup {setup_id} tagged '{tag}'.", setup_id=setup_id, tag=tag)
6870
all_tags = [t.tag for t in setup_tags] + [tag]
6971
return {"setup_tag": {"id": str(setup_id), "tag": all_tags}}
7072

@@ -94,9 +96,15 @@ async def untag_setup(
9496
msg = (
9597
f"You may not remove tag {tag!r} of setup {setup_id} because it was not created by you."
9698
)
99+
logger.warning(
100+
"User attempted to remove tag '{tag}' from setup {setup_id}.",
101+
setup_id=setup_id,
102+
tag=tag,
103+
)
97104
raise TagNotOwnedError(msg)
98105

99106
await database.setups.untag(setup_id, matched_tag_row.tag, expdb_db)
107+
logger.info("Setup {setup_id} had tag '{tag}' removed.", setup_id=setup_id, tag=tag)
100108
remaining_tags = [
101109
t.tag for t in setup_tags if t.tag.casefold() != matched_tag_row.tag.casefold()
102110
]

src/routers/openml/study.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from typing import Annotated, Literal
22

33
from fastapi import APIRouter, Body, Depends
4+
from loguru import logger
45
from pydantic import BaseModel
56
from sqlalchemy.engine import Row
67
from sqlalchemy.ext.asyncio import AsyncConnection
@@ -73,6 +74,12 @@ async def attach_to_study(
7374
# PHP lets *anyone* edit *any* study. We're not going to do that.
7475
if study.creator != user.user_id and not await user.is_admin():
7576
msg = f"Study {study_id} can only be edited by its creator."
77+
logger.warning(
78+
"User {user_id} attempted to attach entities to study they do not own.",
79+
study_id=study_id,
80+
entity_ids=entity_ids,
81+
user_id=user.user_id,
82+
)
7683
raise StudyNotEditableError(msg)
7784
if study.status != StudyStatus.IN_PREPARATION:
7885
msg = f"Study {study_id} can only be edited while in preparation."
@@ -93,6 +100,12 @@ async def attach_to_study(
93100
except ValueError as e:
94101
msg = str(e)
95102
raise StudyConflictError(msg) from e
103+
logger.info(
104+
"User {user_id} attached entities to study {study_id}.",
105+
study_id=study_id,
106+
entity_ids=entity_ids,
107+
user_id=user.user_id,
108+
)
96109
return AttachDetachResponse(study_id=study_id, main_entity_type=study.type_)
97110

98111

@@ -124,6 +137,11 @@ async def create_study(
124137
user=user,
125138
expdb=expdb,
126139
)
140+
logger.info(
141+
"User {user_id} created study {study_id}.",
142+
study_id=study_id,
143+
user_id=user.user_id,
144+
)
127145
# Make sure that invalid fields raise an error (e.g., "task_ids")
128146
return {"study_id": study_id}
129147

tests/conftest.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import contextlib
22
import json
3-
from collections.abc import AsyncIterator, Iterable, Iterator
3+
from collections.abc import AsyncGenerator, AsyncIterator, Iterable, Iterator
44
from pathlib import Path
55
from typing import Any, NamedTuple
66

@@ -13,7 +13,7 @@
1313
from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine
1414

1515
from database.setup import expdb_database, user_database
16-
from main import create_api
16+
from main import create_api, lifespan
1717
from routers.dependencies import expdb_connection, userdb_connection
1818

1919
PHP_API_URL = "http://php-api:80/api/v1/json"
@@ -51,6 +51,12 @@ async def temporary_records(
5151
await connection.commit()
5252

5353

54+
@pytest.fixture(autouse=True, scope="session")
55+
async def one_lifespan() -> AsyncGenerator[None, None]:
56+
async with lifespan(app=None):
57+
yield
58+
59+
5460
@pytest.fixture
5561
async def expdb_test() -> AsyncIterator[AsyncConnection]:
5662
async with automatic_rollback(expdb_database()) as connection:
@@ -85,6 +91,8 @@ async def override_userdb() -> AsyncIterator[AsyncConnection]:
8591

8692
app.dependency_overrides[expdb_connection] = override_expdb
8793
app.dependency_overrides[userdb_connection] = override_userdb
94+
# We do not use the Lifespan manager for now because our auto-use fixture
95+
# `one_lifespan` will do setup and teardown at a session scope level instead.
8896
async with httpx.AsyncClient(
8997
transport=httpx.ASGITransport(app=app),
9098
base_url="http://test",

tests/dependencies/fetch_user_test.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from contextlib import aclosing
2+
13
import pytest
24
from sqlalchemy.ext.asyncio import AsyncConnection
35

@@ -16,19 +18,22 @@
1618
],
1719
)
1820
async def test_fetch_user(api_key: str, user: User, user_test: AsyncConnection) -> None:
19-
db_user = await fetch_user(api_key, user_data=user_test)
21+
async with aclosing(fetch_user(api_key, user_data=user_test)) as agen:
22+
db_user = await anext(agen)
2023
assert isinstance(db_user, User)
2124
assert user.user_id == db_user.user_id
2225
assert set(await user.get_groups()) == set(await db_user.get_groups())
2326

2427

2528
async def test_fetch_user_no_key_no_user() -> None:
26-
assert await fetch_user(api_key=None) is None
29+
async with aclosing(fetch_user(api_key=None)) as agen:
30+
assert await anext(agen) is None
2731

2832

2933
async def test_fetch_user_invalid_key_raises(user_test: AsyncConnection) -> None:
3034
with pytest.raises(AuthenticationFailedError):
31-
await fetch_user(api_key=ApiKey.INVALID, user_data=user_test)
35+
async with aclosing(fetch_user(api_key=ApiKey.INVALID, user_data=user_test)) as agen:
36+
await anext(agen)
3237

3338

3439
async def test_fetch_user_or_raise_raises_if_no_user() -> None:

0 commit comments

Comments
 (0)