Skip to content

Commit 232aac7

Browse files
authored
migrate /api/books and /api/volumes to fastapi (#11922)
1 parent f1b7125 commit 232aac7

6 files changed

Lines changed: 642 additions & 8 deletions

File tree

openlibrary/asgi_app.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ def health() -> dict[str, str]:
211211
return {"status": "ok"}
212212

213213
from openlibrary.fastapi.account import router as account_router
214+
from openlibrary.fastapi.books import router as books_router
214215
from openlibrary.fastapi.cdn import router as cdn_router
215216
from openlibrary.fastapi.checkins import router as checkins_router
216217
from openlibrary.fastapi.internal.api import router as internal_router
@@ -227,6 +228,7 @@ def health() -> dict[str, str]:
227228

228229
# Include routers
229230
app.include_router(account_router)
231+
app.include_router(books_router)
230232
app.include_router(cdn_router)
231233
app.include_router(checkins_router)
232234
app.include_router(internal_router)

openlibrary/fastapi/books.py

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
"""FastAPI Books API endpoints.
2+
3+
DO NOT follow the example of this code... it is horrible and not following
4+
the best practices like Response Models because JSONP makes it quite complicated.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import re
10+
import urllib.parse
11+
from typing import Annotated, Any, Literal
12+
13+
import web
14+
from fastapi import APIRouter, Query, Request
15+
from pydantic import BaseModel, BeforeValidator, Field, TypeAdapter
16+
17+
from openlibrary.fastapi.models import parse_comma_separated_list, wrap_jsonp
18+
from openlibrary.plugins.books import dynlinks, readlinks
19+
from openlibrary.plugins.books.dynlinks import DynlinksOptions
20+
21+
router = APIRouter()
22+
23+
24+
class BooksAPIQueryParams(BaseModel):
25+
"""Query parameters for Books API endpoint."""
26+
27+
bibkeys: Annotated[list[str], BeforeValidator(parse_comma_separated_list)] = Field(
28+
...,
29+
description="Comma-separated list of bibliography keys (ISBN, LCCN, OCLC, etc.)",
30+
)
31+
details: Literal["true", "false"] = Field("false", description="Include detailed book information")
32+
jscmd: Literal["details", "data", "viewapi"] | None = Field(None, description="Format of returned data")
33+
callback: str | None = Field(None, description="JSONP callback function name")
34+
high_priority: bool = Field(False, description="Attempt import immediately for missing ISBNs")
35+
format: Literal["json", "js"] | None = Field(None, description="Explicitly set response format (overrides path-based detection)")
36+
37+
38+
# Note: We don't set response_model on these endpoints because they support JSONP callback.
39+
# When callback parameter is present, the function returns a Response object directly,
40+
# which should bypass response model validation. Setting response_model on the decorator
41+
# can interfere with Response object's content-type headers.
42+
@router.get("/api/books", include_in_schema=False)
43+
@router.get("/api/books.json")
44+
async def get_books(
45+
request: Request,
46+
params: Annotated[BooksAPIQueryParams, Query()],
47+
) -> Any:
48+
"""
49+
Get book metadata by bibliography keys.
50+
51+
This endpoint provides basic information about books, including URLs,
52+
thumbnail links, and preview availability. Supports ISBNs, LCCNs,
53+
OCLC numbers, and Open Library IDs.
54+
55+
When `high_priority=true`, API will attempt to import editions
56+
from ISBNs that don't exist in database immediately, rather
57+
than queueing them for later lookup.
58+
"""
59+
60+
# Set up web context for dynlinks compatibility
61+
# This ensures web.ctx.get("home") returns correct base URL
62+
web.ctx.home = f"{request.url.scheme}://{request.url.netloc}"
63+
64+
# Build options dict from params (excluding bibkeys which goes to dynlinks separately)
65+
options: DynlinksOptions = TypeAdapter(DynlinksOptions).validate_python(params.model_dump(exclude_unset=True))
66+
67+
# Format determination priority:
68+
# 1. format parameter (highest)
69+
# 2. callback parameter (medium) - when present, overrides path-based format
70+
# 3. path-based detection (lowest)
71+
if not params.format:
72+
if params.callback:
73+
options["format"] = "js"
74+
elif request.url.path.endswith(".json"):
75+
options["format"] = "json"
76+
77+
# Call existing business logic
78+
result_str = dynlinks.dynlinks(bib_keys=params.bibkeys, options=options)
79+
80+
return wrap_jsonp(request, result_str)
81+
82+
83+
MULTIGET_PATH_RE = re.compile(r"/api/volumes/(brief|full)/json/(.+)")
84+
85+
86+
@router.get("/api/volumes/{brief_or_full}/json/{req}", include_in_schema=False)
87+
@router.get("/api/volumes/{brief_or_full}/json/{req}.json")
88+
async def get_volumes_multiget(
89+
request: Request,
90+
brief_or_full: Literal["brief", "full"],
91+
req: str,
92+
) -> Any:
93+
"""
94+
Get volume information for multiple identifiers.
95+
96+
This endpoint handles multi-lookup form of Hathi-style API,
97+
allowing multiple identifiers in a single request.
98+
99+
Example:
100+
GET /api/volumes/brief/json/isbn:059035342X|oclc:123456.json
101+
"""
102+
web.ctx.home = f"{request.url.scheme}://{request.url.netloc}"
103+
104+
raw_uri = request.url.path
105+
106+
decoded_path = urllib.parse.unquote(raw_uri)
107+
108+
m = MULTIGET_PATH_RE.match(decoded_path)
109+
if not m or len(m.groups()) != 2:
110+
return {}
111+
112+
_brief_or_full, req = m.groups()
113+
114+
result = readlinks.readlinks(req, {})
115+
return wrap_jsonp(request, result)
116+
117+
118+
# Left without a strict response model since it can return either a VolumeResponse dict or an empty list [] for non-existent identifiers. Pydantic
119+
# can't represent this union cleanly without losing validation benefits.
120+
@router.get("/api/volumes/{brief_or_full}/{idtype}/{idval}", include_in_schema=False)
121+
@router.get("/api/volumes/{brief_or_full}/{idtype}/{idval}.json")
122+
async def get_volume(
123+
request: Request,
124+
brief_or_full: Literal["brief", "full"],
125+
idtype: Literal["oclc", "lccn", "issn", "isbn", "htid", "olid", "recordnumber"],
126+
idval: str,
127+
show_all_items: Annotated[bool, Query(description="Show all items including restricted ones")] = False,
128+
) -> Any:
129+
"""
130+
Get volume information by identifier type and value.
131+
132+
This endpoint provides detailed information about a specific volume,
133+
modeled after HathiTrust Bibliographic API. Includes information
134+
about loans and other editions of same work.
135+
136+
Example:
137+
GET /api/volumes/brief/isbn/059035342X.json
138+
"""
139+
# Set up web context for readlinks compatibility
140+
# This ensures web.ctx.get("home") returns correct base URL
141+
web.ctx.home = f"{request.url.scheme}://{request.url.netloc}"
142+
143+
# Build request identifier
144+
req = f"{idtype}:{idval}"
145+
146+
# Build options dict from query parameters
147+
options: dict[str, str | bool] = {}
148+
if show_all_items:
149+
options["show_all_items"] = show_all_items
150+
151+
# Call existing business logic
152+
result = readlinks.readlinks(req, options)
153+
154+
# Return the result for this specific request key
155+
result = result.get(req, [])
156+
return wrap_jsonp(request, result)

openlibrary/fastapi/models.py

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,41 @@
11
from __future__ import annotations
22

3+
import json
34
import re
45
from typing import Self
56

6-
from fastapi import HTTPException, Request
7+
from fastapi import HTTPException, Request, Response
78
from pydantic import BaseModel, Field, model_validator
89

910
from openlibrary.core.env import get_ol_env
1011

12+
JS_CALLBACK_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$.]*$")
13+
14+
15+
def parse_comma_separated_list(v: str | list[str]) -> list[str]:
16+
"""
17+
Parse comma-separated string values into a list of strings.
18+
19+
This validator handles both string and list inputs, converting:
20+
- "a,b,c" → ["a", "b", "c"]
21+
- ["a", "b,c"] → ["a", "b", "c"]
22+
23+
Used for query parameters that accept comma-separated values like:
24+
- Search fields: "key,name,author_key"
25+
- Bibliography keys: "ISBN1,ISBN2,ISBN3"
26+
27+
Args:
28+
v: Input value (string or list of strings)
29+
30+
Returns:
31+
List of trimmed strings with empty items filtered out
32+
"""
33+
if not v:
34+
return []
35+
if isinstance(v, str):
36+
v = [v]
37+
return [f.strip() for item in v for f in str(item).split(",") if f.strip()]
38+
1139

1240
class Pagination(BaseModel):
1341
"""Reusable pagination parameters for API endpoints."""
@@ -30,12 +58,18 @@ class PaginationLimit20(Pagination):
3058
limit: int = Field(20, ge=0, description="Maximum number of results to return.")
3159

3260

33-
def parse_comma_separated_list(v: str | list[str] | None) -> list[str]:
34-
if not v:
35-
return []
36-
if isinstance(v, str):
37-
v = [v]
38-
return [f.strip() for item in v for f in str(item).split(",") if f.strip()]
61+
def wrap_jsonp(request: Request, data: dict | str) -> Response:
62+
"""Wrap data in JSONP callback if callback param is present.
63+
64+
Always returns a Response object.
65+
Accepts either a dict (which will be JSON-serialized) or a pre-serialized JSON string.
66+
"""
67+
json_string = json.dumps(data) if isinstance(data, dict) else data
68+
if callback := request.query_params.get("callback"):
69+
if not JS_CALLBACK_RE.match(callback):
70+
raise ValueError("Invalid callback parameter: must be a valid JavaScript identifier (only letters, numbers, underscore, $, and . allowed)")
71+
return Response(content=f"{callback}({json_string});", media_type="application/javascript")
72+
return Response(content=json_string, media_type="application/json")
3973

4074

4175
class SolrInternalsParams(BaseModel):

openlibrary/plugins/books/code.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
import urllib
66

77
import web
8+
from typing_extensions import deprecated
89

910
from infogami.plugins.api.code import jsonapi
1011
from infogami.utils import delegate
1112
from openlibrary.plugins.books import dynlinks, readlinks
1213

1314

15+
@deprecated("migrated to fastapi")
1416
class books_json(delegate.page):
1517
"""
1618
Endpoint for mapping bib keys (e.g. ISBN, LCCN) to certain links associated
@@ -48,6 +50,7 @@ def GET(self):
4850
return dynlinks.dynlinks(bib_keys=i.bibkeys.split(","), options=i)
4951

5052

53+
@deprecated("migrated to fastapi")
5154
class read_singleget(delegate.page):
5255
"""Handle the single-lookup form of the Hathi-style API"""
5356

@@ -67,6 +70,7 @@ def GET(self, brief_or_full, idtype, idval):
6770
return json.dumps(result)
6871

6972

73+
@deprecated("migrated to fastapi")
7074
class read_multiget(delegate.page):
7175
"""Handle the multi-lookup form of the Hathi-style API"""
7276

0 commit comments

Comments
 (0)