Skip to content

Commit 18bc63e

Browse files
committed
first attempt at a client library, very much a work in progress, basic uploading and job handling works
1 parent da1f599 commit 18bc63e

2 files changed

Lines changed: 327 additions & 0 deletions

File tree

pyproject.toml

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
[build-system]
2+
requires = ["setuptools"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[project]
6+
authors = [
7+
{email = "thomas@gibfest.dk"},
8+
{name = "Thomas Steen Rasmussen"}
9+
]
10+
classifiers = [
11+
"Programming Language :: Python :: 3",
12+
"Operating System :: OS Independent",
13+
]
14+
dependencies = [
15+
"exifread==3.0.0",
16+
"httpx==0.27.2",
17+
"pillow==11.0.0",
18+
]
19+
description = "BornHack Media Archive Python Client Library"
20+
name = "bma-client"
21+
version = "0.1"
22+
readme = "README.md"
23+
requires-python = ">=3.10"
24+
25+
[project.optional-dependencies]
26+
dev = [
27+
"pre-commit==4.0.0",
28+
]
29+
30+
[project.urls]
31+
homepage = "https://github.com/bornhack/bma-client-python"
32+
33+
[tool.setuptools]
34+
package-dir = {"" = "src"}
35+
36+
[tool.setuptools.packages.find]
37+
where = ["src"]
38+
39+
[tool.ruff]
40+
target-version = "py310"
41+
extend-exclude = [
42+
".git",
43+
"__pycache__",
44+
]
45+
lint.select = ["ALL"]
46+
lint.ignore = [
47+
"G004", # https://docs.astral.sh/ruff/rules/logging-f-string/
48+
"ANN101", # https://docs.astral.sh/ruff/rules/missing-type-self/
49+
"ANN102", # https://docs.astral.sh/ruff/rules/missing-type-cls/
50+
"EM101", # https://docs.astral.sh/ruff/rules/raw-string-in-exception/
51+
"EM102", # https://docs.astral.sh/ruff/rules/f-string-in-exception/
52+
"COM812", # missing-trailing-comma (https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules)
53+
"ISC001", # single-line-implicit-string-concatenation (https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules)
54+
"ARG001", # https://docs.astral.sh/ruff/rules/unused-function-argument/
55+
"ARG002", # https://docs.astral.sh/ruff/rules/unused-method-argument/
56+
"ARG004", # https://docs.astral.sh/ruff/rules/unused-static-method-argument/
57+
]
58+
line-length = 120
59+
60+
[tool.ruff.lint.pydocstyle]
61+
convention = "google"

src/bma_client.py

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
"""BMA client library."""
2+
3+
import json
4+
import logging
5+
import math
6+
import time
7+
import uuid
8+
from fractions import Fraction
9+
from http import HTTPStatus
10+
from pathlib import Path
11+
from typing import TYPE_CHECKING
12+
13+
import exifread
14+
import httpx
15+
from PIL import Image, ImageOps
16+
17+
logger = logging.getLogger("bma_client")
18+
19+
if TYPE_CHECKING:
20+
from io import BytesIO
21+
22+
from django.http import HttpRequest
23+
24+
# maybe these should come from server settings
25+
SKIP_EXIF_TAGS = ["JPEGThumbnail", "TIFFThumbnail", "Filename"]
26+
27+
28+
class BmaBearerAuth(httpx.Auth):
29+
"""An httpx.Auth subclass to add Bearer token to requests."""
30+
31+
def __init__(self, token: str) -> None:
32+
"""Just set the token."""
33+
self.token = token
34+
35+
def auth_flow(self, request: "HttpRequest") -> "HttpRequest":
36+
"""Add Bearer token to request headers."""
37+
request.headers["Authorization"] = f"Bearer {self.token}"
38+
yield request
39+
40+
41+
class BmaClient:
42+
"""The main BMA Client class."""
43+
44+
def __init__(
45+
self,
46+
oauth_client_id: str,
47+
refresh_token: str,
48+
path: Path,
49+
base_url: str = "https://media.bornhack.dk",
50+
client_uuid: uuid.UUID | None = None,
51+
) -> None:
52+
"""Save refresh token, get access token, get or set client uuid."""
53+
self.oauth_client_id = oauth_client_id
54+
self.refresh_token = refresh_token
55+
self.base_url = base_url
56+
logger.debug("Updating oauth token...")
57+
self.update_access_token()
58+
self.uuid = client_uuid if client_uuid else uuid.uuid4()
59+
self.path = path
60+
self.skip_exif_tags = SKIP_EXIF_TAGS
61+
self.get_server_settings()
62+
63+
def update_access_token(self) -> None:
64+
"""Set or update self.access_token using self.refresh_token."""
65+
r = httpx.post(
66+
self.base_url + "/o/token/",
67+
data={
68+
"client_id": self.oauth_client_id,
69+
"refresh_token": self.refresh_token,
70+
"grant_type": "refresh_token",
71+
},
72+
).raise_for_status()
73+
data = r.json()
74+
self.refresh_token = data["refresh_token"]
75+
logger.warning(f"got new refresh_token: {self.refresh_token}")
76+
self.access_token = data["access_token"]
77+
logger.warning(f"got new access_token: {self.access_token}")
78+
self.auth = BmaBearerAuth(token=self.access_token)
79+
self.client = httpx.Client(auth=self.auth)
80+
81+
def get_server_settings(self) -> dict[str, dict[str, dict[str, list[str]]]]:
82+
"""Get BMA settings from server, return as dict."""
83+
r = self.client.get(
84+
self.base_url + "/api/v1/json/jobs/settings/",
85+
).raise_for_status()
86+
self.settings = r.json()["bma_response"]["settings"]
87+
return r.json()
88+
89+
def get_jobs(self, job_filter: str = "?limit=0") -> list[dict[str, str]]:
90+
"""Get a filtered list of the jobs this user has access to."""
91+
r = self.client.get(self.base_url + f"/api/v1/json/jobs/{job_filter}").raise_for_status()
92+
response = r.json()["bma_response"]
93+
logger.debug(f"Returning {len(response)} jobs")
94+
return response
95+
96+
def get_file_info(self, file_uuid: uuid.UUID) -> dict[str, str]:
97+
"""Get metadata for a file."""
98+
r = self.client.get(self.base_url + f"/api/v1/json/files/{file_uuid}/").raise_for_status()
99+
return r.json()["bma_response"]
100+
101+
def download(self, file_uuid: uuid.UUID) -> bytes:
102+
"""Download a file from BMA."""
103+
info = self.get_file_info(file_uuid=file_uuid)
104+
path = self.path / info["filename"]
105+
if not path.exists():
106+
url = self.base_url + info["links"]["downloads"]["original"]
107+
logger.debug(f"Downloading file {url} ...")
108+
r = self.client.get(url).raise_for_status()
109+
logger.debug(f"Done downloading {len(r.content)} bytes, saving to {path}")
110+
with path.open("wb") as f:
111+
f.write(r.content)
112+
return info
113+
114+
def get_job_assignment(self, file_uuid: uuid.UUID | None = None) -> list[dict[str, dict[str, str]]]:
115+
"""Ask for new job(s) from the API."""
116+
url = self.base_url + "/api/v1/json/jobs/assign/"
117+
if file_uuid:
118+
url += f"?file_uuid={file_uuid}"
119+
data = {"client_uuid": self.uuid}
120+
try:
121+
r = self.client.post(url, data=json.dumps(data)).raise_for_status()
122+
response = r.json()["bma_response"]
123+
except httpx.HTTPStatusError as e:
124+
if e.response.status_code == HTTPStatus.NotFound:
125+
response = []
126+
else:
127+
raise
128+
logger.debug(f"Returning {len(response)} jobs")
129+
return response
130+
131+
def upload_file(self, path: Path, attribution: str, file_license: str) -> dict[str, dict[str, str]]:
132+
"""Upload a file."""
133+
# is this an image?
134+
for _mimetype, extensions in self.settings["filetypes"]["images"].values():
135+
extension = path.suffix[1:]
136+
if extension.lower() in extensions:
137+
# this file has the extension of a supported image
138+
logger.debug(f"Extension {extension} is supported...")
139+
break
140+
else:
141+
# file type not supported
142+
raise ValueError(f"{path.suffix}")
143+
144+
# get image dimensions
145+
with Image.open(path) as image:
146+
rotated = ImageOps.exif_transpose(image) # creates a copy with rotation normalised
147+
logger.debug(
148+
f"Image has exif rotation info, using post-rotate size {rotated.size} instead of raw size {image.size}"
149+
)
150+
width, height = rotated.size
151+
152+
# open file
153+
with path.open("rb") as fh:
154+
files = {"f": (path.name, fh)}
155+
# build metadata
156+
data = {
157+
"attribution": attribution,
158+
"license": file_license,
159+
"width": width,
160+
"height": height,
161+
}
162+
# doit
163+
r = self.client.post(
164+
self.base_url + "/api/v1/json/files/upload/",
165+
data={"metadata": json.dumps(data)},
166+
files=files,
167+
)
168+
return r.json()
169+
170+
def handle_job(self, job: dict[str, str], orig: Path) -> tuple[Image.Image, Image.Exif]:
171+
"""Do the thing and return the result."""
172+
if job["job_type"] == "ImageConversionJob":
173+
return self.handle_image_conversion_job(job=job, orig=orig)
174+
if job["job_type"] == "ImageExifExtractionJob":
175+
return self.get_exif(orig)
176+
logger.error(f"Unsupported job type {job['job_type']}")
177+
return None
178+
179+
def handle_image_conversion_job(self, job: dict[str, str], orig: Path) -> tuple[Image.Image, Image.Exif]:
180+
"""Handle image conversion job."""
181+
# load original image
182+
start = time.time()
183+
logger.debug(f"Opening original image {orig}...")
184+
image = Image.open(orig)
185+
logger.debug(
186+
f"Opening {orig.stat().st_size} bytes {image.size} source image took {time.time() - start} seconds"
187+
)
188+
189+
logger.debug("Rotating image (if needed)...")
190+
start = time.time()
191+
image = ImageOps.exif_transpose(image) # creates a copy with rotation normalised
192+
logger.debug(f"Rotating image took {time.time() - start} seconds, image is now {image.size}")
193+
194+
logger.debug("Getting exif metadata from image...")
195+
start = time.time()
196+
exif = image.getexif()
197+
logger.debug(f"Getting exif data took {time.time() - start} seconds")
198+
199+
logger.debug("Calculating size and ratio...")
200+
start = time.time()
201+
if job["aspect_ratio_numerator"] and job["aspect_ratio_denominator"]:
202+
# height is calculated based on requested width and AR
203+
ratio = Fraction(job["aspect_ratio_numerator"], job["aspect_ratio_denominator"])
204+
height = math.floor(job["width"] / ratio)
205+
else:
206+
# height is a fraction of width, keeping AR the same
207+
ratio = None
208+
height = math.floor(job["width"] / Fraction(*image.size))
209+
size = math.floor(job["width"]), math.floor(height)
210+
logger.debug(f"Calculating size and AR took {time.time() - start} seconds")
211+
212+
logger.debug(f"Desired image size is {size}, AR {ratio}, converting image...")
213+
start = time.time()
214+
# custom AR or not?
215+
if ratio:
216+
image = ImageOps.fit(image, size)
217+
else:
218+
image.thumbnail(size)
219+
logger.debug(f"Converting image size and AR took {time.time() - start} seconds")
220+
221+
logger.debug("Done, returning result...")
222+
return image, exif
223+
224+
def upload_job_result(self, job_uuid: uuid.UUID, buf: "BytesIO", filename: str) -> dict:
225+
"""Upload the result of a job."""
226+
size = buf.getbuffer().nbytes
227+
logger.debug(f"Uploading {size} bytes result for job {job_uuid} with filename {filename}")
228+
start = time.time()
229+
files = {"f": (filename, buf)}
230+
# build metadata
231+
data = {
232+
"client_uuid": self.uuid,
233+
}
234+
# doit
235+
r = self.client.post(
236+
self.base_url + f"/api/v1/json/jobs/{job_uuid}/result/",
237+
data={"assign": json.dumps(data)},
238+
files=files,
239+
).raise_for_status()
240+
t = time.time() - start
241+
logger.debug(f"Done, it took {t} seconds to upload {size} bytes, speed {round(size/t)} bytes/sec")
242+
return r.json()
243+
244+
def get_exif(self, fname: Path) -> dict[str, dict[str, str]]:
245+
"""Return a dict with exif data as read by exifread from the file.
246+
247+
exifread returns a flat dict of key: value pairs where the key
248+
is a space seperated "IDF: Key" thing, split and group accordingly
249+
Key: "Image ExifOffset", len 3, value 266
250+
Key: "GPS GPSVersionID", len 12, value [2, 3, 0, 0]
251+
"""
252+
with fname.open("rb") as f:
253+
tags = exifread.process_file(f, details=True)
254+
grouped = {}
255+
for tag, value in tags.items():
256+
if tag in SKIP_EXIF_TAGS:
257+
logger.debug(f"Skipping exif tag {tag}")
258+
continue
259+
# group by IDF
260+
group, *key = tag.split(" ")
261+
key = key[-1]
262+
logger.debug(f"Group: {group} Key: {key}, type {value.field_type}, len {len(str(value))}, value {value}")
263+
if group not in grouped:
264+
grouped[group] = {}
265+
grouped[group][key] = str(value)
266+
return grouped

0 commit comments

Comments
 (0)