|
| 1 | +"""BMA client library.""" |
| 2 | + |
| 3 | +import json |
| 4 | +import logging |
| 5 | +import math |
| 6 | +import time |
| 7 | +import uuid |
| 8 | +from fractions import Fraction |
| 9 | +from http import HTTPStatus |
| 10 | +from pathlib import Path |
| 11 | +from typing import TYPE_CHECKING |
| 12 | + |
| 13 | +import exifread |
| 14 | +import httpx |
| 15 | +from PIL import Image, ImageOps |
| 16 | + |
| 17 | +logger = logging.getLogger("bma_client") |
| 18 | + |
| 19 | +if TYPE_CHECKING: |
| 20 | + from io import BytesIO |
| 21 | + |
| 22 | + from django.http import HttpRequest |
| 23 | + |
| 24 | +# maybe these should come from server settings |
| 25 | +SKIP_EXIF_TAGS = ["JPEGThumbnail", "TIFFThumbnail", "Filename"] |
| 26 | + |
| 27 | + |
| 28 | +class BmaBearerAuth(httpx.Auth): |
| 29 | + """An httpx.Auth subclass to add Bearer token to requests.""" |
| 30 | + |
| 31 | + def __init__(self, token: str) -> None: |
| 32 | + """Just set the token.""" |
| 33 | + self.token = token |
| 34 | + |
| 35 | + def auth_flow(self, request: "HttpRequest") -> "HttpRequest": |
| 36 | + """Add Bearer token to request headers.""" |
| 37 | + request.headers["Authorization"] = f"Bearer {self.token}" |
| 38 | + yield request |
| 39 | + |
| 40 | + |
| 41 | +class BmaClient: |
| 42 | + """The main BMA Client class.""" |
| 43 | + |
| 44 | + def __init__( |
| 45 | + self, |
| 46 | + oauth_client_id: str, |
| 47 | + refresh_token: str, |
| 48 | + path: Path, |
| 49 | + base_url: str = "https://media.bornhack.dk", |
| 50 | + client_uuid: uuid.UUID | None = None, |
| 51 | + ) -> None: |
| 52 | + """Save refresh token, get access token, get or set client uuid.""" |
| 53 | + self.oauth_client_id = oauth_client_id |
| 54 | + self.refresh_token = refresh_token |
| 55 | + self.base_url = base_url |
| 56 | + logger.debug("Updating oauth token...") |
| 57 | + self.update_access_token() |
| 58 | + self.uuid = client_uuid if client_uuid else uuid.uuid4() |
| 59 | + self.path = path |
| 60 | + self.skip_exif_tags = SKIP_EXIF_TAGS |
| 61 | + self.get_server_settings() |
| 62 | + |
| 63 | + def update_access_token(self) -> None: |
| 64 | + """Set or update self.access_token using self.refresh_token.""" |
| 65 | + r = httpx.post( |
| 66 | + self.base_url + "/o/token/", |
| 67 | + data={ |
| 68 | + "client_id": self.oauth_client_id, |
| 69 | + "refresh_token": self.refresh_token, |
| 70 | + "grant_type": "refresh_token", |
| 71 | + }, |
| 72 | + ).raise_for_status() |
| 73 | + data = r.json() |
| 74 | + self.refresh_token = data["refresh_token"] |
| 75 | + logger.warning(f"got new refresh_token: {self.refresh_token}") |
| 76 | + self.access_token = data["access_token"] |
| 77 | + logger.warning(f"got new access_token: {self.access_token}") |
| 78 | + self.auth = BmaBearerAuth(token=self.access_token) |
| 79 | + self.client = httpx.Client(auth=self.auth) |
| 80 | + |
| 81 | + def get_server_settings(self) -> dict[str, dict[str, dict[str, list[str]]]]: |
| 82 | + """Get BMA settings from server, return as dict.""" |
| 83 | + r = self.client.get( |
| 84 | + self.base_url + "/api/v1/json/jobs/settings/", |
| 85 | + ).raise_for_status() |
| 86 | + self.settings = r.json()["bma_response"]["settings"] |
| 87 | + return r.json() |
| 88 | + |
| 89 | + def get_jobs(self, job_filter: str = "?limit=0") -> list[dict[str, str]]: |
| 90 | + """Get a filtered list of the jobs this user has access to.""" |
| 91 | + r = self.client.get(self.base_url + f"/api/v1/json/jobs/{job_filter}").raise_for_status() |
| 92 | + response = r.json()["bma_response"] |
| 93 | + logger.debug(f"Returning {len(response)} jobs") |
| 94 | + return response |
| 95 | + |
| 96 | + def get_file_info(self, file_uuid: uuid.UUID) -> dict[str, str]: |
| 97 | + """Get metadata for a file.""" |
| 98 | + r = self.client.get(self.base_url + f"/api/v1/json/files/{file_uuid}/").raise_for_status() |
| 99 | + return r.json()["bma_response"] |
| 100 | + |
| 101 | + def download(self, file_uuid: uuid.UUID) -> bytes: |
| 102 | + """Download a file from BMA.""" |
| 103 | + info = self.get_file_info(file_uuid=file_uuid) |
| 104 | + path = self.path / info["filename"] |
| 105 | + if not path.exists(): |
| 106 | + url = self.base_url + info["links"]["downloads"]["original"] |
| 107 | + logger.debug(f"Downloading file {url} ...") |
| 108 | + r = self.client.get(url).raise_for_status() |
| 109 | + logger.debug(f"Done downloading {len(r.content)} bytes, saving to {path}") |
| 110 | + with path.open("wb") as f: |
| 111 | + f.write(r.content) |
| 112 | + return info |
| 113 | + |
| 114 | + def get_job_assignment(self, file_uuid: uuid.UUID | None = None) -> list[dict[str, dict[str, str]]]: |
| 115 | + """Ask for new job(s) from the API.""" |
| 116 | + url = self.base_url + "/api/v1/json/jobs/assign/" |
| 117 | + if file_uuid: |
| 118 | + url += f"?file_uuid={file_uuid}" |
| 119 | + data = {"client_uuid": self.uuid} |
| 120 | + try: |
| 121 | + r = self.client.post(url, data=json.dumps(data)).raise_for_status() |
| 122 | + response = r.json()["bma_response"] |
| 123 | + except httpx.HTTPStatusError as e: |
| 124 | + if e.response.status_code == HTTPStatus.NotFound: |
| 125 | + response = [] |
| 126 | + else: |
| 127 | + raise |
| 128 | + logger.debug(f"Returning {len(response)} jobs") |
| 129 | + return response |
| 130 | + |
| 131 | + def upload_file(self, path: Path, attribution: str, file_license: str) -> dict[str, dict[str, str]]: |
| 132 | + """Upload a file.""" |
| 133 | + # is this an image? |
| 134 | + for _mimetype, extensions in self.settings["filetypes"]["images"].values(): |
| 135 | + extension = path.suffix[1:] |
| 136 | + if extension.lower() in extensions: |
| 137 | + # this file has the extension of a supported image |
| 138 | + logger.debug(f"Extension {extension} is supported...") |
| 139 | + break |
| 140 | + else: |
| 141 | + # file type not supported |
| 142 | + raise ValueError(f"{path.suffix}") |
| 143 | + |
| 144 | + # get image dimensions |
| 145 | + with Image.open(path) as image: |
| 146 | + rotated = ImageOps.exif_transpose(image) # creates a copy with rotation normalised |
| 147 | + logger.debug( |
| 148 | + f"Image has exif rotation info, using post-rotate size {rotated.size} instead of raw size {image.size}" |
| 149 | + ) |
| 150 | + width, height = rotated.size |
| 151 | + |
| 152 | + # open file |
| 153 | + with path.open("rb") as fh: |
| 154 | + files = {"f": (path.name, fh)} |
| 155 | + # build metadata |
| 156 | + data = { |
| 157 | + "attribution": attribution, |
| 158 | + "license": file_license, |
| 159 | + "width": width, |
| 160 | + "height": height, |
| 161 | + } |
| 162 | + # doit |
| 163 | + r = self.client.post( |
| 164 | + self.base_url + "/api/v1/json/files/upload/", |
| 165 | + data={"metadata": json.dumps(data)}, |
| 166 | + files=files, |
| 167 | + ) |
| 168 | + return r.json() |
| 169 | + |
| 170 | + def handle_job(self, job: dict[str, str], orig: Path) -> tuple[Image.Image, Image.Exif]: |
| 171 | + """Do the thing and return the result.""" |
| 172 | + if job["job_type"] == "ImageConversionJob": |
| 173 | + return self.handle_image_conversion_job(job=job, orig=orig) |
| 174 | + if job["job_type"] == "ImageExifExtractionJob": |
| 175 | + return self.get_exif(orig) |
| 176 | + logger.error(f"Unsupported job type {job['job_type']}") |
| 177 | + return None |
| 178 | + |
| 179 | + def handle_image_conversion_job(self, job: dict[str, str], orig: Path) -> tuple[Image.Image, Image.Exif]: |
| 180 | + """Handle image conversion job.""" |
| 181 | + # load original image |
| 182 | + start = time.time() |
| 183 | + logger.debug(f"Opening original image {orig}...") |
| 184 | + image = Image.open(orig) |
| 185 | + logger.debug( |
| 186 | + f"Opening {orig.stat().st_size} bytes {image.size} source image took {time.time() - start} seconds" |
| 187 | + ) |
| 188 | + |
| 189 | + logger.debug("Rotating image (if needed)...") |
| 190 | + start = time.time() |
| 191 | + image = ImageOps.exif_transpose(image) # creates a copy with rotation normalised |
| 192 | + logger.debug(f"Rotating image took {time.time() - start} seconds, image is now {image.size}") |
| 193 | + |
| 194 | + logger.debug("Getting exif metadata from image...") |
| 195 | + start = time.time() |
| 196 | + exif = image.getexif() |
| 197 | + logger.debug(f"Getting exif data took {time.time() - start} seconds") |
| 198 | + |
| 199 | + logger.debug("Calculating size and ratio...") |
| 200 | + start = time.time() |
| 201 | + if job["aspect_ratio_numerator"] and job["aspect_ratio_denominator"]: |
| 202 | + # height is calculated based on requested width and AR |
| 203 | + ratio = Fraction(job["aspect_ratio_numerator"], job["aspect_ratio_denominator"]) |
| 204 | + height = math.floor(job["width"] / ratio) |
| 205 | + else: |
| 206 | + # height is a fraction of width, keeping AR the same |
| 207 | + ratio = None |
| 208 | + height = math.floor(job["width"] / Fraction(*image.size)) |
| 209 | + size = math.floor(job["width"]), math.floor(height) |
| 210 | + logger.debug(f"Calculating size and AR took {time.time() - start} seconds") |
| 211 | + |
| 212 | + logger.debug(f"Desired image size is {size}, AR {ratio}, converting image...") |
| 213 | + start = time.time() |
| 214 | + # custom AR or not? |
| 215 | + if ratio: |
| 216 | + image = ImageOps.fit(image, size) |
| 217 | + else: |
| 218 | + image.thumbnail(size) |
| 219 | + logger.debug(f"Converting image size and AR took {time.time() - start} seconds") |
| 220 | + |
| 221 | + logger.debug("Done, returning result...") |
| 222 | + return image, exif |
| 223 | + |
| 224 | + def upload_job_result(self, job_uuid: uuid.UUID, buf: "BytesIO", filename: str) -> dict: |
| 225 | + """Upload the result of a job.""" |
| 226 | + size = buf.getbuffer().nbytes |
| 227 | + logger.debug(f"Uploading {size} bytes result for job {job_uuid} with filename {filename}") |
| 228 | + start = time.time() |
| 229 | + files = {"f": (filename, buf)} |
| 230 | + # build metadata |
| 231 | + data = { |
| 232 | + "client_uuid": self.uuid, |
| 233 | + } |
| 234 | + # doit |
| 235 | + r = self.client.post( |
| 236 | + self.base_url + f"/api/v1/json/jobs/{job_uuid}/result/", |
| 237 | + data={"assign": json.dumps(data)}, |
| 238 | + files=files, |
| 239 | + ).raise_for_status() |
| 240 | + t = time.time() - start |
| 241 | + logger.debug(f"Done, it took {t} seconds to upload {size} bytes, speed {round(size/t)} bytes/sec") |
| 242 | + return r.json() |
| 243 | + |
| 244 | + def get_exif(self, fname: Path) -> dict[str, dict[str, str]]: |
| 245 | + """Return a dict with exif data as read by exifread from the file. |
| 246 | +
|
| 247 | + exifread returns a flat dict of key: value pairs where the key |
| 248 | + is a space seperated "IDF: Key" thing, split and group accordingly |
| 249 | + Key: "Image ExifOffset", len 3, value 266 |
| 250 | + Key: "GPS GPSVersionID", len 12, value [2, 3, 0, 0] |
| 251 | + """ |
| 252 | + with fname.open("rb") as f: |
| 253 | + tags = exifread.process_file(f, details=True) |
| 254 | + grouped = {} |
| 255 | + for tag, value in tags.items(): |
| 256 | + if tag in SKIP_EXIF_TAGS: |
| 257 | + logger.debug(f"Skipping exif tag {tag}") |
| 258 | + continue |
| 259 | + # group by IDF |
| 260 | + group, *key = tag.split(" ") |
| 261 | + key = key[-1] |
| 262 | + logger.debug(f"Group: {group} Key: {key}, type {value.field_type}, len {len(str(value))}, value {value}") |
| 263 | + if group not in grouped: |
| 264 | + grouped[group] = {} |
| 265 | + grouped[group][key] = str(value) |
| 266 | + return grouped |
0 commit comments