Skip to content

Commit a26d64b

Browse files
feat(event_handler): add File parameter support for multipart/form-data uploads (#8093)
* feat(event_handler): add support for File field in OpenAPI utility * feat(event_handler): add support for File field in OpenAPI utility * feat(event_handler): add support for File field in OpenAPI utility
1 parent 7c9589c commit a26d64b

8 files changed

Lines changed: 944 additions & 9 deletions

File tree

aws_lambda_powertools/event_handler/middlewares/openapi_validation.py

Lines changed: 166 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from __future__ import annotations
22

3+
import base64
34
import dataclasses
45
import json
56
import logging
7+
import warnings
68
from typing import TYPE_CHECKING, Any, Callable, Mapping, MutableMapping, Sequence, Union, cast
79
from urllib.parse import parse_qs
810

@@ -25,7 +27,7 @@
2527
RequestValidationError,
2628
ResponseValidationError,
2729
)
28-
from aws_lambda_powertools.event_handler.openapi.params import Param
30+
from aws_lambda_powertools.event_handler.openapi.params import Param, UploadFile
2931
from aws_lambda_powertools.event_handler.openapi.types import UnionType
3032

3133
if TYPE_CHECKING:
@@ -44,6 +46,7 @@
4446
CONTENT_DISPOSITION_NAME_PARAM = "name="
4547
APPLICATION_JSON_CONTENT_TYPE = "application/json"
4648
APPLICATION_FORM_CONTENT_TYPE = "application/x-www-form-urlencoded"
49+
MULTIPART_FORM_DATA_CONTENT_TYPE = "multipart/form-data"
4750

4851

4952
class OpenAPIRequestValidationMiddleware(BaseMiddlewareHandler):
@@ -141,14 +144,18 @@ def _get_body(self, app: EventHandlerInstance) -> dict[str, Any]:
141144
elif content_type.startswith(APPLICATION_FORM_CONTENT_TYPE):
142145
return self._parse_form_data(app)
143146

147+
# Handle multipart/form-data (file uploads)
148+
elif content_type.startswith(MULTIPART_FORM_DATA_CONTENT_TYPE):
149+
return self._parse_multipart_data(app, content_type)
150+
144151
else:
145152
raise RequestUnsupportedContentType(
146-
"Only JSON body or Form() are supported",
153+
"Unsupported content type",
147154
errors=[
148155
{
149156
"type": "unsupported_content_type",
150157
"loc": ("body",),
151-
"msg": "Only JSON body or Form() are supported",
158+
"msg": f"Unsupported content type: {content_type}",
152159
"input": {},
153160
"ctx": {},
154161
},
@@ -195,6 +202,49 @@ def _parse_form_data(self, app: EventHandlerInstance) -> dict[str, Any]:
195202
],
196203
) from e
197204

205+
def _parse_multipart_data(self, app: EventHandlerInstance, content_type: str) -> dict[str, Any]:
206+
"""Parse multipart/form-data from the request body (file uploads)."""
207+
try:
208+
# Extract the boundary from the content-type header
209+
boundary = _extract_multipart_boundary(content_type)
210+
if not boundary:
211+
raise ValueError("Missing boundary in multipart/form-data content-type header")
212+
213+
# Get raw body bytes
214+
raw_body = app.current_event.body or ""
215+
if app.current_event.is_base64_encoded:
216+
body_bytes = base64.b64decode(raw_body)
217+
else:
218+
warnings.warn(
219+
"Received multipart/form-data without base64 encoding. "
220+
"Binary file uploads may be corrupted. "
221+
"If using API Gateway REST API (v1), configure Binary Media Types "
222+
"to include 'multipart/form-data'. "
223+
"See: https://docs.aws.amazon.com/apigateway/latest/developerguide/"
224+
"api-gateway-payload-encodings.html",
225+
stacklevel=2,
226+
)
227+
# Use latin-1 to preserve all byte values (0-255) since the body
228+
# may contain raw binary data that isn't valid UTF-8
229+
body_bytes = raw_body.encode("latin-1")
230+
231+
return _parse_multipart_body(body_bytes, boundary)
232+
233+
except ValueError:
234+
raise
235+
except Exception as e:
236+
raise RequestValidationError(
237+
[
238+
{
239+
"type": "multipart_invalid",
240+
"loc": ("body",),
241+
"msg": "Multipart form data parsing error",
242+
"input": {},
243+
"ctx": {"error": str(e)},
244+
},
245+
],
246+
) from e
247+
198248

199249
class OpenAPIResponseValidationMiddleware(BaseMiddlewareHandler):
200250
"""
@@ -398,7 +448,12 @@ def _request_body_to_args(
398448
continue
399449

400450
value = _normalize_field_value(value=value, field_info=field.field_info)
401-
values[field.name] = _validate_field(field=field, value=value, loc=loc, existing_errors=errors)
451+
452+
# UploadFile objects bypass Pydantic validation — they're already constructed
453+
if isinstance(value, UploadFile):
454+
values[field.name] = value
455+
else:
456+
values[field.name] = _validate_field(field=field, value=value, loc=loc, existing_errors=errors)
402457

403458
return values, errors
404459

@@ -474,6 +529,10 @@ def _is_or_contains_sequence(annotation: Any) -> bool:
474529

475530
def _normalize_field_value(value: Any, field_info: FieldInfo) -> Any:
476531
"""Normalize field value, converting lists to single values for non-sequence fields."""
532+
# When annotation is bytes but value is UploadFile, extract raw content
533+
if isinstance(value, UploadFile) and field_info.annotation is bytes:
534+
return value.content
535+
477536
if _is_or_contains_sequence(field_info.annotation):
478537
return value
479538
elif isinstance(value, list) and value:
@@ -587,3 +646,106 @@ def _get_param_value(
587646
value = input_dict.get(field_name)
588647

589648
return value
649+
650+
651+
def _extract_multipart_boundary(content_type: str) -> str | None:
652+
"""Extract the boundary string from a multipart/form-data content-type header."""
653+
for segment in content_type.split(";"):
654+
stripped = segment.strip()
655+
if stripped.startswith("boundary="):
656+
boundary = stripped[len("boundary=") :]
657+
# Remove optional quotes around boundary
658+
if boundary.startswith('"') and boundary.endswith('"'):
659+
boundary = boundary[1:-1]
660+
return boundary
661+
return None
662+
663+
664+
def _parse_multipart_body(body: bytes, boundary: str) -> dict[str, Any]:
665+
"""
666+
Parse a multipart/form-data body into a dict of field names to values.
667+
668+
File fields get bytes values; regular form fields get string values.
669+
Multiple values for the same field name are collected into lists.
670+
"""
671+
delimiter = f"--{boundary}".encode()
672+
end_delimiter = f"--{boundary}--".encode()
673+
674+
result: dict[str, Any] = {}
675+
676+
# Split body by the boundary delimiter
677+
raw_parts = body.split(delimiter)
678+
679+
for raw_part in raw_parts:
680+
# Skip the preamble (before first boundary) and epilogue (after closing boundary)
681+
if not raw_part or raw_part.strip() == b"" or raw_part.strip() == b"--":
682+
continue
683+
684+
# Remove the end delimiter marker if present
685+
chunk = raw_part
686+
if chunk.endswith(end_delimiter):
687+
chunk = chunk[: -len(end_delimiter)]
688+
689+
# Strip leading \r\n
690+
if chunk.startswith(b"\r\n"):
691+
chunk = chunk[2:]
692+
693+
# Strip trailing \r\n
694+
if chunk.endswith(b"\r\n"):
695+
chunk = chunk[:-2]
696+
697+
# Split headers from body at the double CRLF
698+
header_end = chunk.find(b"\r\n\r\n")
699+
if header_end == -1:
700+
continue
701+
702+
header_section = chunk[:header_end].decode("utf-8")
703+
body_section = chunk[header_end + 4 :]
704+
705+
# Parse Content-Disposition to get the field name and optional filename
706+
field_name = None
707+
filename = None
708+
content_type_header = None
709+
710+
for header_line in header_section.split("\r\n"):
711+
header_lower = header_line.lower()
712+
if header_lower.startswith("content-disposition:"):
713+
field_name = _extract_header_param(header_line, "name")
714+
filename = _extract_header_param(header_line, "filename")
715+
elif header_lower.startswith("content-type:"):
716+
content_type_header = header_line.split(":", 1)[1].strip()
717+
718+
if field_name is None:
719+
continue
720+
721+
# If it has a filename, it's a file upload — wrap as UploadFile
722+
# Otherwise it's a regular form field — decode to string
723+
if filename is not None:
724+
value: Any = UploadFile(content=body_section, filename=filename, content_type=content_type_header)
725+
else:
726+
value = body_section.decode("utf-8")
727+
728+
# Collect multiple values for same field name into a list
729+
if field_name in result:
730+
existing = result[field_name]
731+
if isinstance(existing, list):
732+
existing.append(value)
733+
else:
734+
result[field_name] = [existing, value]
735+
else:
736+
result[field_name] = value
737+
738+
return result
739+
740+
741+
def _extract_header_param(header_line: str, param_name: str) -> str | None:
742+
"""Extract a parameter value from a header line (e.g., name="file" from Content-Disposition)."""
743+
search = f'{param_name}="'
744+
idx = header_line.find(search)
745+
if idx == -1:
746+
return None
747+
start = idx + len(search)
748+
end = header_line.find('"', start)
749+
if end == -1:
750+
return None
751+
return header_line[start:end]

aws_lambda_powertools/event_handler/openapi/dependant.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313
from aws_lambda_powertools.event_handler.openapi.params import (
1414
Body,
1515
Dependant,
16+
File,
1617
Form,
1718
Param,
1819
ParamTypes,
19-
_File,
2020
analyze_param,
2121
create_response_field,
2222
get_flat_dependant,
@@ -370,9 +370,9 @@ def get_body_field_info(
370370
if not required:
371371
body_field_info_kwargs["default"] = None
372372

373-
if any(isinstance(f.field_info, _File) for f in flat_dependant.body_params):
374-
# MAINTENANCE: body_field_info: type[Body] = _File
375-
raise NotImplementedError("_File fields are not supported in request bodies")
373+
if any(isinstance(f.field_info, File) for f in flat_dependant.body_params):
374+
body_field_info = Body
375+
body_field_info_kwargs["media_type"] = "multipart/form-data"
376376
elif any(isinstance(f.field_info, Form) for f in flat_dependant.body_params):
377377
body_field_info = Body
378378
body_field_info_kwargs["media_type"] = "application/x-www-form-urlencoded"

aws_lambda_powertools/event_handler/openapi/params.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -902,7 +902,57 @@ def __init__(
902902
)
903903

904904

905-
class _File(Form): # type: ignore[misc]
905+
class UploadFile:
906+
"""
907+
Represents an uploaded file with its metadata.
908+
909+
Use with ``Annotated[UploadFile, File()]`` to receive file content along with
910+
filename and content type. For raw bytes only, use ``Annotated[bytes, File()]``.
911+
912+
Attributes
913+
----------
914+
filename : str | None
915+
The original filename from the upload.
916+
content_type : str | None
917+
The MIME type declared by the client (e.g. ``image/jpeg``).
918+
content : bytes
919+
The raw file content.
920+
"""
921+
922+
__slots__ = ("content", "content_type", "filename")
923+
924+
def __init__(self, *, content: bytes, filename: str | None = None, content_type: str | None = None):
925+
self.content = content
926+
self.filename = filename
927+
self.content_type = content_type
928+
929+
def __len__(self) -> int:
930+
return len(self.content)
931+
932+
def __repr__(self) -> str:
933+
return f"UploadFile(filename={self.filename!r}, content_type={self.content_type!r}, size={len(self.content)})"
934+
935+
@classmethod
936+
def __get_pydantic_core_schema__(cls, _source_type: Any, _handler: Any) -> Any:
937+
from pydantic_core import core_schema
938+
939+
return core_schema.no_info_plain_validator_function(
940+
cls._validate,
941+
serialization=core_schema.plain_serializer_function_ser_schema(lambda v: v, info_arg=False),
942+
)
943+
944+
@classmethod
945+
def _validate(cls, v: Any) -> UploadFile:
946+
if isinstance(v, cls):
947+
return v
948+
raise ValueError(f"Expected UploadFile, got {type(v).__name__}")
949+
950+
@classmethod
951+
def __get_pydantic_json_schema__(cls, _schema: Any, handler: Any) -> dict[str, Any]:
952+
return {"type": "string", "format": "binary"}
953+
954+
955+
class File(Form): # type: ignore[misc]
906956
"""
907957
A class used to represent a file parameter in a path operation.
908958
"""

docs/core/event_handler/api_gateway.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,57 @@ You can use the `Form` type to tell the Event Handler that a parameter expects f
605605
--8<-- "examples/event_handler_rest/src/working_with_form_data.py"
606606
```
607607

608+
#### Handling file uploads
609+
610+
!!! info "You must set `enable_validation=True` to handle file uploads via type annotation."
611+
612+
You can use the `File` type to accept `multipart/form-data` file uploads. This automatically sets the correct OpenAPI schema, and Swagger UI will render a file picker for each `File()` parameter.
613+
614+
There are two ways to receive uploaded files:
615+
616+
* **`bytes`** — receive raw file content only
617+
* **`UploadFile`** — receive file content along with metadata (filename, content type)
618+
619+
=== "working_with_file_uploads.py"
620+
621+
```python hl_lines="4 12"
622+
--8<-- "examples/event_handler_rest/src/working_with_file_uploads.py"
623+
```
624+
625+
1. `File` is a special OpenAPI type for `multipart/form-data` file uploads. When annotated as `bytes`, you receive the raw file content.
626+
627+
=== "working_with_file_uploads_metadata.py"
628+
629+
```python hl_lines="4 11 15-16"
630+
--8<-- "examples/event_handler_rest/src/working_with_file_uploads_metadata.py"
631+
```
632+
633+
1. Using `UploadFile` instead of `bytes` gives you access to file metadata.
634+
2. `filename` and `content_type` come from the multipart headers sent by the client.
635+
636+
=== "working_with_file_uploads_mixed.py"
637+
638+
You can combine `File()` and `Form()` parameters in the same route to accept file uploads with additional form fields.
639+
640+
```python hl_lines="6 14-15"
641+
--8<-- "examples/event_handler_rest/src/working_with_file_uploads_mixed.py"
642+
```
643+
644+
1. File upload parameter — receives the uploaded file with metadata.
645+
2. Regular form field — receives a string value from the same multipart request.
646+
647+
!!! warning "API Gateway REST API (v1) requires Binary Media Types configuration"
648+
When using API Gateway REST API (v1), you must configure Binary Media Types to include `multipart/form-data`, otherwise binary file content will be corrupted.
649+
650+
```yaml title="SAM template.yaml"
651+
Globals:
652+
Api:
653+
BinaryMediaTypes:
654+
- "multipart~1form-data"
655+
```
656+
657+
API Gateway HTTP API (v2), Lambda Function URL, and ALB handle binary encoding automatically — no extra configuration needed.
658+
608659
#### Supported types for response serialization
609660

610661
With data validation enabled, we natively support serializing the following data types to JSON:
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from typing import Annotated
2+
3+
from aws_lambda_powertools.event_handler import APIGatewayRestResolver
4+
from aws_lambda_powertools.event_handler.openapi.params import File
5+
6+
app = APIGatewayRestResolver(enable_validation=True)
7+
8+
9+
@app.post("/upload")
10+
def upload_file(
11+
file_data: Annotated[bytes, File(description="File to upload")], # (1)!
12+
):
13+
return {"file_size": len(file_data)}
14+
15+
16+
def lambda_handler(event, context):
17+
return app.resolve(event, context)

0 commit comments

Comments
 (0)