diff --git a/sanic/request.py b/sanic/request.py index 8b8d9530..592869e5 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -27,6 +27,7 @@ if TYPE_CHECKING: from sanic.app import Sanic import email.utils +import unicodedata import uuid from collections import defaultdict @@ -1084,6 +1085,16 @@ def parse_multipart_form(body, boundary): form_parameters["filename*"] ) file_name = unquote(value, encoding=encoding) + + # Normalize to NFC (Apple MacOS/iOS send NFD) + # Notes: + # - No effect for Windows, Linux or Android clients which + # already send NFC + # - Python open() is tricky (creates files in NFC no matter + # which form you use) + if file_name is not None: + file_name = unicodedata.normalize("NFC", file_name) + elif form_header_field == "content-type": content_type = form_header_value content_charset = form_parameters.get("charset", "utf-8") diff --git a/tests/test_requests.py b/tests/test_requests.py index 9a984bb4..b8392964 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -1293,6 +1293,24 @@ async def test_request_string_representation_asgi(app): "------sanic--\r\n", "filename_\u00A0_test", ), + # Umlaut using NFC normalization (Windows, Linux, Android) + ( + "------sanic\r\n" + 'content-disposition: form-data; filename*="utf-8\'\'filename_%C3%A4_test"; name="test"\r\n' + "\r\n" + "OK\r\n" + "------sanic--\r\n", + "filename_\u00E4_test", + ), + # Umlaut using NFD normalization (MacOS client) + ( + "------sanic\r\n" + 'content-disposition: form-data; filename*="utf-8\'\'filename_a%CC%88_test"; name="test"\r\n' + "\r\n" + "OK\r\n" + "------sanic--\r\n", + "filename_\u00E4_test", # Sanic should normalize to NFC + ), ], ) def test_request_multipart_files(app, payload, filename):