Filename normalisation of form-data/multipart file uploads (umlauts on Apple clients) (#2625)

Co-authored-by: L. Karkkainen <tronic@users.noreply.github.com>
This commit is contained in:
L. Kärkkäinen 2022-12-13 06:36:21 +00:00 committed by GitHub
parent 92e7463721
commit 13e9ab7ba9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 29 additions and 0 deletions

View File

@ -27,6 +27,7 @@ if TYPE_CHECKING:
from sanic.app import Sanic from sanic.app import Sanic
import email.utils import email.utils
import unicodedata
import uuid import uuid
from collections import defaultdict from collections import defaultdict
@ -1084,6 +1085,16 @@ def parse_multipart_form(body, boundary):
form_parameters["filename*"] form_parameters["filename*"]
) )
file_name = unquote(value, encoding=encoding) file_name = unquote(value, encoding=encoding)
# Normalize to NFC (Apple MacOS/iOS send NFD)
# Notes:
# - No effect for Windows, Linux or Android clients which
# already send NFC
# - Python open() is tricky (creates files in NFC no matter
# which form you use)
if file_name is not None:
file_name = unicodedata.normalize("NFC", file_name)
elif form_header_field == "content-type": elif form_header_field == "content-type":
content_type = form_header_value content_type = form_header_value
content_charset = form_parameters.get("charset", "utf-8") content_charset = form_parameters.get("charset", "utf-8")

View File

@ -1293,6 +1293,24 @@ async def test_request_string_representation_asgi(app):
"------sanic--\r\n", "------sanic--\r\n",
"filename_\u00A0_test", "filename_\u00A0_test",
), ),
# Umlaut using NFC normalization (Windows, Linux, Android)
(
"------sanic\r\n"
'content-disposition: form-data; filename*="utf-8\'\'filename_%C3%A4_test"; name="test"\r\n'
"\r\n"
"OK\r\n"
"------sanic--\r\n",
"filename_\u00E4_test",
),
# Umlaut using NFD normalization (MacOS client)
(
"------sanic\r\n"
'content-disposition: form-data; filename*="utf-8\'\'filename_a%CC%88_test"; name="test"\r\n'
"\r\n"
"OK\r\n"
"------sanic--\r\n",
"filename_\u00E4_test", # Sanic should normalize to NFC
),
], ],
) )
def test_request_multipart_files(app, payload, filename): def test_request_multipart_files(app, payload, filename):