Filename normalisation of form-data/multipart file uploads (umlauts on Apple clients) (#2625)
Co-authored-by: L. Karkkainen <tronic@users.noreply.github.com>
This commit is contained in:
		| @@ -27,6 +27,7 @@ if TYPE_CHECKING: | |||||||
|     from sanic.app import Sanic |     from sanic.app import Sanic | ||||||
|  |  | ||||||
| import email.utils | import email.utils | ||||||
|  | import unicodedata | ||||||
| import uuid | import uuid | ||||||
|  |  | ||||||
| from collections import defaultdict | from collections import defaultdict | ||||||
| @@ -1084,6 +1085,16 @@ def parse_multipart_form(body, boundary): | |||||||
|                         form_parameters["filename*"] |                         form_parameters["filename*"] | ||||||
|                     ) |                     ) | ||||||
|                     file_name = unquote(value, encoding=encoding) |                     file_name = unquote(value, encoding=encoding) | ||||||
|  |  | ||||||
|  |                 # Normalize to NFC (Apple MacOS/iOS send NFD) | ||||||
|  |                 # Notes: | ||||||
|  |                 # - No effect for Windows, Linux or Android clients which | ||||||
|  |                 #   already send NFC | ||||||
|  |                 # - Python open() is tricky (creates files in NFC no matter | ||||||
|  |                 #   which form you use) | ||||||
|  |                 if file_name is not None: | ||||||
|  |                     file_name = unicodedata.normalize("NFC", file_name) | ||||||
|  |  | ||||||
|             elif form_header_field == "content-type": |             elif form_header_field == "content-type": | ||||||
|                 content_type = form_header_value |                 content_type = form_header_value | ||||||
|                 content_charset = form_parameters.get("charset", "utf-8") |                 content_charset = form_parameters.get("charset", "utf-8") | ||||||
|   | |||||||
| @@ -1293,6 +1293,24 @@ async def test_request_string_representation_asgi(app): | |||||||
|             "------sanic--\r\n", |             "------sanic--\r\n", | ||||||
|             "filename_\u00A0_test", |             "filename_\u00A0_test", | ||||||
|         ), |         ), | ||||||
|  |         # Umlaut using NFC normalization (Windows, Linux, Android) | ||||||
|  |         ( | ||||||
|  |             "------sanic\r\n" | ||||||
|  |             'content-disposition: form-data; filename*="utf-8\'\'filename_%C3%A4_test"; name="test"\r\n' | ||||||
|  |             "\r\n" | ||||||
|  |             "OK\r\n" | ||||||
|  |             "------sanic--\r\n", | ||||||
|  |             "filename_\u00E4_test", | ||||||
|  |         ), | ||||||
|  |         # Umlaut using NFD normalization (MacOS client) | ||||||
|  |         ( | ||||||
|  |             "------sanic\r\n" | ||||||
|  |             'content-disposition: form-data; filename*="utf-8\'\'filename_a%CC%88_test"; name="test"\r\n' | ||||||
|  |             "\r\n" | ||||||
|  |             "OK\r\n" | ||||||
|  |             "------sanic--\r\n", | ||||||
|  |             "filename_\u00E4_test",  # Sanic should normalize to NFC | ||||||
|  |         ), | ||||||
|     ], |     ], | ||||||
| ) | ) | ||||||
| def test_request_multipart_files(app, payload, filename): | def test_request_multipart_files(app, payload, filename): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 L. Kärkkäinen
					L. Kärkkäinen