diff --git a/sanic/headers.py b/sanic/headers.py new file mode 100644 index 00000000..8062b57c --- /dev/null +++ b/sanic/headers.py @@ -0,0 +1,37 @@ +import re +import typing + + +Options = typing.Dict[str, str] # key=value fields in various headers + +token, quoted = r"([\w!#$%&'*+\-.^_`|~]+)", r'"([^"]*)"' +parameter = re.compile(fr";\s*{token}=(?:{token}|{quoted})", re.ASCII) +firefox_quote_escape = re.compile(r'\\"(?!; |\s*$)') + +# RFC's quoted-pair escapes are mostly ignored by browsers. Chrome, Firefox and +# curl all have different escaping, that we try to handle as well as possible, +# even though no client espaces in a way that would allow perfect handling. + +# For more information, consult ../tests/test_requests.py + + +def parse_content_header(value: str) -> typing.Tuple[str, Options]: + """Parse content-type and content-disposition header values. + + E.g. 'form-data; name=upload; filename=\"file.txt\"' to + ('form-data', {'name': 'upload', 'filename': 'file.txt'}) + + Mostly identical to cgi.parse_header and werkzeug.parse_options_header + but runs faster and handles special characters better. Unescapes quotes. + """ + value = firefox_quote_escape.sub("%22", value) + pos = value.find(";") + if pos == -1: + options = {} + else: + options = { + m.group(1).lower(): m.group(2) or m.group(3).replace("%22", '"') + for m in parameter.finditer(value[pos:]) + } + value = value[:pos] + return value.strip().lower(), options diff --git a/sanic/request.py b/sanic/request.py index 8356d579..9663063b 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -4,7 +4,6 @@ import json import sys import warnings -from cgi import parse_header from collections import defaultdict, namedtuple from http.cookies import SimpleCookie from urllib.parse import parse_qs, parse_qsl, unquote, urlunparse @@ -12,6 +11,7 @@ from urllib.parse import parse_qs, parse_qsl, unquote, urlunparse from httptools import parse_url from sanic.exceptions import InvalidUsage +from sanic.headers import parse_content_header from sanic.log import error_logger, logger @@ -177,7 +177,7 @@ class Request(dict): content_type = self.headers.get( "Content-Type", DEFAULT_HTTP_CONTENT_TYPE ) - content_type, parameters = parse_header(content_type) + content_type, parameters = parse_content_header(content_type) try: if content_type == "application/x-www-form-urlencoded": self.parsed_form = RequestParameters( @@ -561,7 +561,7 @@ def parse_multipart_form(body, boundary): colon_index = form_line.index(":") form_header_field = form_line[0:colon_index].lower() - form_header_value, form_parameters = parse_header( + form_header_value, form_parameters = parse_content_header( form_line[colon_index + 2 :] ) diff --git a/tests/test_headers.py b/tests/test_headers.py new file mode 100644 index 00000000..e228f386 --- /dev/null +++ b/tests/test_headers.py @@ -0,0 +1,57 @@ +import pytest + +from sanic import headers + + +@pytest.mark.parametrize( + "input, expected", + [ + ("text/plain", ("text/plain", {})), + ("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})), + ("text/plain;charset=us-ascii", ("text/plain", {"charset": "us-ascii"})), + ('text/plain ; charset="us-ascii"', ("text/plain", {"charset": "us-ascii"})), + ( + 'text/plain ; charset="us-ascii"; another=opt', + ("text/plain", {"charset": "us-ascii", "another": "opt"}) + ), + ( + 'attachment; filename="silly.txt"', + ("attachment", {"filename": "silly.txt"}) + ), + ( + 'attachment; filename="strange;name"', + ("attachment", {"filename": "strange;name"}) + ), + ( + 'attachment; filename="strange;name";size=123;', + ("attachment", {"filename": "strange;name", "size": "123"}) + ), + ( + 'form-data; name="files"; filename="fo\\"o;bar\\"', + ('form-data', {'name': 'files', 'filename': 'fo"o;bar\\'}) + # cgi.parse_header: + # ('form-data', {'name': 'files', 'filename': 'fo"o;bar\\'}) + # werkzeug.parse_options_header: + # ('form-data', {'name': 'files', 'filename': '"fo\\"o', 'bar\\"': None}) + ), + # with Unicode filename! + ( + # Chrome: + # Content-Disposition: form-data; name="foo%22;bar\"; filename="😀" + 'form-data; name="foo%22;bar\\"; filename="😀"', + ('form-data', {'name': 'foo";bar\\', 'filename': '😀'}) + # cgi: ('form-data', {'name': 'foo%22;bar"; filename="😀'}) + # werkzeug: ('form-data', {'name': 'foo%22;bar"; filename='}) + ), + ( + # Firefox: + # Content-Disposition: form-data; name="foo\";bar\"; filename="😀" + 'form-data; name="foo\\";bar\\"; filename="😀"', + ('form-data', {'name': 'foo";bar\\', 'filename': '😀'}) + # cgi: ('form-data', {'name': 'foo";bar"; filename="😀'}) + # werkzeug: ('form-data', {'name': 'foo";bar"; filename='}) + ), + ] +) +def test_parse_headers(input, expected): + assert headers.parse_content_header(input) == expected