diff --git a/sanic/headers.py b/sanic/headers.py index d623b171..5e3a9e54 100644 --- a/sanic/headers.py +++ b/sanic/headers.py @@ -1,19 +1,32 @@ import re import typing +Options = typing.Dict[str, str] # key=value fields in various headers -token, quoted = r"([\w!#$%&'*+\-.^_`|~]+)", r'"((?:[^"]|\\")*)"' +token, quoted = r"([\w!#$%&'*+\-.^_`|~]+)", r'"([^"]*)"' parameter = re.compile(fr";\s*{token}=(?:{token}|{quoted})", re.ASCII) +# Note: this intentionally leaves out the quoted-pair escape sequence specified +# in RFCs because browsers escape quotes as %22 and do not escape backslashes. +# In particular, a file upload named foo"bar\ is sent as filename="foo%22bar\" +# by all browsers, and would parse incorrectly if quoted-pair were handled. -def parse_options_header(value: str) -> typing.Tuple[str, dict]: - """Parse HTTP header values of Content-Type format.""" + +def parse_content_header(value: str) -> typing.Tuple[str, Options]: + """Parse content-type and content-disposition header values. + + E.g. 'form-data; name=upload; filename=\"file.txt\"' to + ('form-data', {'name': 'upload', 'filename': 'file.txt'}) + + Mostly identical to cgi.parse_header and werkzeug.parse_options_header + but runs faster. Like the others, does NOT unescape anything. + """ pos = value.find(";") if pos == -1: options = {} else: options = { - m.group(1).lower(): m.group(2) or m.group(3).replace('\\"', '"') + m.group(1).lower(): m.group(2) or m.group(3) for m in parameter.finditer(value[pos:]) } value = value[:pos] diff --git a/sanic/request.py b/sanic/request.py index 5fd50353..c3c44729 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -11,7 +11,7 @@ from urllib.parse import parse_qs, parse_qsl, unquote, urlunparse from httptools import parse_url from sanic.exceptions import InvalidUsage -from sanic.headers import parse_options_header +from sanic.headers import parse_content_header from sanic.log import error_logger, logger @@ -177,7 +177,7 @@ class Request(dict): content_type = self.headers.get( "Content-Type", DEFAULT_HTTP_CONTENT_TYPE ) - content_type, parameters = parse_options_header(content_type) + content_type, parameters = parse_content_header(content_type) try: if content_type == "application/x-www-form-urlencoded": self.parsed_form = RequestParameters( @@ -551,7 +551,7 @@ def parse_multipart_form(body, boundary): colon_index = form_line.index(":") form_header_field = form_line[0:colon_index].lower() - form_header_value, form_parameters = parse_options_header( + form_header_value, form_parameters = parse_content_header( form_line[colon_index + 2 :] )