Simplified parse_content_header escaping (#2707)

This commit is contained in:
L. Kärkkäinen 2023-03-06 04:39:16 +00:00 committed by GitHub
parent cb49c2b26d
commit 259e458847
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 24 deletions

View File

@ -19,7 +19,6 @@ OptionsIterable = Iterable[Tuple[str, str]] # May contain duplicate keys
_token, _quoted = r"([\w!#$%&'*+\-.^_`|~]+)", r'"([^"]*)"'
_param = re.compile(rf";\s*{_token}=(?:{_token}|{_quoted})", re.ASCII)
_firefox_quote_escape = re.compile(r'\\"(?!; |\s*$)')
_ipv6 = "(?:[0-9A-Fa-f]{0,4}:){2,7}[0-9A-Fa-f]{0,4}"
_ipv6_re = re.compile(_ipv6)
_host_re = re.compile(
@ -268,19 +267,23 @@ def parse_accept(accept: Optional[str]) -> AcceptList:
def parse_content_header(value: str) -> Tuple[str, Options]:
"""Parse content-type and content-disposition header values.
E.g. 'form-data; name=upload; filename=\"file.txt\"' to
E.g. `form-data; name=upload; filename="file.txt"` to
('form-data', {'name': 'upload', 'filename': 'file.txt'})
Mostly identical to cgi.parse_header and werkzeug.parse_options_header
but runs faster and handles special characters better. Unescapes quotes.
but runs faster and handles special characters better.
Unescapes %22 to `"` and %0D%0A to `\n` in field values.
"""
value = _firefox_quote_escape.sub("%22", value)
pos = value.find(";")
if pos == -1:
options: Dict[str, Union[int, str]] = {}
else:
options = {
m.group(1).lower(): m.group(2) or m.group(3).replace("%22", '"')
m.group(1)
.lower(): (m.group(2) or m.group(3))
.replace("%22", '"')
.replace("%0D%0A", "\n")
for m in _param.finditer(value[pos:])
}
value = value[:pos]

View File

@ -49,32 +49,17 @@ def raised_ceiling():
("attachment", {"filename": "strange;name", "size": "123"}),
),
(
'form-data; name="files"; filename="fo\\"o;bar\\"',
("form-data", {"name": "files", "filename": 'fo"o;bar\\'})
# cgi.parse_header:
# ('form-data', {'name': 'files', 'filename': 'fo"o;bar\\'})
# werkzeug.parse_options_header:
# (
# "form-data",
# {"name": "files", "filename": '"fo\\"o', 'bar\\"': None},
# ),
'form-data; name="foo"; value="%22\\%0D%0A"',
("form-data", {"name": "foo", "value": '\"\\\n'})
),
# <input type=file name="foo&quot;;bar\"> with Unicode filename!
(
# Chrome:
# Chrome, Firefox:
# Content-Disposition: form-data; name="foo%22;bar\"; filename="😀"
'form-data; name="foo%22;bar\\"; filename="😀"',
("form-data", {"name": 'foo";bar\\', "filename": "😀"})
# cgi: ('form-data', {'name': 'foo%22;bar"; filename="😀'})
# werkzeug: ('form-data', {'name': 'foo%22;bar"; filename='})
),
(
# Firefox:
# Content-Disposition: form-data; name="foo\";bar\"; filename="😀"
'form-data; name="foo\\";bar\\"; filename="😀"',
("form-data", {"name": 'foo";bar\\', "filename": "😀"})
# cgi: ('form-data', {'name': 'foo";bar"; filename="😀'})
# werkzeug: ('form-data', {'name': 'foo";bar"; filename='})
# werkzeug (pre 2.3.0): ('form-data', {'name': 'foo%22;bar"; filename='})
),
],
)