PEP 594 has cgi module scheduled for deprecation in Python 3.8 (#1649)
* PEP 594 has cgi module scheduled for deprecation in Python 3.8. Reimplement
cgi.parse_header in Sanic. The new implementation is much faster than either
cgi.parse_header or equivalent werkzeug.parse_options_header, and unlike the
two, handles also quoted values with semicolons or \" in them.
* Fix string escape.
* Useless linter complaints.
* More linter issues
* Add return type hint.
* Do not support quoted-pair escapes.
- Improved documentation and renamed the function more aptly as it only seems
to apply to content-type and content-disposition headers.
* Unquote filenames also in normal mode.
* Add tests for headers. Adapted from CPython parse_header tests with changes on the final test.
* Linter
* Revert "Unquote filenames also in normal mode."
This reverts commit bf0d502bcd
.
* Improved parse_content_header and added tests with Firefox and Chrome.
- Unescaping of quotes moved to parse_content_header because it affects all fields,
not just filenames.
- It is impossible to handle all cases correctly but the current heuristics should
suffice well for typical cases and beyond.
- Added comparisons with cgi.parse_header and werkzeug.parse_options_header.
* Updated comments as well.
This commit is contained in:
parent
228a31ee0a
commit
2011f3a0b2
37
sanic/headers.py
Normal file
37
sanic/headers.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
import re
|
||||||
|
import typing
|
||||||
|
|
||||||
|
|
||||||
|
Options = typing.Dict[str, str] # key=value fields in various headers
|
||||||
|
|
||||||
|
token, quoted = r"([\w!#$%&'*+\-.^_`|~]+)", r'"([^"]*)"'
|
||||||
|
parameter = re.compile(fr";\s*{token}=(?:{token}|{quoted})", re.ASCII)
|
||||||
|
firefox_quote_escape = re.compile(r'\\"(?!; |\s*$)')
|
||||||
|
|
||||||
|
# RFC's quoted-pair escapes are mostly ignored by browsers. Chrome, Firefox and
|
||||||
|
# curl all have different escaping, that we try to handle as well as possible,
|
||||||
|
# even though no client espaces in a way that would allow perfect handling.
|
||||||
|
|
||||||
|
# For more information, consult ../tests/test_requests.py
|
||||||
|
|
||||||
|
|
||||||
|
def parse_content_header(value: str) -> typing.Tuple[str, Options]:
|
||||||
|
"""Parse content-type and content-disposition header values.
|
||||||
|
|
||||||
|
E.g. 'form-data; name=upload; filename=\"file.txt\"' to
|
||||||
|
('form-data', {'name': 'upload', 'filename': 'file.txt'})
|
||||||
|
|
||||||
|
Mostly identical to cgi.parse_header and werkzeug.parse_options_header
|
||||||
|
but runs faster and handles special characters better. Unescapes quotes.
|
||||||
|
"""
|
||||||
|
value = firefox_quote_escape.sub("%22", value)
|
||||||
|
pos = value.find(";")
|
||||||
|
if pos == -1:
|
||||||
|
options = {}
|
||||||
|
else:
|
||||||
|
options = {
|
||||||
|
m.group(1).lower(): m.group(2) or m.group(3).replace("%22", '"')
|
||||||
|
for m in parameter.finditer(value[pos:])
|
||||||
|
}
|
||||||
|
value = value[:pos]
|
||||||
|
return value.strip().lower(), options
|
|
@ -4,7 +4,6 @@ import json
|
||||||
import sys
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from cgi import parse_header
|
|
||||||
from collections import defaultdict, namedtuple
|
from collections import defaultdict, namedtuple
|
||||||
from http.cookies import SimpleCookie
|
from http.cookies import SimpleCookie
|
||||||
from urllib.parse import parse_qs, parse_qsl, unquote, urlunparse
|
from urllib.parse import parse_qs, parse_qsl, unquote, urlunparse
|
||||||
|
@ -12,6 +11,7 @@ from urllib.parse import parse_qs, parse_qsl, unquote, urlunparse
|
||||||
from httptools import parse_url
|
from httptools import parse_url
|
||||||
|
|
||||||
from sanic.exceptions import InvalidUsage
|
from sanic.exceptions import InvalidUsage
|
||||||
|
from sanic.headers import parse_content_header
|
||||||
from sanic.log import error_logger, logger
|
from sanic.log import error_logger, logger
|
||||||
|
|
||||||
|
|
||||||
|
@ -177,7 +177,7 @@ class Request(dict):
|
||||||
content_type = self.headers.get(
|
content_type = self.headers.get(
|
||||||
"Content-Type", DEFAULT_HTTP_CONTENT_TYPE
|
"Content-Type", DEFAULT_HTTP_CONTENT_TYPE
|
||||||
)
|
)
|
||||||
content_type, parameters = parse_header(content_type)
|
content_type, parameters = parse_content_header(content_type)
|
||||||
try:
|
try:
|
||||||
if content_type == "application/x-www-form-urlencoded":
|
if content_type == "application/x-www-form-urlencoded":
|
||||||
self.parsed_form = RequestParameters(
|
self.parsed_form = RequestParameters(
|
||||||
|
@ -561,7 +561,7 @@ def parse_multipart_form(body, boundary):
|
||||||
|
|
||||||
colon_index = form_line.index(":")
|
colon_index = form_line.index(":")
|
||||||
form_header_field = form_line[0:colon_index].lower()
|
form_header_field = form_line[0:colon_index].lower()
|
||||||
form_header_value, form_parameters = parse_header(
|
form_header_value, form_parameters = parse_content_header(
|
||||||
form_line[colon_index + 2 :]
|
form_line[colon_index + 2 :]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
57
tests/test_headers.py
Normal file
57
tests/test_headers.py
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from sanic import headers
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"input, expected",
|
||||||
|
[
|
||||||
|
("text/plain", ("text/plain", {})),
|
||||||
|
("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})),
|
||||||
|
("text/plain;charset=us-ascii", ("text/plain", {"charset": "us-ascii"})),
|
||||||
|
('text/plain ; charset="us-ascii"', ("text/plain", {"charset": "us-ascii"})),
|
||||||
|
(
|
||||||
|
'text/plain ; charset="us-ascii"; another=opt',
|
||||||
|
("text/plain", {"charset": "us-ascii", "another": "opt"})
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'attachment; filename="silly.txt"',
|
||||||
|
("attachment", {"filename": "silly.txt"})
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'attachment; filename="strange;name"',
|
||||||
|
("attachment", {"filename": "strange;name"})
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'attachment; filename="strange;name";size=123;',
|
||||||
|
("attachment", {"filename": "strange;name", "size": "123"})
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'form-data; name="files"; filename="fo\\"o;bar\\"',
|
||||||
|
('form-data', {'name': 'files', 'filename': 'fo"o;bar\\'})
|
||||||
|
# cgi.parse_header:
|
||||||
|
# ('form-data', {'name': 'files', 'filename': 'fo"o;bar\\'})
|
||||||
|
# werkzeug.parse_options_header:
|
||||||
|
# ('form-data', {'name': 'files', 'filename': '"fo\\"o', 'bar\\"': None})
|
||||||
|
),
|
||||||
|
# <input type=file name="foo";bar\"> with Unicode filename!
|
||||||
|
(
|
||||||
|
# Chrome:
|
||||||
|
# Content-Disposition: form-data; name="foo%22;bar\"; filename="😀"
|
||||||
|
'form-data; name="foo%22;bar\\"; filename="😀"',
|
||||||
|
('form-data', {'name': 'foo";bar\\', 'filename': '😀'})
|
||||||
|
# cgi: ('form-data', {'name': 'foo%22;bar"; filename="😀'})
|
||||||
|
# werkzeug: ('form-data', {'name': 'foo%22;bar"; filename='})
|
||||||
|
),
|
||||||
|
(
|
||||||
|
# Firefox:
|
||||||
|
# Content-Disposition: form-data; name="foo\";bar\"; filename="😀"
|
||||||
|
'form-data; name="foo\\";bar\\"; filename="😀"',
|
||||||
|
('form-data', {'name': 'foo";bar\\', 'filename': '😀'})
|
||||||
|
# cgi: ('form-data', {'name': 'foo";bar"; filename="😀'})
|
||||||
|
# werkzeug: ('form-data', {'name': 'foo";bar"; filename='})
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
def test_parse_headers(input, expected):
|
||||||
|
assert headers.parse_content_header(input) == expected
|
Loading…
Reference in New Issue
Block a user