PEP 594 has cgi module scheduled for deprecation in Python 3.8 (#1649)
* PEP 594 has cgi module scheduled for deprecation in Python 3.8. Reimplement
cgi.parse_header in Sanic. The new implementation is much faster than either
cgi.parse_header or equivalent werkzeug.parse_options_header, and unlike the
two, handles also quoted values with semicolons or \" in them.
* Fix string escape.
* Useless linter complaints.
* More linter issues
* Add return type hint.
* Do not support quoted-pair escapes.
- Improved documentation and renamed the function more aptly as it only seems
  to apply to content-type and content-disposition headers.
* Unquote filenames also in normal mode.
* Add tests for headers. Adapted from CPython parse_header tests with changes on the final test.
* Linter
* Revert "Unquote filenames also in normal mode."
This reverts commit bf0d502bcd.
* Improved parse_content_header and added tests with Firefox and Chrome.
- Unescaping of quotes moved to parse_content_header because it affects all fields,
  not just filenames.
- It is impossible to handle all cases correctly but the current heuristics should
  suffice well for typical cases and beyond.
- Added comparisons with cgi.parse_header and werkzeug.parse_options_header.
* Updated comments as well.
			
			
This commit is contained in:
		 L. Kärkkäinen
					L. Kärkkäinen
				
			
				
					committed by
					
						 Stephen Sadowski
						Stephen Sadowski
					
				
			
			
				
	
			
			
			 Stephen Sadowski
						Stephen Sadowski
					
				
			
						parent
						
							228a31ee0a
						
					
				
				
					commit
					2011f3a0b2
				
			
							
								
								
									
										37
									
								
								sanic/headers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								sanic/headers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,37 @@ | |||||||
|  | import re | ||||||
|  | import typing | ||||||
|  |  | ||||||
|  |  | ||||||
|  | Options = typing.Dict[str, str]  # key=value fields in various headers | ||||||
|  |  | ||||||
|  | token, quoted = r"([\w!#$%&'*+\-.^_`|~]+)", r'"([^"]*)"' | ||||||
|  | parameter = re.compile(fr";\s*{token}=(?:{token}|{quoted})", re.ASCII) | ||||||
|  | firefox_quote_escape = re.compile(r'\\"(?!; |\s*$)') | ||||||
|  |  | ||||||
|  | # RFC's quoted-pair escapes are mostly ignored by browsers. Chrome, Firefox and | ||||||
|  | # curl all have different escaping, that we try to handle as well as possible, | ||||||
|  | # even though no client espaces in a way that would allow perfect handling. | ||||||
|  |  | ||||||
|  | # For more information, consult ../tests/test_requests.py | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def parse_content_header(value: str) -> typing.Tuple[str, Options]: | ||||||
|  |     """Parse content-type and content-disposition header values. | ||||||
|  |  | ||||||
|  |     E.g. 'form-data; name=upload; filename=\"file.txt\"' to | ||||||
|  |     ('form-data', {'name': 'upload', 'filename': 'file.txt'}) | ||||||
|  |  | ||||||
|  |     Mostly identical to cgi.parse_header and werkzeug.parse_options_header | ||||||
|  |     but runs faster and handles special characters better. Unescapes quotes. | ||||||
|  |     """ | ||||||
|  |     value = firefox_quote_escape.sub("%22", value) | ||||||
|  |     pos = value.find(";") | ||||||
|  |     if pos == -1: | ||||||
|  |         options = {} | ||||||
|  |     else: | ||||||
|  |         options = { | ||||||
|  |             m.group(1).lower(): m.group(2) or m.group(3).replace("%22", '"') | ||||||
|  |             for m in parameter.finditer(value[pos:]) | ||||||
|  |         } | ||||||
|  |         value = value[:pos] | ||||||
|  |     return value.strip().lower(), options | ||||||
| @@ -4,7 +4,6 @@ import json | |||||||
| import sys | import sys | ||||||
| import warnings | import warnings | ||||||
|  |  | ||||||
| from cgi import parse_header |  | ||||||
| from collections import defaultdict, namedtuple | from collections import defaultdict, namedtuple | ||||||
| from http.cookies import SimpleCookie | from http.cookies import SimpleCookie | ||||||
| from urllib.parse import parse_qs, parse_qsl, unquote, urlunparse | from urllib.parse import parse_qs, parse_qsl, unquote, urlunparse | ||||||
| @@ -12,6 +11,7 @@ from urllib.parse import parse_qs, parse_qsl, unquote, urlunparse | |||||||
| from httptools import parse_url | from httptools import parse_url | ||||||
|  |  | ||||||
| from sanic.exceptions import InvalidUsage | from sanic.exceptions import InvalidUsage | ||||||
|  | from sanic.headers import parse_content_header | ||||||
| from sanic.log import error_logger, logger | from sanic.log import error_logger, logger | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -177,7 +177,7 @@ class Request(dict): | |||||||
|             content_type = self.headers.get( |             content_type = self.headers.get( | ||||||
|                 "Content-Type", DEFAULT_HTTP_CONTENT_TYPE |                 "Content-Type", DEFAULT_HTTP_CONTENT_TYPE | ||||||
|             ) |             ) | ||||||
|             content_type, parameters = parse_header(content_type) |             content_type, parameters = parse_content_header(content_type) | ||||||
|             try: |             try: | ||||||
|                 if content_type == "application/x-www-form-urlencoded": |                 if content_type == "application/x-www-form-urlencoded": | ||||||
|                     self.parsed_form = RequestParameters( |                     self.parsed_form = RequestParameters( | ||||||
| @@ -561,7 +561,7 @@ def parse_multipart_form(body, boundary): | |||||||
|  |  | ||||||
|             colon_index = form_line.index(":") |             colon_index = form_line.index(":") | ||||||
|             form_header_field = form_line[0:colon_index].lower() |             form_header_field = form_line[0:colon_index].lower() | ||||||
|             form_header_value, form_parameters = parse_header( |             form_header_value, form_parameters = parse_content_header( | ||||||
|                 form_line[colon_index + 2 :] |                 form_line[colon_index + 2 :] | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										57
									
								
								tests/test_headers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								tests/test_headers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | |||||||
|  | import pytest | ||||||
|  |  | ||||||
|  | from sanic import headers | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @pytest.mark.parametrize( | ||||||
|  |     "input, expected", | ||||||
|  |     [ | ||||||
|  |         ("text/plain", ("text/plain", {})), | ||||||
|  |         ("text/vnd.just.made.this.up ; ", ("text/vnd.just.made.this.up", {})), | ||||||
|  |         ("text/plain;charset=us-ascii", ("text/plain", {"charset": "us-ascii"})), | ||||||
|  |         ('text/plain ; charset="us-ascii"', ("text/plain", {"charset": "us-ascii"})), | ||||||
|  |         ( | ||||||
|  |             'text/plain ; charset="us-ascii"; another=opt', | ||||||
|  |             ("text/plain", {"charset": "us-ascii", "another": "opt"}) | ||||||
|  |         ), | ||||||
|  |         ( | ||||||
|  |             'attachment; filename="silly.txt"', | ||||||
|  |             ("attachment", {"filename": "silly.txt"}) | ||||||
|  |         ), | ||||||
|  |         ( | ||||||
|  |             'attachment; filename="strange;name"', | ||||||
|  |             ("attachment", {"filename": "strange;name"}) | ||||||
|  |         ), | ||||||
|  |         ( | ||||||
|  |             'attachment; filename="strange;name";size=123;', | ||||||
|  |             ("attachment", {"filename": "strange;name", "size": "123"}) | ||||||
|  |         ), | ||||||
|  |         ( | ||||||
|  |             'form-data; name="files"; filename="fo\\"o;bar\\"', | ||||||
|  |             ('form-data', {'name': 'files', 'filename': 'fo"o;bar\\'}) | ||||||
|  |             # cgi.parse_header: | ||||||
|  |             # ('form-data', {'name': 'files', 'filename': 'fo"o;bar\\'}) | ||||||
|  |             # werkzeug.parse_options_header: | ||||||
|  |             # ('form-data', {'name': 'files', 'filename': '"fo\\"o', 'bar\\"': None}) | ||||||
|  |         ), | ||||||
|  |         # <input type=file name="foo";bar\"> with Unicode filename! | ||||||
|  |         ( | ||||||
|  |             # Chrome: | ||||||
|  |             # Content-Disposition: form-data; name="foo%22;bar\"; filename="😀" | ||||||
|  |             'form-data; name="foo%22;bar\\"; filename="😀"', | ||||||
|  |             ('form-data', {'name': 'foo";bar\\', 'filename': '😀'}) | ||||||
|  |             # cgi: ('form-data', {'name': 'foo%22;bar"; filename="😀'}) | ||||||
|  |             # werkzeug: ('form-data', {'name': 'foo%22;bar"; filename='}) | ||||||
|  |         ), | ||||||
|  |         ( | ||||||
|  |             # Firefox: | ||||||
|  |             # Content-Disposition: form-data; name="foo\";bar\"; filename="😀" | ||||||
|  |             'form-data; name="foo\\";bar\\"; filename="😀"', | ||||||
|  |             ('form-data', {'name': 'foo";bar\\', 'filename': '😀'}) | ||||||
|  |             # cgi: ('form-data', {'name': 'foo";bar"; filename="😀'}) | ||||||
|  |             # werkzeug: ('form-data', {'name': 'foo";bar"; filename='}) | ||||||
|  |         ), | ||||||
|  |     ] | ||||||
|  | ) | ||||||
|  | def test_parse_headers(input, expected): | ||||||
|  |     assert headers.parse_content_header(input) == expected | ||||||
		Reference in New Issue
	
	Block a user