From fdcba79f45f5b953459787c44321bc82dd9c9344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?= Date: Mon, 29 Jul 2019 18:20:03 +0300 Subject: [PATCH] PEP 594 has cgi module scheduled for deprecation in Python 3.8. Reimplement cgi.parse_header in Sanic. The new implementation is much faster than either cgi.parse_header or equivalent werkzeug.parse_options_header, and unlike the two, handles also quoted values with semicolons or \" in them. --- sanic/headers.py | 18 ++++++++++++++++++ sanic/request.py | 7 +++---- 2 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 sanic/headers.py diff --git a/sanic/headers.py b/sanic/headers.py new file mode 100644 index 00000000..fccef120 --- /dev/null +++ b/sanic/headers.py @@ -0,0 +1,18 @@ +import re + +token, quoted = r"([\w!#$%&'*+\-.^_`|~]+)", r'"((?:[^"]|\\")*)"' +parameter = re.compile(f';\s*{token}=(?:{token}|{quoted})', re.ASCII) + + +def parse_options_header(value: str): + """Parse HTTP header values of Content-Type format.""" + pos = value.find(';') + if pos == -1: + options = {} + else: + options = { + m.group(1).lower(): m.group(2) or m.group(3).replace(r'\"', '"') + for m in parameter.finditer(value[pos:]) + } + value = value[:pos] + return value.strip().lower(), options diff --git a/sanic/request.py b/sanic/request.py index 285333c7..9191f8fa 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -4,7 +4,6 @@ import json import sys import warnings -from cgi import parse_header from collections import defaultdict, namedtuple from http.cookies import SimpleCookie from urllib.parse import parse_qs, parse_qsl, unquote, urlunparse @@ -13,7 +12,7 @@ from httptools import parse_url from sanic.exceptions import InvalidUsage from sanic.log import error_logger, logger - +from sanic.headers import parse_options_header try: from ujson import loads as json_loads @@ -177,7 +176,7 @@ class Request(dict): content_type = self.headers.get( "Content-Type", DEFAULT_HTTP_CONTENT_TYPE ) - content_type, parameters = parse_header(content_type) + content_type, parameters = parse_options_header(content_type) try: if content_type == "application/x-www-form-urlencoded": self.parsed_form = RequestParameters( @@ -551,7 +550,7 @@ def parse_multipart_form(body, boundary): colon_index = form_line.index(":") form_header_field = form_line[0:colon_index].lower() - form_header_value, form_parameters = parse_header( + form_header_value, form_parameters = parse_options_header( form_line[colon_index + 2 :] )