From 372957856e85d1d41149d96c55bcea102b52b700 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?=
 <tronic@users.noreply.github.com>
Date: Fri, 23 Aug 2019 11:50:41 +0300
Subject: [PATCH] Do not support quoted-pair escapes.

- Improved documentation and renamed the function more aptly as it only seems
  to apply to content-type and content-disposition headers.
---
 sanic/headers.py | 21 +++++++++++++++++----
 sanic/request.py |  6 +++---
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/sanic/headers.py b/sanic/headers.py
index d623b171..5e3a9e54 100644
--- a/sanic/headers.py
+++ b/sanic/headers.py
@@ -1,19 +1,32 @@
 import re
 import typing
 
+Options = typing.Dict[str, str]  # key=value fields in various headers
 
-token, quoted = r"([\w!#$%&'*+\-.^_`|~]+)", r'"((?:[^"]|\\")*)"'
+token, quoted = r"([\w!#$%&'*+\-.^_`|~]+)", r'"([^"]*)"'
 parameter = re.compile(fr";\s*{token}=(?:{token}|{quoted})", re.ASCII)
 
+# Note: this intentionally leaves out the quoted-pair escape sequence specified
+# in RFCs because browsers escape quotes as %22 and do not escape backslashes.
+# In particular, a file upload named foo"bar\ is sent as filename="foo%22bar\"
+# by all browsers, and would parse incorrectly if quoted-pair were handled.
 
-def parse_options_header(value: str) -> typing.Tuple[str, dict]:
-    """Parse HTTP header values of Content-Type format."""
+
+def parse_content_header(value: str) -> typing.Tuple[str, Options]:
+    """Parse content-type and content-disposition header values.
+
+    E.g. 'form-data; name=upload; filename=\"file.txt\"' to
+    ('form-data', {'name': 'upload', 'filename': 'file.txt'})
+
+    Mostly identical to cgi.parse_header and werkzeug.parse_options_header
+    but runs faster. Like the others, does NOT unescape anything.
+    """
     pos = value.find(";")
     if pos == -1:
         options = {}
     else:
         options = {
-            m.group(1).lower(): m.group(2) or m.group(3).replace('\\"', '"')
+            m.group(1).lower(): m.group(2) or m.group(3)
             for m in parameter.finditer(value[pos:])
         }
         value = value[:pos]
diff --git a/sanic/request.py b/sanic/request.py
index 5fd50353..c3c44729 100644
--- a/sanic/request.py
+++ b/sanic/request.py
@@ -11,7 +11,7 @@ from urllib.parse import parse_qs, parse_qsl, unquote, urlunparse
 from httptools import parse_url
 
 from sanic.exceptions import InvalidUsage
-from sanic.headers import parse_options_header
+from sanic.headers import parse_content_header
 from sanic.log import error_logger, logger
 
 
@@ -177,7 +177,7 @@ class Request(dict):
             content_type = self.headers.get(
                 "Content-Type", DEFAULT_HTTP_CONTENT_TYPE
             )
-            content_type, parameters = parse_options_header(content_type)
+            content_type, parameters = parse_content_header(content_type)
             try:
                 if content_type == "application/x-www-form-urlencoded":
                     self.parsed_form = RequestParameters(
@@ -551,7 +551,7 @@ def parse_multipart_form(body, boundary):
 
             colon_index = form_line.index(":")
             form_header_field = form_line[0:colon_index].lower()
-            form_header_value, form_parameters = parse_options_header(
+            form_header_value, form_parameters = parse_content_header(
                 form_line[colon_index + 2 :]
             )