From ed1c563d1fc610a9b4cc6dac4036c72e6c3b3916 Mon Sep 17 00:00:00 2001 From: Dirk Guijt Date: Thu, 1 Feb 2018 11:30:24 +0100 Subject: [PATCH 1/8] fixed bug in multipart/form-data parser Sanic automatically assumes that a form field is a file if it has a content-type header, even though the header is text/plain or application/json. This is a fix for it, I took into account the RFC7578 specification regarding the defaults. --- sanic/request.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sanic/request.py b/sanic/request.py index ecc41d13..98bb049f 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -284,7 +284,8 @@ def parse_multipart_form(body, boundary): form_parts = body.split(boundary) for form_part in form_parts[1:-1]: file_name = None - file_type = None + content_type = "text/plain" + content_charset = "utf-8" field_name = None line_index = 2 line_end_index = 0 @@ -304,19 +305,21 @@ def parse_multipart_form(body, boundary): if form_header_field == 'content-disposition': if 'filename' in form_parameters: file_name = form_parameters['filename'] - field_name = form_parameters.get('name') + field_name = form_parameters['name'] elif form_header_field == 'content-type': - file_type = form_header_value + content_type = form_header_value + if 'charset' in form_parameters: + content_charset = form_parameters['charset'] post_data = form_part[line_index:-4] - if file_name or file_type: - file = File(type=file_type, name=file_name, body=post_data) + if file_name: + file = File(type=content_type, name=file_name, body=post_data) if field_name in files: files[field_name].append(file) else: files[field_name] = [file] else: - value = post_data.decode('utf-8') + value = post_data.decode(content_charset) if field_name in fields: fields[field_name].append(value) else: From a76d8108fe519182e5972c702c94024ea3b2899f Mon Sep 17 00:00:00 2001 From: DirkGuijt <922322+DirkGuijt@users.noreply.github.com> Date: Thu, 1 Feb 2018 11:55:30 +0100 Subject: [PATCH 2/8] small code style change changed double quotes to single quotes to match the coding style --- sanic/request.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sanic/request.py b/sanic/request.py index 98bb049f..908f8c7a 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -284,8 +284,8 @@ def parse_multipart_form(body, boundary): form_parts = body.split(boundary) for form_part in form_parts[1:-1]: file_name = None - content_type = "text/plain" - content_charset = "utf-8" + content_type = 'text/plain' + content_charset = 'utf-8' field_name = None line_index = 2 line_end_index = 0 From 788253cbe8e2da5d0b5756885f74b49b00195363 Mon Sep 17 00:00:00 2001 From: Dirk Guijt Date: Fri, 2 Feb 2018 00:55:51 +0100 Subject: [PATCH 3/8] changes based on discussion on PR #1109 --- sanic/request.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sanic/request.py b/sanic/request.py index 98bb049f..33844cd7 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -303,13 +303,11 @@ def parse_multipart_form(body, boundary): form_line[colon_index + 2:]) if form_header_field == 'content-disposition': - if 'filename' in form_parameters: - file_name = form_parameters['filename'] - field_name = form_parameters['name'] + file_name = form_parameters.get('filename') + field_name = form_parameters.get('name') elif form_header_field == 'content-type': content_type = form_header_value - if 'charset' in form_parameters: - content_charset = form_parameters['charset'] + content_charset = form_parameters.get('charset', 'utf-8') post_data = form_part[line_index:-4] if file_name: From 5c341a2b00c2ef2f7898bcc1e0783c0f373e0421 Mon Sep 17 00:00:00 2001 From: Dirk Guijt Date: Fri, 2 Feb 2018 09:43:42 +0100 Subject: [PATCH 4/8] made field name mandatory in multipart/form-data headers A field name in the Content-Disposition header is required by the multipart/form-data spec. If one field/part does not have it, it will be omitted from the request. When this happens, we log it to DEBUG. --- sanic/request.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/sanic/request.py b/sanic/request.py index b37f9f9d..0660337f 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -18,7 +18,7 @@ except ImportError: json_loads = json.loads from sanic.exceptions import InvalidUsage -from sanic.log import error_logger +from sanic.log import error_logger, logger DEFAULT_HTTP_CONTENT_TYPE = "application/octet-stream" @@ -309,18 +309,21 @@ def parse_multipart_form(body, boundary): content_type = form_header_value content_charset = form_parameters.get('charset', 'utf-8') - post_data = form_part[line_index:-4] - if file_name: - file = File(type=content_type, name=file_name, body=post_data) - if field_name in files: - files[field_name].append(file) + if field_name: + post_data = form_part[line_index:-4] + if file_name: + file = File(type=content_type, name=file_name, body=post_data) + if field_name in files: + files[field_name].append(file) + else: + files[field_name] = [file] else: - files[field_name] = [file] + value = post_data.decode(content_charset) + if field_name in fields: + fields[field_name].append(value) + else: + fields[field_name] = [value] else: - value = post_data.decode(content_charset) - if field_name in fields: - fields[field_name].append(value) - else: - fields[field_name] = [value] + logger.debug('Form-data field does not have a name parameter in the Content-Disposition header') return fields, files From 1eecffce9726ccaa1d5b67c80ae6c59e2711e182 Mon Sep 17 00:00:00 2001 From: Dirk Guijt Date: Fri, 2 Feb 2018 09:57:06 +0100 Subject: [PATCH 5/8] fixed minor flake8 style problem --- sanic/request.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sanic/request.py b/sanic/request.py index 0660337f..e330e085 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -324,6 +324,7 @@ def parse_multipart_form(body, boundary): else: fields[field_name] = [value] else: - logger.debug('Form-data field does not have a name parameter in the Content-Disposition header') + logger.debug('Form-data field does not have a \'name\' parameter \ + in the Content-Disposition header') return fields, files From ddf2a604d1a9cfb30b89fdfb33f47ac56ba5e930 Mon Sep 17 00:00:00 2001 From: Dirk Guijt Date: Sat, 3 Feb 2018 03:07:07 +0100 Subject: [PATCH 6/8] changed 'file' variable to 'form_file' to prevent overwriting the reserved word --- sanic/request.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sanic/request.py b/sanic/request.py index e330e085..01863bd0 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -312,11 +312,11 @@ def parse_multipart_form(body, boundary): if field_name: post_data = form_part[line_index:-4] if file_name: - file = File(type=content_type, name=file_name, body=post_data) + form_file = File(type=content_type, name=file_name, body=post_data) if field_name in files: - files[field_name].append(file) + files[field_name].append(form_file) else: - files[field_name] = [file] + files[field_name] = [form_file] else: value = post_data.decode(content_charset) if field_name in fields: From 48d45f1ca4a682487e57689f172f888ac646cfc2 Mon Sep 17 00:00:00 2001 From: Dirk Guijt Date: Sat, 3 Feb 2018 03:14:04 +0100 Subject: [PATCH 7/8] sorry, style issue again --- sanic/request.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sanic/request.py b/sanic/request.py index 01863bd0..4b27d7e8 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -312,7 +312,8 @@ def parse_multipart_form(body, boundary): if field_name: post_data = form_part[line_index:-4] if file_name: - form_file = File(type=content_type, name=file_name, body=post_data) + form_file = \ + File(type=content_type, name=file_name, body=post_data) if field_name in files: files[field_name].append(form_file) else: From e083224df1ee5d320dd1d2148e1c37bdbf00b9eb Mon Sep 17 00:00:00 2001 From: Dirk Guijt Date: Wed, 7 Feb 2018 09:29:44 +0100 Subject: [PATCH 8/8] changed bewline formatting --- sanic/request.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sanic/request.py b/sanic/request.py index 4b27d7e8..cd7071d7 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -312,8 +312,9 @@ def parse_multipart_form(body, boundary): if field_name: post_data = form_part[line_index:-4] if file_name: - form_file = \ - File(type=content_type, name=file_name, body=post_data) + form_file = File(type=content_type, + name=file_name, + body=post_data) if field_name in files: files[field_name].append(form_file) else: