From 4942769fbe9ff2d53f92a1ea8ce65e29937acbe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20Bl=C3=B6m?= Date: Thu, 26 Jan 2017 18:37:16 -0800 Subject: [PATCH 1/6] Added Range request options for static files --- sanic/exceptions.py | 16 +++++++- sanic/response.py | 95 +++++++++++++++++++++++++++++++++++---------- sanic/sanic.py | 18 ++++----- sanic/static.py | 47 +++++++++++++++++----- 4 files changed, 137 insertions(+), 39 deletions(-) diff --git a/sanic/exceptions.py b/sanic/exceptions.py index 2596a97a..a98c9e14 100644 --- a/sanic/exceptions.py +++ b/sanic/exceptions.py @@ -104,6 +104,7 @@ INTERNAL_SERVER_ERROR_HTML = ''' class SanicException(Exception): def __init__(self, message, status_code=None): super().__init__(message) + if status_code is not None: self.status_code = status_code @@ -137,6 +138,17 @@ class PayloadTooLarge(SanicException): status_code = 413 +class ContentRangeError(SanicException): + status_code = 416 + + def __init__(self, message, content_range): + super().__init__(message) + self.headers = { + 'Content-Type': 'text/plain', + "Content-Range": "bytes */%s" % (content_range.total,) + } + + class Handler: handlers = None @@ -191,7 +203,9 @@ class Handler: if issubclass(type(exception), SanicException): return text( 'Error: {}'.format(exception), - status=getattr(exception, 'status_code', 500)) + status=getattr(exception, 'status_code', 500), + headers=getattr(exception, 'headers', dict()) + ) elif self.debug: html_output = self._render_traceback_html(exception, request) diff --git a/sanic/response.py b/sanic/response.py index c29a473e..dee0f5bc 100644 --- a/sanic/response.py +++ b/sanic/response.py @@ -97,21 +97,27 @@ class HTTPResponse: def output(self, version="1.1", keep_alive=False, keep_alive_timeout=None): # This is all returned in a kind-of funky way # We tried to make this as fast as possible in pure python - timeout_header = b'' if keep_alive and keep_alive_timeout: - timeout_header = b'Keep-Alive: timeout=%d\r\n' % keep_alive_timeout - + if 'Keep-Alive' not in self.headers: + self.headers['Keep-Alive'] = keep_alive_timeout + if 'Connection' not in self.headers: + if keep_alive: + self.headers['Connection'] = 'keep-alive' + else: + self.headers['Connection'] = 'close' + if 'Content-Length' not in self.headers: + self.headers['Content-Length'] = len(self.body) + if 'Content-Type' not in self.headers: + self.headers['Content-Type'] = self.content_type headers = b'' if self.headers: for name, value in self.headers.items(): try: - headers += ( - b'%b: %b\r\n' % (name.encode(), value.encode('utf-8'))) + headers += (b'%b: %b\r\n' % ( + name.encode(), value.encode('utf-8'))) except AttributeError: - headers += ( - b'%b: %b\r\n' % ( - str(name).encode(), str(value).encode('utf-8'))) - + headers += (b'%b: %b\r\n' % ( + str(name).encode(), str(value).encode('utf-8'))) # Try to pull from the common codes first # Speeds up response rate 6% over pulling from all status = COMMON_STATUS_CODES.get(self.status) @@ -119,18 +125,11 @@ class HTTPResponse: status = ALL_STATUS_CODES.get(self.status) return (b'HTTP/%b %d %b\r\n' - b'Content-Type: %b\r\n' - b'Content-Length: %d\r\n' - b'Connection: %b\r\n' - b'%b%b\r\n' + b'%b\r\n' b'%b') % ( version.encode(), self.status, status, - self.content_type.encode(), - len(self.body), - b'keep-alive' if keep_alive else b'close', - timeout_header, headers, self.body ) @@ -142,13 +141,62 @@ class HTTPResponse: return self._cookies +class ContentRangeHandler: + """ + This class is for parsing the request header + """ + __slots__ = ('start', 'end', 'size', 'total', 'headers') + + def __init__(self, request, stats): + self.start = self.size = 0 + self.end = None + self.headers = dict() + self.total = stats.st_size + range_header = request.headers.get('Range') + if range_header: + self.start, self.end = ContentRangeHandler.parse_range(range_header) + if self.start is not None and self.end is not None: + self.size = self.end - self.start + elif self.end is not None: + self.size = self.end + elif self.start is not None: + self.size = self.total - self.start + else: + self.size = self.total + self.headers['Content-Range'] = "bytes %s-%s/%s" % ( + self.start, self.end, self.total) + else: + self.size = self.total + + def __bool__(self): + return self.size > 0 + + @staticmethod + def parse_range(range_header): + unit, _, value = tuple(map(str.strip, range_header.partition('='))) + if unit != 'bytes': + return None + start_b, _, end_b = tuple(map(str.strip, value.partition('-'))) + try: + start = int(start_b) if start_b.strip() else None + end = int(end_b) if end_b.strip() else None + except ValueError: + return None + if end is not None: + if start is None: + if end != 0: + start = -end + end = None + return start, end + + def json(body, status=200, headers=None, **kwargs): """ Returns response object with body in json format. :param body: Response data to be serialized. :param status: Response code. :param headers: Custom Headers. - :param \**kwargs: Remaining arguments that are passed to the json encoder. + :param kwargs: Remaining arguments that are passed to the json encoder. """ return HTTPResponse(json_dumps(body, **kwargs), headers=headers, status=status, content_type="application/json") @@ -176,17 +224,24 @@ def html(body, status=200, headers=None): content_type="text/html; charset=utf-8") -async def file(location, mime_type=None, headers=None): +async def file(location, mime_type=None, headers=None, _range=None): """ Returns response object with file data. :param location: Location of file on system. :param mime_type: Specific mime_type. :param headers: Custom Headers. + :param _range: """ filename = path.split(location)[-1] async with open_async(location, mode='rb') as _file: - out_stream = await _file.read() + if _range: + await _file.seek(_range.start) + out_stream = await _file.read(_range.size) + headers['Content-Range'] = 'bytes %s-%s/%s' % ( + _range.start, _range.end, _range.total) + else: + out_stream = await _file.read() mime_type = mime_type or guess_type(filename)[0] or 'text/plain' diff --git a/sanic/sanic.py b/sanic/sanic.py index cea09470..8b9dbc78 100644 --- a/sanic/sanic.py +++ b/sanic/sanic.py @@ -78,22 +78,22 @@ class Sanic: # Shorthand method decorators def get(self, uri, host=None): - return self.route(uri, methods=["GET"], host=host) + return self.route(uri, methods=frozenset({"GET"}), host=host) def post(self, uri, host=None): - return self.route(uri, methods=["POST"], host=host) + return self.route(uri, methods=frozenset({"POST"}), host=host) def put(self, uri, host=None): - return self.route(uri, methods=["PUT"], host=host) + return self.route(uri, methods=frozenset({"PUT"}), host=host) def head(self, uri, host=None): - return self.route(uri, methods=["HEAD"], host=host) + return self.route(uri, methods=frozenset({"HEAD"}), host=host) def options(self, uri, host=None): - return self.route(uri, methods=["OPTIONS"], host=host) + return self.route(uri, methods=frozenset({"OPTIONS"}), host=host) def patch(self, uri, host=None): - return self.route(uri, methods=["PATCH"], host=host) + return self.route(uri, methods=frozenset({"PATCH"}), host=host) def add_route(self, handler, uri, methods=None, host=None): """ @@ -117,7 +117,7 @@ class Sanic: """ Decorates a function to be registered as a handler for exceptions - :param \*exceptions: exceptions + :param exceptions: exceptions :return: decorated function """ @@ -152,13 +152,13 @@ class Sanic: # Static Files def static(self, uri, file_or_directory, pattern='.+', - use_modified_since=True): + use_modified_since=True, use_content_range=False): """ Registers a root to serve files from. The input can either be a file or a directory. See """ static_register(self, uri, file_or_directory, pattern, - use_modified_since) + use_modified_since, use_content_range) def blueprint(self, blueprint, **options): """ diff --git a/sanic/static.py b/sanic/static.py index 1d0bff0f..a3df5918 100644 --- a/sanic/static.py +++ b/sanic/static.py @@ -2,14 +2,16 @@ from aiofiles.os import stat from os import path from re import sub from time import strftime, gmtime +from mimetypes import guess_type from urllib.parse import unquote -from .exceptions import FileNotFound, InvalidUsage -from .response import file, HTTPResponse +from .exceptions import FileNotFound, InvalidUsage, ContentRangeError +from .response import file, HTTPResponse, ContentRangeHandler -def register(app, uri, file_or_directory, pattern, use_modified_since): - # TODO: Though sanic is not a file server, I feel like we should atleast +def register(app, uri, file_or_directory, pattern, + use_modified_since, use_content_range): + # TODO: Though sanic is not a file server, I feel like we should at least # make a good effort here. Modified-since is nice, but we could # also look into etags, expires, and caching """ @@ -23,8 +25,9 @@ def register(app, uri, file_or_directory, pattern, use_modified_since): :param use_modified_since: If true, send file modified time, and return not modified if the browser's matches the server's + :param use_content_range: If true, process header for range requests + and sends the file part that is requested """ - # If we're not trying to match a file directly, # serve from the folder if not path.isfile(file_or_directory): @@ -50,6 +53,7 @@ def register(app, uri, file_or_directory, pattern, use_modified_since): headers = {} # Check if the client has been sent this file before # and it has not been modified since + stats = None if use_modified_since: stats = await stat(file_path) modified_since = strftime('%a, %d %b %Y %H:%M:%S GMT', @@ -57,11 +61,36 @@ def register(app, uri, file_or_directory, pattern, use_modified_since): if request.headers.get('If-Modified-Since') == modified_since: return HTTPResponse(status=304) headers['Last-Modified'] = modified_since - - return await file(file_path, headers=headers) - except: + _range = None + if use_content_range: + if not stats: + stats = await stat(file_path) + headers['Accept-Ranges'] = 'bytes' + headers['Content-Length'] = str(stats.st_size) + if request.method != 'HEAD': + _range = ContentRangeHandler(request, stats) + # If the start byte is greater than the size + # of the entire file or if the end is + if _range.start >= _range.total or _range.end == 0: + raise ContentRangeError('Content-Range malformed', + _range) + if _range.start == 0 and _range.size == _range.total: + _range = None + else: + headers['Content-Length'] = str(_range.size) + for k, v in _range.headers.items(): + headers[k] = v + if request.method == 'HEAD': + return HTTPResponse( + headers=headers, + content_type=guess_type(file_path)[0] or 'text/plain') + else: + return await file(file_path, headers=headers, _range=_range) + except ContentRangeError: + raise + except Exception: raise FileNotFound('File not found', path=file_or_directory, relative_url=file_uri) - app.route(uri, methods=['GET'])(_handler) + app.route(uri, methods=['GET', 'HEAD'])(_handler) From abbb7cdaf07f7b4a75bc615a626be7611cc024c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20Bl=C3=B6m?= Date: Thu, 26 Jan 2017 18:37:51 -0800 Subject: [PATCH 2/6] PEP8 format changes --- tests/test_requests.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_requests.py b/tests/test_requests.py index b2ee8e78..9450630a 100644 --- a/tests/test_requests.py +++ b/tests/test_requests.py @@ -112,7 +112,8 @@ def test_query_string(): async def handler(request): return text('OK') - request, response = sanic_endpoint_test(app, params=[("test1", "1"), ("test2", "false"), ("test2", "true")]) + request, response = sanic_endpoint_test( + app, params=[("test1", "1"), ("test2", "false"), ("test2", "true")]) assert request.args.get('test1') == '1' assert request.args.get('test2') == 'false' @@ -150,7 +151,8 @@ def test_post_json(): payload = {'test': 'OK'} headers = {'content-type': 'application/json'} - request, response = sanic_endpoint_test(app, data=json_dumps(payload), headers=headers) + request, response = sanic_endpoint_test( + app, data=json_dumps(payload), headers=headers) assert request.json.get('test') == 'OK' assert response.text == 'OK' From 31ad850e37964332763fd98b2f3f071873ae82a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20Bl=C3=B6m?= Date: Thu, 26 Jan 2017 18:38:32 -0800 Subject: [PATCH 3/6] added Range request test cases --- tests/test_static.py | 56 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tests/test_static.py b/tests/test_static.py index 82b0d1f9..5f9d9e09 100644 --- a/tests/test_static.py +++ b/tests/test_static.py @@ -60,3 +60,59 @@ def test_static_url_decode_file(static_file_directory): request, response = sanic_endpoint_test(app, uri='/dir/decode me.txt') assert response.status == 200 assert response.body == decode_me_contents + + +def test_static_head_request(static_file_path, static_file_content): + app = Sanic('test_static') + app.static('/testing.file', static_file_path, use_content_range=True) + + request, response = sanic_endpoint_test( + app, uri='/testing.file', method='head') + assert response.status == 200 + assert 'Accept-Ranges' in response.headers + assert 'Content-Length' in response.headers + assert int(response.headers['Content-Length']) == len(static_file_content) + + +def test_static_content_range(static_file_path, static_file_content): + app = Sanic('test_static') + app.static('/testing.file', static_file_path, use_content_range=True) + + headers = { + 'Range': 'bytes=12-19' + } + request, response = sanic_endpoint_test( + app, uri='/testing.file', headers=headers) + assert response.status == 200 + assert 'Content-Length' in response.headers + assert 'Content-Range' in response.headers + assert int(response.headers['Content-Length']) == 19-12 + assert response.body == bytes(static_file_content)[12:19] + + +def test_static_content_range_empty(static_file_path, static_file_content): + app = Sanic('test_static') + app.static('/testing.file', static_file_path, use_content_range=True) + + request, response = sanic_endpoint_test(app, uri='/testing.file') + assert response.status == 200 + assert 'Content-Length' in response.headers + assert 'Content-Range' not in response.headers + assert int(response.headers['Content-Length']) == len(static_file_content) + assert response.body == bytes(static_file_content) + + +def test_static_content_range_error(static_file_path, static_file_content): + app = Sanic('test_static') + app.static('/testing.file', static_file_path, use_content_range=True) + + headers = { + 'Range': 'bytes=1-0' + } + request, response = sanic_endpoint_test( + app, uri='/testing.file', headers=headers) + assert response.status == 416 + assert 'Content-Length' in response.headers + assert 'Content-Range' in response.headers + assert response.headers['Content-Range'] == "bytes */%s" % ( + len(static_file_content),) From ee5e145e2d302770e5db06013ac4338de7930227 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20Bl=C3=B6m?= Date: Fri, 27 Jan 2017 08:00:41 -0800 Subject: [PATCH 4/6] fixed line to long notice --- sanic/response.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sanic/response.py b/sanic/response.py index dee0f5bc..b8ea5ee8 100644 --- a/sanic/response.py +++ b/sanic/response.py @@ -152,9 +152,9 @@ class ContentRangeHandler: self.end = None self.headers = dict() self.total = stats.st_size - range_header = request.headers.get('Range') - if range_header: - self.start, self.end = ContentRangeHandler.parse_range(range_header) + _range = request.headers.get('Range') + if _range: + self.start, self.end = ContentRangeHandler.parse_range(_range) if self.start is not None and self.end is not None: self.size = self.end - self.start elif self.end is not None: From 8619e50845f4d1e48124ffde7d31760bda0a314c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20Bl=C3=B6m?= Date: Sat, 28 Jan 2017 11:18:52 -0800 Subject: [PATCH 5/6] Changed output to use a default_header dictionary and a ChainMap to unnecessary conditionals and simplified range parsing logic --- sanic/response.py | 82 +++++++++++++++++--------------------------- sanic/static.py | 2 +- tests/test_static.py | 2 +- 3 files changed, 34 insertions(+), 52 deletions(-) diff --git a/sanic/response.py b/sanic/response.py index b8ea5ee8..287eec26 100644 --- a/sanic/response.py +++ b/sanic/response.py @@ -1,6 +1,7 @@ from aiofiles import open as open_async from mimetypes import guess_type from os import path +from collections import ChainMap from ujson import dumps as json_dumps @@ -97,26 +98,24 @@ class HTTPResponse: def output(self, version="1.1", keep_alive=False, keep_alive_timeout=None): # This is all returned in a kind-of funky way # We tried to make this as fast as possible in pure python - if keep_alive and keep_alive_timeout: - if 'Keep-Alive' not in self.headers: - self.headers['Keep-Alive'] = keep_alive_timeout - if 'Connection' not in self.headers: - if keep_alive: - self.headers['Connection'] = 'keep-alive' - else: - self.headers['Connection'] = 'close' - if 'Content-Length' not in self.headers: - self.headers['Content-Length'] = len(self.body) - if 'Content-Type' not in self.headers: - self.headers['Content-Type'] = self.content_type + default_header = dict() + if keep_alive: + if keep_alive_timeout: + default_header['Keep-Alive'] = keep_alive_timeout + default_header['Connection'] = 'keep-alive' + else: + default_header['Connection'] = 'close' + default_header['Content-Length'] = len(self.body) + default_header['Content-Type'] = self.content_type headers = b'' - if self.headers: - for name, value in self.headers.items(): - try: - headers += (b'%b: %b\r\n' % ( + for name, value in ChainMap(self.headers, default_header).items(): + try: + headers += ( + b'%b: %b\r\n' % ( name.encode(), value.encode('utf-8'))) - except AttributeError: - headers += (b'%b: %b\r\n' % ( + except AttributeError: + headers += ( + b'%b: %b\r\n' % ( str(name).encode(), str(value).encode('utf-8'))) # Try to pull from the common codes first # Speeds up response rate 6% over pulling from all @@ -148,46 +147,29 @@ class ContentRangeHandler: __slots__ = ('start', 'end', 'size', 'total', 'headers') def __init__(self, request, stats): - self.start = self.size = 0 + self.size = self.start = 0 self.end = None self.headers = dict() self.total = stats.st_size _range = request.headers.get('Range') - if _range: - self.start, self.end = ContentRangeHandler.parse_range(_range) - if self.start is not None and self.end is not None: - self.size = self.end - self.start - elif self.end is not None: - self.size = self.end - elif self.start is not None: - self.size = self.total - self.start - else: - self.size = self.total - self.headers['Content-Range'] = "bytes %s-%s/%s" % ( - self.start, self.end, self.total) - else: - self.size = self.total - - def __bool__(self): - return self.size > 0 - - @staticmethod - def parse_range(range_header): - unit, _, value = tuple(map(str.strip, range_header.partition('='))) + if _range is None: + return + unit, _, value = tuple(map(str.strip, _range.partition('='))) if unit != 'bytes': - return None + return start_b, _, end_b = tuple(map(str.strip, value.partition('-'))) try: - start = int(start_b) if start_b.strip() else None - end = int(end_b) if end_b.strip() else None + self.start = int(start_b) if start_b else 0 + self.end = int(end_b) if end_b else 0 except ValueError: - return None - if end is not None: - if start is None: - if end != 0: - start = -end - end = None - return start, end + self.start = self.end = 0 + return + self.size = self.end - self.start + self.headers['Content-Range'] = "bytes %s-%s/%s" % ( + self.start, self.end, self.total) + + def __bool__(self): + return self.size != 0 def json(body, status=200, headers=None, **kwargs): diff --git a/sanic/static.py b/sanic/static.py index a3df5918..e0255334 100644 --- a/sanic/static.py +++ b/sanic/static.py @@ -74,7 +74,7 @@ def register(app, uri, file_or_directory, pattern, if _range.start >= _range.total or _range.end == 0: raise ContentRangeError('Content-Range malformed', _range) - if _range.start == 0 and _range.size == _range.total: + if _range.start == 0 and _range.size == 0: _range = None else: headers['Content-Length'] = str(_range.size) diff --git a/tests/test_static.py b/tests/test_static.py index 5f9d9e09..601b0deb 100644 --- a/tests/test_static.py +++ b/tests/test_static.py @@ -74,7 +74,7 @@ def test_static_head_request(static_file_path, static_file_content): assert int(response.headers['Content-Length']) == len(static_file_content) -def test_static_content_range(static_file_path, static_file_content): +def test_static_content_range_correct(static_file_path, static_file_content): app = Sanic('test_static') app.static('/testing.file', static_file_path, use_content_range=True) From cedf1d0b0023b07e0ed97bce6e1c6e11a50257f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kyle=20Bl=C3=B6m?= Date: Mon, 30 Jan 2017 09:13:43 -0800 Subject: [PATCH 6/6] Added new tests, new request logic, and handler file Added new tests for alternate uses for alternate range request types. Changed error handlnig for new request logic that simplifies the integration logic Moved the error handler and the content range handler to their own handler file to prevent circular imports. --- sanic/exceptions.py | 78 ++------------------------ sanic/handlers.py | 127 +++++++++++++++++++++++++++++++++++++++++++ sanic/response.py | 32 ----------- sanic/sanic.py | 4 +- sanic/static.py | 32 +++++------ tests/test_static.py | 39 ++++++++++++- 6 files changed, 188 insertions(+), 124 deletions(-) create mode 100644 sanic/handlers.py diff --git a/sanic/exceptions.py b/sanic/exceptions.py index a98c9e14..370882be 100644 --- a/sanic/exceptions.py +++ b/sanic/exceptions.py @@ -1,8 +1,3 @@ -from .response import text, html -from .log import log -from traceback import format_exc, extract_tb -import sys - TRACEBACK_STYLE = '''