From 4537544fde54bf3b08d56cdde375882b1e072202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?= Date: Wed, 4 Sep 2019 17:15:32 +0300 Subject: [PATCH 1/6] HTTP1 header formatting moved to headers.format_headers and rewritten. - New implementation is one line of code and twice faster than the old one. - Whole header block encoded to UTF-8 in one pass. - No longer supports custom encode method on header values. - Cookie objects now have __str__ in addition to encode, to work with this. --- sanic/cookies.py | 6 +++++- sanic/headers.py | 13 +++++++++++-- sanic/response.py | 16 ++-------------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/sanic/cookies.py b/sanic/cookies.py index 19907945..ed672fba 100644 --- a/sanic/cookies.py +++ b/sanic/cookies.py @@ -130,6 +130,10 @@ class Cookie(dict): :return: Cookie encoded in a codec of choosing. :except: UnicodeEncodeError """ + return str(self).encode(encoding) + + def __str__(self): + """Format as a Set-Cookie header value.""" output = ["%s=%s" % (self.key, _quote(self.value))] for key, value in self.items(): if key == "max-age": @@ -147,4 +151,4 @@ class Cookie(dict): else: output.append("%s=%s" % (self._keys[key], value)) - return "; ".join(output).encode(encoding) + return "; ".join(output) diff --git a/sanic/headers.py b/sanic/headers.py index 6c9fa221..e1ac48b3 100644 --- a/sanic/headers.py +++ b/sanic/headers.py @@ -1,9 +1,9 @@ import re -from typing import Dict, Iterable, Optional, Tuple +from typing import Any, Dict, Iterable, Optional, Tuple from urllib.parse import unquote - +HeaderIterable = Iterable[Tuple[str, Any]] # Values convertible to str Options = Dict[str, str] # key=value fields in various headers OptionsIterable = Iterable[Tuple[str, str]] # May contain duplicate keys @@ -165,3 +165,12 @@ def parse_host(host: str) -> Tuple[Optional[str], Optional[int]]: return None, None host, port = m.groups() return host.lower(), port and int(port) + + +def format_http1(headers: HeaderIterable) -> bytes: + """Convert a headers iterable into HTTP/1 header format. + + - Outputs UTF-8 bytes where each header line ends with \\r\\n. + - Values are converted into strings if necessary. + """ + return "".join(f"{name}: {val}\r\n" for name, val in headers).encode() diff --git a/sanic/response.py b/sanic/response.py index 6f937c95..83eacd53 100644 --- a/sanic/response.py +++ b/sanic/response.py @@ -7,6 +7,7 @@ from aiofiles import open as open_async from sanic.compat import Header from sanic.cookies import CookieJar +from sanic.headers import format_http1 from sanic.helpers import STATUS_CODES, has_message_body, remove_entity_headers @@ -30,20 +31,7 @@ class BaseHTTPResponse: return str(data).encode() def _parse_headers(self): - headers = b"" - for name, value in self.headers.items(): - try: - headers += b"%b: %b\r\n" % ( - name.encode(), - value.encode("utf-8"), - ) - except AttributeError: - headers += b"%b: %b\r\n" % ( - str(name).encode(), - str(value).encode("utf-8"), - ) - - return headers + return format_http1(self.headers.items()) @property def cookies(self): From d248dbb72bb1b9df9ecb956d11e9c91bd015b5a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?= Date: Wed, 4 Sep 2019 17:51:59 +0300 Subject: [PATCH 2/6] Linter --- sanic/headers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sanic/headers.py b/sanic/headers.py index e1ac48b3..ec3dc237 100644 --- a/sanic/headers.py +++ b/sanic/headers.py @@ -3,6 +3,7 @@ import re from typing import Any, Dict, Iterable, Optional, Tuple from urllib.parse import unquote + HeaderIterable = Iterable[Tuple[str, Any]] # Values convertible to str Options = Dict[str, str] # key=value fields in various headers OptionsIterable = Iterable[Tuple[str, str]] # May contain duplicate keys From 7dc683913f6da0c252d8a08b717fb23c6f578659 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?= Date: Thu, 5 Sep 2019 11:43:23 +0300 Subject: [PATCH 3/6] format_http1_response --- sanic/headers.py | 20 ++++++++++++++++ sanic/response.py | 60 ++++++++++++----------------------------------- 2 files changed, 35 insertions(+), 45 deletions(-) diff --git a/sanic/headers.py b/sanic/headers.py index ec3dc237..cc9a0dcf 100644 --- a/sanic/headers.py +++ b/sanic/headers.py @@ -3,6 +3,8 @@ import re from typing import Any, Dict, Iterable, Optional, Tuple from urllib.parse import unquote +from sanic.helpers import STATUS_CODES + HeaderIterable = Iterable[Tuple[str, Any]] # Values convertible to str Options = Dict[str, str] # key=value fields in various headers @@ -175,3 +177,21 @@ def format_http1(headers: HeaderIterable) -> bytes: - Values are converted into strings if necessary. """ return "".join(f"{name}: {val}\r\n" for name, val in headers).encode() + + +def format_http1_response( + status: int, headers: HeaderIterable, body=b"" +) -> bytes: + """Format a full HTTP/1.1 response. + + - If `body` is included, content-length must be specified in headers. + """ + headers = format_http1(headers) + if status == 200: + return b"HTTP/1.1 200 OK\r\n%b\r\n%b" % (headers, body) + return b"HTTP/1.1 %d %b\r\n%b\r\n%b" % ( + status, + STATUS_CODES.get(status, b"UNKNOWN"), + headers, + body, + ) diff --git a/sanic/response.py b/sanic/response.py index 83eacd53..92362c3d 100644 --- a/sanic/response.py +++ b/sanic/response.py @@ -7,8 +7,8 @@ from aiofiles import open as open_async from sanic.compat import Header from sanic.cookies import CookieJar -from sanic.headers import format_http1 -from sanic.helpers import STATUS_CODES, has_message_body, remove_entity_headers +from sanic.headers import format_http1, format_http1_response +from sanic.helpers import has_message_body, remove_entity_headers try: @@ -104,33 +104,17 @@ class StreamingHTTPResponse(BaseHTTPResponse): def get_headers( self, version="1.1", keep_alive=False, keep_alive_timeout=None ): - # This is all returned in a kind-of funky way - # We tried to make this as fast as possible in pure python - timeout_header = b"" + if "Content-Type" not in self.headers: + self.headers["Content-Type"] = self.content_type + if keep_alive and keep_alive_timeout is not None: - timeout_header = b"Keep-Alive: %d\r\n" % keep_alive_timeout + self.headers["Keep-Alive"] = keep_alive_timeout if self.chunked and version == "1.1": self.headers["Transfer-Encoding"] = "chunked" self.headers.pop("Content-Length", None) - self.headers["Content-Type"] = self.headers.get( - "Content-Type", self.content_type - ) - headers = self._parse_headers() - - if self.status == 200: - status = b"OK" - else: - status = STATUS_CODES.get(self.status) - - return (b"HTTP/%b %d %b\r\n" b"%b" b"%b\r\n") % ( - version.encode(), - self.status, - status, - timeout_header, - headers, - ) + return format_http1_response(self.status, self.headers.items()) class HTTPResponse(BaseHTTPResponse): @@ -156,11 +140,8 @@ class HTTPResponse(BaseHTTPResponse): self._cookies = None def output(self, version="1.1", keep_alive=False, keep_alive_timeout=None): - # This is all returned in a kind-of funky way - # We tried to make this as fast as possible in pure python - timeout_header = b"" - if keep_alive and keep_alive_timeout is not None: - timeout_header = b"Keep-Alive: %d\r\n" % keep_alive_timeout + if "Content-Type" not in self.headers: + self.headers["Content-Type"] = self.content_type body = b"" if has_message_body(self.status): @@ -176,24 +157,13 @@ class HTTPResponse(BaseHTTPResponse): if self.status in (304, 412): self.headers = remove_entity_headers(self.headers) - headers = self._parse_headers() + if keep_alive and keep_alive_timeout is not None: + self.headers["Connection"] = "keep-alive" + self.headers["Keep-Alive"] = keep_alive_timeout + elif not keep_alive: + self.headers["Connection"] = "close" - if self.status == 200: - status = b"OK" - else: - status = STATUS_CODES.get(self.status, b"UNKNOWN RESPONSE") - - return ( - b"HTTP/%b %d %b\r\n" b"Connection: %b\r\n" b"%b" b"%b\r\n" b"%b" - ) % ( - version.encode(), - self.status, - status, - b"keep-alive" if keep_alive else b"close", - timeout_header, - headers, - body, - ) + return format_http1_response(self.status, self.headers.items(), body) @property def cookies(self): From 82bc2cfba987337bccc5e7206c2ffd708cff6641 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?= Date: Sun, 8 Sep 2019 11:49:51 +0300 Subject: [PATCH 4/6] Replace encode_body with faster implementation based on f-string. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmarks: def encode_body(data): try: # Try to encode it regularly return data.encode() except AttributeError: # Convert it to a str if you can't return str(data).encode() def encode_body2(data): return f"{data}".encode() def encode_body3(data): return str(data).encode() data_str, data_int = "foo", 123 %timeit encode_body(data_int) 928 ns ± 2.96 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each) %timeit encode_body2(data_int) 280 ns ± 2.09 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each) %timeit encode_body3(data_int) 387 ns ± 1.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each) %timeit encode_body(data_str) 202 ns ± 1.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each) %timeit encode_body2(data_str) 197 ns ± 0.507 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each) %timeit encode_body3(data_str) 313 ns ± 1.28 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each) --- sanic/response.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sanic/response.py b/sanic/response.py index 92362c3d..95faa7e6 100644 --- a/sanic/response.py +++ b/sanic/response.py @@ -23,12 +23,7 @@ except BaseException: class BaseHTTPResponse: def _encode_body(self, data): - try: - # Try to encode it regularly - return data.encode() - except AttributeError: - # Convert it to a str if you can't - return str(data).encode() + return f"{data}".encode() def _parse_headers(self): return format_http1(self.headers.items()) From fff519fae475ec56e3872a116ab3b24850a37b7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?= Date: Sun, 8 Sep 2019 11:58:31 +0300 Subject: [PATCH 5/6] Wtf linter --- sanic/response.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sanic/response.py b/sanic/response.py index 95faa7e6..5f42fc1e 100644 --- a/sanic/response.py +++ b/sanic/response.py @@ -173,7 +173,7 @@ def json( headers=None, content_type="application/json", dumps=json_dumps, - **kwargs + **kwargs, ): """ Returns response object with body in json format. From 1acd1d7d88d55b6580f7d31bdf3af69da6e41dac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?= Date: Tue, 10 Sep 2019 14:58:22 +0300 Subject: [PATCH 6/6] Content-type fixes. --- sanic/response.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sanic/response.py b/sanic/response.py index 5f42fc1e..031edace 100644 --- a/sanic/response.py +++ b/sanic/response.py @@ -51,7 +51,7 @@ class StreamingHTTPResponse(BaseHTTPResponse): streaming_fn, status=200, headers=None, - content_type="text/plain", + content_type="text/plain; charset=utf-8", chunked=True, ): self.content_type = content_type @@ -120,7 +120,7 @@ class HTTPResponse(BaseHTTPResponse): body=None, status=200, headers=None, - content_type="text/plain", + content_type="text/plain; charset=utf-8", body_bytes=b"", ): self.content_type = content_type @@ -145,10 +145,6 @@ class HTTPResponse(BaseHTTPResponse): "Content-Length", len(self.body) ) - self.headers["Content-Type"] = self.headers.get( - "Content-Type", self.content_type - ) - if self.status in (304, 412): self.headers = remove_entity_headers(self.headers)