More robust response datatype handling (#1674)

* HTTP1 header formatting moved to headers.format_headers and rewritten.

- New implementation is one line of code and twice faster than the old one.
- Whole header block encoded to UTF-8 in one pass.
- No longer supports custom encode method on header values.
- Cookie objects now have __str__ in addition to encode, to work with this.

* Linter

* format_http1_response

* Replace encode_body with faster implementation based on f-string.

Benchmarks:

def encode_body(data):
    try:
        # Try to encode it regularly
        return data.encode()
    except AttributeError:
        # Convert it to a str if you can't
        return str(data).encode()

def encode_body2(data):
    return f"{data}".encode()

def encode_body3(data):
    return str(data).encode()

data_str, data_int = "foo", 123

%timeit encode_body(data_int)
928 ns ± 2.96 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

%timeit encode_body2(data_int)
280 ns ± 2.09 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

%timeit encode_body3(data_int)
387 ns ± 1.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

%timeit encode_body(data_str)
202 ns ± 1.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

%timeit encode_body2(data_str)
197 ns ± 0.507 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)

%timeit encode_body3(data_str)
313 ns ± 1.28 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

* Wtf linter

* Content-type fixes.

* Body encoding sanitation, first pass.
- body/data type autodetection fixed.
- do not repr(body).encode() bytes-ish values.
- support __html__ and _repr_html_ in sanic.response.html().

* <any type>-to-str response autoconversion limited to sanic.response.text() only.

* Workaround MyPy issue.

* Add an empty line to make isort happy.

* Add html test for __html__ and _repr_html_.

* Remove StreamingHTTPResponse.get_headers helper function.

* Add back HTTPResponse Keep-Alive removed by earlier merge or something.

* Revert "Remove StreamingHTTPResponse.get_headers helper function."

Tests depend on this otherwise useless function.

This reverts commit 9651e6ae01.

* Add deprecation warnings; instead of assert for wrong HTTP version, and for non-string response.text.

* Add back missing import.

* Avoid duplicate response header tweaking code.

* Linter errors
This commit is contained in:
L. Kärkkäinen 2020-01-20 18:34:32 +02:00 committed by Stephen Sadowski
parent e908ca8cef
commit bffdb3b5c2
4 changed files with 119 additions and 66 deletions

View File

@ -3,6 +3,8 @@ import re
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
from urllib.parse import unquote from urllib.parse import unquote
from sanic.helpers import STATUS_CODES
HeaderIterable = Iterable[Tuple[str, Any]] # Values convertible to str HeaderIterable = Iterable[Tuple[str, Any]] # Values convertible to str
Options = Dict[str, Union[int, str]] # key=value fields in various headers Options = Dict[str, Union[int, str]] # key=value fields in various headers
@ -180,3 +182,19 @@ def format_http1(headers: HeaderIterable) -> bytes:
- Values are converted into strings if necessary. - Values are converted into strings if necessary.
""" """
return "".join(f"{name}: {val}\r\n" for name, val in headers).encode() return "".join(f"{name}: {val}\r\n" for name, val in headers).encode()
def format_http1_response(
status: int, headers: HeaderIterable, body=b""
) -> bytes:
"""Format a full HTTP/1.1 response.
- If `body` is included, content-length must be specified in headers.
"""
headerbytes = format_http1(headers)
return b"HTTP/1.1 %d %b\r\n%b\r\n%b" % (
status,
STATUS_CODES.get(status, b"UNKNOWN"),
headerbytes,
body,
)

View File

@ -1,3 +1,5 @@
import warnings
from functools import partial from functools import partial
from mimetypes import guess_type from mimetypes import guess_type
from os import path from os import path
@ -5,8 +7,8 @@ from urllib.parse import quote_plus
from sanic.compat import Header, open_async from sanic.compat import Header, open_async
from sanic.cookies import CookieJar from sanic.cookies import CookieJar
from sanic.headers import format_http1 from sanic.headers import format_http1, format_http1_response
from sanic.helpers import STATUS_CODES, has_message_body, remove_entity_headers from sanic.helpers import has_message_body, remove_entity_headers
try: try:
@ -21,12 +23,7 @@ except ImportError:
class BaseHTTPResponse: class BaseHTTPResponse:
def _encode_body(self, data): def _encode_body(self, data):
try: return data.encode() if hasattr(data, "encode") else data
# Try to encode it regularly
return data.encode()
except AttributeError:
# Convert it to a str if you can't
return str(data).encode()
def _parse_headers(self): def _parse_headers(self):
return format_http1(self.headers.items()) return format_http1(self.headers.items())
@ -37,6 +34,37 @@ class BaseHTTPResponse:
self._cookies = CookieJar(self.headers) self._cookies = CookieJar(self.headers)
return self._cookies return self._cookies
def get_headers(
self,
version="1.1",
keep_alive=False,
keep_alive_timeout=None,
body=b"",
):
""".. deprecated:: 20.3:
This function is not public API and will be removed."""
if version != "1.1":
warnings.warn(
"Only HTTP/1.1 is currently supported (got {version})",
DeprecationWarning,
)
# self.headers get priority over content_type
if self.content_type and "Content-Type" not in self.headers:
self.headers["Content-Type"] = self.content_type
if keep_alive:
self.headers["Connection"] = "keep-alive"
if keep_alive_timeout is not None:
self.headers["Keep-Alive"] = keep_alive_timeout
else:
self.headers["Connection"] = "close"
if self.status in (304, 412):
self.headers = remove_entity_headers(self.headers)
return format_http1_response(self.status, self.headers.items(), body)
class StreamingHTTPResponse(BaseHTTPResponse): class StreamingHTTPResponse(BaseHTTPResponse):
__slots__ = ( __slots__ = (
@ -54,7 +82,7 @@ class StreamingHTTPResponse(BaseHTTPResponse):
streaming_fn, streaming_fn,
status=200, status=200,
headers=None, headers=None,
content_type="text/plain", content_type="text/plain; charset=utf-8",
chunked=True, chunked=True,
): ):
self.content_type = content_type self.content_type = content_type
@ -67,10 +95,9 @@ class StreamingHTTPResponse(BaseHTTPResponse):
async def write(self, data): async def write(self, data):
"""Writes a chunk of data to the streaming response. """Writes a chunk of data to the streaming response.
:param data: bytes-ish data to be written. :param data: str or bytes-ish data to be written.
""" """
if type(data) != bytes: data = self._encode_body(data)
data = self._encode_body(data)
if self.chunked: if self.chunked:
await self.protocol.push_data(b"%x\r\n%b\r\n" % (len(data), data)) await self.protocol.push_data(b"%x\r\n%b\r\n" % (len(data), data))
@ -102,28 +129,11 @@ class StreamingHTTPResponse(BaseHTTPResponse):
def get_headers( def get_headers(
self, version="1.1", keep_alive=False, keep_alive_timeout=None self, version="1.1", keep_alive=False, keep_alive_timeout=None
): ):
# This is all returned in a kind-of funky way
# We tried to make this as fast as possible in pure python
timeout_header = b""
if keep_alive and keep_alive_timeout is not None:
timeout_header = b"Keep-Alive: %d\r\n" % keep_alive_timeout
if self.chunked and version == "1.1": if self.chunked and version == "1.1":
self.headers["Transfer-Encoding"] = "chunked" self.headers["Transfer-Encoding"] = "chunked"
self.headers.pop("Content-Length", None) self.headers.pop("Content-Length", None)
self.headers["Content-Type"] = self.headers.get(
"Content-Type", self.content_type
)
headers = self._parse_headers() return super().get_headers(version, keep_alive, keep_alive_timeout)
status = STATUS_CODES.get(self.status, b"UNKNOWN RESPONSE")
return (b"HTTP/%b %d %b\r\n" b"%b" b"%b\r\n") % (
version.encode(),
self.status,
status,
timeout_header,
headers,
)
class HTTPResponse(BaseHTTPResponse): class HTTPResponse(BaseHTTPResponse):
@ -138,23 +148,12 @@ class HTTPResponse(BaseHTTPResponse):
body_bytes=b"", body_bytes=b"",
): ):
self.content_type = content_type self.content_type = content_type
self.body = body_bytes if body is None else self._encode_body(body)
if body is not None:
self.body = self._encode_body(body)
else:
self.body = body_bytes
self.status = status self.status = status
self.headers = Header(headers or {}) self.headers = Header(headers or {})
self._cookies = None self._cookies = None
def output(self, version="1.1", keep_alive=False, keep_alive_timeout=None): def output(self, version="1.1", keep_alive=False, keep_alive_timeout=None):
# This is all returned in a kind-of funky way
# We tried to make this as fast as possible in pure python
timeout_header = b""
if keep_alive and keep_alive_timeout is not None:
timeout_header = b"Keep-Alive: %d\r\n" % keep_alive_timeout
body = b"" body = b""
if has_message_body(self.status): if has_message_body(self.status):
body = self.body body = self.body
@ -162,26 +161,7 @@ class HTTPResponse(BaseHTTPResponse):
"Content-Length", len(self.body) "Content-Length", len(self.body)
) )
# self.headers get priority over content_type return self.get_headers(version, keep_alive, keep_alive_timeout, body)
if self.content_type and "Content-Type" not in self.headers:
self.headers["Content-Type"] = self.content_type
if self.status in (304, 412):
self.headers = remove_entity_headers(self.headers)
headers = self._parse_headers()
status = STATUS_CODES.get(self.status, b"UNKNOWN RESPONSE")
return (
b"HTTP/%b %d %b\r\n" b"Connection: %b\r\n" b"%b" b"%b\r\n" b"%b"
) % (
version.encode(),
self.status,
status,
b"keep-alive" if keep_alive else b"close",
timeout_header,
headers,
body,
)
@property @property
def cookies(self): def cookies(self):
@ -206,7 +186,7 @@ def json(
headers=None, headers=None,
content_type="application/json", content_type="application/json",
dumps=json_dumps, dumps=json_dumps,
**kwargs **kwargs,
): ):
""" """
Returns response object with body in json format. Returns response object with body in json format.
@ -235,6 +215,21 @@ def text(
:param headers: Custom Headers. :param headers: Custom Headers.
:param content_type: the content type (string) of the response :param content_type: the content type (string) of the response
""" """
if not isinstance(body, str):
warnings.warn(
"Types other than str will be deprecated in future versions for"
f" response.text, got type {type(body).__name__})",
DeprecationWarning,
)
# Type conversions are deprecated and quite b0rked but still supported for
# text() until applications get fixed. This try-except should be removed.
try:
# Avoid repr(body).encode() b0rkage for body that is already encoded.
# memoryview used only to test bytes-ishness.
with memoryview(body):
pass
except TypeError:
body = f"{body}" # no-op if body is already str
return HTTPResponse( return HTTPResponse(
body, status=status, headers=headers, content_type=content_type body, status=status, headers=headers, content_type=content_type
) )
@ -263,10 +258,14 @@ def html(body, status=200, headers=None):
""" """
Returns response object with body in html format. Returns response object with body in html format.
:param body: Response data to be encoded. :param body: str or bytes-ish, or an object with __html__ or _repr_html_.
:param status: Response code. :param status: Response code.
:param headers: Custom Headers. :param headers: Custom Headers.
""" """
if hasattr(body, "__html__"):
body = body.__html__()
elif hasattr(body, "_repr_html_"):
body = body._repr_html_()
return HTTPResponse( return HTTPResponse(
body, body,
status=status, status=status,

View File

@ -11,7 +11,7 @@ import pytest
from sanic import Blueprint, Sanic from sanic import Blueprint, Sanic
from sanic.exceptions import ServerError from sanic.exceptions import ServerError
from sanic.request import DEFAULT_HTTP_CONTENT_TYPE, Request, RequestParameters from sanic.request import DEFAULT_HTTP_CONTENT_TYPE, Request, RequestParameters
from sanic.response import json, text from sanic.response import html, json, text
from sanic.testing import ASGI_HOST, HOST, PORT from sanic.testing import ASGI_HOST, HOST, PORT
@ -72,6 +72,41 @@ def test_text(app):
assert response.text == "Hello" assert response.text == "Hello"
def test_html(app):
class Foo:
def __html__(self):
return "<h1>Foo</h1>"
def _repr_html_(self):
return "<h1>Foo object repr</h1>"
class Bar:
def _repr_html_(self):
return "<h1>Bar object repr</h1>"
@app.route("/")
async def handler(request):
return html("<h1>Hello</h1>")
@app.route("/foo")
async def handler(request):
return html(Foo())
@app.route("/bar")
async def handler(request):
return html(Bar())
request, response = app.test_client.get("/")
assert response.content_type == "text/html; charset=utf-8"
assert response.text == "<h1>Hello</h1>"
request, response = app.test_client.get("/foo")
assert response.text == "<h1>Foo</h1>"
request, response = app.test_client.get("/bar")
assert response.text == "<h1>Bar object repr</h1>"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_text_asgi(app): async def test_text_asgi(app):
@app.route("/") @app.route("/")

View File

@ -20,6 +20,7 @@ from sanic.response import (
json, json,
raw, raw,
stream, stream,
text,
) )
from sanic.response import empty from sanic.response import empty
from sanic.server import HttpProtocol from sanic.server import HttpProtocol
@ -35,7 +36,7 @@ def test_response_body_not_a_string(app):
@app.route("/hello") @app.route("/hello")
async def hello_route(request): async def hello_route(request):
return HTTPResponse(body=random_num) return text(random_num)
request, response = app.test_client.get("/hello") request, response = app.test_client.get("/hello")
assert response.text == str(random_num) assert response.text == str(random_num)