From dfc0704831d8c6c41062adfbac742527ccec0cc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?= <98187+Tronic@users.noreply.github.com> Date: Sun, 26 Feb 2023 06:25:10 +0000 Subject: [PATCH] Error page rendering format selection (#2668) Co-authored-by: Adam Hopkins Co-authored-by: L. Karkkainen --- sanic/errorpages.py | 165 ++++++++++++++++----------------------- sanic/headers.py | 1 - sanic/request.py | 9 ++- tests/test_errorpages.py | 117 ++++++++++++++++++++++++--- tests/test_request.py | 34 +++++--- 5 files changed, 204 insertions(+), 122 deletions(-) diff --git a/sanic/errorpages.py b/sanic/errorpages.py index c20896db..77b1a3b7 100644 --- a/sanic/errorpages.py +++ b/sanic/errorpages.py @@ -22,6 +22,7 @@ from traceback import extract_tb from sanic.exceptions import BadRequest, SanicException from sanic.helpers import STATUS_CODES +from sanic.log import deprecation, logger from sanic.response import html, json, text @@ -42,6 +43,7 @@ FALLBACK_TEXT = ( "cannot complete your request." ) FALLBACK_STATUS = 500 +JSON = "application/json" class BaseRenderer: @@ -390,21 +392,18 @@ def escape(text): return f"{text}".replace("&", "&").replace("<", "<") -RENDERERS_BY_CONFIG = { - "html": HTMLRenderer, - "json": JSONRenderer, - "text": TextRenderer, +MIME_BY_CONFIG = { + "text": "text/plain", + "json": "application/json", + "html": "text/html", } - +CONFIG_BY_MIME = {v: k for k, v in MIME_BY_CONFIG.items()} RENDERERS_BY_CONTENT_TYPE = { "text/plain": TextRenderer, "application/json": JSONRenderer, "multipart/form-data": HTMLRenderer, "text/html": HTMLRenderer, } -CONTENT_TYPE_BY_RENDERERS = { - v: k for k, v in RENDERERS_BY_CONTENT_TYPE.items() -} # Handler source code is checked for which response types it returns with the # route error_format="auto" (default) to determine which format to use. @@ -420,7 +419,7 @@ RESPONSE_MAPPING = { def check_error_format(format): - if format not in RENDERERS_BY_CONFIG and format != "auto": + if format not in MIME_BY_CONFIG and format != "auto": raise SanicException(f"Unknown format: {format}") @@ -435,94 +434,68 @@ def exception_response( """ Render a response for the default FALLBACK exception handler. """ - content_type = None - if not renderer: - # Make sure we have something set - renderer = base - render_format = fallback - - if request: - # If there is a request, try and get the format - # from the route - if request.route: - try: - if request.route.extra.error_format: - render_format = request.route.extra.error_format - except AttributeError: - ... - - content_type = request.headers.getone("content-type", "").split( - ";" - )[0] - - acceptable = request.accept - - # If the format is auto still, make a guess - if render_format == "auto": - # First, if there is an Accept header, check if text/html - # is the first option - # According to MDN Web Docs, all major browsers use text/html - # as the primary value in Accept (with the exception of IE 8, - # and, well, if you are supporting IE 8, then you have bigger - # problems to concern yourself with than what default exception - # renderer is used) - # Source: - # https://developer.mozilla.org/en-US/docs/Web/HTTP/Content_negotiation/List_of_default_Accept_values - - if acceptable and acceptable.match( - "text/html", accept_wildcards=False - ): - renderer = HTMLRenderer - - # Second, if there is an Accept header, check if - # application/json is an option, or if the content-type - # is application/json - elif ( - acceptable - and acceptable.match( - "application/json", accept_wildcards=False - ) - or content_type == "application/json" - ): - renderer = JSONRenderer - - # Third, if there is no Accept header, assume we want text. - # The likely use case here is a raw socket. - elif not acceptable: - renderer = TextRenderer - else: - # Fourth, look to see if there was a JSON body - # When in this situation, the request is probably coming - # from curl, an API client like Postman or Insomnia, or a - # package like requests or httpx - try: - # Give them the benefit of the doubt if they did: - # $ curl localhost:8000 -d '{"foo": "bar"}' - # And provide them with JSONRenderer - renderer = JSONRenderer if request.json else base - except BadRequest: - renderer = base - else: - renderer = RENDERERS_BY_CONFIG.get(render_format, renderer) - - # Lastly, if there is an Accept header, make sure - # our choice is okay - if acceptable: - type_ = CONTENT_TYPE_BY_RENDERERS.get(renderer) # type: ignore - if type_ and not acceptable.match(type_): - # If the renderer selected is not in the Accept header - # look through what is in the Accept header, and select - # the first option that matches. Otherwise, just drop back - # to the original default - for accept in acceptable: - mtype = f"{accept.type}/{accept.subtype}" - maybe = RENDERERS_BY_CONTENT_TYPE.get(mtype) - if maybe: - renderer = maybe - break - else: - renderer = base + mt = guess_mime(request, fallback) + renderer = RENDERERS_BY_CONTENT_TYPE.get(mt, base) renderer = t.cast(t.Type[BaseRenderer], renderer) return renderer(request, exception, debug).render() + + +def guess_mime(req: Request, fallback: str) -> str: + # Attempt to find a suitable MIME format for the response. + # Insertion-ordered map of formats["html"] = "source of that suggestion" + formats = {} + name = "" + # Route error_format (by magic from handler code if auto, the default) + if req.route: + name = req.route.name + f = req.route.extra.error_format + if f in MIME_BY_CONFIG: + formats[f] = name + + if not formats and fallback in MIME_BY_CONFIG: + formats[fallback] = "FALLBACK_ERROR_FORMAT" + + # If still not known, check for the request for clues of JSON + if not formats and fallback == "auto" and req.accept.match(JSON): + if JSON in req.accept: # Literally, not wildcard + formats["json"] = "request.accept" + elif JSON in req.headers.getone("content-type", ""): + formats["json"] = "content-type" + # DEPRECATION: Remove this block in 24.3 + else: + c = None + try: + c = req.json + except BadRequest: + pass + if c: + formats["json"] = "request.json" + deprecation( + "Response type was determined by the JSON content of " + "the request. This behavior is deprecated and will be " + "removed in v24.3. Please specify the format either by\n" + f' error_format="json" on route {name}, by\n' + ' FALLBACK_ERROR_FORMAT = "json", or by adding header\n' + " accept: application/json to your requests.", + 24.3, + ) + + # Any other supported formats + if fallback == "auto": + for k in MIME_BY_CONFIG: + if k not in formats: + formats[k] = "any" + + mimes = [MIME_BY_CONFIG[k] for k in formats] + m = req.accept.match(*mimes) + if m: + format = CONFIG_BY_MIME[m.mime] + source = formats[format] + logger.debug( + f"The client accepts {m.header}, using '{format}' from {source}" + ) + else: + logger.debug(f"No format found, the client accepts {req.accept!r}") + return m.mime diff --git a/sanic/headers.py b/sanic/headers.py index b192fa24..067e985b 100644 --- a/sanic/headers.py +++ b/sanic/headers.py @@ -166,7 +166,6 @@ class Matched: def _compare(self, other) -> Tuple[bool, Matched]: if isinstance(other, str): - # return self.mime == other, Accept.parse(other) parsed = Matched.parse(other) if self.mime == other: return True, parsed diff --git a/sanic/request.py b/sanic/request.py index 62ed6cf2..26fa17a8 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -500,13 +500,16 @@ class Request: @property def accept(self) -> AcceptList: - """ + """Accepted response content types. + + A convenience handler for easier RFC-compliant matching of MIME types, + parsed as a list that can match wildcards and includes */* by default. + :return: The ``Accept`` header parsed :rtype: AcceptList """ if self.parsed_accept is None: - accept_header = self.headers.getone("accept", "") - self.parsed_accept = parse_accept(accept_header) + self.parsed_accept = parse_accept(self.headers.get("accept")) return self.parsed_accept @property diff --git a/tests/test_errorpages.py b/tests/test_errorpages.py index fda629ca..538acf1b 100644 --- a/tests/test_errorpages.py +++ b/tests/test_errorpages.py @@ -1,12 +1,14 @@ +import logging + import pytest from sanic import Sanic from sanic.config import Config -from sanic.errorpages import HTMLRenderer, exception_response +from sanic.errorpages import TextRenderer, guess_mime, exception_response from sanic.exceptions import NotFound, SanicException from sanic.handlers import ErrorHandler from sanic.request import Request -from sanic.response import HTTPResponse, html, json, text +from sanic.response import HTTPResponse, empty, html, json, text @pytest.fixture @@ -17,6 +19,45 @@ def app(): def err(request): raise Exception("something went wrong") + @app.get("/forced_json/", error_format="json") + def manual_fail(request, fail): + if fail == "fail": + raise Exception + return html("") # Should be ignored + + @app.get("/empty/") + def empty_fail(request, fail): + if fail == "fail": + raise Exception + return empty() + + @app.get("/json/") + def json_fail(request, fail): + if fail == "fail": + raise Exception + # After 23.3 route format should become json, older versions think it + # is mixed due to empty mapping to html, and don't find any format. + return json({"foo": "bar"}) if fail == "json" else empty() + + @app.get("/html/") + def html_fail(request, fail): + if fail == "fail": + raise Exception + return html("

foo

") + + @app.get("/text/") + def text_fail(request, fail): + if fail == "fail": + raise Exception + return text("foo") + + @app.get("/mixed/") + def mixed_fail(request, param): + if param not in ("json", "html"): + raise Exception + return json({}) if param == "json" else html("") + + return app @@ -28,14 +69,14 @@ def fake_request(app): @pytest.mark.parametrize( "fallback,content_type, exception, status", ( - (None, "text/html; charset=utf-8", Exception, 500), + (None, "text/plain; charset=utf-8", Exception, 500), ("html", "text/html; charset=utf-8", Exception, 500), - ("auto", "text/html; charset=utf-8", Exception, 500), + ("auto", "text/plain; charset=utf-8", Exception, 500), ("text", "text/plain; charset=utf-8", Exception, 500), ("json", "application/json", Exception, 500), - (None, "text/html; charset=utf-8", NotFound, 404), + (None, "text/plain; charset=utf-8", NotFound, 404), ("html", "text/html; charset=utf-8", NotFound, 404), - ("auto", "text/html; charset=utf-8", NotFound, 404), + ("auto", "text/plain; charset=utf-8", NotFound, 404), ("text", "text/plain; charset=utf-8", NotFound, 404), ("json", "application/json", NotFound, 404), ), @@ -43,6 +84,10 @@ def fake_request(app): def test_should_return_html_valid_setting( fake_request, fallback, content_type, exception, status ): + # Note: if fallback is None or "auto", prior to PR #2668 base was returned + # and after that a text response is given because it matches */*. Changed + # base to TextRenderer in this test, like it is in Sanic itself, so the + # test passes with either version but still covers everything that it did. if fallback: fake_request.app.config.FALLBACK_ERROR_FORMAT = fallback @@ -53,7 +98,7 @@ def test_should_return_html_valid_setting( fake_request, e, True, - base=HTMLRenderer, + base=TextRenderer, fallback=fake_request.app.config.FALLBACK_ERROR_FORMAT, ) @@ -259,15 +304,17 @@ def test_fallback_with_content_type_mismatch_accept(app): "accept,content_type,expected", ( (None, None, "text/plain; charset=utf-8"), - ("foo/bar", None, "text/html; charset=utf-8"), + ("foo/bar", None, "text/plain; charset=utf-8"), ("application/json", None, "application/json"), ("application/json,text/plain", None, "application/json"), ("text/plain,application/json", None, "application/json"), ("text/plain,foo/bar", None, "text/plain; charset=utf-8"), - # Following test is valid after v22.3 - # ("text/plain,text/html", None, "text/plain; charset=utf-8"), - ("*/*", "foo/bar", "text/html; charset=utf-8"), + ("text/plain,text/html", None, "text/plain; charset=utf-8"), + ("*/*", "foo/bar", "text/plain; charset=utf-8"), ("*/*", "application/json", "application/json"), + # App wants text/plain but accept has equal entries for it + ("text/*,*/plain", None, "text/plain; charset=utf-8"), + ), ) def test_combinations_for_auto(fake_request, accept, content_type, expected): @@ -286,7 +333,7 @@ def test_combinations_for_auto(fake_request, accept, content_type, expected): fake_request, e, True, - base=HTMLRenderer, + base=TextRenderer, fallback="auto", ) @@ -376,3 +423,49 @@ def test_config_fallback_bad_value(app): message = "Unknown format: fake" with pytest.raises(SanicException, match=message): app.config.FALLBACK_ERROR_FORMAT = "fake" + + +@pytest.mark.parametrize( + "route_format,fallback,accept,expected", + ( + ("json", "html", "*/*", "The client accepts */*, using 'json' from fakeroute"), + ("json", "auto", "text/html,*/*;q=0.8", "The client accepts text/html, using 'html' from any"), + ("json", "json", "text/html,*/*;q=0.8", "The client accepts */*;q=0.8, using 'json' from fakeroute"), + ("", "html", "text/*,*/plain", "The client accepts text/*, using 'html' from FALLBACK_ERROR_FORMAT"), + ("", "json", "text/*,*/*", "The client accepts */*, using 'json' from FALLBACK_ERROR_FORMAT"), + ("", "auto", "*/*,application/json;q=0.5", "The client accepts */*, using 'json' from request.accept"), + ("", "auto", "*/*", "The client accepts */*, using 'json' from content-type"), + ("", "auto", "text/html,text/plain", "The client accepts text/plain, using 'text' from any"), + ("", "auto", "text/html,text/plain;q=0.9", "The client accepts text/html, using 'html' from any"), + ("html", "json", "application/xml", "No format found, the client accepts [application/xml]"), + ("", "auto", "*/*", "The client accepts */*, using 'text' from any"), + ("", "", "*/*", "No format found, the client accepts [*/*]"), + # DEPRECATED: remove in 24.3 + ("", "auto", "*/*", "The client accepts */*, using 'json' from request.json"), + ), +) +def test_guess_mime_logging(caplog, fake_request, route_format, fallback, accept, expected): + class FakeObject: + pass + fake_request.route = FakeObject() + fake_request.route.name = "fakeroute" + fake_request.route.extra = FakeObject() + fake_request.route.extra.error_format = route_format + if accept is None: + del fake_request.headers["accept"] + else: + fake_request.headers["accept"] = accept + + if "content-type" in expected: + fake_request.headers["content-type"] = "application/json" + + # Fake JSON content (DEPRECATED: remove in 24.3) + if "request.json" in expected: + fake_request.parsed_json = {"foo": "bar"} + + with caplog.at_level(logging.DEBUG, logger="sanic.root"): + guess_mime(fake_request, fallback) + + logmsg, = [r.message for r in caplog.records if r.funcName == "guess_mime"] + + assert logmsg == expected diff --git a/tests/test_request.py b/tests/test_request.py index a4757b52..c7c13910 100644 --- a/tests/test_request.py +++ b/tests/test_request.py @@ -150,26 +150,29 @@ def test_request_accept(): async def get(request): return response.empty() + header_value = "text/plain;format=flowed, text/plain, text/*, */*" request, _ = app.test_client.get( "/", - headers={ - "Accept": "text/*, text/plain, text/plain;format=flowed, */*" - }, + headers={"Accept": header_value}, ) - assert [str(i) for i in request.accept] == [ + assert str(request.accept) == header_value + match = request.accept.match( + "*/*;format=flowed", "text/plain;format=flowed", "text/plain", "text/*", "*/*", - ] + ) + assert match == "*/*;format=flowed" + assert match.header.mime == "text/plain" + assert match.header.params == {"format": "flowed"} + header_value = ( + "text/plain; q=0.5, text/html, text/x-dvi; q=0.8, text/x-c" + ) request, _ = app.test_client.get( "/", - headers={ - "Accept": ( - "text/plain; q=0.5, text/html, text/x-dvi; q=0.8, text/x-c" - ) - }, + headers={"Accept": header_value}, ) assert [str(i) for i in request.accept] == [ "text/html", @@ -177,6 +180,17 @@ def test_request_accept(): "text/x-dvi;q=0.8", "text/plain;q=0.5", ] + match = request.accept.match( + "application/json", + "text/plain", # Has lower q in accept header + "text/html;format=flowed", # Params mismatch + "text/*", # Matches + "*/*", + ) + assert match == "text/*" + assert match.header.mime == "text/html" + assert match.header.q == 1.0 + assert not match.header.params def test_bad_url_parse():