From a733d3271536682d2e08cdcf8e741f11c8ffc371 Mon Sep 17 00:00:00 2001 From: Adam Hopkins Date: Wed, 3 Mar 2021 16:33:34 +0200 Subject: [PATCH] Add raw header info to request object (#2032) --- sanic/constants.py | 1 + sanic/http.py | 8 +++++--- sanic/mixins/routes.py | 4 ++-- sanic/request.py | 20 ++++++++++++++------ sanic/response.py | 3 ++- tests/test_headers.py | 36 +++++++++++++++++++++++++++++++++--- 6 files changed, 57 insertions(+), 15 deletions(-) diff --git a/sanic/constants.py b/sanic/constants.py index 8bd87fe9..cb2e8ffa 100644 --- a/sanic/constants.py +++ b/sanic/constants.py @@ -1 +1,2 @@ HTTP_METHODS = ("GET", "POST", "PUT", "HEAD", "OPTIONS", "PATCH", "DELETE") +DEFAULT_HTTP_CONTENT_TYPE = "application/octet-stream" diff --git a/sanic/http.py b/sanic/http.py index 066411cb..0303d4cf 100644 --- a/sanic/http.py +++ b/sanic/http.py @@ -189,8 +189,9 @@ class Http: # Parse header content try: - raw_headers = buf[:pos].decode(errors="surrogateescape") - reqline, *raw_headers = raw_headers.split("\r\n") + head = buf[:pos] + raw_headers = head.decode(errors="surrogateescape") + reqline, *split_headers = raw_headers.split("\r\n") method, self.url, protocol = reqline.split(" ") if protocol == "HTTP/1.1": @@ -204,7 +205,7 @@ class Http: request_body = False headers = [] - for name, value in (h.split(":", 1) for h in raw_headers): + for name, value in (h.split(":", 1) for h in split_headers): name, value = h = name.lower(), value.lstrip() if name in ("content-length", "transfer-encoding"): @@ -223,6 +224,7 @@ class Http: request = self.protocol.request_class( url_bytes=self.url.encode(), headers=headers_instance, + head=bytes(head), version=protocol[5:], method=method, transport=self.protocol.transport, diff --git a/sanic/mixins/routes.py b/sanic/mixins/routes.py index 060fd327..a8451ab2 100644 --- a/sanic/mixins/routes.py +++ b/sanic/mixins/routes.py @@ -11,7 +11,7 @@ from urllib.parse import unquote from sanic_routing.route import Route # type: ignore from sanic.compat import stat_async -from sanic.constants import HTTP_METHODS +from sanic.constants import DEFAULT_HTTP_CONTENT_TYPE, HTTP_METHODS from sanic.exceptions import ( ContentRangeError, FileNotFound, @@ -689,7 +689,7 @@ class RouteMixin: content_type = ( content_type or guess_type(file_path)[0] - or "application/octet-stream" + or DEFAULT_HTTP_CONTENT_TYPE ) if "charset=" not in content_type and ( diff --git a/sanic/request.py b/sanic/request.py index f50e1e5c..6296419e 100644 --- a/sanic/request.py +++ b/sanic/request.py @@ -31,6 +31,7 @@ from urllib.parse import parse_qs, parse_qsl, unquote, urlunparse from httptools import parse_url # type: ignore from sanic.compat import CancelledErrors, Header +from sanic.constants import DEFAULT_HTTP_CONTENT_TYPE from sanic.exceptions import InvalidUsage from sanic.headers import ( Options, @@ -49,12 +50,6 @@ try: except ImportError: from json import loads as json_loads # type: ignore -DEFAULT_HTTP_CONTENT_TYPE = "application/octet-stream" - -# HTTP/1.1: https://www.w3.org/Protocols/rfc2616/rfc2616-sec7.html#sec7.2.1 -# > If the media type remains unknown, the recipient SHOULD treat it -# > as type "application/octet-stream" - class RequestParameters(dict): """ @@ -95,6 +90,7 @@ class Request: "conn_info", "ctx", "endpoint", + "head", "headers", "method", "name", @@ -121,6 +117,7 @@ class Request: method: str, transport: TransportProtocol, app: Sanic, + head: bytes = b"", ): self.raw_url = url_bytes # TODO: Content-Encoding detection @@ -132,6 +129,7 @@ class Request: self.version = version self.method = method self.transport = transport + self.head = head # Init but do not inhale self.body = b"" @@ -207,6 +205,16 @@ class Request: if not self.body: self.body = b"".join([data async for data in self.stream]) + @property + def raw_headers(self): + _, headers = self.head.split(b"\r\n", 1) + return bytes(headers) + + @property + def request_line(self): + reqline, _ = self.head.split(b"\r\n", 1) + return bytes(reqline) + @property def id(self) -> Optional[Union[uuid.UUID, str, int]]: """ diff --git a/sanic/response.py b/sanic/response.py index 10070aa2..e17b080d 100644 --- a/sanic/response.py +++ b/sanic/response.py @@ -17,6 +17,7 @@ from urllib.parse import quote_plus from warnings import warn from sanic.compat import Header, open_async +from sanic.constants import DEFAULT_HTTP_CONTENT_TYPE from sanic.cookies import CookieJar from sanic.helpers import has_message_body, remove_entity_headers from sanic.http import Http @@ -297,7 +298,7 @@ def raw( body: Optional[AnyStr], status: int = 200, headers: Optional[Dict[str, str]] = None, - content_type: str = "application/octet-stream", + content_type: str = DEFAULT_HTTP_CONTENT_TYPE, ) -> HTTPResponse: """ Returns response object without encoding the body. diff --git a/tests/test_headers.py b/tests/test_headers.py index 7d552fb8..4580a073 100644 --- a/tests/test_headers.py +++ b/tests/test_headers.py @@ -2,11 +2,9 @@ from unittest.mock import Mock import pytest -from sanic import Sanic, headers -from sanic.compat import Header +from sanic import headers, text from sanic.exceptions import PayloadTooLarge from sanic.http import Http -from sanic.request import Request @pytest.mark.parametrize( @@ -85,3 +83,35 @@ async def test_header_size_exceeded(): with pytest.raises(PayloadTooLarge): await http.http1_request_header() + + +def test_raw_headers(app): + app.route("/")(lambda _: text("")) + request, _ = app.test_client.get( + "/", + headers={ + "FOO": "bar", + "Host": "example.com", + "User-Agent": "Sanic-Testing", + }, + ) + + assert request.raw_headers == ( + b"Host: example.com\r\nAccept: */*\r\nAccept-Encoding: gzip, " + b"deflate\r\nConnection: keep-alive\r\nUser-Agent: " + b"Sanic-Testing\r\nFOO: bar" + ) + + +def test_request_line(app): + app.route("/")(lambda _: text("")) + request, _ = app.test_client.get( + "/", + headers={ + "FOO": "bar", + "Host": "example.com", + "User-Agent": "Sanic-Testing", + }, + ) + + assert request.request_line == b"GET / HTTP/1.1"