Stricter charset handling and escaping of request URLs (#2710)
Co-authored-by: L. Karkkainen <tronic@users.noreply.github.com>
This commit is contained in:
		| @@ -3,7 +3,6 @@ from __future__ import annotations | ||||
| import warnings | ||||
|  | ||||
| from typing import TYPE_CHECKING, Optional | ||||
| from urllib.parse import quote | ||||
|  | ||||
| from sanic.compat import Header | ||||
| from sanic.exceptions import BadRequest, ServerError | ||||
| @@ -146,14 +145,6 @@ class ASGIApp: | ||||
|             raise BadRequest( | ||||
|                 "Header names can only contain US-ASCII characters" | ||||
|             ) | ||||
|         path = ( | ||||
|             scope["path"][1:] | ||||
|             if scope["path"].startswith("/") | ||||
|             else scope["path"] | ||||
|         ) | ||||
|         url = "/".join([scope.get("root_path", ""), quote(path)]) | ||||
|         url_bytes = url.encode("latin-1") | ||||
|         url_bytes += b"?" + scope["query_string"] | ||||
|  | ||||
|         if scope["type"] == "http": | ||||
|             version = scope["http_version"] | ||||
| @@ -168,6 +159,13 @@ class ASGIApp: | ||||
|         else: | ||||
|             raise ServerError("Received unknown ASGI scope") | ||||
|  | ||||
|         url_bytes, query = scope["raw_path"], scope["query_string"] | ||||
|         if query: | ||||
|             # httpx ASGI client sends query string as part of raw_path | ||||
|             url_bytes = url_bytes.split(b"?", 1)[0] | ||||
|             # All servers send them separately | ||||
|             url_bytes = b"%b?%b" % (url_bytes, query) | ||||
|  | ||||
|         request_class = sanic_app.request_class or Request | ||||
|         instance.request = request_class( | ||||
|             url_bytes, | ||||
|   | ||||
| @@ -240,9 +240,14 @@ class Http(Stream, metaclass=TouchUpMeta): | ||||
|             headers_instance.getone("upgrade", "").lower() == "websocket" | ||||
|         ) | ||||
|  | ||||
|         try: | ||||
|             url_bytes = self.url.encode("ASCII") | ||||
|         except UnicodeEncodeError: | ||||
|             raise BadRequest("URL may only contain US-ASCII characters.") | ||||
|  | ||||
|         # Prepare a Request object | ||||
|         request = self.protocol.request_class( | ||||
|             url_bytes=self.url.encode(), | ||||
|             url_bytes=url_bytes, | ||||
|             headers=headers_instance, | ||||
|             head=bytes(head), | ||||
|             version=protocol[5:], | ||||
| @@ -445,9 +450,18 @@ class Http(Stream, metaclass=TouchUpMeta): | ||||
|         bogus response for error handling use. | ||||
|         """ | ||||
|  | ||||
|         # Reformat any URL already received with \xHH escapes for better logs | ||||
|         url_bytes = ( | ||||
|             self.url.encode(errors="surrogateescape") | ||||
|             .decode("ASCII", errors="backslashreplace") | ||||
|             .encode("ASCII") | ||||
|             if self.url | ||||
|             else b"*" | ||||
|         ) | ||||
|  | ||||
|         # FIXME: Avoid this by refactoring error handling and response code | ||||
|         self.request = self.protocol.request_class( | ||||
|             url_bytes=self.url.encode() if self.url else b"*", | ||||
|             url_bytes=url_bytes, | ||||
|             headers=Header({}), | ||||
|             version="1.1", | ||||
|             method="NONE", | ||||
|   | ||||
| @@ -18,7 +18,12 @@ from typing import ( | ||||
|  | ||||
| from sanic.compat import Header | ||||
| from sanic.constants import LocalCertCreator | ||||
| from sanic.exceptions import PayloadTooLarge, SanicException, ServerError | ||||
| from sanic.exceptions import ( | ||||
|     BadRequest, | ||||
|     PayloadTooLarge, | ||||
|     SanicException, | ||||
|     ServerError, | ||||
| ) | ||||
| from sanic.helpers import has_message_body | ||||
| from sanic.http.constants import Stage | ||||
| from sanic.http.stream import Stream | ||||
| @@ -333,7 +338,17 @@ class Http3: | ||||
|         return self.receivers[stream_id] | ||||
|  | ||||
|     def _make_request(self, event: HeadersReceived) -> Request: | ||||
|         headers = Header(((k.decode(), v.decode()) for k, v in event.headers)) | ||||
|         try: | ||||
|             headers = Header( | ||||
|                 ( | ||||
|                     (k.decode("ASCII"), v.decode(errors="surrogateescape")) | ||||
|                     for k, v in event.headers | ||||
|                 ) | ||||
|             ) | ||||
|         except UnicodeDecodeError: | ||||
|             raise BadRequest( | ||||
|                 "Header names may only contain US-ASCII characters." | ||||
|             ) | ||||
|         method = headers[":method"] | ||||
|         path = headers[":path"] | ||||
|         scheme = headers.pop(":scheme", "") | ||||
| @@ -342,9 +357,14 @@ class Http3: | ||||
|         if authority: | ||||
|             headers["host"] = authority | ||||
|  | ||||
|         try: | ||||
|             url_bytes = path.encode("ASCII") | ||||
|         except UnicodeEncodeError: | ||||
|             raise BadRequest("URL may only contain US-ASCII characters.") | ||||
|  | ||||
|         transport = HTTP3Transport(self.protocol) | ||||
|         request = self.protocol.request_class( | ||||
|             path.encode(), | ||||
|             url_bytes, | ||||
|             headers, | ||||
|             "3", | ||||
|             method, | ||||
|   | ||||
| @@ -133,7 +133,8 @@ class Request: | ||||
|         try: | ||||
|             self._parsed_url = parse_url(url_bytes) | ||||
|         except HttpParserInvalidURLError: | ||||
|             raise BadURL(f"Bad URL: {url_bytes.decode()}") | ||||
|             url = url_bytes.decode(errors="backslashreplace") | ||||
|             raise BadURL(f"Bad URL: {url}") | ||||
|         self._id: Optional[Union[uuid.UUID, str, int]] = None | ||||
|         self._name: Optional[str] = None | ||||
|         self._stream_id = stream_id | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 L. Kärkkäinen
					L. Kärkkäinen