Stricter charset handling and escaping of request URLs (#2710)
Co-authored-by: L. Karkkainen <tronic@users.noreply.github.com>
This commit is contained in:
		| @@ -3,7 +3,6 @@ from __future__ import annotations | ||||
| import warnings | ||||
|  | ||||
| from typing import TYPE_CHECKING, Optional | ||||
| from urllib.parse import quote | ||||
|  | ||||
| from sanic.compat import Header | ||||
| from sanic.exceptions import BadRequest, ServerError | ||||
| @@ -146,14 +145,6 @@ class ASGIApp: | ||||
|             raise BadRequest( | ||||
|                 "Header names can only contain US-ASCII characters" | ||||
|             ) | ||||
|         path = ( | ||||
|             scope["path"][1:] | ||||
|             if scope["path"].startswith("/") | ||||
|             else scope["path"] | ||||
|         ) | ||||
|         url = "/".join([scope.get("root_path", ""), quote(path)]) | ||||
|         url_bytes = url.encode("latin-1") | ||||
|         url_bytes += b"?" + scope["query_string"] | ||||
|  | ||||
|         if scope["type"] == "http": | ||||
|             version = scope["http_version"] | ||||
| @@ -168,6 +159,13 @@ class ASGIApp: | ||||
|         else: | ||||
|             raise ServerError("Received unknown ASGI scope") | ||||
|  | ||||
|         url_bytes, query = scope["raw_path"], scope["query_string"] | ||||
|         if query: | ||||
|             # httpx ASGI client sends query string as part of raw_path | ||||
|             url_bytes = url_bytes.split(b"?", 1)[0] | ||||
|             # All servers send them separately | ||||
|             url_bytes = b"%b?%b" % (url_bytes, query) | ||||
|  | ||||
|         request_class = sanic_app.request_class or Request | ||||
|         instance.request = request_class( | ||||
|             url_bytes, | ||||
|   | ||||
| @@ -240,9 +240,14 @@ class Http(Stream, metaclass=TouchUpMeta): | ||||
|             headers_instance.getone("upgrade", "").lower() == "websocket" | ||||
|         ) | ||||
|  | ||||
|         try: | ||||
|             url_bytes = self.url.encode("ASCII") | ||||
|         except UnicodeEncodeError: | ||||
|             raise BadRequest("URL may only contain US-ASCII characters.") | ||||
|  | ||||
|         # Prepare a Request object | ||||
|         request = self.protocol.request_class( | ||||
|             url_bytes=self.url.encode(), | ||||
|             url_bytes=url_bytes, | ||||
|             headers=headers_instance, | ||||
|             head=bytes(head), | ||||
|             version=protocol[5:], | ||||
| @@ -445,9 +450,18 @@ class Http(Stream, metaclass=TouchUpMeta): | ||||
|         bogus response for error handling use. | ||||
|         """ | ||||
|  | ||||
|         # Reformat any URL already received with \xHH escapes for better logs | ||||
|         url_bytes = ( | ||||
|             self.url.encode(errors="surrogateescape") | ||||
|             .decode("ASCII", errors="backslashreplace") | ||||
|             .encode("ASCII") | ||||
|             if self.url | ||||
|             else b"*" | ||||
|         ) | ||||
|  | ||||
|         # FIXME: Avoid this by refactoring error handling and response code | ||||
|         self.request = self.protocol.request_class( | ||||
|             url_bytes=self.url.encode() if self.url else b"*", | ||||
|             url_bytes=url_bytes, | ||||
|             headers=Header({}), | ||||
|             version="1.1", | ||||
|             method="NONE", | ||||
|   | ||||
| @@ -18,7 +18,12 @@ from typing import ( | ||||
|  | ||||
| from sanic.compat import Header | ||||
| from sanic.constants import LocalCertCreator | ||||
| from sanic.exceptions import PayloadTooLarge, SanicException, ServerError | ||||
| from sanic.exceptions import ( | ||||
|     BadRequest, | ||||
|     PayloadTooLarge, | ||||
|     SanicException, | ||||
|     ServerError, | ||||
| ) | ||||
| from sanic.helpers import has_message_body | ||||
| from sanic.http.constants import Stage | ||||
| from sanic.http.stream import Stream | ||||
| @@ -333,7 +338,17 @@ class Http3: | ||||
|         return self.receivers[stream_id] | ||||
|  | ||||
|     def _make_request(self, event: HeadersReceived) -> Request: | ||||
|         headers = Header(((k.decode(), v.decode()) for k, v in event.headers)) | ||||
|         try: | ||||
|             headers = Header( | ||||
|                 ( | ||||
|                     (k.decode("ASCII"), v.decode(errors="surrogateescape")) | ||||
|                     for k, v in event.headers | ||||
|                 ) | ||||
|             ) | ||||
|         except UnicodeDecodeError: | ||||
|             raise BadRequest( | ||||
|                 "Header names may only contain US-ASCII characters." | ||||
|             ) | ||||
|         method = headers[":method"] | ||||
|         path = headers[":path"] | ||||
|         scheme = headers.pop(":scheme", "") | ||||
| @@ -342,9 +357,14 @@ class Http3: | ||||
|         if authority: | ||||
|             headers["host"] = authority | ||||
|  | ||||
|         try: | ||||
|             url_bytes = path.encode("ASCII") | ||||
|         except UnicodeEncodeError: | ||||
|             raise BadRequest("URL may only contain US-ASCII characters.") | ||||
|  | ||||
|         transport = HTTP3Transport(self.protocol) | ||||
|         request = self.protocol.request_class( | ||||
|             path.encode(), | ||||
|             url_bytes, | ||||
|             headers, | ||||
|             "3", | ||||
|             method, | ||||
|   | ||||
| @@ -133,7 +133,8 @@ class Request: | ||||
|         try: | ||||
|             self._parsed_url = parse_url(url_bytes) | ||||
|         except HttpParserInvalidURLError: | ||||
|             raise BadURL(f"Bad URL: {url_bytes.decode()}") | ||||
|             url = url_bytes.decode(errors="backslashreplace") | ||||
|             raise BadURL(f"Bad URL: {url}") | ||||
|         self._id: Optional[Union[uuid.UUID, str, int]] = None | ||||
|         self._name: Optional[str] = None | ||||
|         self._stream_id = stream_id | ||||
|   | ||||
							
								
								
									
										2
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								setup.py
									
									
									
									
									
								
							| @@ -116,7 +116,7 @@ requirements = [ | ||||
| ] | ||||
|  | ||||
| tests_require = [ | ||||
|     "sanic-testing>=22.9.0", | ||||
|     "sanic-testing@git+https://github.com/sanic-org/sanic-testing.git@main#egg=sanic-testing>=22.12.0", | ||||
|     "pytest==7.1.*", | ||||
|     "coverage", | ||||
|     "beautifulsoup4", | ||||
|   | ||||
| @@ -11,7 +11,7 @@ from aioquic.quic.events import ProtocolNegotiated | ||||
| from sanic import Request, Sanic | ||||
| from sanic.compat import Header | ||||
| from sanic.config import DEFAULT_CONFIG | ||||
| from sanic.exceptions import PayloadTooLarge | ||||
| from sanic.exceptions import BadRequest, PayloadTooLarge | ||||
| from sanic.http.constants import Stage | ||||
| from sanic.http.http3 import Http3, HTTPReceiver | ||||
| from sanic.models.server_types import ConnInfo | ||||
| @@ -292,3 +292,48 @@ def test_request_conn_info(app): | ||||
|     receiver = http3.get_receiver_by_stream_id(1) | ||||
|  | ||||
|     assert isinstance(receiver.request.conn_info, ConnInfo) | ||||
|  | ||||
|  | ||||
| def test_request_header_encoding(app): | ||||
|     protocol = generate_protocol(app) | ||||
|     http3 = Http3(protocol, protocol.transmit) | ||||
|     with pytest.raises(BadRequest) as exc_info: | ||||
|         http3.http_event_received( | ||||
|             HeadersReceived( | ||||
|                 [ | ||||
|                     (b":method", b"GET"), | ||||
|                     (b":path", b"/location"), | ||||
|                     (b":scheme", b"https"), | ||||
|                     (b":authority", b"localhost:8443"), | ||||
|                     ("foo\u00A0".encode(), b"bar"), | ||||
|                 ], | ||||
|                 1, | ||||
|                 False, | ||||
|             ) | ||||
|         ) | ||||
|     assert exc_info.value.status_code == 400 | ||||
|     assert ( | ||||
|         str(exc_info.value) | ||||
|         == "Header names may only contain US-ASCII characters." | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_request_url_encoding(app): | ||||
|     protocol = generate_protocol(app) | ||||
|     http3 = Http3(protocol, protocol.transmit) | ||||
|     with pytest.raises(BadRequest) as exc_info: | ||||
|         http3.http_event_received( | ||||
|             HeadersReceived( | ||||
|                 [ | ||||
|                     (b":method", b"GET"), | ||||
|                     (b":path", b"/location\xA0"), | ||||
|                     (b":scheme", b"https"), | ||||
|                     (b":authority", b"localhost:8443"), | ||||
|                     (b"foo", b"bar"), | ||||
|                 ], | ||||
|                 1, | ||||
|                 False, | ||||
|             ) | ||||
|         ) | ||||
|     assert exc_info.value.status_code == 400 | ||||
|     assert str(exc_info.value) == "URL may only contain US-ASCII characters." | ||||
|   | ||||
| @@ -652,3 +652,17 @@ async def test_asgi_headers_decoding(app: Sanic, monkeypatch: MonkeyPatch): | ||||
|  | ||||
|     _, response = await app.asgi_client.get("/", headers={"Test-Header": "😅"}) | ||||
|     assert response.status_code == 200 | ||||
|  | ||||
|  | ||||
| @pytest.mark.asyncio | ||||
| async def test_asgi_url_decoding(app): | ||||
|     @app.get("/dir/<name>", unquote=True) | ||||
|     def _request(request: Request, name): | ||||
|         return text(name) | ||||
|  | ||||
|     # 2F should not become a path separator (unquoted later) | ||||
|     _, response = await app.asgi_client.get("/dir/some%2Fpath") | ||||
|     assert response.text == "some/path" | ||||
|  | ||||
|     _, response = await app.asgi_client.get("/dir/some%F0%9F%98%80path") | ||||
|     assert response.text == "some😀path" | ||||
|   | ||||
| @@ -98,3 +98,17 @@ def test_transfer_chunked(client): | ||||
|     data = stdjson.loads(body) | ||||
|  | ||||
|     assert data == ["foo", "bar"] | ||||
|  | ||||
|  | ||||
| def test_url_encoding(client): | ||||
|     client.send( | ||||
|         """ | ||||
|         GET /invalid\xA0url HTTP/1.1 | ||||
|  | ||||
|         """ | ||||
|     ) | ||||
|     response = client.recv() | ||||
|     headers, body = response.rsplit(b"\r\n\r\n", 1) | ||||
|  | ||||
|     assert b"400 Bad Request" in headers | ||||
|     assert b"URL may only contain US-ASCII characters." in body | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 L. Kärkkäinen
					L. Kärkkäinen