2023-11-08 20:38:40 +00:00
|
|
|
import asyncio
|
|
|
|
import datetime
|
2023-10-23 02:51:39 +01:00
|
|
|
import mimetypes
|
2023-11-08 20:38:40 +00:00
|
|
|
from concurrent.futures import ThreadPoolExecutor
|
2023-11-12 19:35:20 +00:00
|
|
|
from pathlib import Path, PurePath, PurePosixPath
|
2023-11-08 20:38:40 +00:00
|
|
|
from stat import S_IFDIR, S_IFREG
|
2023-10-23 02:51:39 +01:00
|
|
|
from urllib.parse import unquote
|
2023-11-08 20:38:40 +00:00
|
|
|
from wsgiref.handlers import format_date_time
|
2023-10-14 23:29:50 +01:00
|
|
|
|
2023-11-08 20:38:40 +00:00
|
|
|
import brotli
|
|
|
|
import sanic.helpers
|
|
|
|
from blake3 import blake3
|
|
|
|
from sanic import Blueprint, Sanic, empty, raw
|
2023-11-12 19:35:20 +00:00
|
|
|
from sanic.exceptions import Forbidden, NotFound, ServerError
|
2023-11-08 20:38:40 +00:00
|
|
|
from sanic.log import logging
|
|
|
|
from stream_zip import ZIP_AUTO, stream_zip
|
2023-10-14 23:29:50 +01:00
|
|
|
|
2023-10-23 02:51:39 +01:00
|
|
|
from cista import auth, config, session, watching
|
|
|
|
from cista.api import bp
|
|
|
|
from cista.util.apphelpers import handle_sanic_exception
|
|
|
|
|
2023-11-08 20:38:40 +00:00
|
|
|
# Workaround until Sanic PR #2824 is merged
|
|
|
|
sanic.helpers._ENTITY_HEADERS = frozenset()
|
|
|
|
|
2023-10-23 02:51:39 +01:00
|
|
|
app = Sanic("cista", strict_slashes=True)
|
|
|
|
app.blueprint(auth.bp)
|
|
|
|
app.blueprint(bp)
|
|
|
|
app.exception(Exception)(handle_sanic_exception)
|
2023-10-14 23:29:50 +01:00
|
|
|
|
2023-10-26 15:18:59 +01:00
|
|
|
|
2023-10-23 02:51:39 +01:00
|
|
|
@app.before_server_start
|
|
|
|
async def main_start(app, loop):
|
|
|
|
config.load_config()
|
|
|
|
await watching.start(app, loop)
|
2023-11-11 10:48:33 +00:00
|
|
|
app.ctx.threadexec = ThreadPoolExecutor(
|
|
|
|
max_workers=8, thread_name_prefix="cista-ioworker"
|
|
|
|
)
|
2023-11-08 20:38:40 +00:00
|
|
|
|
2023-10-23 02:51:39 +01:00
|
|
|
|
|
|
|
@app.after_server_stop
|
|
|
|
async def main_stop(app, loop):
|
|
|
|
await watching.stop(app, loop)
|
2023-11-08 20:38:40 +00:00
|
|
|
app.ctx.threadexec.shutdown()
|
|
|
|
|
2023-10-14 23:29:50 +01:00
|
|
|
|
2023-10-21 02:44:43 +01:00
|
|
|
@app.on_request
|
2023-10-23 02:51:39 +01:00
|
|
|
async def use_session(req):
|
|
|
|
req.ctx.session = session.get(req)
|
|
|
|
try:
|
2023-11-11 10:48:33 +00:00
|
|
|
req.ctx.username = req.ctx.session["username"] # type: ignore
|
|
|
|
req.ctx.user = config.config.users[req.ctx.username]
|
2023-10-23 02:51:39 +01:00
|
|
|
except (AttributeError, KeyError, TypeError):
|
2023-11-08 20:38:40 +00:00
|
|
|
req.ctx.username = None
|
2023-10-23 02:51:39 +01:00
|
|
|
req.ctx.user = None
|
2023-10-21 02:44:43 +01:00
|
|
|
# CSRF protection
|
2023-10-23 02:51:39 +01:00
|
|
|
if req.method == "GET" and req.headers.upgrade != "websocket":
|
2023-10-21 02:44:43 +01:00
|
|
|
return # Ordinary GET requests are fine
|
2023-10-23 02:51:39 +01:00
|
|
|
# Check that origin matches host, for browsers which should all send Origin.
|
|
|
|
# Curl doesn't send any Origin header, so we allow it anyway.
|
|
|
|
origin = req.headers.origin
|
|
|
|
if origin and origin.split("//", 1)[1] != req.host:
|
2023-10-21 02:44:43 +01:00
|
|
|
raise Forbidden("Invalid origin: Cross-Site requests not permitted")
|
|
|
|
|
2023-10-26 15:18:59 +01:00
|
|
|
|
2023-10-14 23:29:50 +01:00
|
|
|
@app.before_server_start
|
2023-10-23 02:51:39 +01:00
|
|
|
def http_fileserver(app, _):
|
|
|
|
bp = Blueprint("fileserver")
|
|
|
|
bp.on_request(auth.verify)
|
2023-10-26 15:18:59 +01:00
|
|
|
bp.static(
|
|
|
|
"/files/",
|
|
|
|
config.config.path,
|
|
|
|
use_content_range=True,
|
|
|
|
stream_large_files=True,
|
|
|
|
directory_view=True,
|
|
|
|
)
|
2023-10-23 02:51:39 +01:00
|
|
|
app.blueprint(bp)
|
|
|
|
|
2023-10-26 15:18:59 +01:00
|
|
|
|
2023-11-08 20:38:40 +00:00
|
|
|
www = {}
|
|
|
|
|
|
|
|
|
|
|
|
def _load_wwwroot(www):
|
|
|
|
wwwnew = {}
|
2023-11-11 10:48:33 +00:00
|
|
|
base = Path(__file__).with_name("wwwroot")
|
|
|
|
paths = [PurePath()]
|
2023-11-08 20:38:40 +00:00
|
|
|
while paths:
|
|
|
|
path = paths.pop(0)
|
|
|
|
current = base / path
|
|
|
|
for p in current.iterdir():
|
|
|
|
if p.is_dir():
|
2023-11-11 10:48:33 +00:00
|
|
|
paths.append(p.relative_to(base))
|
2023-11-08 20:38:40 +00:00
|
|
|
continue
|
|
|
|
name = p.relative_to(base).as_posix()
|
|
|
|
mime = mimetypes.guess_type(name)[0] or "application/octet-stream"
|
|
|
|
mtime = p.stat().st_mtime
|
|
|
|
data = p.read_bytes()
|
|
|
|
etag = blake3(data).hexdigest(length=8)
|
|
|
|
if name == "index.html":
|
|
|
|
name = ""
|
|
|
|
# Use old data if not changed
|
|
|
|
if name in www and www[name][2]["etag"] == etag:
|
|
|
|
wwwnew[name] = www[name]
|
|
|
|
continue
|
|
|
|
# Add charset definition
|
|
|
|
if mime.startswith("text/"):
|
|
|
|
mime = f"{mime}; charset=UTF-8"
|
|
|
|
# Asset files names will change whenever the content changes
|
|
|
|
cached = name.startswith("assets/")
|
|
|
|
headers = {
|
|
|
|
"etag": etag,
|
|
|
|
"last-modified": format_date_time(mtime),
|
|
|
|
"cache-control": "max-age=31536000, immutable"
|
|
|
|
if cached
|
|
|
|
else "no-cache",
|
|
|
|
"content-type": mime,
|
|
|
|
}
|
|
|
|
# Precompress with Brotli
|
|
|
|
br = brotli.compress(data)
|
|
|
|
if len(br) >= len(data):
|
|
|
|
br = False
|
|
|
|
wwwnew[name] = data, br, headers
|
2023-11-11 10:48:33 +00:00
|
|
|
if not wwwnew:
|
|
|
|
raise ServerError(
|
|
|
|
"Web frontend missing. Did you forget npm run build?",
|
|
|
|
extra={"wwwroot": str(base)},
|
|
|
|
quiet=True,
|
|
|
|
)
|
2023-11-08 20:38:40 +00:00
|
|
|
return wwwnew
|
|
|
|
|
|
|
|
|
2023-11-11 10:48:33 +00:00
|
|
|
@app.before_server_start
|
|
|
|
async def start(app):
|
|
|
|
await load_wwwroot(app)
|
|
|
|
if app.debug:
|
|
|
|
app.add_task(refresh_wwwroot())
|
|
|
|
|
|
|
|
|
|
|
|
async def load_wwwroot(app):
|
|
|
|
global www
|
|
|
|
www = await asyncio.get_event_loop().run_in_executor(
|
|
|
|
app.ctx.threadexec, _load_wwwroot, www
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2023-11-08 20:38:40 +00:00
|
|
|
async def refresh_wwwroot():
|
|
|
|
while True:
|
2023-11-11 10:48:33 +00:00
|
|
|
await asyncio.sleep(0.5)
|
2023-11-08 20:38:40 +00:00
|
|
|
try:
|
|
|
|
wwwold = www
|
2023-11-11 10:48:33 +00:00
|
|
|
await load_wwwroot(app)
|
2023-11-08 20:38:40 +00:00
|
|
|
changes = ""
|
|
|
|
for name in sorted(www):
|
|
|
|
attr = www[name]
|
|
|
|
if wwwold.get(name) == attr:
|
|
|
|
continue
|
|
|
|
headers = attr[2]
|
|
|
|
changes += f"{headers['last-modified']} {headers['etag']} /{name}\n"
|
|
|
|
for name in sorted(set(wwwold) - set(www)):
|
|
|
|
changes += f"Deleted /{name}\n"
|
|
|
|
if changes:
|
|
|
|
print(f"Updated wwwroot:\n{changes}", end="", flush=True)
|
|
|
|
except Exception as e:
|
|
|
|
print("Error loading wwwroot", e)
|
|
|
|
if not app.debug:
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/<path:path>", methods=["GET", "HEAD"])
|
2023-10-23 02:51:39 +01:00
|
|
|
async def wwwroot(req, path=""):
|
|
|
|
"""Frontend files only"""
|
2023-11-08 20:38:40 +00:00
|
|
|
name = unquote(path)
|
|
|
|
if name not in www:
|
|
|
|
raise NotFound(f"File not found: /{path}", extra={"name": name})
|
|
|
|
data, br, headers = www[name]
|
|
|
|
if req.headers.if_none_match == headers["etag"]:
|
|
|
|
# The client has it cached, respond 304 Not Modified
|
|
|
|
return empty(304, headers=headers)
|
|
|
|
# Brotli compressed?
|
|
|
|
if br and "br" in req.headers.accept_encoding.split(", "):
|
2023-11-11 10:48:33 +00:00
|
|
|
headers = {**headers, "content-encoding": "br"}
|
2023-11-08 20:38:40 +00:00
|
|
|
data = br
|
|
|
|
return raw(data, headers=headers)
|
|
|
|
|
|
|
|
|
2023-11-12 19:35:20 +00:00
|
|
|
def get_files(wanted: set) -> list[tuple[PurePosixPath, Path]]:
|
|
|
|
loc = PurePosixPath()
|
|
|
|
idx = 0
|
|
|
|
ret = []
|
|
|
|
level: int | None = None
|
|
|
|
parent: PurePosixPath | None = None
|
|
|
|
with watching.state.lock:
|
|
|
|
root = watching.state.root
|
|
|
|
while idx < len(root):
|
|
|
|
f = root[idx]
|
|
|
|
loc = PurePosixPath(*loc.parts[: f.level - 1]) / f.name
|
|
|
|
if parent is not None and f.level <= level:
|
|
|
|
level = parent = None
|
|
|
|
if f.key in wanted:
|
|
|
|
level, parent = f.level, loc.parent
|
|
|
|
if parent is not None:
|
|
|
|
wanted.discard(f.key)
|
|
|
|
ret.append((loc.relative_to(parent), watching.rootpath / loc))
|
|
|
|
idx += 1
|
|
|
|
return ret
|
2023-11-11 10:48:33 +00:00
|
|
|
|
|
|
|
|
|
|
|
@app.get("/zip/<keys>/<zipfile:ext=zip>")
|
|
|
|
async def zip_download(req, keys, zipfile, ext):
|
|
|
|
"""Download a zip archive of the given keys"""
|
|
|
|
|
|
|
|
wanted = set(keys.split("+"))
|
|
|
|
files = get_files(wanted)
|
2023-11-08 20:38:40 +00:00
|
|
|
|
|
|
|
if not files:
|
2023-10-26 15:18:59 +01:00
|
|
|
raise NotFound(
|
2023-11-08 20:38:40 +00:00
|
|
|
"No files found",
|
2023-11-12 19:35:20 +00:00
|
|
|
context={"keys": keys, "zipfile": f"{zipfile}.{ext}", "wanted": wanted},
|
2023-10-26 15:18:59 +01:00
|
|
|
)
|
2023-11-08 20:38:40 +00:00
|
|
|
if wanted:
|
|
|
|
raise NotFound("Files not found", context={"missing": wanted})
|
|
|
|
|
|
|
|
def local_files(files):
|
|
|
|
for rel, p in files:
|
|
|
|
s = p.stat()
|
|
|
|
size = s.st_size
|
|
|
|
modified = datetime.datetime.fromtimestamp(s.st_mtime, datetime.UTC)
|
2023-11-12 19:35:20 +00:00
|
|
|
name = rel.as_posix()
|
2023-11-08 20:38:40 +00:00
|
|
|
if p.is_dir():
|
2023-11-12 19:35:20 +00:00
|
|
|
yield f"{name}/", modified, S_IFDIR | 0o755, ZIP_AUTO(size), iter(b"")
|
2023-11-08 20:38:40 +00:00
|
|
|
else:
|
2023-11-12 19:35:20 +00:00
|
|
|
yield name, modified, S_IFREG | 0o644, ZIP_AUTO(size), contents(p, size)
|
2023-11-08 20:38:40 +00:00
|
|
|
|
2023-11-12 19:35:20 +00:00
|
|
|
def contents(name, size):
|
2023-11-08 20:38:40 +00:00
|
|
|
with name.open("rb") as f:
|
2023-11-12 19:35:20 +00:00
|
|
|
while size > 0 and (chunk := f.read(min(size, 1 << 20))):
|
|
|
|
size -= len(chunk)
|
2023-11-08 20:38:40 +00:00
|
|
|
yield chunk
|
2023-11-12 19:35:20 +00:00
|
|
|
assert size == 0
|
2023-11-08 20:38:40 +00:00
|
|
|
|
|
|
|
def worker():
|
|
|
|
try:
|
2023-11-12 19:40:05 +00:00
|
|
|
for chunk in stream_zip(local_files(files)):
|
|
|
|
asyncio.run_coroutine_threadsafe(queue.put(chunk), loop).result()
|
2023-11-08 20:38:40 +00:00
|
|
|
except Exception:
|
|
|
|
logging.exception("Error streaming ZIP")
|
|
|
|
raise
|
|
|
|
finally:
|
|
|
|
asyncio.run_coroutine_threadsafe(queue.put(None), loop)
|
|
|
|
|
|
|
|
# Don't block the event loop: run in a thread
|
|
|
|
queue = asyncio.Queue(maxsize=1)
|
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
thread = loop.run_in_executor(app.ctx.threadexec, worker)
|
|
|
|
|
|
|
|
# Stream the response
|
2023-11-12 19:35:20 +00:00
|
|
|
res = await req.respond(
|
|
|
|
content_type="application/zip",
|
|
|
|
headers={"cache-control": "no-store"},
|
|
|
|
)
|
2023-11-08 20:38:40 +00:00
|
|
|
while chunk := await queue.get():
|
|
|
|
await res.send(chunk)
|
|
|
|
|
|
|
|
await thread # If it raises, the response will fail download
|