From bd680e36681be2a31b6e8b2c9388dc79407b0926 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Tue, 17 Oct 2023 21:33:31 +0300 Subject: [PATCH] Major upgrades, much code and docs rewritten. --- README.md | 10 +-- cista/app.py | 50 +++-------- cista/config.py | 156 ++++++++++++++++++++++++++++++++ cista/fileio.py | 18 ++-- cista/protocol.py | 35 +++++++- cista/static/index.html | 191 +++++++++++++++++++++++++++++----------- cista/watching.py | 124 ++++++++++++++++++++------ pyproject.toml | 2 + 8 files changed, 455 insertions(+), 131 deletions(-) create mode 100644 cista/config.py diff --git a/README.md b/README.md index 7356e4b..9629b1b 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,10 @@ # Web File Storage -Development install: - +Run directly from repository with Hatch (or use pip install as usual): ```sh -pip install hatch --break-system-packages -hatch run sanic cista --reload --dev # Runs on localhost:8000 +hatch run sanic cista --reload --dev ``` -Environment variable `STORAGE=` may be used to choose which folder it serves. The default is current directory. +Configuration file is created `.cista.toml` in current directory, which is also shared by default. Edit while the server is not running to set share path and other parameters. -No authentication is supported, so implement access control externally or be careful with your files. +No authentication yet, so implement access control externally or be careful with your files. diff --git a/cista/app.py b/cista/app.py index 8e13304..a5272e5 100644 --- a/cista/app.py +++ b/cista/app.py @@ -1,13 +1,10 @@ import asyncio from importlib.resources import files -from pathlib import Path import msgspec from sanic import Sanic from sanic.log import logger from sanic.response import html -from watchdog.events import FileSystemEventHandler -from watchdog.observers import Observer from . import watching from .fileio import ROOT, FileServer @@ -15,6 +12,7 @@ from .protocol import ErrorMsg, FileRange, StatusMsg app = Sanic("cista") fileserver = FileServer() +watching.register(app, "/api/watch") def asend(ws, msg): return ws.send(msg if isinstance(msg, bytes) else msgspec.json.encode(msg).decode()) @@ -27,22 +25,6 @@ async def start_fileserver(app, _): async def stop_fileserver(app, _): await fileserver.stop() -@app.before_server_start -async def start_watcher(app, _): - class Handler(FileSystemEventHandler): - def on_any_event(self, event): - watching.update(Path(event.src_path).relative_to(ROOT)) - app.ctx.observer = Observer() - app.ctx.observer.schedule(Handler(), str(ROOT), recursive=True) - app.ctx.observer.start() - -@app.after_server_stop -async def stop_watcher(app, _): - app.ctx.observer.stop() - app.ctx.observer.join() - - - @app.get("/") async def index_page(request): index = files("cista").joinpath("static", "index.html").read_text() @@ -50,16 +32,6 @@ async def index_page(request): app.static("/files", ROOT, use_content_range=True, stream_large_files=True, directory_view=True) -@app.websocket('/api/watch') -async def watch(request, ws): - try: - q = watching.pubsub[ws] = asyncio.Queue() - await asend(ws, {"root": watching.tree}) - while True: - await asend(ws, await q.get()) - finally: - del watching.pubsub[ws] - @app.websocket('/api/upload') async def upload(request, ws): alink = fileserver.alink @@ -78,24 +50,28 @@ async def upload(request, ws): d = f"{len(data)} bytes" if isinstance(data, bytes) else data raise ValueError(f"Expected {req.end - pos} more bytes, got {d}") # Report success - res = StatusMsg(status="upload", url=url, req=req) + res = StatusMsg(status="ack", req=req) await asend(ws, res) - print(res) - + await ws.drain() except Exception as e: - res = ErrorMsg(error=str(e), url=url, req=req) + res = ErrorMsg(error=str(e), req=req) await asend(ws, res) logger.exception(repr(res), e) return +@app.websocket("/ws") +async def ws(request, ws): + while True: + data = await ws.recv() + await ws.send(data) @app.websocket('/api/download') async def download(request, ws): alink = fileserver.alink - url = request.url_for("download") while True: req = None try: + print("Waiting for download command") text = await ws.recv() if not isinstance(text, str): raise ValueError(f"Expected JSON control, got binary len(data) = {len(text)}") @@ -108,12 +84,14 @@ async def download(request, ws): await asend(ws, data) pos += len(data) # Report success - res = StatusMsg(status="download", url=url, req=req) + res = StatusMsg(status="ack", req=req) await asend(ws, res) + print(ws, dir(ws)) + await ws.drain() print(res) except Exception as e: - res = ErrorMsg(error=str(e), url=url, req=req) + res = ErrorMsg(error=str(e), req=req) await asend(ws, res) logger.exception(repr(res), e) return diff --git a/cista/config.py b/cista/config.py new file mode 100644 index 0000000..d2c7938 --- /dev/null +++ b/cista/config.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +import hmac +import re +import secrets +from functools import wraps +from hashlib import sha256 +from pathlib import Path, PurePath +from time import time +from unicodedata import normalize + +import argon2 +import msgspec + +_argon = argon2.PasswordHasher() +_droppyhash = re.compile(r'^([a-f0-9]{64})\$([a-f0-9]{8})$') + +class Config(msgspec.Struct): + path: Path = Path.cwd() + secret: str = secrets.token_hex(12) + public: bool = False + users: dict[str, User] = {} + sessions: dict[str, Session] = {} + links: dict[str, Link] = {} + +class User(msgspec.Struct, omit_defaults=True): + privileged: bool = False + hash: str = "" + lastSeen: int = 0 + + def set_password(self, password: str): + self.hash = _argon.hash(_pwnorm(password)) + +class Session(msgspec.Struct): + username: str + lastSeen: int + +class Link(msgspec.Struct, omit_defaults=True): + location: str + creator: str = "" + expires: int = 0 + +config = Config() + +def derived_secret(*params, len=8) -> bytes: + """Used to derive secret keys from the main secret""" + # Each part is made the same length by hashing first + combined = b"".join( + sha256( + p if isinstance(p, bytes) else f"{p}".encode() + ).digest() + for p in [config.secret, *params] + ) + # Output a bytes of the desired length + return sha256(combined).digest()[:len] + +def _pwnorm(password): + return normalize('NFC', password).strip().encode() + +def login(username: str, password: str): + un = _pwnorm(username) + pw = _pwnorm(password) + try: + u = config.users[un.decode()] + except KeyError: + raise ValueError("Invalid username") + # Verify password + if not u.hash: + raise ValueError("Account disabled") + if (m := _droppyhash.match(u.hash)) is not None: + h, s = m.groups() + h2 = hmac.digest(pw + s.encode() + un, b"", "sha256").hex() + if not hmac.compare_digest(h, h2): + raise ValueError("Invalid password") + # Droppy hashes are weak, do a hash update + u.set_password(password) + else: + try: + _argon.verify(u.hash, pw) + except Exception: + raise ValueError("Invalid password") + if _argon.check_needs_rehash(u.hash): + u.set_password(password) + # Login successful + now = int(time()) + u.lastSeen = now + sid = secrets.token_urlsafe(12) + config.sessions[sid] = Session(username, now) + return u, sid + +def enc_hook(obj): + if isinstance(obj, PurePath): + return obj.as_posix() + raise TypeError + +def dec_hook(typ, obj): + if typ is Path: + return Path(obj) + raise TypeError + +conffile = Path.cwd() / ".cista.toml" + +def config_update(modify): + global config + tmpname = conffile.with_suffix(".tmp") + try: + f = tmpname.open("xb") + except FileExistsError: + if tmpname.stat().st_mtime < time() - 1: + tmpname.unlink() + return "collision" + try: + # Load, modify and save with atomic replace + try: + old = conffile.read_bytes() + c = msgspec.toml.decode(old, type=Config, dec_hook=dec_hook) + except FileNotFoundError: + old = b"" + c = Config() # Initialize with defaults + c = modify(c) + new = msgspec.toml.encode(c, enc_hook=enc_hook) + if old == new: + f.close() + tmpname.unlink() + config = c + return "read" + f.write(new) + f.close() + tmpname.rename(conffile) # Atomic replace + except: + f.close() + tmpname.unlink() + raise + config = c + return "modified" if old else "created" + +def modifies_config(modify): + """Decorator for functions that modify the config file""" + @wraps(modify) + def wrapper(*args, **kwargs): + m = lambda c: modify(c, *args, **kwargs) + # Retry modification in case of write collision + while (c := config_update(m)) == "collision": + time.sleep(0.01) + return c + return wrapper + +@modifies_config +def droppy_import(config: Config) -> Config: + p = Path.home() / ".droppy/config" + cf = msgspec.json.decode((p / "config.json").read_bytes()) + db = msgspec.json.decode((p / "db.json").read_bytes()) + return msgspec.convert(cf | db, Config) + +# Load/initialize config file +print(conffile, config_update(lambda c: c)) diff --git a/cista/fileio.py b/cista/fileio.py index a369bc4..9f6e050 100644 --- a/cista/fileio.py +++ b/cista/fileio.py @@ -1,20 +1,28 @@ import asyncio import os import unicodedata -from pathlib import Path +from pathlib import Path, PurePosixPath from pathvalidate import sanitize_filepath +from . import config from .asynclink import AsyncLink from .lrucache import LRUCache -ROOT = Path(os.environ.get("STORAGE", Path.cwd())) +ROOT = config.config.path +print("Serving", ROOT) -def sanitize_filename(filename): +def fuid(stat) -> str: + """Unique file ID. Stays the same on renames and modification.""" + return config.derived_secret("filekey-inode", stat.st_dev, stat.st_ino).hex() + +def sanitize_filename(filename: str) -> str: filename = unicodedata.normalize("NFC", filename) + # UNIX filenames can contain backslashes but for compatibility we replace them with dashes + filename = filename.replace("\\", "-") filename = sanitize_filepath(filename) - filename = filename.replace("/", "-") - return filename + filename = filename.strip("/") + return PurePosixPath(filename).as_posix() class File: def __init__(self, filename): diff --git a/cista/protocol.py b/cista/protocol.py index 8d1b8ed..f24966d 100644 --- a/cista/protocol.py +++ b/cista/protocol.py @@ -15,11 +15,9 @@ class FileRange(msgspec.Struct): class ErrorMsg(msgspec.Struct): error: str req: FileRange - url: str class StatusMsg(msgspec.Struct): status: str - url: str req: FileRange @@ -32,7 +30,38 @@ class FileEntry(msgspec.Struct): class DirEntry(msgspec.Struct): size: int mtime: int - dir: Dict[str, Union[FileEntry, DirEntry]] + dir: DirList + + def __getitem__(self, name): + return self.dir[name] + + def __setitem__(self, name, value): + self.dir[name] = value + + def __contains__(self, name): + return name in self.dir + + def __delitem__(self, name): + del self.dir[name] + + @property + def props(self): + return { + k: v + for k, v in self.__struct_fields__ + if k != "dir" + } + +DirList = dict[str, Union[FileEntry, DirEntry]] + + +class UpdateEntry(msgspec.Struct, omit_defaults=True): + """Updates the named entry in the tree. Fields that are set replace old values. A list of entries recurses directories.""" + name: str = "" + deleted: bool = False + size: int | None = None + mtime: int | None = None + dir: DirList | None = None def make_dir_data(root): if len(root) == 2: diff --git a/cista/static/index.html b/cista/static/index.html index 5e63ced..3e76385 100644 --- a/cista/static/index.html +++ b/cista/static/index.html @@ -1,5 +1,26 @@ Storage +

Quick file upload

Uses parallel WebSocket connections for increased bandwidth /api/upload

@@ -8,84 +29,148 @@
-

File downloads (websocket)

+

Files

    -

    File listings

    -

    Plain HTML browser /files/

    - -

    JSON list updated via WebSocket /api/watch:

    - -