Major upgrades, much code and docs rewritten.

This commit is contained in:
Leo Vasanko 2023-10-17 21:33:31 +03:00 committed by Leo Vasanko
parent 27b89d6d38
commit bd680e3668
8 changed files with 455 additions and 131 deletions

View File

@ -1,12 +1,10 @@
# Web File Storage # Web File Storage
Development install: Run directly from repository with Hatch (or use pip install as usual):
```sh ```sh
pip install hatch --break-system-packages hatch run sanic cista --reload --dev
hatch run sanic cista --reload --dev # Runs on localhost:8000
``` ```
Environment variable `STORAGE=<path>` may be used to choose which folder it serves. The default is current directory. Configuration file is created `.cista.toml` in current directory, which is also shared by default. Edit while the server is not running to set share path and other parameters.
No authentication is supported, so implement access control externally or be careful with your files. No authentication yet, so implement access control externally or be careful with your files.

View File

@ -1,13 +1,10 @@
import asyncio import asyncio
from importlib.resources import files from importlib.resources import files
from pathlib import Path
import msgspec import msgspec
from sanic import Sanic from sanic import Sanic
from sanic.log import logger from sanic.log import logger
from sanic.response import html from sanic.response import html
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
from . import watching from . import watching
from .fileio import ROOT, FileServer from .fileio import ROOT, FileServer
@ -15,6 +12,7 @@ from .protocol import ErrorMsg, FileRange, StatusMsg
app = Sanic("cista") app = Sanic("cista")
fileserver = FileServer() fileserver = FileServer()
watching.register(app, "/api/watch")
def asend(ws, msg): def asend(ws, msg):
return ws.send(msg if isinstance(msg, bytes) else msgspec.json.encode(msg).decode()) return ws.send(msg if isinstance(msg, bytes) else msgspec.json.encode(msg).decode())
@ -27,22 +25,6 @@ async def start_fileserver(app, _):
async def stop_fileserver(app, _): async def stop_fileserver(app, _):
await fileserver.stop() await fileserver.stop()
@app.before_server_start
async def start_watcher(app, _):
class Handler(FileSystemEventHandler):
def on_any_event(self, event):
watching.update(Path(event.src_path).relative_to(ROOT))
app.ctx.observer = Observer()
app.ctx.observer.schedule(Handler(), str(ROOT), recursive=True)
app.ctx.observer.start()
@app.after_server_stop
async def stop_watcher(app, _):
app.ctx.observer.stop()
app.ctx.observer.join()
@app.get("/") @app.get("/")
async def index_page(request): async def index_page(request):
index = files("cista").joinpath("static", "index.html").read_text() index = files("cista").joinpath("static", "index.html").read_text()
@ -50,16 +32,6 @@ async def index_page(request):
app.static("/files", ROOT, use_content_range=True, stream_large_files=True, directory_view=True) app.static("/files", ROOT, use_content_range=True, stream_large_files=True, directory_view=True)
@app.websocket('/api/watch')
async def watch(request, ws):
try:
q = watching.pubsub[ws] = asyncio.Queue()
await asend(ws, {"root": watching.tree})
while True:
await asend(ws, await q.get())
finally:
del watching.pubsub[ws]
@app.websocket('/api/upload') @app.websocket('/api/upload')
async def upload(request, ws): async def upload(request, ws):
alink = fileserver.alink alink = fileserver.alink
@ -78,24 +50,28 @@ async def upload(request, ws):
d = f"{len(data)} bytes" if isinstance(data, bytes) else data d = f"{len(data)} bytes" if isinstance(data, bytes) else data
raise ValueError(f"Expected {req.end - pos} more bytes, got {d}") raise ValueError(f"Expected {req.end - pos} more bytes, got {d}")
# Report success # Report success
res = StatusMsg(status="upload", url=url, req=req) res = StatusMsg(status="ack", req=req)
await asend(ws, res) await asend(ws, res)
print(res) await ws.drain()
except Exception as e: except Exception as e:
res = ErrorMsg(error=str(e), url=url, req=req) res = ErrorMsg(error=str(e), req=req)
await asend(ws, res) await asend(ws, res)
logger.exception(repr(res), e) logger.exception(repr(res), e)
return return
@app.websocket("/ws")
async def ws(request, ws):
while True:
data = await ws.recv()
await ws.send(data)
@app.websocket('/api/download') @app.websocket('/api/download')
async def download(request, ws): async def download(request, ws):
alink = fileserver.alink alink = fileserver.alink
url = request.url_for("download")
while True: while True:
req = None req = None
try: try:
print("Waiting for download command")
text = await ws.recv() text = await ws.recv()
if not isinstance(text, str): if not isinstance(text, str):
raise ValueError(f"Expected JSON control, got binary len(data) = {len(text)}") raise ValueError(f"Expected JSON control, got binary len(data) = {len(text)}")
@ -108,12 +84,14 @@ async def download(request, ws):
await asend(ws, data) await asend(ws, data)
pos += len(data) pos += len(data)
# Report success # Report success
res = StatusMsg(status="download", url=url, req=req) res = StatusMsg(status="ack", req=req)
await asend(ws, res) await asend(ws, res)
print(ws, dir(ws))
await ws.drain()
print(res) print(res)
except Exception as e: except Exception as e:
res = ErrorMsg(error=str(e), url=url, req=req) res = ErrorMsg(error=str(e), req=req)
await asend(ws, res) await asend(ws, res)
logger.exception(repr(res), e) logger.exception(repr(res), e)
return return

156
cista/config.py Normal file
View File

@ -0,0 +1,156 @@
from __future__ import annotations
import hmac
import re
import secrets
from functools import wraps
from hashlib import sha256
from pathlib import Path, PurePath
from time import time
from unicodedata import normalize
import argon2
import msgspec
_argon = argon2.PasswordHasher()
_droppyhash = re.compile(r'^([a-f0-9]{64})\$([a-f0-9]{8})$')
class Config(msgspec.Struct):
path: Path = Path.cwd()
secret: str = secrets.token_hex(12)
public: bool = False
users: dict[str, User] = {}
sessions: dict[str, Session] = {}
links: dict[str, Link] = {}
class User(msgspec.Struct, omit_defaults=True):
privileged: bool = False
hash: str = ""
lastSeen: int = 0
def set_password(self, password: str):
self.hash = _argon.hash(_pwnorm(password))
class Session(msgspec.Struct):
username: str
lastSeen: int
class Link(msgspec.Struct, omit_defaults=True):
location: str
creator: str = ""
expires: int = 0
config = Config()
def derived_secret(*params, len=8) -> bytes:
"""Used to derive secret keys from the main secret"""
# Each part is made the same length by hashing first
combined = b"".join(
sha256(
p if isinstance(p, bytes) else f"{p}".encode()
).digest()
for p in [config.secret, *params]
)
# Output a bytes of the desired length
return sha256(combined).digest()[:len]
def _pwnorm(password):
return normalize('NFC', password).strip().encode()
def login(username: str, password: str):
un = _pwnorm(username)
pw = _pwnorm(password)
try:
u = config.users[un.decode()]
except KeyError:
raise ValueError("Invalid username")
# Verify password
if not u.hash:
raise ValueError("Account disabled")
if (m := _droppyhash.match(u.hash)) is not None:
h, s = m.groups()
h2 = hmac.digest(pw + s.encode() + un, b"", "sha256").hex()
if not hmac.compare_digest(h, h2):
raise ValueError("Invalid password")
# Droppy hashes are weak, do a hash update
u.set_password(password)
else:
try:
_argon.verify(u.hash, pw)
except Exception:
raise ValueError("Invalid password")
if _argon.check_needs_rehash(u.hash):
u.set_password(password)
# Login successful
now = int(time())
u.lastSeen = now
sid = secrets.token_urlsafe(12)
config.sessions[sid] = Session(username, now)
return u, sid
def enc_hook(obj):
if isinstance(obj, PurePath):
return obj.as_posix()
raise TypeError
def dec_hook(typ, obj):
if typ is Path:
return Path(obj)
raise TypeError
conffile = Path.cwd() / ".cista.toml"
def config_update(modify):
global config
tmpname = conffile.with_suffix(".tmp")
try:
f = tmpname.open("xb")
except FileExistsError:
if tmpname.stat().st_mtime < time() - 1:
tmpname.unlink()
return "collision"
try:
# Load, modify and save with atomic replace
try:
old = conffile.read_bytes()
c = msgspec.toml.decode(old, type=Config, dec_hook=dec_hook)
except FileNotFoundError:
old = b""
c = Config() # Initialize with defaults
c = modify(c)
new = msgspec.toml.encode(c, enc_hook=enc_hook)
if old == new:
f.close()
tmpname.unlink()
config = c
return "read"
f.write(new)
f.close()
tmpname.rename(conffile) # Atomic replace
except:
f.close()
tmpname.unlink()
raise
config = c
return "modified" if old else "created"
def modifies_config(modify):
"""Decorator for functions that modify the config file"""
@wraps(modify)
def wrapper(*args, **kwargs):
m = lambda c: modify(c, *args, **kwargs)
# Retry modification in case of write collision
while (c := config_update(m)) == "collision":
time.sleep(0.01)
return c
return wrapper
@modifies_config
def droppy_import(config: Config) -> Config:
p = Path.home() / ".droppy/config"
cf = msgspec.json.decode((p / "config.json").read_bytes())
db = msgspec.json.decode((p / "db.json").read_bytes())
return msgspec.convert(cf | db, Config)
# Load/initialize config file
print(conffile, config_update(lambda c: c))

View File

@ -1,20 +1,28 @@
import asyncio import asyncio
import os import os
import unicodedata import unicodedata
from pathlib import Path from pathlib import Path, PurePosixPath
from pathvalidate import sanitize_filepath from pathvalidate import sanitize_filepath
from . import config
from .asynclink import AsyncLink from .asynclink import AsyncLink
from .lrucache import LRUCache from .lrucache import LRUCache
ROOT = Path(os.environ.get("STORAGE", Path.cwd())) ROOT = config.config.path
print("Serving", ROOT)
def sanitize_filename(filename): def fuid(stat) -> str:
"""Unique file ID. Stays the same on renames and modification."""
return config.derived_secret("filekey-inode", stat.st_dev, stat.st_ino).hex()
def sanitize_filename(filename: str) -> str:
filename = unicodedata.normalize("NFC", filename) filename = unicodedata.normalize("NFC", filename)
# UNIX filenames can contain backslashes but for compatibility we replace them with dashes
filename = filename.replace("\\", "-")
filename = sanitize_filepath(filename) filename = sanitize_filepath(filename)
filename = filename.replace("/", "-") filename = filename.strip("/")
return filename return PurePosixPath(filename).as_posix()
class File: class File:
def __init__(self, filename): def __init__(self, filename):

View File

@ -15,11 +15,9 @@ class FileRange(msgspec.Struct):
class ErrorMsg(msgspec.Struct): class ErrorMsg(msgspec.Struct):
error: str error: str
req: FileRange req: FileRange
url: str
class StatusMsg(msgspec.Struct): class StatusMsg(msgspec.Struct):
status: str status: str
url: str
req: FileRange req: FileRange
@ -32,7 +30,38 @@ class FileEntry(msgspec.Struct):
class DirEntry(msgspec.Struct): class DirEntry(msgspec.Struct):
size: int size: int
mtime: int mtime: int
dir: Dict[str, Union[FileEntry, DirEntry]] dir: DirList
def __getitem__(self, name):
return self.dir[name]
def __setitem__(self, name, value):
self.dir[name] = value
def __contains__(self, name):
return name in self.dir
def __delitem__(self, name):
del self.dir[name]
@property
def props(self):
return {
k: v
for k, v in self.__struct_fields__
if k != "dir"
}
DirList = dict[str, Union[FileEntry, DirEntry]]
class UpdateEntry(msgspec.Struct, omit_defaults=True):
"""Updates the named entry in the tree. Fields that are set replace old values. A list of entries recurses directories."""
name: str = ""
deleted: bool = False
size: int | None = None
mtime: int | None = None
dir: DirList | None = None
def make_dir_data(root): def make_dir_data(root):
if len(root) == 2: if len(root) == 2:

View File

@ -1,5 +1,26 @@
<!DOCTYPE html> <!DOCTYPE html>
<title>Storage</title> <title>Storage</title>
<style>
body {
font-family: sans-serif;
max-width: 100ch;
margin: 0 auto;
padding: 1em;
background-color: #333;
color: #eee;
}
td {
text-align: right;
padding: .5em;
}
td:first-child {
text-align: left;
}
a {
color: inherit;
text-decoration: none;
}
</style>
<div> <div>
<h2>Quick file upload</h2> <h2>Quick file upload</h2>
<p>Uses parallel WebSocket connections for increased bandwidth /api/upload</p> <p>Uses parallel WebSocket connections for increased bandwidth /api/upload</p>
@ -8,84 +29,148 @@
</div> </div>
<div> <div>
<h2>File downloads (websocket)</h2> <h2>Files</h2>
<ul id=file_list></ul> <ul id=file_list></ul>
</div> </div>
<h2>File listings</h2>
<p>Plain HTML browser <a href=/files/>/files/</a></p>
<p>JSON list updated via WebSocket /api/watch:</p>
<textarea id=list style="padding: 1em; width: 80ch; height: 40ch;"></textarea>
<script> <script>
const list = document.getElementById("list")
let files = {} let files = {}
let flatfiles = {}
function createWatchSocket() { function createWatchSocket() {
const wsurl = new URL("/api/watch", location.href.replace(/^http/, 'ws')) const wsurl = new URL("/api/watch", location.href.replace(/^http/, 'ws'))
const ws = new WebSocket(wsurl) const ws = new WebSocket(wsurl)
ws.onmessage = event => { ws.onmessage = event => {
msg = JSON.parse(event.data) msg = JSON.parse(event.data)
console.log("Watch", msg) if (msg.update) {
if (msg.root) { tree_update(msg.update)
files = msg.root
file_list(files) file_list(files)
} else if (msg.update) { } else {
const {path, data} = msg.update console.log("Unkonwn message from watch socket", msg)
for (const p of path.split("/")) {
// TODO update files at path with new data
}
} }
list.value = JSON.stringify(files)
} }
} }
function file_list(files) {
const ul = document.getElementById("file_list")
ul.innerHTML = ""
const dir = ""
let ptr = files.dir
console.log(ptr)
for (const name of Object.keys(ptr)) {
if (ptr[name].dir) continue
const {size, mtime} = ptr[name]
const li = document.createElement("li")
const a = document.createElement("a")
ul.appendChild(li)
li.appendChild(a)
a.textContent = name
a.href = name
a.onclick = event => {
event.preventDefault()
download(name, size)
}
}
}
createWatchSocket() createWatchSocket()
async function download(name, size) { function tree_update(msg) {
let node = files
for (const elem of msg) {
if (elem.deleted) {
delete node.dir[elem.name]
delete flatfiles[p]
break
}
if (elem.name !== undefined) node = node.dir[elem.name] ||= {}
if (elem.size !== undefined) node.size = elem.size
if (elem.mtime !== undefined) node.mtime = elem.mtime
if (elem.dir !== undefined) node.dir = elem.dir
}
// Update paths and flatfiles
files.path = "/"
const nodes = [files]
flatfiles = {}
while (node = nodes.pop()) {
flatfiles[node.path] = node
if (node.dir === undefined) continue
for (const name of Object.keys(node.dir)) {
const child = node.dir[name]
child.path = node.path + name + (child.dir === undefined ? "" : "/")
nodes.push(child)
}
}
}
var collator = new Intl.Collator(undefined, {numeric: true, sensitivity: 'base'});
const compare_path = (a, b) => collator.compare(a.path, b.path)
const compare_time = (a, b) => a.mtime > b.mtime
function file_list(files) {
const table = document.getElementById("file_list")
const sorted = Object.values(flatfiles).sort(compare_time)
table.innerHTML = ""
for (const f of sorted) {
const {path, size, mtime} = f
const tr = document.createElement("tr")
const name_td = document.createElement("td")
const size_td = document.createElement("td")
const mtime_td = document.createElement("td")
const a = document.createElement("a")
table.appendChild(tr)
tr.appendChild(name_td)
tr.appendChild(size_td)
tr.appendChild(mtime_td)
name_td.appendChild(a)
size_td.textContent = size
mtime_td.textContent = formatUnixDate(mtime)
a.textContent = path
a.href = `/files${path}`
/*a.onclick = event => {
if (window.showSaveFilePicker) {
event.preventDefault()
download_ws(name, size)
}
}
a.download = ""*/
}
}
function formatUnixDate(t) {
const date = new Date(t * 1000)
const now = new Date()
const diff = date - now
const formatter = new Intl.RelativeTimeFormat('en', { numeric: 'auto' })
if (Math.abs(diff) <= 60000) {
return formatter.format(Math.round(diff / 1000), 'second')
}
if (Math.abs(diff) <= 3600000) {
return formatter.format(Math.round(diff / 60000), 'minute')
}
if (Math.abs(diff) <= 86400000) {
return formatter.format(Math.round(diff / 3600000), 'hour')
}
if (Math.abs(diff) <= 604800000) {
return formatter.format(Math.round(diff / 86400000), 'day')
}
return date.toLocaleDateString()
}
async function download_ws(name, size) {
const fh = await window.showSaveFilePicker({
suggestedName: name,
})
const writer = await fh.createWritable()
writer.truncate(size)
const wsurl = new URL("/api/download", location.href.replace(/^http/, 'ws')) const wsurl = new URL("/api/download", location.href.replace(/^http/, 'ws'))
const ws = new WebSocket(wsurl) const ws = new WebSocket(wsurl)
ws.binaryType = 'arraybuffer' let pos = 0
ws.onopen = () => { ws.onopen = () => {
console.log("Download socket connected") console.log("Downloading over WebSocket", name, size)
ws.send(JSON.stringify({name, start: 0, end: size, size})) ws.send(JSON.stringify({name, start: 0, end: size, size}))
} }
ws.onmessage = event => { ws.onmessage = event => {
const data = event.data if (typeof event.data === 'string') {
console.log("Download", data) const msg = JSON.parse(event.data)
const blob = new Blob([data], {type: "application/octet-stream"}) console.log("Download finished", msg)
const url = URL.createObjectURL(blob) ws.close()
const a = document.createElement("a") return
a.href = url }
a.download = name console.log("Received chunk", name, pos, pos + event.data.size)
a.click() pos += event.data.size
ws.close() writer.write(event.data)
} }
ws.onclose = () => { ws.onclose = () => {
console.log("Download socket disconnected") if (pos < size) {
console.log("Download aborted", name, pos)
writer.truncate(pos)
}
writer.close()
} }
} }
@ -95,7 +180,7 @@ const numConnections = 2
const chunkSize = 1<<20 const chunkSize = 1<<20
const wsConnections = new Set() const wsConnections = new Set()
for (let i = 0; i < numConnections; i++) createUploadWS() //for (let i = 0; i < numConnections; i++) createUploadWS()
function createUploadWS() { function createUploadWS() {
const wsurl = new URL("/api/upload", location.href.replace(/^http/, 'ws')) const wsurl = new URL("/api/upload", location.href.replace(/^http/, 'ws'))

View File

@ -1,19 +1,20 @@
import asyncio
import secrets import secrets
from hashlib import sha256
from pathlib import Path, PurePosixPath from pathlib import Path, PurePosixPath
import threading
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
import msgspec import msgspec
from . import config
from .fileio import ROOT from .fileio import ROOT
from .protocol import DirEntry, FileEntry from .protocol import DirEntry, FileEntry, UpdateEntry
secret = secrets.token_bytes(8) secret = secrets.token_bytes(8)
pubsub = {} pubsub = {}
def fuid(stat): def walk(path: Path = ROOT) -> DirEntry | FileEntry | None:
return sha256((stat.st_dev << 32 | stat.st_ino).to_bytes(8, 'big') + secret).hexdigest()[:16]
def walk(path: Path = ROOT):
try: try:
s = path.stat() s = path.stat()
mtime = int(s.st_mtime) mtime = int(s.st_mtime)
@ -27,32 +28,99 @@ def walk(path: Path = ROOT):
else: else:
size = 0 size = 0
return DirEntry(size, mtime, tree) return DirEntry(size, mtime, tree)
except FileNotFoundError:
return None
except OSError as e: except OSError as e:
print("OS error walking path", path, e) print("OS error walking path", path, e)
return None return None
tree = walk() tree = {"": walk()}
tree_lock = threading.Lock()
def update(relpath: PurePosixPath): def refresh():
ptr = tree.dir root = tree[""]
path = ROOT return msgspec.json.encode({"update": [
name = "" UpdateEntry(size=root.size, mtime=root.mtime, dir=root.dir)
for name in relpath.parts[:-1]: ]}).decode()
path /= name
try: def update(relpath: PurePosixPath, loop):
ptr = ptr[name].dir new = walk(ROOT / relpath)
except KeyError: with tree_lock:
msg = update_internal(relpath, new)
print(msg)
asyncio.run_coroutine_threadsafe(broadcast(msg), loop)
def update_internal(relpath: PurePosixPath, new: DirEntry | FileEntry | None):
path = "", *relpath.parts
old = tree
elems = []
for name in path:
if name not in old:
# File or folder created
old = None
elems.append((name, None))
if len(elems) < len(path):
raise ValueError("Tree out of sync")
break break
new = walk(path) old = old[name]
old = ptr.pop(name, None) elems.append((name, old))
if new is not None:
ptr[name] = new
if old == new: if old == new:
return return # No changes
print("Update", relpath) mt = new.mtime if new else 0
# TODO: update parents size/mtime szdiff = (new.size if new else 0) - (old.size if old else 0)
msg = msgspec.json.encode({"update": { # Update parents
"path": relpath.as_posix(), update = []
"data": new, for name, entry in elems[:-1]:
}}) u = UpdateEntry(name)
for queue in pubsub.values(): queue.put_nowait(msg) if szdiff:
entry.size += szdiff
u.size = entry.size
if mt > entry.mtime:
u.mtime = entry.mtime = mt
update.append(u)
# The last element is the one that changed
print([e[0] for e in elems])
name, entry = elems[-1]
parent = elems[-2][1] if len(elems) > 1 else tree
u = UpdateEntry(name)
if new:
parent[name] = new
if u.size != new.size: u.size = new.size
if u.mtime != new.mtime: u.mtime = new.mtime
if isinstance(new, DirEntry):
if u.dir == new.dir: u.dir = new.dir
else:
del parent[name]
u.deleted = True
update.append(u)
return msgspec.json.encode({"update": update}).decode()
async def broadcast(msg):
for queue in pubsub.values():
await queue.put_nowait(msg)
def register(app, url):
@app.before_server_start
async def start_watcher(app, loop):
class Handler(FileSystemEventHandler):
def on_any_event(self, event):
update(Path(event.src_path).relative_to(ROOT), loop)
app.ctx.observer = Observer()
app.ctx.observer.schedule(Handler(), str(ROOT), recursive=True)
app.ctx.observer.start()
@app.after_server_stop
async def stop_watcher(app, _):
app.ctx.observer.stop()
app.ctx.observer.join()
@app.websocket(url)
async def watch(request, ws):
try:
with tree_lock:
q = pubsub[ws] = asyncio.Queue()
await ws.send(refresh())
while True:
await ws.send(await q.get())
finally:
del pubsub[ws]

View File

@ -14,9 +14,11 @@ authors = [
classifiers = [ classifiers = [
] ]
dependencies = [ dependencies = [
"argon2-cffi",
"msgspec", "msgspec",
"pathvalidate", "pathvalidate",
"sanic", "sanic",
"tomli_w",
"watchdog", "watchdog",
] ]