Major upgrades, much code and docs rewritten.

This commit is contained in:
Leo Vasanko 2023-10-17 21:33:31 +03:00 committed by Leo Vasanko
parent 27b89d6d38
commit bd680e3668
8 changed files with 455 additions and 131 deletions

View File

@ -1,12 +1,10 @@
# Web File Storage
Development install:
Run directly from repository with Hatch (or use pip install as usual):
```sh
pip install hatch --break-system-packages
hatch run sanic cista --reload --dev # Runs on localhost:8000
hatch run sanic cista --reload --dev
```
Environment variable `STORAGE=<path>` may be used to choose which folder it serves. The default is current directory.
Configuration file is created `.cista.toml` in current directory, which is also shared by default. Edit while the server is not running to set share path and other parameters.
No authentication is supported, so implement access control externally or be careful with your files.
No authentication yet, so implement access control externally or be careful with your files.

View File

@ -1,13 +1,10 @@
import asyncio
from importlib.resources import files
from pathlib import Path
import msgspec
from sanic import Sanic
from sanic.log import logger
from sanic.response import html
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
from . import watching
from .fileio import ROOT, FileServer
@ -15,6 +12,7 @@ from .protocol import ErrorMsg, FileRange, StatusMsg
app = Sanic("cista")
fileserver = FileServer()
watching.register(app, "/api/watch")
def asend(ws, msg):
return ws.send(msg if isinstance(msg, bytes) else msgspec.json.encode(msg).decode())
@ -27,22 +25,6 @@ async def start_fileserver(app, _):
async def stop_fileserver(app, _):
await fileserver.stop()
@app.before_server_start
async def start_watcher(app, _):
class Handler(FileSystemEventHandler):
def on_any_event(self, event):
watching.update(Path(event.src_path).relative_to(ROOT))
app.ctx.observer = Observer()
app.ctx.observer.schedule(Handler(), str(ROOT), recursive=True)
app.ctx.observer.start()
@app.after_server_stop
async def stop_watcher(app, _):
app.ctx.observer.stop()
app.ctx.observer.join()
@app.get("/")
async def index_page(request):
index = files("cista").joinpath("static", "index.html").read_text()
@ -50,16 +32,6 @@ async def index_page(request):
app.static("/files", ROOT, use_content_range=True, stream_large_files=True, directory_view=True)
@app.websocket('/api/watch')
async def watch(request, ws):
try:
q = watching.pubsub[ws] = asyncio.Queue()
await asend(ws, {"root": watching.tree})
while True:
await asend(ws, await q.get())
finally:
del watching.pubsub[ws]
@app.websocket('/api/upload')
async def upload(request, ws):
alink = fileserver.alink
@ -78,24 +50,28 @@ async def upload(request, ws):
d = f"{len(data)} bytes" if isinstance(data, bytes) else data
raise ValueError(f"Expected {req.end - pos} more bytes, got {d}")
# Report success
res = StatusMsg(status="upload", url=url, req=req)
res = StatusMsg(status="ack", req=req)
await asend(ws, res)
print(res)
await ws.drain()
except Exception as e:
res = ErrorMsg(error=str(e), url=url, req=req)
res = ErrorMsg(error=str(e), req=req)
await asend(ws, res)
logger.exception(repr(res), e)
return
@app.websocket("/ws")
async def ws(request, ws):
while True:
data = await ws.recv()
await ws.send(data)
@app.websocket('/api/download')
async def download(request, ws):
alink = fileserver.alink
url = request.url_for("download")
while True:
req = None
try:
print("Waiting for download command")
text = await ws.recv()
if not isinstance(text, str):
raise ValueError(f"Expected JSON control, got binary len(data) = {len(text)}")
@ -108,12 +84,14 @@ async def download(request, ws):
await asend(ws, data)
pos += len(data)
# Report success
res = StatusMsg(status="download", url=url, req=req)
res = StatusMsg(status="ack", req=req)
await asend(ws, res)
print(ws, dir(ws))
await ws.drain()
print(res)
except Exception as e:
res = ErrorMsg(error=str(e), url=url, req=req)
res = ErrorMsg(error=str(e), req=req)
await asend(ws, res)
logger.exception(repr(res), e)
return

156
cista/config.py Normal file
View File

@ -0,0 +1,156 @@
from __future__ import annotations
import hmac
import re
import secrets
from functools import wraps
from hashlib import sha256
from pathlib import Path, PurePath
from time import time
from unicodedata import normalize
import argon2
import msgspec
_argon = argon2.PasswordHasher()
_droppyhash = re.compile(r'^([a-f0-9]{64})\$([a-f0-9]{8})$')
class Config(msgspec.Struct):
path: Path = Path.cwd()
secret: str = secrets.token_hex(12)
public: bool = False
users: dict[str, User] = {}
sessions: dict[str, Session] = {}
links: dict[str, Link] = {}
class User(msgspec.Struct, omit_defaults=True):
privileged: bool = False
hash: str = ""
lastSeen: int = 0
def set_password(self, password: str):
self.hash = _argon.hash(_pwnorm(password))
class Session(msgspec.Struct):
username: str
lastSeen: int
class Link(msgspec.Struct, omit_defaults=True):
location: str
creator: str = ""
expires: int = 0
config = Config()
def derived_secret(*params, len=8) -> bytes:
"""Used to derive secret keys from the main secret"""
# Each part is made the same length by hashing first
combined = b"".join(
sha256(
p if isinstance(p, bytes) else f"{p}".encode()
).digest()
for p in [config.secret, *params]
)
# Output a bytes of the desired length
return sha256(combined).digest()[:len]
def _pwnorm(password):
return normalize('NFC', password).strip().encode()
def login(username: str, password: str):
un = _pwnorm(username)
pw = _pwnorm(password)
try:
u = config.users[un.decode()]
except KeyError:
raise ValueError("Invalid username")
# Verify password
if not u.hash:
raise ValueError("Account disabled")
if (m := _droppyhash.match(u.hash)) is not None:
h, s = m.groups()
h2 = hmac.digest(pw + s.encode() + un, b"", "sha256").hex()
if not hmac.compare_digest(h, h2):
raise ValueError("Invalid password")
# Droppy hashes are weak, do a hash update
u.set_password(password)
else:
try:
_argon.verify(u.hash, pw)
except Exception:
raise ValueError("Invalid password")
if _argon.check_needs_rehash(u.hash):
u.set_password(password)
# Login successful
now = int(time())
u.lastSeen = now
sid = secrets.token_urlsafe(12)
config.sessions[sid] = Session(username, now)
return u, sid
def enc_hook(obj):
if isinstance(obj, PurePath):
return obj.as_posix()
raise TypeError
def dec_hook(typ, obj):
if typ is Path:
return Path(obj)
raise TypeError
conffile = Path.cwd() / ".cista.toml"
def config_update(modify):
global config
tmpname = conffile.with_suffix(".tmp")
try:
f = tmpname.open("xb")
except FileExistsError:
if tmpname.stat().st_mtime < time() - 1:
tmpname.unlink()
return "collision"
try:
# Load, modify and save with atomic replace
try:
old = conffile.read_bytes()
c = msgspec.toml.decode(old, type=Config, dec_hook=dec_hook)
except FileNotFoundError:
old = b""
c = Config() # Initialize with defaults
c = modify(c)
new = msgspec.toml.encode(c, enc_hook=enc_hook)
if old == new:
f.close()
tmpname.unlink()
config = c
return "read"
f.write(new)
f.close()
tmpname.rename(conffile) # Atomic replace
except:
f.close()
tmpname.unlink()
raise
config = c
return "modified" if old else "created"
def modifies_config(modify):
"""Decorator for functions that modify the config file"""
@wraps(modify)
def wrapper(*args, **kwargs):
m = lambda c: modify(c, *args, **kwargs)
# Retry modification in case of write collision
while (c := config_update(m)) == "collision":
time.sleep(0.01)
return c
return wrapper
@modifies_config
def droppy_import(config: Config) -> Config:
p = Path.home() / ".droppy/config"
cf = msgspec.json.decode((p / "config.json").read_bytes())
db = msgspec.json.decode((p / "db.json").read_bytes())
return msgspec.convert(cf | db, Config)
# Load/initialize config file
print(conffile, config_update(lambda c: c))

View File

@ -1,20 +1,28 @@
import asyncio
import os
import unicodedata
from pathlib import Path
from pathlib import Path, PurePosixPath
from pathvalidate import sanitize_filepath
from . import config
from .asynclink import AsyncLink
from .lrucache import LRUCache
ROOT = Path(os.environ.get("STORAGE", Path.cwd()))
ROOT = config.config.path
print("Serving", ROOT)
def sanitize_filename(filename):
def fuid(stat) -> str:
"""Unique file ID. Stays the same on renames and modification."""
return config.derived_secret("filekey-inode", stat.st_dev, stat.st_ino).hex()
def sanitize_filename(filename: str) -> str:
filename = unicodedata.normalize("NFC", filename)
# UNIX filenames can contain backslashes but for compatibility we replace them with dashes
filename = filename.replace("\\", "-")
filename = sanitize_filepath(filename)
filename = filename.replace("/", "-")
return filename
filename = filename.strip("/")
return PurePosixPath(filename).as_posix()
class File:
def __init__(self, filename):

View File

@ -15,11 +15,9 @@ class FileRange(msgspec.Struct):
class ErrorMsg(msgspec.Struct):
error: str
req: FileRange
url: str
class StatusMsg(msgspec.Struct):
status: str
url: str
req: FileRange
@ -32,7 +30,38 @@ class FileEntry(msgspec.Struct):
class DirEntry(msgspec.Struct):
size: int
mtime: int
dir: Dict[str, Union[FileEntry, DirEntry]]
dir: DirList
def __getitem__(self, name):
return self.dir[name]
def __setitem__(self, name, value):
self.dir[name] = value
def __contains__(self, name):
return name in self.dir
def __delitem__(self, name):
del self.dir[name]
@property
def props(self):
return {
k: v
for k, v in self.__struct_fields__
if k != "dir"
}
DirList = dict[str, Union[FileEntry, DirEntry]]
class UpdateEntry(msgspec.Struct, omit_defaults=True):
"""Updates the named entry in the tree. Fields that are set replace old values. A list of entries recurses directories."""
name: str = ""
deleted: bool = False
size: int | None = None
mtime: int | None = None
dir: DirList | None = None
def make_dir_data(root):
if len(root) == 2:

View File

@ -1,5 +1,26 @@
<!DOCTYPE html>
<title>Storage</title>
<style>
body {
font-family: sans-serif;
max-width: 100ch;
margin: 0 auto;
padding: 1em;
background-color: #333;
color: #eee;
}
td {
text-align: right;
padding: .5em;
}
td:first-child {
text-align: left;
}
a {
color: inherit;
text-decoration: none;
}
</style>
<div>
<h2>Quick file upload</h2>
<p>Uses parallel WebSocket connections for increased bandwidth /api/upload</p>
@ -8,84 +29,148 @@
</div>
<div>
<h2>File downloads (websocket)</h2>
<h2>Files</h2>
<ul id=file_list></ul>
</div>
<h2>File listings</h2>
<p>Plain HTML browser <a href=/files/>/files/</a></p>
<p>JSON list updated via WebSocket /api/watch:</p>
<textarea id=list style="padding: 1em; width: 80ch; height: 40ch;"></textarea>
<script>
const list = document.getElementById("list")
let files = {}
let flatfiles = {}
function createWatchSocket() {
const wsurl = new URL("/api/watch", location.href.replace(/^http/, 'ws'))
const ws = new WebSocket(wsurl)
ws.onmessage = event => {
msg = JSON.parse(event.data)
console.log("Watch", msg)
if (msg.root) {
files = msg.root
if (msg.update) {
tree_update(msg.update)
file_list(files)
} else if (msg.update) {
const {path, data} = msg.update
for (const p of path.split("/")) {
// TODO update files at path with new data
}
} else {
console.log("Unkonwn message from watch socket", msg)
}
list.value = JSON.stringify(files)
}
}
function file_list(files) {
const ul = document.getElementById("file_list")
ul.innerHTML = ""
const dir = ""
let ptr = files.dir
console.log(ptr)
for (const name of Object.keys(ptr)) {
if (ptr[name].dir) continue
const {size, mtime} = ptr[name]
const li = document.createElement("li")
const a = document.createElement("a")
ul.appendChild(li)
li.appendChild(a)
a.textContent = name
a.href = name
a.onclick = event => {
event.preventDefault()
download(name, size)
}
}
}
createWatchSocket()
async function download(name, size) {
function tree_update(msg) {
let node = files
for (const elem of msg) {
if (elem.deleted) {
delete node.dir[elem.name]
delete flatfiles[p]
break
}
if (elem.name !== undefined) node = node.dir[elem.name] ||= {}
if (elem.size !== undefined) node.size = elem.size
if (elem.mtime !== undefined) node.mtime = elem.mtime
if (elem.dir !== undefined) node.dir = elem.dir
}
// Update paths and flatfiles
files.path = "/"
const nodes = [files]
flatfiles = {}
while (node = nodes.pop()) {
flatfiles[node.path] = node
if (node.dir === undefined) continue
for (const name of Object.keys(node.dir)) {
const child = node.dir[name]
child.path = node.path + name + (child.dir === undefined ? "" : "/")
nodes.push(child)
}
}
}
var collator = new Intl.Collator(undefined, {numeric: true, sensitivity: 'base'});
const compare_path = (a, b) => collator.compare(a.path, b.path)
const compare_time = (a, b) => a.mtime > b.mtime
function file_list(files) {
const table = document.getElementById("file_list")
const sorted = Object.values(flatfiles).sort(compare_time)
table.innerHTML = ""
for (const f of sorted) {
const {path, size, mtime} = f
const tr = document.createElement("tr")
const name_td = document.createElement("td")
const size_td = document.createElement("td")
const mtime_td = document.createElement("td")
const a = document.createElement("a")
table.appendChild(tr)
tr.appendChild(name_td)
tr.appendChild(size_td)
tr.appendChild(mtime_td)
name_td.appendChild(a)
size_td.textContent = size
mtime_td.textContent = formatUnixDate(mtime)
a.textContent = path
a.href = `/files${path}`
/*a.onclick = event => {
if (window.showSaveFilePicker) {
event.preventDefault()
download_ws(name, size)
}
}
a.download = ""*/
}
}
function formatUnixDate(t) {
const date = new Date(t * 1000)
const now = new Date()
const diff = date - now
const formatter = new Intl.RelativeTimeFormat('en', { numeric: 'auto' })
if (Math.abs(diff) <= 60000) {
return formatter.format(Math.round(diff / 1000), 'second')
}
if (Math.abs(diff) <= 3600000) {
return formatter.format(Math.round(diff / 60000), 'minute')
}
if (Math.abs(diff) <= 86400000) {
return formatter.format(Math.round(diff / 3600000), 'hour')
}
if (Math.abs(diff) <= 604800000) {
return formatter.format(Math.round(diff / 86400000), 'day')
}
return date.toLocaleDateString()
}
async function download_ws(name, size) {
const fh = await window.showSaveFilePicker({
suggestedName: name,
})
const writer = await fh.createWritable()
writer.truncate(size)
const wsurl = new URL("/api/download", location.href.replace(/^http/, 'ws'))
const ws = new WebSocket(wsurl)
ws.binaryType = 'arraybuffer'
let pos = 0
ws.onopen = () => {
console.log("Download socket connected")
console.log("Downloading over WebSocket", name, size)
ws.send(JSON.stringify({name, start: 0, end: size, size}))
}
ws.onmessage = event => {
const data = event.data
console.log("Download", data)
const blob = new Blob([data], {type: "application/octet-stream"})
const url = URL.createObjectURL(blob)
const a = document.createElement("a")
a.href = url
a.download = name
a.click()
ws.close()
if (typeof event.data === 'string') {
const msg = JSON.parse(event.data)
console.log("Download finished", msg)
ws.close()
return
}
console.log("Received chunk", name, pos, pos + event.data.size)
pos += event.data.size
writer.write(event.data)
}
ws.onclose = () => {
console.log("Download socket disconnected")
if (pos < size) {
console.log("Download aborted", name, pos)
writer.truncate(pos)
}
writer.close()
}
}
@ -95,7 +180,7 @@ const numConnections = 2
const chunkSize = 1<<20
const wsConnections = new Set()
for (let i = 0; i < numConnections; i++) createUploadWS()
//for (let i = 0; i < numConnections; i++) createUploadWS()
function createUploadWS() {
const wsurl = new URL("/api/upload", location.href.replace(/^http/, 'ws'))

View File

@ -1,19 +1,20 @@
import asyncio
import secrets
from hashlib import sha256
from pathlib import Path, PurePosixPath
import threading
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
import msgspec
from . import config
from .fileio import ROOT
from .protocol import DirEntry, FileEntry
from .protocol import DirEntry, FileEntry, UpdateEntry
secret = secrets.token_bytes(8)
pubsub = {}
def fuid(stat):
return sha256((stat.st_dev << 32 | stat.st_ino).to_bytes(8, 'big') + secret).hexdigest()[:16]
def walk(path: Path = ROOT):
def walk(path: Path = ROOT) -> DirEntry | FileEntry | None:
try:
s = path.stat()
mtime = int(s.st_mtime)
@ -27,32 +28,99 @@ def walk(path: Path = ROOT):
else:
size = 0
return DirEntry(size, mtime, tree)
except FileNotFoundError:
return None
except OSError as e:
print("OS error walking path", path, e)
return None
tree = walk()
tree = {"": walk()}
tree_lock = threading.Lock()
def update(relpath: PurePosixPath):
ptr = tree.dir
path = ROOT
name = ""
for name in relpath.parts[:-1]:
path /= name
try:
ptr = ptr[name].dir
except KeyError:
def refresh():
root = tree[""]
return msgspec.json.encode({"update": [
UpdateEntry(size=root.size, mtime=root.mtime, dir=root.dir)
]}).decode()
def update(relpath: PurePosixPath, loop):
new = walk(ROOT / relpath)
with tree_lock:
msg = update_internal(relpath, new)
print(msg)
asyncio.run_coroutine_threadsafe(broadcast(msg), loop)
def update_internal(relpath: PurePosixPath, new: DirEntry | FileEntry | None):
path = "", *relpath.parts
old = tree
elems = []
for name in path:
if name not in old:
# File or folder created
old = None
elems.append((name, None))
if len(elems) < len(path):
raise ValueError("Tree out of sync")
break
new = walk(path)
old = ptr.pop(name, None)
if new is not None:
ptr[name] = new
old = old[name]
elems.append((name, old))
if old == new:
return
print("Update", relpath)
# TODO: update parents size/mtime
msg = msgspec.json.encode({"update": {
"path": relpath.as_posix(),
"data": new,
}})
for queue in pubsub.values(): queue.put_nowait(msg)
return # No changes
mt = new.mtime if new else 0
szdiff = (new.size if new else 0) - (old.size if old else 0)
# Update parents
update = []
for name, entry in elems[:-1]:
u = UpdateEntry(name)
if szdiff:
entry.size += szdiff
u.size = entry.size
if mt > entry.mtime:
u.mtime = entry.mtime = mt
update.append(u)
# The last element is the one that changed
print([e[0] for e in elems])
name, entry = elems[-1]
parent = elems[-2][1] if len(elems) > 1 else tree
u = UpdateEntry(name)
if new:
parent[name] = new
if u.size != new.size: u.size = new.size
if u.mtime != new.mtime: u.mtime = new.mtime
if isinstance(new, DirEntry):
if u.dir == new.dir: u.dir = new.dir
else:
del parent[name]
u.deleted = True
update.append(u)
return msgspec.json.encode({"update": update}).decode()
async def broadcast(msg):
for queue in pubsub.values():
await queue.put_nowait(msg)
def register(app, url):
@app.before_server_start
async def start_watcher(app, loop):
class Handler(FileSystemEventHandler):
def on_any_event(self, event):
update(Path(event.src_path).relative_to(ROOT), loop)
app.ctx.observer = Observer()
app.ctx.observer.schedule(Handler(), str(ROOT), recursive=True)
app.ctx.observer.start()
@app.after_server_stop
async def stop_watcher(app, _):
app.ctx.observer.stop()
app.ctx.observer.join()
@app.websocket(url)
async def watch(request, ws):
try:
with tree_lock:
q = pubsub[ws] = asyncio.Queue()
await ws.send(refresh())
while True:
await ws.send(await q.get())
finally:
del pubsub[ws]

View File

@ -14,9 +14,11 @@ authors = [
classifiers = [
]
dependencies = [
"argon2-cffi",
"msgspec",
"pathvalidate",
"sanic",
"tomli_w",
"watchdog",
]