More efficient flat file list format and various UX improvements (#3)
This is a major upgrade with assorted things included. - Navigation flows improved, search appears in URL history, cleared when navigating to another folder - More efficient file list format for faster loads - Efficient updates, never re-send full root another time (except at connection) - Large number of watching and filelist updates (inotify issues remain) - File size coloring - Fixed ZIP generation random glitches (thread race condition) - Code refactoring, cleanup, typing fixes - More tests Reviewed-on: #3
This commit is contained in:
@@ -1,20 +1,137 @@
|
||||
import asyncio
|
||||
import shutil
|
||||
import stat
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from os import stat_result
|
||||
from pathlib import Path, PurePosixPath
|
||||
|
||||
import msgspec
|
||||
from natsort import humansorted, natsort_keygen, ns
|
||||
from sanic.log import logging
|
||||
|
||||
from cista import config
|
||||
from cista.fileio import fuid
|
||||
from cista.protocol import DirEntry, FileEntry, UpdateEntry
|
||||
from cista.protocol import FileEntry, Space, UpdDel, UpdIns, UpdKeep
|
||||
|
||||
pubsub = {}
|
||||
tree = {"": None}
|
||||
tree_lock = threading.Lock()
|
||||
sortkey = natsort_keygen(alg=ns.LOCALE)
|
||||
|
||||
|
||||
class State:
|
||||
def __init__(self):
|
||||
self.lock = threading.RLock()
|
||||
self._space = Space(0, 0, 0, 0)
|
||||
self._listing: list[FileEntry] = []
|
||||
|
||||
@property
|
||||
def space(self):
|
||||
with self.lock:
|
||||
return self._space
|
||||
|
||||
@space.setter
|
||||
def space(self, space):
|
||||
with self.lock:
|
||||
self._space = space
|
||||
|
||||
@property
|
||||
def root(self) -> list[FileEntry]:
|
||||
with self.lock:
|
||||
return self._listing[:]
|
||||
|
||||
@root.setter
|
||||
def root(self, listing: list[FileEntry]):
|
||||
with self.lock:
|
||||
self._listing = listing
|
||||
|
||||
def _slice(self, idx: PurePosixPath | tuple[PurePosixPath, int]):
|
||||
relpath, relfile = idx if isinstance(idx, tuple) else (idx, 0)
|
||||
begin, end = 0, len(self._listing)
|
||||
level = 0
|
||||
isfile = 0
|
||||
|
||||
# Special case for root
|
||||
if not relpath.parts:
|
||||
return slice(begin, end)
|
||||
|
||||
begin += 1
|
||||
for part in relpath.parts:
|
||||
level += 1
|
||||
found = False
|
||||
|
||||
while begin < end:
|
||||
entry = self._listing[begin]
|
||||
|
||||
if entry.level < level:
|
||||
break
|
||||
|
||||
if entry.level == level:
|
||||
if entry.name == part:
|
||||
found = True
|
||||
if level == len(relpath.parts):
|
||||
isfile = relfile
|
||||
else:
|
||||
begin += 1
|
||||
break
|
||||
cmp = entry.isfile - isfile or sortkey(entry.name) > sortkey(part)
|
||||
if cmp > 0:
|
||||
break
|
||||
|
||||
begin += 1
|
||||
|
||||
if not found:
|
||||
return slice(begin, begin)
|
||||
|
||||
# Found the starting point, now find the end of the slice
|
||||
for end in range(begin + 1, len(self._listing) + 1):
|
||||
if end == len(self._listing) or self._listing[end].level <= level:
|
||||
break
|
||||
return slice(begin, end)
|
||||
|
||||
def __getitem__(self, index: PurePosixPath | tuple[PurePosixPath, int]):
|
||||
with self.lock:
|
||||
return self._listing[self._slice(index)]
|
||||
|
||||
def __setitem__(
|
||||
self, index: tuple[PurePosixPath, int], value: list[FileEntry]
|
||||
) -> None:
|
||||
rel, isfile = index
|
||||
with self.lock:
|
||||
if rel.parts:
|
||||
parent = self._slice(rel.parent)
|
||||
if parent.start == parent.stop:
|
||||
raise ValueError(
|
||||
f"Parent folder {rel.as_posix()} is missing for {rel.name}"
|
||||
)
|
||||
self._listing[self._slice(index)] = value
|
||||
|
||||
def __delitem__(self, relpath: PurePosixPath):
|
||||
with self.lock:
|
||||
del self._listing[self._slice(relpath)]
|
||||
|
||||
def _index(self, rel: PurePosixPath):
|
||||
idx = 0
|
||||
ret = []
|
||||
|
||||
def _dir(self, idx: int):
|
||||
level = self._listing[idx].level + 1
|
||||
end = len(self._listing)
|
||||
idx += 1
|
||||
ret = []
|
||||
while idx < end and (r := self._listing[idx]).level >= level:
|
||||
if r.level == level:
|
||||
ret.append(idx)
|
||||
return ret, idx
|
||||
|
||||
def update(self, rel: PurePosixPath, value: FileEntry):
|
||||
begin = 0
|
||||
parents = []
|
||||
while self._listing[begin].level < len(rel.parts):
|
||||
parents.append(begin)
|
||||
|
||||
|
||||
state = State()
|
||||
rootpath: Path = None # type: ignore
|
||||
quit = False
|
||||
modified_flags = (
|
||||
@@ -26,23 +143,22 @@ modified_flags = (
|
||||
"IN_MOVED_FROM",
|
||||
"IN_MOVED_TO",
|
||||
)
|
||||
disk_usage = None
|
||||
|
||||
|
||||
def watcher_thread(loop):
|
||||
global disk_usage, rootpath
|
||||
global rootpath
|
||||
import inotify.adapters
|
||||
|
||||
while True:
|
||||
rootpath = config.config.path
|
||||
i = inotify.adapters.InotifyTree(rootpath.as_posix())
|
||||
old = format_tree() if tree[""] else None
|
||||
with tree_lock:
|
||||
# Initialize the tree from filesystem
|
||||
tree[""] = walk(rootpath)
|
||||
msg = format_tree()
|
||||
if msg != old:
|
||||
asyncio.run_coroutine_threadsafe(broadcast(msg), loop)
|
||||
# Initialize the tree from filesystem
|
||||
new = walk()
|
||||
with state.lock:
|
||||
old = state.root
|
||||
if old != new:
|
||||
state.root = new
|
||||
broadcast(format_update(old, new), loop)
|
||||
|
||||
# The watching is not entirely reliable, so do a full refresh every minute
|
||||
refreshdl = time.monotonic() + 60.0
|
||||
@@ -52,9 +168,10 @@ def watcher_thread(loop):
|
||||
return
|
||||
# Disk usage update
|
||||
du = shutil.disk_usage(rootpath)
|
||||
if du != disk_usage:
|
||||
disk_usage = du
|
||||
asyncio.run_coroutine_threadsafe(broadcast(format_du()), loop)
|
||||
space = Space(*du, storage=state.root[0].size)
|
||||
if space != state.space:
|
||||
state.space = space
|
||||
broadcast(format_space(space), loop)
|
||||
break
|
||||
# Do a full refresh?
|
||||
if time.monotonic() > refreshdl:
|
||||
@@ -75,144 +192,141 @@ def watcher_thread(loop):
|
||||
|
||||
|
||||
def watcher_thread_poll(loop):
|
||||
global disk_usage, rootpath
|
||||
global rootpath
|
||||
|
||||
while not quit:
|
||||
rootpath = config.config.path
|
||||
old = format_tree() if tree[""] else None
|
||||
with tree_lock:
|
||||
# Initialize the tree from filesystem
|
||||
tree[""] = walk(rootpath)
|
||||
msg = format_tree()
|
||||
if msg != old:
|
||||
asyncio.run_coroutine_threadsafe(broadcast(msg), loop)
|
||||
new = walk()
|
||||
with state.lock:
|
||||
old = state.root
|
||||
if old != new:
|
||||
state.root = new
|
||||
broadcast(format_update(old, new), loop)
|
||||
|
||||
# Disk usage update
|
||||
du = shutil.disk_usage(rootpath)
|
||||
if du != disk_usage:
|
||||
disk_usage = du
|
||||
asyncio.run_coroutine_threadsafe(broadcast(format_du()), loop)
|
||||
space = Space(*du, storage=state.root[0].size)
|
||||
if space != state.space:
|
||||
state.space = space
|
||||
broadcast(format_space(space), loop)
|
||||
|
||||
time.sleep(1.0)
|
||||
time.sleep(2.0)
|
||||
|
||||
|
||||
def format_du():
|
||||
return msgspec.json.encode(
|
||||
{
|
||||
"space": {
|
||||
"disk": disk_usage.total,
|
||||
"used": disk_usage.used,
|
||||
"free": disk_usage.free,
|
||||
"storage": tree[""].size,
|
||||
},
|
||||
},
|
||||
).decode()
|
||||
|
||||
|
||||
def format_tree():
|
||||
root = tree[""]
|
||||
return msgspec.json.encode({"root": root}).decode()
|
||||
|
||||
|
||||
def walk(path: Path) -> DirEntry | FileEntry | None:
|
||||
def walk(rel=PurePosixPath()) -> list[FileEntry]: # noqa: B008
|
||||
path = rootpath / rel
|
||||
try:
|
||||
s = path.stat()
|
||||
key = fuid(s)
|
||||
assert key, repr(key)
|
||||
mtime = int(s.st_mtime)
|
||||
if path.is_file():
|
||||
return FileEntry(key, s.st_size, mtime)
|
||||
st = path.stat()
|
||||
except OSError:
|
||||
return []
|
||||
return _walk(rel, int(not stat.S_ISDIR(st.st_mode)), st)
|
||||
|
||||
tree = {
|
||||
p.name: v
|
||||
for p in path.iterdir()
|
||||
if not p.name.startswith(".")
|
||||
if (v := walk(p)) is not None
|
||||
}
|
||||
if tree:
|
||||
size = sum(v.size for v in tree.values())
|
||||
mtime = max(mtime, *(v.mtime for v in tree.values()))
|
||||
else:
|
||||
size = 0
|
||||
return DirEntry(key, size, mtime, tree)
|
||||
|
||||
def _walk(rel: PurePosixPath, isfile: int, st: stat_result) -> list[FileEntry]:
|
||||
entry = FileEntry(
|
||||
level=len(rel.parts),
|
||||
name=rel.name,
|
||||
key=fuid(st),
|
||||
mtime=int(st.st_mtime),
|
||||
size=st.st_size if isfile else 0,
|
||||
isfile=isfile,
|
||||
)
|
||||
if isfile:
|
||||
return [entry]
|
||||
ret = [entry]
|
||||
path = rootpath / rel
|
||||
try:
|
||||
li = []
|
||||
for f in path.iterdir():
|
||||
if f.name.startswith("."):
|
||||
continue # No dotfiles
|
||||
s = f.stat()
|
||||
li.append((int(not stat.S_ISDIR(s.st_mode)), f.name, s))
|
||||
for [isfile, name, s] in humansorted(li):
|
||||
subtree = _walk(rel / name, isfile, s)
|
||||
child = subtree[0]
|
||||
entry.mtime = max(entry.mtime, child.mtime)
|
||||
entry.size += child.size
|
||||
ret.extend(subtree)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
pass # Things may be rapidly in motion
|
||||
except OSError as e:
|
||||
print("OS error walking path", path, e)
|
||||
return None
|
||||
return ret
|
||||
|
||||
|
||||
def update(relpath: PurePosixPath, loop):
|
||||
"""Called by inotify updates, check the filesystem and broadcast any changes."""
|
||||
if rootpath is None or relpath is None:
|
||||
print("ERROR", rootpath, relpath)
|
||||
new = walk(rootpath / relpath)
|
||||
with tree_lock:
|
||||
update = update_internal(relpath, new)
|
||||
if not update:
|
||||
return # No changes
|
||||
msg = msgspec.json.encode({"update": update}).decode()
|
||||
asyncio.run_coroutine_threadsafe(broadcast(msg), loop)
|
||||
new = walk(relpath)
|
||||
with state.lock:
|
||||
old = state[relpath]
|
||||
if old == new:
|
||||
return
|
||||
old = state.root
|
||||
if new:
|
||||
state[relpath, new[0].isfile] = new
|
||||
else:
|
||||
del state[relpath]
|
||||
broadcast(format_update(old, state.root), loop)
|
||||
|
||||
|
||||
def update_internal(
|
||||
relpath: PurePosixPath,
|
||||
new: DirEntry | FileEntry | None,
|
||||
) -> list[UpdateEntry]:
|
||||
path = "", *relpath.parts
|
||||
old = tree
|
||||
elems = []
|
||||
for name in path:
|
||||
if name not in old:
|
||||
# File or folder created
|
||||
old = None
|
||||
elems.append((name, None))
|
||||
if len(elems) < len(path):
|
||||
# We got a notify for an item whose parent is not in tree
|
||||
print("Tree out of sync DEBUG", relpath)
|
||||
print(elems)
|
||||
print("Current tree:")
|
||||
print(tree[""])
|
||||
print("Walking all:")
|
||||
print(walk(rootpath))
|
||||
raise ValueError("Tree out of sync")
|
||||
break
|
||||
old = old[name]
|
||||
elems.append((name, old))
|
||||
if old == new:
|
||||
return []
|
||||
mt = new.mtime if new else 0
|
||||
szdiff = (new.size if new else 0) - (old.size if old else 0)
|
||||
# Update parents
|
||||
def format_update(old, new):
|
||||
# Make keep/del/insert diff until one of the lists ends
|
||||
oidx, nidx = 0, 0
|
||||
update = []
|
||||
for name, entry in elems[:-1]:
|
||||
u = UpdateEntry(name, entry.key)
|
||||
if szdiff:
|
||||
entry.size += szdiff
|
||||
u.size = entry.size
|
||||
if mt > entry.mtime:
|
||||
u.mtime = entry.mtime = mt
|
||||
update.append(u)
|
||||
# The last element is the one that changed
|
||||
name, entry = elems[-1]
|
||||
parent = elems[-2][1] if len(elems) > 1 else tree
|
||||
u = UpdateEntry(name, new.key if new else entry.key)
|
||||
if new:
|
||||
parent[name] = new
|
||||
if u.size != new.size:
|
||||
u.size = new.size
|
||||
if u.mtime != new.mtime:
|
||||
u.mtime = new.mtime
|
||||
if isinstance(new, DirEntry) and u.dir != new.dir:
|
||||
u.dir = new.dir
|
||||
else:
|
||||
del parent[name]
|
||||
u.deleted = True
|
||||
update.append(u)
|
||||
return update
|
||||
keep_count = 0
|
||||
while oidx < len(old) and nidx < len(new):
|
||||
if old[oidx] == new[nidx]:
|
||||
keep_count += 1
|
||||
oidx += 1
|
||||
nidx += 1
|
||||
continue
|
||||
if keep_count > 0:
|
||||
update.append(UpdKeep(keep_count))
|
||||
keep_count = 0
|
||||
|
||||
del_count = 0
|
||||
rest = new[nidx:]
|
||||
while oidx < len(old) and old[oidx] not in rest:
|
||||
del_count += 1
|
||||
oidx += 1
|
||||
if del_count:
|
||||
update.append(UpdDel(del_count))
|
||||
continue
|
||||
|
||||
insert_items = []
|
||||
rest = old[oidx:]
|
||||
while nidx < len(new) and new[nidx] not in rest:
|
||||
insert_items.append(new[nidx])
|
||||
nidx += 1
|
||||
update.append(UpdIns(insert_items))
|
||||
|
||||
# Diff any remaining
|
||||
if keep_count > 0:
|
||||
update.append(UpdKeep(keep_count))
|
||||
if oidx < len(old):
|
||||
update.append(UpdDel(len(old) - oidx))
|
||||
elif nidx < len(new):
|
||||
update.append(UpdIns(new[nidx:]))
|
||||
|
||||
return msgspec.json.encode({"update": update}).decode()
|
||||
|
||||
|
||||
async def broadcast(msg):
|
||||
def format_space(usage):
|
||||
return msgspec.json.encode({"space": usage}).decode()
|
||||
|
||||
|
||||
def format_root(root):
|
||||
return msgspec.json.encode({"root": root}).decode()
|
||||
|
||||
|
||||
def broadcast(msg, loop):
|
||||
return asyncio.run_coroutine_threadsafe(abroadcast(msg), loop).result()
|
||||
|
||||
|
||||
async def abroadcast(msg):
|
||||
try:
|
||||
for queue in pubsub.values():
|
||||
queue.put_nowait(msg)
|
||||
@@ -223,8 +337,9 @@ async def broadcast(msg):
|
||||
|
||||
async def start(app, loop):
|
||||
config.load_config()
|
||||
use_inotify = False and sys.platform == "linux"
|
||||
app.ctx.watcher = threading.Thread(
|
||||
target=watcher_thread if sys.platform == "linux" else watcher_thread_poll,
|
||||
target=watcher_thread if use_inotify else watcher_thread_poll,
|
||||
args=[loop],
|
||||
)
|
||||
app.ctx.watcher.start()
|
||||
|
||||
Reference in New Issue
Block a user