Maintenance update (#7)

- Use modern tooling uv and bun
- Various changes to work with latest PyAV and PIL that have changed their API
- Improved image, video and document previews (uses AVIF, renders AVIF/HEIC/videos in HDR, faster processing)
- Fix a server hang in some cases where a folder was moved or renamed
- Log exceptions instead of only returning 500 response to client
- Log timing of preview generation functions
- Default to quality 50 in previews (previously 40)
This commit is contained in:
2025-08-15 18:03:04 +01:00
parent f38bb4bab9
commit 073f1a8707
16 changed files with 369 additions and 203 deletions

View File

@@ -4,12 +4,14 @@ import io
import mimetypes
import urllib.parse
from pathlib import PurePosixPath
from time import perf_counter
from urllib.parse import unquote
from wsgiref.handlers import format_date_time
import av
import av.datasets
import fitz # PyMuPDF
import numpy as np
import pillow_heif
from PIL import Image
from sanic import Blueprint, empty, raw
from sanic.exceptions import NotFound
@@ -18,7 +20,7 @@ from sanic.log import logger
from cista import config
from cista.util.filename import sanitize
DISPLAYMATRIX = av.stream.SideData.DISPLAYMATRIX
pillow_heif.register_heif_opener()
bp = Blueprint("preview", url_prefix="/preview")
@@ -28,20 +30,20 @@ async def preview(req, path):
"""Preview a file"""
maxsize = int(req.args.get("px", 1024))
maxzoom = float(req.args.get("zoom", 2.0))
quality = int(req.args.get("q", 40))
quality = int(req.args.get("q", 60))
rel = PurePosixPath(sanitize(unquote(path)))
path = config.config.path / rel
stat = path.lstat()
etag = config.derived_secret(
"preview", rel, stat.st_mtime_ns, quality, maxsize, maxzoom
).hex()
savename = PurePosixPath(path.name).with_suffix(".webp")
savename = PurePosixPath(path.name).with_suffix(".avif")
headers = {
"etag": etag,
"last-modified": format_date_time(stat.st_mtime),
"cache-control": "max-age=604800, immutable"
+ ("" if config.config.public else ", private"),
"content-type": "image/webp",
"content-type": "image/avif",
"content-disposition": f"inline; filename*=UTF-8''{urllib.parse.quote(savename.as_posix())}",
}
if req.headers.if_none_match == etag:
@@ -60,58 +62,164 @@ async def preview(req, path):
def dispatch(path, quality, maxsize, maxzoom):
if path.suffix.lower() in (".pdf", ".xps", ".epub", ".mobi"):
return process_pdf(path, quality=quality, maxsize=maxsize, maxzoom=maxzoom)
if mimetypes.guess_type(path.name)[0].startswith("video/"):
type, _ = mimetypes.guess_type(path.name)
if type and type.startswith("video/"):
return process_video(path, quality=quality, maxsize=maxsize)
return process_image(path, quality=quality, maxsize=maxsize)
def process_image(path, *, maxsize, quality):
t_load_start = perf_counter()
img = Image.open(path)
w, h = img.size
img.thumbnail((min(w, maxsize), min(h, maxsize)))
# Fix rotation based on EXIF data
try:
rotate_values = {3: 180, 6: 270, 8: 90}
orientation = img._getexif().get(274)
if orientation in rotate_values:
logger.debug(f"Rotating preview {path} by {rotate_values[orientation]}")
img = img.rotate(rotate_values[orientation], expand=True)
except AttributeError:
...
except Exception as e:
logger.error(f"Error rotating preview image: {e}")
# Save as webp
# Force decode to include I/O in load timing
img.load()
t_load_end = perf_counter()
# Resize
orig_w, orig_h = img.size
t_proc_start = perf_counter()
img.thumbnail((min(orig_w, maxsize), min(orig_h, maxsize)))
t_proc_end = perf_counter()
# Save as AVIF
imgdata = io.BytesIO()
img.save(imgdata, format="webp", quality=quality, method=4)
return imgdata.getvalue()
t_save_start = perf_counter()
img.save(imgdata, format="avif", quality=quality, speed=10, max_threads=1)
t_save_end = perf_counter()
ret = imgdata.getvalue()
load_ms = (t_load_end - t_load_start) * 1000
proc_ms = (t_proc_end - t_proc_start) * 1000
save_ms = (t_save_end - t_save_start) * 1000
logger.debug(
"Preview image %s: load=%.1fms process=%.1fms save=%.1fms out=%.1fKB",
path.name,
load_ms,
proc_ms,
save_ms,
len(ret) / 1024,
)
return ret
def process_pdf(path, *, maxsize, maxzoom, quality, page_number=0):
t_load_start = perf_counter()
pdf = fitz.open(path)
page = pdf.load_page(page_number)
w, h = page.rect[2:4]
zoom = min(maxsize / w, maxsize / h, maxzoom)
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat)
return pix.pil_tobytes(format="webp", quality=quality, method=4)
pix = page.get_pixmap(matrix=mat) # type: ignore[attr-defined]
t_load_end = perf_counter()
t_save_start = perf_counter()
ret = pix.pil_tobytes(format="avif", quality=quality, speed=10, max_threads=1)
t_save_end = perf_counter()
logger.debug(
"Preview pdf %s: load+render=%.1fms save=%.1fms",
path.name,
(t_load_end - t_load_start) * 1000,
(t_save_end - t_save_start) * 1000,
)
return ret
def process_video(path, *, maxsize, quality):
with av.open(str(path)) as container:
stream = container.streams.video[0]
stream.codec_context.skip_frame = "NONKEY"
rot = stream.side_data and stream.side_data.get(DISPLAYMATRIX) or 0
container.seek(container.duration // 8)
img = next(container.decode(stream)).to_image()
del stream
img.thumbnail((maxsize, maxsize))
frame = None
imgdata = io.BytesIO()
if rot:
img = img.rotate(rot, expand=True)
img.save(imgdata, format="webp", quality=quality, method=4)
del img
istream = ostream = icc = occ = frame = None
t_load_start = perf_counter()
# Initialize to avoid "possibly unbound" in static analysis when exceptions occur
t_load_end = t_load_start
t_save_start = t_load_start
t_save_end = t_load_start
with (
av.open(str(path)) as icontainer,
av.open(imgdata, "w", format="avif") as ocontainer,
):
istream = icontainer.streams.video[0]
istream.codec_context.skip_frame = "NONKEY"
icontainer.seek((icontainer.duration or 0) // 8)
for frame in icontainer.decode(istream):
if frame.dts is not None:
break
else:
raise RuntimeError("No frames found in video")
# Resize frame to thumbnail size
if frame.width > maxsize or frame.height > maxsize:
scale_factor = min(maxsize / frame.width, maxsize / frame.height)
new_width = int(frame.width * scale_factor)
new_height = int(frame.height * scale_factor)
frame = frame.reformat(width=new_width, height=new_height)
# Simple rotation detection and logging
if frame.rotation:
try:
fplanes = frame.to_ndarray()
# Split into Y, U, V planes of proper dimensions
planes = [
fplanes[: frame.height],
fplanes[frame.height : frame.height + frame.height // 4].reshape(
frame.height // 2, frame.width // 2
),
fplanes[frame.height + frame.height // 4 :].reshape(
frame.height // 2, frame.width // 2
),
]
# Rotate
planes = [np.rot90(p, frame.rotation // 90) for p in planes]
# Restore PyAV format
planes = np.hstack([p.flat for p in planes]).reshape(
-1, planes[0].shape[1]
)
frame = av.VideoFrame.from_ndarray(planes, format=frame.format.name)
del planes, fplanes
except Exception as e:
if "not yet supported" in str(e):
logger.warning(
f"Not rotating {path.name} preview image by {frame.rotation}°:\n PyAV: {e}"
)
else:
logger.exception(f"Error rotating video frame: {e}")
t_load_end = perf_counter()
t_save_start = perf_counter()
crf = str(int(63 * (1 - quality / 100) ** 2)) # Closely matching PIL quality-%
ostream = ocontainer.add_stream(
"av1",
options={
"crf": crf,
"usage": "realtime",
"cpu-used": "8",
"threads": "1",
},
)
assert isinstance(ostream, av.VideoStream)
ostream.width = frame.width
ostream.height = frame.height
icc = istream.codec_context
occ = ostream.codec_context
# Copy HDR metadata from input video stream
occ.color_primaries = icc.color_primaries
occ.color_trc = icc.color_trc
occ.colorspace = icc.colorspace
occ.color_range = icc.color_range
ocontainer.mux(ostream.encode(frame))
ocontainer.mux(ostream.encode(None)) # Flush the stream
t_save_end = perf_counter()
# Capture frame dimensions before cleanup
ret = imgdata.getvalue()
del imgdata
logger.debug(
"Preview video %s: load+decode=%.1fms save=%.1fms",
path.name,
(t_load_end - t_load_start) * 1000,
(t_save_end - t_save_start) * 1000,
)
del imgdata, istream, ostream, icc, occ, frame
gc.collect()
return ret

View File

@@ -127,8 +127,7 @@ class FileEntry(msgspec.Struct, array_like=True, frozen=True):
return f"{self.name} ({self.size}, {self.mtime})"
class Update(msgspec.Struct, array_like=True):
...
class Update(msgspec.Struct, array_like=True): ...
class UpdKeep(Update, tag="k"):

View File

@@ -1,5 +1,7 @@
import os
import re
import signal
import sys
from pathlib import Path
from sanic import Sanic
@@ -11,6 +13,14 @@ def run(*, dev=False):
"""Run Sanic main process that spawns worker processes to serve HTTP requests."""
from .app import app
# Set up immediate exit on Ctrl+C for faster termination
def signal_handler(signum, frame):
print("\nReceived interrupt signal, exiting immediately...")
os._exit(0)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
url, opts = parse_listen(config.config.listen)
# Silence Sanic's warning about running in production rather than debug
os.environ["SANIC_IGNORE_PRODUCTION_WARNING"] = "1"

View File

@@ -29,6 +29,8 @@ async def handle_sanic_exception(request, e):
if not message or not request.app.debug and code == 500:
message = "Internal Server Error"
message = f"⚠️ {message}" if code < 500 else f"🛑 {message}"
if code == 500:
logger.exception(e)
# Non-browsers get JSON errors
if "text/html" not in request.headers.accept:
return jres(

View File

@@ -48,6 +48,7 @@ def treeiter(rootmod):
def treeget(rootmod: list[FileEntry], path: PurePosixPath):
begin = None
ret = []
for i, relpath, entry in treeiter(rootmod):
if begin is None:
if relpath == path:
@@ -57,6 +58,7 @@ def treeget(rootmod: list[FileEntry], path: PurePosixPath):
if entry.level <= len(path.parts):
break
ret.append(entry)
return begin, ret
@@ -77,28 +79,36 @@ def treeinspos(rootmod: list[FileEntry], relpath: PurePosixPath, relfile: int):
# root
level += 1
continue
ename = rel.parts[level - 1]
name = relpath.parts[level - 1]
esort = sortkey(ename)
nsort = sortkey(name)
# Non-leaf are always folders, only use relfile at leaf
isfile = relfile if len(relpath.parts) == level else 0
# First compare by isfile, then by sorting order and if that too matches then case sensitive
cmp = (
entry.isfile - isfile
or (esort > nsort) - (esort < nsort)
or (ename > name) - (ename < name)
)
if cmp > 0:
return i
if cmp < 0:
continue
level += 1
if level > len(relpath.parts):
print("ERROR: insertpos", relpath, i, entry.name, entry.level, level)
logger.error(
f"insertpos level overflow: relpath={relpath}, i={i}, entry.name={entry.name}, entry.level={entry.level}, level={level}"
)
break
else:
i += 1
return i
@@ -179,21 +189,16 @@ def update_path(rootmod: list[FileEntry], relpath: PurePosixPath, loop):
"""Called on FS updates, check the filesystem and broadcast any changes."""
new = walk(relpath)
obegin, old = treeget(rootmod, relpath)
if old == new:
logger.debug(
f"Watch: Event without changes needed {relpath}"
if old
else f"Watch: Event with old and new missing: {relpath}"
)
return
if obegin is not None:
del rootmod[obegin : obegin + len(old)]
if new:
logger.debug(f"Watch: Update {relpath}" if old else f"Watch: Created {relpath}")
i = treeinspos(rootmod, relpath, new[0].isfile)
rootmod[i:i] = new
else:
logger.debug(f"Watch: Removed {relpath}")
def update_space(loop):
@@ -218,17 +223,35 @@ def format_update(old, new):
oremain, nremain = set(old), set(new)
update = []
keep_count = 0
iteration_count = 0
# Precompute index maps to allow deterministic tie-breaking when both
# candidates exist in both sequences but are not equal (rename/move cases)
old_pos = {e: i for i, e in enumerate(old)}
new_pos = {e: i for i, e in enumerate(new)}
while oidx < len(old) and nidx < len(new):
iteration_count += 1
# Emergency brake for potential infinite loops
if iteration_count > 50000:
logger.error(
f"format_update potential infinite loop! iteration={iteration_count}, oidx={oidx}, nidx={nidx}"
)
raise Exception(
f"format_update infinite loop detected at iteration {iteration_count}"
)
modified = False
# Matching entries are kept
if old[oidx] == new[nidx]:
entry = old[oidx]
oremain.remove(entry)
nremain.remove(entry)
oremain.discard(entry)
nremain.discard(entry)
keep_count += 1
oidx += 1
nidx += 1
continue
if keep_count > 0:
modified = True
update.append(UpdKeep(keep_count))
@@ -248,7 +271,7 @@ def format_update(old, new):
insert_items = []
while nidx < len(new) and new[nidx] not in oremain:
entry = new[nidx]
nremain.remove(entry)
nremain.discard(entry)
insert_items.append(entry)
nidx += 1
if insert_items:
@@ -256,9 +279,32 @@ def format_update(old, new):
update.append(UpdIns(insert_items))
if not modified:
raise Exception(
f"Infinite loop in diff {nidx=} {oidx=} {len(old)=} {len(new)=}"
)
# Tie-break: both items exist in both lists but don't match here.
# Decide whether to delete old[oidx] first or insert new[nidx] first
# based on which alignment is closer.
if oidx >= len(old) or nidx >= len(new):
break
cur_old = old[oidx]
cur_new = new[nidx]
pos_old_in_new = new_pos.get(cur_old)
pos_new_in_old = old_pos.get(cur_new)
# Default distances if not present (shouldn't happen if in remain sets)
dist_del = (pos_old_in_new - nidx) if pos_old_in_new is not None else 1
dist_ins = (pos_new_in_old - oidx) if pos_new_in_old is not None else 1
# Prefer the operation with smaller forward distance; tie => delete
if dist_del <= dist_ins:
# Delete current old item
oremain.discard(cur_old)
update.append(UpdDel(1))
oidx += 1
else:
# Insert current new item
nremain.discard(cur_new)
update.append(UpdIns([cur_new]))
nidx += 1
# Diff any remaining
if keep_count > 0:
@@ -311,10 +357,7 @@ def watcher_inotify(loop):
while not quit.is_set():
i = inotify.adapters.InotifyTree(rootpath.as_posix())
# Initialize the tree from filesystem
t0 = time.perf_counter()
update_root(loop)
t1 = time.perf_counter()
logger.debug(f"Root update took {t1 - t0:.1f}s")
trefresh = time.monotonic() + 300.0
tspace = time.monotonic() + 5.0
# Watch for changes (frequent wakeups needed for quiting)
@@ -335,32 +378,52 @@ def watcher_inotify(loop):
if quit.is_set():
return
interesting = any(f in modified_flags for f in event[1])
if event[2] == rootpath.as_posix() and event[3] == "zzz":
logger.debug(f"Watch: {interesting=} {event=}")
if interesting:
# Update modified path
t0 = time.perf_counter()
path = PurePosixPath(event[2]) / event[3]
update_path(rootmod, path.relative_to(rootpath), loop)
t1 = time.perf_counter()
logger.debug(f"Watch: Update {event[3]} took {t1 - t0:.1f}s")
try:
rel_path = path.relative_to(rootpath)
update_path(rootmod, rel_path, loop)
except Exception as e:
logger.error(
f"Error processing inotify event for path {path}: {e}"
)
raise
if not dirty:
t = time.monotonic()
dirty = True
# Wait a maximum of 0.5s to push the updates
if dirty and time.monotonic() >= t + 0.5:
# Wait a maximum of 0.2s to push the updates
if dirty and time.monotonic() >= t + 0.2:
break
if dirty and state.root != rootmod:
t0 = time.perf_counter()
update = format_update(state.root, rootmod)
t1 = time.perf_counter()
with state.lock:
broadcast(update, loop)
state.root = rootmod
t2 = time.perf_counter()
logger.debug(
f"Format update took {t1 - t0:.1f}s, broadcast {t2 - t1:.1f}s"
)
try:
update = format_update(state.root, rootmod)
with state.lock:
broadcast(update, loop)
state.root = rootmod
except Exception:
logger.exception(
"format_update failed; falling back to full rescan"
)
# Fallback: full rescan and try diff again; last resort send full root
try:
fresh = walk(PurePosixPath())
try:
update = format_update(state.root, fresh)
with state.lock:
broadcast(update, loop)
state.root = fresh
except Exception:
logger.exception(
"Fallback diff failed; sending full root snapshot"
)
with state.lock:
broadcast(format_root(fresh), loop)
state.root = fresh
except Exception:
logger.exception(
"Full rescan failed; dropping this batch of updates"
)
del i # Free the inotify object