Maintenance update (#7)
- Use modern tooling uv and bun - Various changes to work with latest PyAV and PIL that have changed their API - Improved image, video and document previews (uses AVIF, renders AVIF/HEIC/videos in HDR, faster processing) - Fix a server hang in some cases where a folder was moved or renamed - Log exceptions instead of only returning 500 response to client - Log timing of preview generation functions - Default to quality 50 in previews (previously 40)
This commit is contained in:
184
cista/preview.py
184
cista/preview.py
@@ -4,12 +4,14 @@ import io
|
||||
import mimetypes
|
||||
import urllib.parse
|
||||
from pathlib import PurePosixPath
|
||||
from time import perf_counter
|
||||
from urllib.parse import unquote
|
||||
from wsgiref.handlers import format_date_time
|
||||
|
||||
import av
|
||||
import av.datasets
|
||||
import fitz # PyMuPDF
|
||||
import numpy as np
|
||||
import pillow_heif
|
||||
from PIL import Image
|
||||
from sanic import Blueprint, empty, raw
|
||||
from sanic.exceptions import NotFound
|
||||
@@ -18,7 +20,7 @@ from sanic.log import logger
|
||||
from cista import config
|
||||
from cista.util.filename import sanitize
|
||||
|
||||
DISPLAYMATRIX = av.stream.SideData.DISPLAYMATRIX
|
||||
pillow_heif.register_heif_opener()
|
||||
|
||||
bp = Blueprint("preview", url_prefix="/preview")
|
||||
|
||||
@@ -28,20 +30,20 @@ async def preview(req, path):
|
||||
"""Preview a file"""
|
||||
maxsize = int(req.args.get("px", 1024))
|
||||
maxzoom = float(req.args.get("zoom", 2.0))
|
||||
quality = int(req.args.get("q", 40))
|
||||
quality = int(req.args.get("q", 60))
|
||||
rel = PurePosixPath(sanitize(unquote(path)))
|
||||
path = config.config.path / rel
|
||||
stat = path.lstat()
|
||||
etag = config.derived_secret(
|
||||
"preview", rel, stat.st_mtime_ns, quality, maxsize, maxzoom
|
||||
).hex()
|
||||
savename = PurePosixPath(path.name).with_suffix(".webp")
|
||||
savename = PurePosixPath(path.name).with_suffix(".avif")
|
||||
headers = {
|
||||
"etag": etag,
|
||||
"last-modified": format_date_time(stat.st_mtime),
|
||||
"cache-control": "max-age=604800, immutable"
|
||||
+ ("" if config.config.public else ", private"),
|
||||
"content-type": "image/webp",
|
||||
"content-type": "image/avif",
|
||||
"content-disposition": f"inline; filename*=UTF-8''{urllib.parse.quote(savename.as_posix())}",
|
||||
}
|
||||
if req.headers.if_none_match == etag:
|
||||
@@ -60,58 +62,164 @@ async def preview(req, path):
|
||||
def dispatch(path, quality, maxsize, maxzoom):
|
||||
if path.suffix.lower() in (".pdf", ".xps", ".epub", ".mobi"):
|
||||
return process_pdf(path, quality=quality, maxsize=maxsize, maxzoom=maxzoom)
|
||||
if mimetypes.guess_type(path.name)[0].startswith("video/"):
|
||||
type, _ = mimetypes.guess_type(path.name)
|
||||
if type and type.startswith("video/"):
|
||||
return process_video(path, quality=quality, maxsize=maxsize)
|
||||
return process_image(path, quality=quality, maxsize=maxsize)
|
||||
|
||||
|
||||
def process_image(path, *, maxsize, quality):
|
||||
t_load_start = perf_counter()
|
||||
img = Image.open(path)
|
||||
w, h = img.size
|
||||
img.thumbnail((min(w, maxsize), min(h, maxsize)))
|
||||
# Fix rotation based on EXIF data
|
||||
try:
|
||||
rotate_values = {3: 180, 6: 270, 8: 90}
|
||||
orientation = img._getexif().get(274)
|
||||
if orientation in rotate_values:
|
||||
logger.debug(f"Rotating preview {path} by {rotate_values[orientation]}")
|
||||
img = img.rotate(rotate_values[orientation], expand=True)
|
||||
except AttributeError:
|
||||
...
|
||||
except Exception as e:
|
||||
logger.error(f"Error rotating preview image: {e}")
|
||||
# Save as webp
|
||||
# Force decode to include I/O in load timing
|
||||
img.load()
|
||||
t_load_end = perf_counter()
|
||||
# Resize
|
||||
orig_w, orig_h = img.size
|
||||
t_proc_start = perf_counter()
|
||||
img.thumbnail((min(orig_w, maxsize), min(orig_h, maxsize)))
|
||||
t_proc_end = perf_counter()
|
||||
# Save as AVIF
|
||||
imgdata = io.BytesIO()
|
||||
img.save(imgdata, format="webp", quality=quality, method=4)
|
||||
return imgdata.getvalue()
|
||||
t_save_start = perf_counter()
|
||||
img.save(imgdata, format="avif", quality=quality, speed=10, max_threads=1)
|
||||
t_save_end = perf_counter()
|
||||
|
||||
ret = imgdata.getvalue()
|
||||
|
||||
load_ms = (t_load_end - t_load_start) * 1000
|
||||
proc_ms = (t_proc_end - t_proc_start) * 1000
|
||||
save_ms = (t_save_end - t_save_start) * 1000
|
||||
logger.debug(
|
||||
"Preview image %s: load=%.1fms process=%.1fms save=%.1fms out=%.1fKB",
|
||||
path.name,
|
||||
load_ms,
|
||||
proc_ms,
|
||||
save_ms,
|
||||
len(ret) / 1024,
|
||||
)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def process_pdf(path, *, maxsize, maxzoom, quality, page_number=0):
|
||||
t_load_start = perf_counter()
|
||||
pdf = fitz.open(path)
|
||||
page = pdf.load_page(page_number)
|
||||
w, h = page.rect[2:4]
|
||||
zoom = min(maxsize / w, maxsize / h, maxzoom)
|
||||
mat = fitz.Matrix(zoom, zoom)
|
||||
pix = page.get_pixmap(matrix=mat)
|
||||
return pix.pil_tobytes(format="webp", quality=quality, method=4)
|
||||
pix = page.get_pixmap(matrix=mat) # type: ignore[attr-defined]
|
||||
t_load_end = perf_counter()
|
||||
|
||||
t_save_start = perf_counter()
|
||||
ret = pix.pil_tobytes(format="avif", quality=quality, speed=10, max_threads=1)
|
||||
t_save_end = perf_counter()
|
||||
|
||||
logger.debug(
|
||||
"Preview pdf %s: load+render=%.1fms save=%.1fms",
|
||||
path.name,
|
||||
(t_load_end - t_load_start) * 1000,
|
||||
(t_save_end - t_save_start) * 1000,
|
||||
)
|
||||
return ret
|
||||
|
||||
|
||||
def process_video(path, *, maxsize, quality):
|
||||
with av.open(str(path)) as container:
|
||||
stream = container.streams.video[0]
|
||||
stream.codec_context.skip_frame = "NONKEY"
|
||||
rot = stream.side_data and stream.side_data.get(DISPLAYMATRIX) or 0
|
||||
container.seek(container.duration // 8)
|
||||
img = next(container.decode(stream)).to_image()
|
||||
del stream
|
||||
|
||||
img.thumbnail((maxsize, maxsize))
|
||||
frame = None
|
||||
imgdata = io.BytesIO()
|
||||
if rot:
|
||||
img = img.rotate(rot, expand=True)
|
||||
img.save(imgdata, format="webp", quality=quality, method=4)
|
||||
del img
|
||||
istream = ostream = icc = occ = frame = None
|
||||
t_load_start = perf_counter()
|
||||
# Initialize to avoid "possibly unbound" in static analysis when exceptions occur
|
||||
t_load_end = t_load_start
|
||||
t_save_start = t_load_start
|
||||
t_save_end = t_load_start
|
||||
with (
|
||||
av.open(str(path)) as icontainer,
|
||||
av.open(imgdata, "w", format="avif") as ocontainer,
|
||||
):
|
||||
istream = icontainer.streams.video[0]
|
||||
istream.codec_context.skip_frame = "NONKEY"
|
||||
icontainer.seek((icontainer.duration or 0) // 8)
|
||||
for frame in icontainer.decode(istream):
|
||||
if frame.dts is not None:
|
||||
break
|
||||
else:
|
||||
raise RuntimeError("No frames found in video")
|
||||
|
||||
# Resize frame to thumbnail size
|
||||
if frame.width > maxsize or frame.height > maxsize:
|
||||
scale_factor = min(maxsize / frame.width, maxsize / frame.height)
|
||||
new_width = int(frame.width * scale_factor)
|
||||
new_height = int(frame.height * scale_factor)
|
||||
frame = frame.reformat(width=new_width, height=new_height)
|
||||
|
||||
# Simple rotation detection and logging
|
||||
if frame.rotation:
|
||||
try:
|
||||
fplanes = frame.to_ndarray()
|
||||
# Split into Y, U, V planes of proper dimensions
|
||||
planes = [
|
||||
fplanes[: frame.height],
|
||||
fplanes[frame.height : frame.height + frame.height // 4].reshape(
|
||||
frame.height // 2, frame.width // 2
|
||||
),
|
||||
fplanes[frame.height + frame.height // 4 :].reshape(
|
||||
frame.height // 2, frame.width // 2
|
||||
),
|
||||
]
|
||||
# Rotate
|
||||
planes = [np.rot90(p, frame.rotation // 90) for p in planes]
|
||||
# Restore PyAV format
|
||||
planes = np.hstack([p.flat for p in planes]).reshape(
|
||||
-1, planes[0].shape[1]
|
||||
)
|
||||
frame = av.VideoFrame.from_ndarray(planes, format=frame.format.name)
|
||||
del planes, fplanes
|
||||
except Exception as e:
|
||||
if "not yet supported" in str(e):
|
||||
logger.warning(
|
||||
f"Not rotating {path.name} preview image by {frame.rotation}°:\n PyAV: {e}"
|
||||
)
|
||||
else:
|
||||
logger.exception(f"Error rotating video frame: {e}")
|
||||
t_load_end = perf_counter()
|
||||
|
||||
t_save_start = perf_counter()
|
||||
crf = str(int(63 * (1 - quality / 100) ** 2)) # Closely matching PIL quality-%
|
||||
ostream = ocontainer.add_stream(
|
||||
"av1",
|
||||
options={
|
||||
"crf": crf,
|
||||
"usage": "realtime",
|
||||
"cpu-used": "8",
|
||||
"threads": "1",
|
||||
},
|
||||
)
|
||||
assert isinstance(ostream, av.VideoStream)
|
||||
ostream.width = frame.width
|
||||
ostream.height = frame.height
|
||||
icc = istream.codec_context
|
||||
occ = ostream.codec_context
|
||||
|
||||
# Copy HDR metadata from input video stream
|
||||
occ.color_primaries = icc.color_primaries
|
||||
occ.color_trc = icc.color_trc
|
||||
occ.colorspace = icc.colorspace
|
||||
occ.color_range = icc.color_range
|
||||
|
||||
ocontainer.mux(ostream.encode(frame))
|
||||
ocontainer.mux(ostream.encode(None)) # Flush the stream
|
||||
t_save_end = perf_counter()
|
||||
|
||||
# Capture frame dimensions before cleanup
|
||||
ret = imgdata.getvalue()
|
||||
del imgdata
|
||||
logger.debug(
|
||||
"Preview video %s: load+decode=%.1fms save=%.1fms",
|
||||
path.name,
|
||||
(t_load_end - t_load_start) * 1000,
|
||||
(t_save_end - t_save_start) * 1000,
|
||||
)
|
||||
del imgdata, istream, ostream, icc, occ, frame
|
||||
gc.collect()
|
||||
return ret
|
||||
|
||||
@@ -127,8 +127,7 @@ class FileEntry(msgspec.Struct, array_like=True, frozen=True):
|
||||
return f"{self.name} ({self.size}, {self.mtime})"
|
||||
|
||||
|
||||
class Update(msgspec.Struct, array_like=True):
|
||||
...
|
||||
class Update(msgspec.Struct, array_like=True): ...
|
||||
|
||||
|
||||
class UpdKeep(Update, tag="k"):
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import os
|
||||
import re
|
||||
import signal
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from sanic import Sanic
|
||||
@@ -11,6 +13,14 @@ def run(*, dev=False):
|
||||
"""Run Sanic main process that spawns worker processes to serve HTTP requests."""
|
||||
from .app import app
|
||||
|
||||
# Set up immediate exit on Ctrl+C for faster termination
|
||||
def signal_handler(signum, frame):
|
||||
print("\nReceived interrupt signal, exiting immediately...")
|
||||
os._exit(0)
|
||||
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
url, opts = parse_listen(config.config.listen)
|
||||
# Silence Sanic's warning about running in production rather than debug
|
||||
os.environ["SANIC_IGNORE_PRODUCTION_WARNING"] = "1"
|
||||
|
||||
@@ -29,6 +29,8 @@ async def handle_sanic_exception(request, e):
|
||||
if not message or not request.app.debug and code == 500:
|
||||
message = "Internal Server Error"
|
||||
message = f"⚠️ {message}" if code < 500 else f"🛑 {message}"
|
||||
if code == 500:
|
||||
logger.exception(e)
|
||||
# Non-browsers get JSON errors
|
||||
if "text/html" not in request.headers.accept:
|
||||
return jres(
|
||||
|
||||
@@ -48,6 +48,7 @@ def treeiter(rootmod):
|
||||
def treeget(rootmod: list[FileEntry], path: PurePosixPath):
|
||||
begin = None
|
||||
ret = []
|
||||
|
||||
for i, relpath, entry in treeiter(rootmod):
|
||||
if begin is None:
|
||||
if relpath == path:
|
||||
@@ -57,6 +58,7 @@ def treeget(rootmod: list[FileEntry], path: PurePosixPath):
|
||||
if entry.level <= len(path.parts):
|
||||
break
|
||||
ret.append(entry)
|
||||
|
||||
return begin, ret
|
||||
|
||||
|
||||
@@ -77,28 +79,36 @@ def treeinspos(rootmod: list[FileEntry], relpath: PurePosixPath, relfile: int):
|
||||
# root
|
||||
level += 1
|
||||
continue
|
||||
|
||||
ename = rel.parts[level - 1]
|
||||
name = relpath.parts[level - 1]
|
||||
|
||||
esort = sortkey(ename)
|
||||
nsort = sortkey(name)
|
||||
# Non-leaf are always folders, only use relfile at leaf
|
||||
isfile = relfile if len(relpath.parts) == level else 0
|
||||
|
||||
# First compare by isfile, then by sorting order and if that too matches then case sensitive
|
||||
cmp = (
|
||||
entry.isfile - isfile
|
||||
or (esort > nsort) - (esort < nsort)
|
||||
or (ename > name) - (ename < name)
|
||||
)
|
||||
|
||||
if cmp > 0:
|
||||
return i
|
||||
if cmp < 0:
|
||||
continue
|
||||
|
||||
level += 1
|
||||
if level > len(relpath.parts):
|
||||
print("ERROR: insertpos", relpath, i, entry.name, entry.level, level)
|
||||
logger.error(
|
||||
f"insertpos level overflow: relpath={relpath}, i={i}, entry.name={entry.name}, entry.level={entry.level}, level={level}"
|
||||
)
|
||||
break
|
||||
else:
|
||||
i += 1
|
||||
|
||||
return i
|
||||
|
||||
|
||||
@@ -179,21 +189,16 @@ def update_path(rootmod: list[FileEntry], relpath: PurePosixPath, loop):
|
||||
"""Called on FS updates, check the filesystem and broadcast any changes."""
|
||||
new = walk(relpath)
|
||||
obegin, old = treeget(rootmod, relpath)
|
||||
|
||||
if old == new:
|
||||
logger.debug(
|
||||
f"Watch: Event without changes needed {relpath}"
|
||||
if old
|
||||
else f"Watch: Event with old and new missing: {relpath}"
|
||||
)
|
||||
return
|
||||
|
||||
if obegin is not None:
|
||||
del rootmod[obegin : obegin + len(old)]
|
||||
|
||||
if new:
|
||||
logger.debug(f"Watch: Update {relpath}" if old else f"Watch: Created {relpath}")
|
||||
i = treeinspos(rootmod, relpath, new[0].isfile)
|
||||
rootmod[i:i] = new
|
||||
else:
|
||||
logger.debug(f"Watch: Removed {relpath}")
|
||||
|
||||
|
||||
def update_space(loop):
|
||||
@@ -218,17 +223,35 @@ def format_update(old, new):
|
||||
oremain, nremain = set(old), set(new)
|
||||
update = []
|
||||
keep_count = 0
|
||||
iteration_count = 0
|
||||
# Precompute index maps to allow deterministic tie-breaking when both
|
||||
# candidates exist in both sequences but are not equal (rename/move cases)
|
||||
old_pos = {e: i for i, e in enumerate(old)}
|
||||
new_pos = {e: i for i, e in enumerate(new)}
|
||||
|
||||
while oidx < len(old) and nidx < len(new):
|
||||
iteration_count += 1
|
||||
|
||||
# Emergency brake for potential infinite loops
|
||||
if iteration_count > 50000:
|
||||
logger.error(
|
||||
f"format_update potential infinite loop! iteration={iteration_count}, oidx={oidx}, nidx={nidx}"
|
||||
)
|
||||
raise Exception(
|
||||
f"format_update infinite loop detected at iteration {iteration_count}"
|
||||
)
|
||||
|
||||
modified = False
|
||||
# Matching entries are kept
|
||||
if old[oidx] == new[nidx]:
|
||||
entry = old[oidx]
|
||||
oremain.remove(entry)
|
||||
nremain.remove(entry)
|
||||
oremain.discard(entry)
|
||||
nremain.discard(entry)
|
||||
keep_count += 1
|
||||
oidx += 1
|
||||
nidx += 1
|
||||
continue
|
||||
|
||||
if keep_count > 0:
|
||||
modified = True
|
||||
update.append(UpdKeep(keep_count))
|
||||
@@ -248,7 +271,7 @@ def format_update(old, new):
|
||||
insert_items = []
|
||||
while nidx < len(new) and new[nidx] not in oremain:
|
||||
entry = new[nidx]
|
||||
nremain.remove(entry)
|
||||
nremain.discard(entry)
|
||||
insert_items.append(entry)
|
||||
nidx += 1
|
||||
if insert_items:
|
||||
@@ -256,9 +279,32 @@ def format_update(old, new):
|
||||
update.append(UpdIns(insert_items))
|
||||
|
||||
if not modified:
|
||||
raise Exception(
|
||||
f"Infinite loop in diff {nidx=} {oidx=} {len(old)=} {len(new)=}"
|
||||
)
|
||||
# Tie-break: both items exist in both lists but don't match here.
|
||||
# Decide whether to delete old[oidx] first or insert new[nidx] first
|
||||
# based on which alignment is closer.
|
||||
if oidx >= len(old) or nidx >= len(new):
|
||||
break
|
||||
cur_old = old[oidx]
|
||||
cur_new = new[nidx]
|
||||
|
||||
pos_old_in_new = new_pos.get(cur_old)
|
||||
pos_new_in_old = old_pos.get(cur_new)
|
||||
|
||||
# Default distances if not present (shouldn't happen if in remain sets)
|
||||
dist_del = (pos_old_in_new - nidx) if pos_old_in_new is not None else 1
|
||||
dist_ins = (pos_new_in_old - oidx) if pos_new_in_old is not None else 1
|
||||
|
||||
# Prefer the operation with smaller forward distance; tie => delete
|
||||
if dist_del <= dist_ins:
|
||||
# Delete current old item
|
||||
oremain.discard(cur_old)
|
||||
update.append(UpdDel(1))
|
||||
oidx += 1
|
||||
else:
|
||||
# Insert current new item
|
||||
nremain.discard(cur_new)
|
||||
update.append(UpdIns([cur_new]))
|
||||
nidx += 1
|
||||
|
||||
# Diff any remaining
|
||||
if keep_count > 0:
|
||||
@@ -311,10 +357,7 @@ def watcher_inotify(loop):
|
||||
while not quit.is_set():
|
||||
i = inotify.adapters.InotifyTree(rootpath.as_posix())
|
||||
# Initialize the tree from filesystem
|
||||
t0 = time.perf_counter()
|
||||
update_root(loop)
|
||||
t1 = time.perf_counter()
|
||||
logger.debug(f"Root update took {t1 - t0:.1f}s")
|
||||
trefresh = time.monotonic() + 300.0
|
||||
tspace = time.monotonic() + 5.0
|
||||
# Watch for changes (frequent wakeups needed for quiting)
|
||||
@@ -335,32 +378,52 @@ def watcher_inotify(loop):
|
||||
if quit.is_set():
|
||||
return
|
||||
interesting = any(f in modified_flags for f in event[1])
|
||||
if event[2] == rootpath.as_posix() and event[3] == "zzz":
|
||||
logger.debug(f"Watch: {interesting=} {event=}")
|
||||
if interesting:
|
||||
# Update modified path
|
||||
t0 = time.perf_counter()
|
||||
path = PurePosixPath(event[2]) / event[3]
|
||||
update_path(rootmod, path.relative_to(rootpath), loop)
|
||||
t1 = time.perf_counter()
|
||||
logger.debug(f"Watch: Update {event[3]} took {t1 - t0:.1f}s")
|
||||
try:
|
||||
rel_path = path.relative_to(rootpath)
|
||||
update_path(rootmod, rel_path, loop)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error processing inotify event for path {path}: {e}"
|
||||
)
|
||||
raise
|
||||
if not dirty:
|
||||
t = time.monotonic()
|
||||
dirty = True
|
||||
# Wait a maximum of 0.5s to push the updates
|
||||
if dirty and time.monotonic() >= t + 0.5:
|
||||
# Wait a maximum of 0.2s to push the updates
|
||||
if dirty and time.monotonic() >= t + 0.2:
|
||||
break
|
||||
if dirty and state.root != rootmod:
|
||||
t0 = time.perf_counter()
|
||||
update = format_update(state.root, rootmod)
|
||||
t1 = time.perf_counter()
|
||||
with state.lock:
|
||||
broadcast(update, loop)
|
||||
state.root = rootmod
|
||||
t2 = time.perf_counter()
|
||||
logger.debug(
|
||||
f"Format update took {t1 - t0:.1f}s, broadcast {t2 - t1:.1f}s"
|
||||
)
|
||||
try:
|
||||
update = format_update(state.root, rootmod)
|
||||
with state.lock:
|
||||
broadcast(update, loop)
|
||||
state.root = rootmod
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"format_update failed; falling back to full rescan"
|
||||
)
|
||||
# Fallback: full rescan and try diff again; last resort send full root
|
||||
try:
|
||||
fresh = walk(PurePosixPath())
|
||||
try:
|
||||
update = format_update(state.root, fresh)
|
||||
with state.lock:
|
||||
broadcast(update, loop)
|
||||
state.root = fresh
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Fallback diff failed; sending full root snapshot"
|
||||
)
|
||||
with state.lock:
|
||||
broadcast(format_root(fresh), loop)
|
||||
state.root = fresh
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Full rescan failed; dropping this batch of updates"
|
||||
)
|
||||
|
||||
del i # Free the inotify object
|
||||
|
||||
|
||||
Reference in New Issue
Block a user