From aa1ea03bedd8c8641268af1a8892862ae6fd4614 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Wed, 13 Aug 2025 10:15:22 -0700 Subject: [PATCH 01/43] Fix running with today's Python tooling and av module. --- cista/preview.py | 34 ++++++++++++++++++++++++++++------ pyproject.toml | 15 +++++++++------ 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/cista/preview.py b/cista/preview.py index a595057..c37a4a4 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -18,8 +18,6 @@ from sanic.log import logger from cista import config from cista.util.filename import sanitize -DISPLAYMATRIX = av.stream.SideData.DISPLAYMATRIX - bp = Blueprint("preview", url_prefix="/preview") @@ -100,15 +98,39 @@ def process_video(path, *, maxsize, quality): with av.open(str(path)) as container: stream = container.streams.video[0] stream.codec_context.skip_frame = "NONKEY" - rot = stream.side_data and stream.side_data.get(DISPLAYMATRIX) or 0 + + # Updated side data access for newer av versions + rot = 0 + try: + # Try newer API first + if hasattr(stream, "side_data") and stream.side_data: + display_matrix = stream.side_data.get("DISPLAYMATRIX") + if display_matrix: + rot = ( + display_matrix.rotation + if hasattr(display_matrix, "rotation") + else 0 + ) + except (AttributeError, KeyError): + # Fallback for older API or missing side data + rot = 0 + container.seek(container.duration // 8) - img = next(container.decode(stream)).to_image() + try: + frame = next(container.decode(stream)) + img = frame.to_image() + except StopIteration: + # If no frame found, try from beginning + container.seek(0) + frame = next(container.decode(stream)) + img = frame.to_image() + del stream img.thumbnail((maxsize, maxsize)) imgdata = io.BytesIO() - if rot: - img = img.rotate(rot, expand=True) + if rot and rot != 0: + img = img.rotate(-rot, expand=True) # Negative rotation for correct orientation img.save(imgdata, format="webp", quality=quality, method=4) del img ret = imgdata.getvalue() diff --git a/pyproject.toml b/pyproject.toml index 64fb3eb..401c9f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ classifiers = [ requires-python = ">=3.11" dependencies = [ "argon2-cffi", + "av", "blake3", "brotli", "docopt", @@ -23,7 +24,6 @@ dependencies = [ "natsort", "pathvalidate", "pillow", - "pyav", "pyjwt", "pymupdf", "sanic", @@ -50,15 +50,15 @@ source = "vcs" [tool.hatch.build] artifacts = ["cista/wwwroot"] targets.sdist.hooks.custom.path = "scripts/build-frontend.py" +targets.sdist.include = [ + "/cista", +] hooks.vcs.version-file = "cista/_version.py" hooks.vcs.template = """ # This file is automatically generated by hatch build. __version__ = {version!r} """ only-packages = true -targets.sdist.include = [ - "/cista", -] [tool.pytest.ini_options] addopts = [ @@ -95,11 +95,14 @@ ignore = [ "TD0", "TRY", ] -show-source = true -show-fixes = true [tool.ruff.isort] known-first-party = ["cista"] [tool.ruff.per-file-ignores] "tests/*" = ["S", "ANN", "D", "INP"] + +[dependency-groups] +dev = [ + "pytest>=8.4.1", +] -- 2.49.0 From cf7d61d11c6ba41fda7d4f8ce175ef5baaaa4ecd Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Wed, 13 Aug 2025 10:15:59 -0700 Subject: [PATCH 02/43] Don't add uv.lock to repo. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 02868b0..bc3d2d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ .* +*.lock !.gitignore __pycache__/ *.egg-info/ -- 2.49.0 From 425d7012b70f65adedf4a96899d7847cca8c999b Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Wed, 13 Aug 2025 10:16:54 -0700 Subject: [PATCH 03/43] Additional tests for watching. --- tests/test_watching_directory_rename.py | 1150 +++++++++++++++++++++++ 1 file changed, 1150 insertions(+) create mode 100644 tests/test_watching_directory_rename.py diff --git a/tests/test_watching_directory_rename.py b/tests/test_watching_directory_rename.py new file mode 100644 index 0000000..0abb6fe --- /dev/null +++ b/tests/test_watching_directory_rename.py @@ -0,0 +1,1150 @@ +import asyncio +import queue +import shutil +import signal +import tempfile +import threading +import time +from pathlib import Path, PurePosixPath +from unittest.mock import MagicMock, patch + +import msgspec +import pytest + +from cista import config, watching +from cista.protocol import UpdateMessage + + +@pytest.fixture +def temp_dir(): + """Create a temporary directory for testing.""" + with tempfile.TemporaryDirectory() as tmpdirname: + yield Path(tmpdirname) + + +@pytest.fixture +def setup_watcher(temp_dir): + """Setup the watcher with a temporary directory.""" + # Store original values + original_rootpath = watching.rootpath + original_state = watching.state + original_quit = watching.quit + + # Setup test environment + config.config = config.Config(path=temp_dir, listen=":0") + watching.rootpath = temp_dir + watching.state = watching.State() + watching.quit = threading.Event() + + yield temp_dir + + # Cleanup + watching.quit.set() + watching.rootpath = original_rootpath + watching.state = original_state + watching.quit = original_quit + + +def create_test_structure(base_path: Path): + """Create a test directory structure with subdirectories and files.""" + # Create main subdirectory with files + subdir = base_path / "test_subdir" + subdir.mkdir() + + # Add some files to the subdirectory + (subdir / "file1.txt").write_text("content1") + (subdir / "file2.txt").write_text("content2") + + # Create a nested subdirectory + nested = subdir / "nested" + nested.mkdir() + (nested / "nested_file.txt").write_text("nested content") + + # Create another top-level directory for reference + other_dir = base_path / "other_dir" + other_dir.mkdir() + (other_dir / "other_file.txt").write_text("other content") + + return subdir, nested, other_dir + + +def test_nested_directory_rename_causes_hang(setup_watcher): + """Test renaming deeply nested directories - this is where the hang typically occurs. + + The bug manifests when renaming directories that are nested within other directories, + not just top-level directories. + """ + temp_dir = setup_watcher + + # Create a complex nested structure that mirrors real-world usage + # parent/child/grandchild/target_dir/files... + parent = temp_dir / "parent_folder" + parent.mkdir() + + child = parent / "child_folder" + child.mkdir() + + grandchild = child / "grandchild_folder" + grandchild.mkdir() + + # This is the directory we'll rename - it's deeply nested + target_dir = grandchild / "target_to_rename" + target_dir.mkdir() + + # Add files to make the directory scan more complex + for i in range(20): + (target_dir / f"file_{i:03d}.txt").write_text(f"content_{i}") + + # Add another nested level inside target + deep_nested = target_dir / "even_deeper" + deep_nested.mkdir() + for i in range(10): + (deep_nested / f"deep_file_{i}.txt").write_text(f"deep_content_{i}") + + # Initialize watcher state + initial_root = watching.walk(PurePosixPath()) + watching.state.root = initial_root + + # Verify the nested structure exists + target_path = PurePosixPath("parent_folder/child_folder/grandchild_folder/target_to_rename") + initial_begin, initial_entries = watching.treeget(initial_root, target_path) + assert initial_begin is not None, "Target directory should be found in initial state" + assert len(initial_entries) > 1, "Target directory should contain files" + + # Now rename the deeply nested directory + new_target = grandchild / "renamed_target" + target_dir.rename(new_target) + + loop = asyncio.new_event_loop() + working_state = watching.state.root[:] + + # This is where the hang likely occurs - updating a deeply nested path + old_nested_path = PurePosixPath("parent_folder/child_folder/grandchild_folder/target_to_rename") + new_nested_path = PurePosixPath("parent_folder/child_folder/grandchild_folder/renamed_target") + + start_time = time.time() + + # Update the old path (should remove it) + watching.update_path(working_state, old_nested_path, loop) + + # Update the new path (should add it) + watching.update_path(working_state, new_nested_path, loop) + + end_time = time.time() + + # Check for hang - nested operations should still be fast + duration = end_time - start_time + assert duration < 3.0, f"Nested directory rename took too long: {duration}s - possible hang" + + # Verify the old nested path is gone + old_begin, old_entries = watching.treeget(working_state, old_nested_path) + assert old_begin is None, "Old nested directory should be removed from tree" + + # Verify the new nested path exists + new_begin, new_entries = watching.treeget(working_state, new_nested_path) + assert new_begin is not None, "New nested directory should exist in tree" + assert len(new_entries) > 1, "New nested directory should contain all the files" + + +def test_move_directory_across_nested_parents(setup_watcher): + """Test moving a directory from one nested location to another - high hang risk scenario.""" + temp_dir = setup_watcher + + # Create source nested structure + source_parent = temp_dir / "source_area" + source_parent.mkdir() + source_child = source_parent / "source_child" + source_child.mkdir() + + # Create the directory to move + movable_dir = source_child / "movable_directory" + movable_dir.mkdir() + + # Add content to make it more complex + for i in range(15): + (movable_dir / f"file_{i}.txt").write_text(f"movable_content_{i}") + + # Create a subdirectory within the movable directory + sub_movable = movable_dir / "sub_directory" + sub_movable.mkdir() + for i in range(5): + (sub_movable / f"sub_file_{i}.txt").write_text(f"sub_content_{i}") + + # Create destination nested structure + dest_parent = temp_dir / "destination_area" + dest_parent.mkdir() + dest_child = dest_parent / "dest_child" + dest_child.mkdir() + dest_grandchild = dest_child / "dest_grandchild" + dest_grandchild.mkdir() + + # Initialize state + watching.state.root = watching.walk(PurePosixPath()) + working_state = watching.state.root[:] + + # Move the directory to the deeply nested destination + dest_movable = dest_grandchild / "moved_directory" + movable_dir.rename(dest_movable) + + loop = asyncio.new_event_loop() + + # These paths represent the complex nested move operation + old_path = PurePosixPath("source_area/source_child/movable_directory") + new_path = PurePosixPath("destination_area/dest_child/dest_grandchild/moved_directory") + + start_time = time.time() + + # This sequence is where hangs typically occur with cross-directory moves + try: + # Remove from old location + watching.update_path(working_state, old_path, loop) + + # Add to new location + watching.update_path(working_state, new_path, loop) + + except Exception as e: + pytest.fail(f"Nested directory move failed: {e}") + + end_time = time.time() + duration = end_time - start_time + + # Should complete without hanging + assert duration < 5.0, f"Cross-nested move took too long: {duration}s" + + # Verify old location is empty + old_begin, old_entries = watching.treeget(working_state, old_path) + assert old_begin is None, "Directory should be removed from old nested location" + + # Verify new location has the directory + new_begin, new_entries = watching.treeget(working_state, new_path) + assert new_begin is not None, "Directory should exist in new nested location" + assert len(new_entries) > 1, "Moved directory should retain all its contents" + + +def test_rapid_nested_directory_operations_cause_corruption(setup_watcher): + """Test rapid operations on nested directories that can cause state corruption.""" + temp_dir = setup_watcher + + # Create multiple nested structures + structures = [] + for i in range(3): + level1 = temp_dir / f"level1_{i}" + level1.mkdir() + level2 = level1 / f"level2_{i}" + level2.mkdir() + level3 = level2 / f"level3_{i}" + level3.mkdir() + target = level3 / f"target_{i}" + target.mkdir() + + # Add files + for j in range(10): + (target / f"file_{j}.txt").write_text(f"content_{i}_{j}") + + structures.append((level1, level2, level3, target)) + + # Initialize state + watching.state.root = watching.walk(PurePosixPath()) + working_state = watching.state.root[:] + + loop = asyncio.new_event_loop() + + # Perform rapid nested operations that can cause race conditions + operations = [] + + for i, (level1, level2, level3, target) in enumerate(structures): + # Rename the deeply nested target + new_target = level3 / f"renamed_target_{i}" + target.rename(new_target) + + old_path = PurePosixPath(f"level1_{i}/level2_{i}/level3_{i}/target_{i}") + new_path = PurePosixPath(f"level1_{i}/level2_{i}/level3_{i}/renamed_target_{i}") + operations.append((old_path, new_path)) + + start_time = time.time() + + # Process all operations rapidly - this can cause state corruption/hangs + for old_path, new_path in operations: + try: + watching.update_path(working_state, old_path, loop) + watching.update_path(working_state, new_path, loop) + except Exception as e: + pytest.fail(f"Rapid nested operations failed for {old_path} -> {new_path}: {e}") + + end_time = time.time() + duration = end_time - start_time + + # Should complete without hanging even with rapid operations + assert duration < 10.0, f"Rapid nested operations took too long: {duration}s" + + # Verify final state consistency + for i, (old_path, new_path) in enumerate(operations): + # Old paths should be gone + old_begin, old_entries = watching.treeget(working_state, old_path) + assert old_begin is None, f"Old path {old_path} should be removed" + + # New paths should exist + new_begin, new_entries = watching.treeget(working_state, new_path) + assert new_begin is not None, f"New path {new_path} should exist" + + +def test_nested_directory_treeget_corruption(setup_watcher): + """Test that treeget function handles nested path operations correctly without corruption.""" + temp_dir = setup_watcher + + # Create a complex tree structure + root_dirs = [] + for i in range(3): + root_dir = temp_dir / f"root_{i}" + root_dir.mkdir() + + for j in range(2): + mid_dir = root_dir / f"mid_{j}" + mid_dir.mkdir() + + for k in range(2): + leaf_dir = mid_dir / f"leaf_{k}" + leaf_dir.mkdir() + + # Add files to leaf directories + for l in range(5): + (leaf_dir / f"file_{l}.txt").write_text(f"content_{i}_{j}_{k}_{l}") + + root_dirs.append(root_dir) + + # Initialize state + initial_root = watching.walk(PurePosixPath()) + watching.state.root = initial_root + + # Test treeget with various nested paths + test_paths = [ + PurePosixPath("root_0"), + PurePosixPath("root_0/mid_0"), + PurePosixPath("root_0/mid_0/leaf_0"), + PurePosixPath("root_1/mid_1/leaf_1"), + PurePosixPath("root_2/mid_0/leaf_1"), + ] + + # Verify treeget works correctly for all paths + for path in test_paths: + begin, entries = watching.treeget(initial_root, path) + assert begin is not None, f"treeget should find existing path: {path}" + assert len(entries) >= 1, f"treeget should return entries for: {path}" + + # Now rename a nested directory and test treeget consistency + old_leaf = temp_dir / "root_0" / "mid_0" / "leaf_0" + new_leaf = temp_dir / "root_0" / "mid_0" / "renamed_leaf" + old_leaf.rename(new_leaf) + + # Update the state + loop = asyncio.new_event_loop() + working_state = initial_root[:] + + old_nested_path = PurePosixPath("root_0/mid_0/leaf_0") + new_nested_path = PurePosixPath("root_0/mid_0/renamed_leaf") + + # Update paths + watching.update_path(working_state, old_nested_path, loop) + watching.update_path(working_state, new_nested_path, loop) + + # Verify treeget consistency after the update + old_begin, old_entries = watching.treeget(working_state, old_nested_path) + assert old_begin is None, "Old nested path should not be found after rename" + + new_begin, new_entries = watching.treeget(working_state, new_nested_path) + assert new_begin is not None, "New nested path should be found after rename" + assert len(new_entries) >= 1, "New nested path should have entries" + + # Verify that other paths are still accessible (no corruption) + for path in [PurePosixPath("root_1/mid_1/leaf_1"), PurePosixPath("root_2/mid_0/leaf_1")]: + begin, entries = watching.treeget(working_state, path) + assert begin is not None, f"Other paths should remain accessible: {path}" + + +def test_format_update_infinite_loop_with_complex_nested_changes(setup_watcher): + """Create a scenario that specifically triggers infinite loops in format_update. + + The hang often occurs in format_update when the diff algorithm gets confused + by complex nested directory moves. + """ + temp_dir = setup_watcher + + # Create a complex scenario that can confuse the diff algorithm + # Multiple directories with similar names and nested structures + dirs_data = [] + + for i in range(4): + # Create main directory + main_dir = temp_dir / f"main_{i}" + main_dir.mkdir() + + # Create subdirectories with similar patterns + sub_dir = main_dir / "common_subdir_name" + sub_dir.mkdir() + + # Create files with varying content + for j in range(15): + (sub_dir / f"file_{j:02d}.txt").write_text(f"main_{i}_content_{j}") + + # Add another level of nesting + nested = sub_dir / "nested_level" + nested.mkdir() + for j in range(8): + (nested / f"nested_{j}.txt").write_text(f"nested_{i}_{j}") + + dirs_data.append((main_dir, sub_dir, nested)) + + # Get initial state + old_state = watching.walk(PurePosixPath()) + + # Perform complex renames that can confuse the diff algorithm + # Rename all subdirectories to have even more similar names + for i, (main_dir, sub_dir, nested) in enumerate(dirs_data): + # Rename the subdirectory to a name that's very similar to others + new_sub_name = f"renamed_common_subdir_{i}" + new_sub_dir = main_dir / new_sub_name + sub_dir.rename(new_sub_dir) + + # Also rename some files to create more confusion + for j in range(0, 10, 2): # Rename every other file + old_file = new_sub_dir / f"file_{j:02d}.txt" + new_file = new_sub_dir / f"renamed_file_{j:02d}.txt" + if old_file.exists(): + old_file.rename(new_file) + + # Get new state + new_state = watching.walk(PurePosixPath()) + + # This is the critical test - format_update with complex nested changes + # that have caused infinite loops in the past + start_time = time.time() + + try: + # Set a more aggressive timeout + def timeout_handler(signum, frame): + raise TimeoutError("format_update appears to be hanging") + + # Set a 10-second timeout + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(10) + + try: + update_msg = watching.format_update(old_state, new_state) + signal.alarm(0) # Cancel the alarm + + end_time = time.time() + duration = end_time - start_time + + # Even complex diffs should complete quickly + assert duration < 8.0, f"format_update took {duration}s - possible infinite loop" + + # Verify the result is valid + assert update_msg, "format_update should return a message" + decoded = msgspec.json.decode(update_msg, type=UpdateMessage) + assert decoded.update, "Update should contain operations" + + except TimeoutError: + signal.alarm(0) + pytest.fail("format_update hung/infinite loop detected with complex nested changes") + + except Exception as e: + signal.alarm(0) + pytest.fail(f"format_update failed: {e}") + + +def test_update_path_with_corrupted_tree_state(setup_watcher): + """Test update_path when the tree state becomes corrupted by rapid changes.""" + temp_dir = setup_watcher + + # Create a nested structure + parent = temp_dir / "parent" + parent.mkdir() + child = parent / "child" + child.mkdir() + target = child / "target_dir" + target.mkdir() + + # Add many files to make operations slower + for i in range(30): + (target / f"file_{i:03d}.txt").write_text(f"content_{i}") + + # Add nested subdirectories + for i in range(3): + subdir = target / f"subdir_{i}" + subdir.mkdir() + for j in range(10): + (subdir / f"sub_file_{j}.txt").write_text(f"sub_content_{i}_{j}") + + # Initialize state + watching.state.root = watching.walk(PurePosixPath()) + + # Create a working copy that we'll manually corrupt to simulate race conditions + working_state = watching.state.root[:] + + loop = asyncio.new_event_loop() + + # Rename the directory + new_target = child / "renamed_target" + target.rename(new_target) + + # Simulate the race condition by manually corrupting the tree state + # This mimics what happens when inotify events arrive out of order + + # First, try to update a path that should exist + old_path = PurePosixPath("parent/child/target_dir") + + # Manually remove an entry to simulate corruption + if len(working_state) > 5: + # Remove a random entry to corrupt the tree structure + del working_state[3] + + start_time = time.time() + + try: + # This should handle corrupted state gracefully + watching.update_path(working_state, old_path, loop) + + # Now add the new path + new_path = PurePosixPath("parent/child/renamed_target") + watching.update_path(working_state, new_path, loop) + + end_time = time.time() + duration = end_time - start_time + + # Should complete without hanging even with corrupted state + assert duration < 5.0, f"update_path with corrupted state took {duration}s" + + except Exception as e: + # Some exceptions are expected with corrupted state, but shouldn't hang + end_time = time.time() + duration = end_time - start_time + assert duration < 5.0, f"update_path hung even when failing: {duration}s" + + +def test_simulate_real_inotify_event_sequence(setup_watcher): + """Simulate the exact inotify event sequence that causes hangs.""" + temp_dir = setup_watcher + + # Create the exact scenario from real usage that triggers the bug + project_dir = temp_dir / "project" + project_dir.mkdir() + + src_dir = project_dir / "src" + src_dir.mkdir() + + components_dir = src_dir / "components" + components_dir.mkdir() + + # This is the directory that will be renamed + old_component = components_dir / "OldComponent" + old_component.mkdir() + + # Add files that exist in real projects + for filename in ["index.tsx", "styles.css", "types.ts", "utils.ts"]: + (old_component / filename).write_text(f"// {filename} content") + + # Add a subdirectory with more files + sub_dir = old_component / "subcomponents" + sub_dir.mkdir() + for i in range(5): + (sub_dir / f"SubComponent{i}.tsx").write_text(f"// SubComponent{i}") + + # Initialize state + watching.state.root = watching.walk(PurePosixPath()) + working_state = watching.state.root[:] + + loop = asyncio.new_event_loop() + + # This is the exact operation that causes hangs in real usage + new_component = components_dir / "NewComponent" + old_component.rename(new_component) + + # Simulate the inotify event sequence that causes problems + # IN_MOVED_FROM event for the old directory + old_path = PurePosixPath("project/src/components/OldComponent") + + # IN_MOVED_TO event for the new directory + new_path = PurePosixPath("project/src/components/NewComponent") + + # Track how long the operations take + start_time = time.time() + + # Set up timeout detection + def timeout_handler(signum, frame): + raise TimeoutError("Simulated inotify sequence hung") + + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(15) # 15 second timeout + + try: + # This sequence is where the hang occurs in real usage + watching.update_path(working_state, old_path, loop) + watching.update_path(working_state, new_path, loop) + + # If we get here without hanging, cancel the alarm + signal.alarm(0) + + end_time = time.time() + duration = end_time - start_time + + # Real inotify operations should be fast + assert duration < 10.0, f"Simulated inotify sequence took {duration}s" + + # Verify the final state is correct + old_begin, old_entries = watching.treeget(working_state, old_path) + assert old_begin is None, "Old component path should be removed" + + new_begin, new_entries = watching.treeget(working_state, new_path) + assert new_begin is not None, "New component path should exist" + assert len(new_entries) > 1, "New component should contain all files" + + except TimeoutError: + signal.alarm(0) + pytest.fail("HANG DETECTED: Simulated inotify event sequence hung!") + + except Exception as e: + signal.alarm(0) + pytest.fail(f"Simulated inotify sequence failed: {e}") + + finally: + signal.alarm(0) # Ensure alarm is cancelled + """Test format_update with nested directory changes that could cause infinite loops.""" + temp_dir = setup_watcher + + # Create complex nested structure that has caused issues + complex_structure = temp_dir / "complex" + complex_structure.mkdir() + + # Create multiple levels with similar names (potential for confusion) + level_a = complex_structure / "level_a" + level_a.mkdir() + sublevel_a = level_a / "sublevel" + sublevel_a.mkdir() + + level_b = complex_structure / "level_b" + level_b.mkdir() + sublevel_b = level_b / "sublevel" + sublevel_b.mkdir() + + # Add files to each sublevel + for i in range(10): + (sublevel_a / f"file_a_{i}.txt").write_text(f"content_a_{i}") + (sublevel_b / f"file_b_{i}.txt").write_text(f"content_b_{i}") + + # Get initial state + old_state = watching.walk(PurePosixPath()) + + # Perform nested directory renames that could confuse the diff algorithm + renamed_sublevel_a = level_a / "renamed_sublevel" + sublevel_a.rename(renamed_sublevel_a) + + renamed_sublevel_b = level_b / "also_renamed_sublevel" + sublevel_b.rename(renamed_sublevel_b) + + # Get new state + new_state = watching.walk(PurePosixPath()) + + # This is where infinite loops or hangs can occur in format_update + start_time = time.time() + + try: + update_msg = watching.format_update(old_state, new_state) + end_time = time.time() + + duration = end_time - start_time + assert duration < 5.0, f"format_update took too long with nested changes: {duration}s" + + # Verify the update message is valid + assert update_msg, "format_update should return valid message" + decoded = msgspec.json.decode(update_msg, type=UpdateMessage) + assert decoded.update, "Update should contain operations" + + except Exception as e: + pytest.fail(f"format_update failed or hung with nested directory changes: {e}") + """Test that reproduces the hang when directory rename events race with updates. + + This test simulates the exact conditions that cause the hang: + 1. Create a directory with files + 2. Start monitoring it + 3. Rename the directory while the watcher is processing events + 4. This should cause a hang where old directory names are preserved + """ + temp_dir = setup_watcher + + # Create test structure with many files to increase chance of race conditions + subdir = temp_dir / "original_dir" + subdir.mkdir() + + # Create many files to make the directory scan take longer + for i in range(50): + (subdir / f"file_{i:03d}.txt").write_text(f"content_{i}") + + # Create nested directories + nested = subdir / "nested" + nested.mkdir() + for i in range(20): + (nested / f"nested_file_{i:03d}.txt").write_text(f"nested_content_{i}") + + # Initial scan to populate the state + initial_root = watching.walk(PurePosixPath()) + watching.state.root = initial_root + + # Verify initial structure + initial_names = [entry.name for entry in initial_root] + assert "original_dir" in initial_names + + # Create a mock event loop for testing + loop = asyncio.new_event_loop() + + # Simulate the problematic sequence: + # 1. Start processing the original directory + # 2. Rename it while processing + # 3. Try to update both old and new paths + + # Start by getting the initial state + original_rootmod = watching.state.root[:] + + # Rename the directory + renamed_dir = temp_dir / "renamed_dir" + subdir.rename(renamed_dir) + + # Now simulate what happens in the inotify watcher: + # Multiple rapid updates that can cause race conditions + + # First, try to update the old path (should remove it) + watching.update_path(original_rootmod, PurePosixPath("original_dir"), loop) + + # Then try to update the new path (should add it) + watching.update_path(original_rootmod, PurePosixPath("renamed_dir"), loop) + + # Check if the state is consistent + final_names = [entry.name for entry in original_rootmod] + + # The bug would manifest as: + # 1. Old directory name still present (should be gone) + # 2. New directory name missing (should be there) + # 3. Inconsistent state causing hangs + + # This is the expected correct behavior + assert "original_dir" not in final_names, "Old directory name should be removed" + assert "renamed_dir" in final_names, "New directory name should be present" + + # Additional check: verify we can still walk the renamed directory + renamed_walk = watching.walk(PurePosixPath("renamed_dir")) + assert len(renamed_walk) > 1, "Should be able to walk renamed directory" + + +def test_concurrent_inotify_events_simulation(setup_watcher): + """Simulate concurrent inotify events that can cause the hanging bug.""" + temp_dir = setup_watcher + + # Create a complex directory structure + dirs = ["dir_a", "dir_b", "dir_c"] + created_dirs = [] + + for dir_name in dirs: + dir_path = temp_dir / dir_name + dir_path.mkdir() + # Add files to each directory + for i in range(10): + (dir_path / f"file_{i}.txt").write_text(f"content in {dir_name}") + created_dirs.append(dir_path) + + # Initial state + watching.state.root = watching.walk(PurePosixPath()) + original_state = watching.state.root[:] + + loop = asyncio.new_event_loop() + + # Simulate rapid concurrent operations that happen in real usage + # This mimics what happens when multiple filesystem events arrive rapidly + + # Rename all directories simultaneously (as might happen with mv commands) + renamed_paths = [] + for i, dir_path in enumerate(created_dirs): + new_path = temp_dir / f"renamed_{dirs[i]}" + dir_path.rename(new_path) + renamed_paths.append(new_path) + + # Now simulate the inotify event processing that causes issues + # In the real code, these updates happen in rapid succession + # and can cause race conditions + + working_state = original_state[:] + + # Process removal events (IN_MOVED_FROM) + for dir_name in dirs: + try: + watching.update_path(working_state, PurePosixPath(dir_name), loop) + except Exception as e: + # The bug might manifest as exceptions during updates + pytest.fail(f"Update path failed for {dir_name}: {e}") + + # Process addition events (IN_MOVED_TO) + for i, dir_name in enumerate(dirs): + try: + new_name = f"renamed_{dir_name}" + watching.update_path(working_state, PurePosixPath(new_name), loop) + except Exception as e: + pytest.fail(f"Update path failed for {new_name}: {e}") + + # Verify final state is consistent + final_names = [entry.name for entry in working_state] + + # Check that old names are gone + for dir_name in dirs: + assert dir_name not in final_names, f"Old directory {dir_name} should be removed" + + # Check that new names are present + for i, dir_name in enumerate(dirs): + new_name = f"renamed_{dir_name}" + assert new_name in final_names, f"New directory {new_name} should be present" + + +def test_format_update_with_rapid_changes(setup_watcher): + """Test format_update with rapid directory changes that can cause hangs.""" + temp_dir = setup_watcher + + # Create initial structure + initial_dirs = ["test1", "test2", "test3"] + for dir_name in initial_dirs: + dir_path = temp_dir / dir_name + dir_path.mkdir() + (dir_path / "file.txt").write_text("test content") + + # Get initial state + old_state = watching.walk(PurePosixPath()) + + # Perform rapid renames + for i, dir_name in enumerate(initial_dirs): + old_path = temp_dir / dir_name + new_path = temp_dir / f"renamed_{dir_name}" + old_path.rename(new_path) + + # Get new state + new_state = watching.walk(PurePosixPath()) + + # This is where the hang might occur - in format_update + start_time = time.time() + try: + update_msg = watching.format_update(old_state, new_state) + end_time = time.time() + + # Should complete quickly + duration = end_time - start_time + assert duration < 5.0, f"format_update took too long: {duration}s" + + # Decode the update to verify it's valid + decoded = msgspec.json.decode(update_msg, type=UpdateMessage) + assert decoded.update, "Update message should contain operations" + + except Exception as e: + pytest.fail(f"format_update failed or hung: {e}") + + +def test_update_path_with_missing_directory(setup_watcher): + """Test update_path when called on a directory that no longer exists. + + This simulates the race condition where update_path is called for a path + that was just moved/deleted. + """ + temp_dir = setup_watcher + + # Create and populate initial state + test_dir = temp_dir / "disappearing_dir" + test_dir.mkdir() + (test_dir / "file.txt").write_text("content") + + initial_state = watching.walk(PurePosixPath()) + watching.state.root = initial_state + working_state = initial_state[:] + + # Remove the directory + shutil.rmtree(test_dir) + + loop = asyncio.new_event_loop() + + # Now try to update the path that no longer exists + # This should handle gracefully without hanging + start_time = time.time() + try: + watching.update_path(working_state, PurePosixPath("disappearing_dir"), loop) + end_time = time.time() + + duration = end_time - start_time + assert duration < 2.0, f"update_path took too long: {duration}s" + + # Verify the directory was removed from the state + final_names = [entry.name for entry in working_state] + assert "disappearing_dir" not in final_names + + except Exception as e: + pytest.fail(f"update_path should handle missing directories gracefully: {e}") + + +def test_threaded_watcher_simulation(setup_watcher): + """Test that simulates the actual threaded watcher behavior with directory renames. + + This test creates a more realistic scenario where the watcher thread + processes events while filesystem operations are happening. + """ + temp_dir = setup_watcher + + # Create test structure + test_dirs = [] + for i in range(5): + dir_path = temp_dir / f"thread_test_dir_{i}" + dir_path.mkdir() + # Add some files + for j in range(5): + (dir_path / f"file_{j}.txt").write_text(f"content_{i}_{j}") + test_dirs.append(dir_path) + + # Initialize state + watching.state.root = watching.walk(PurePosixPath()) + + # Create an event loop for the simulation + loop = asyncio.new_event_loop() + + # Track state changes + state_changes = [] + original_broadcast = watching.broadcast + + def tracking_broadcast(msg, loop_param): + state_changes.append(msg) + return original_broadcast(msg, loop_param) + + # Patch broadcast to track changes + with patch("cista.watching.broadcast", side_effect=tracking_broadcast): + + # Simulate rapid directory operations + start_time = time.time() + + for i, dir_path in enumerate(test_dirs): + # Rename directory + new_path = temp_dir / f"renamed_thread_test_dir_{i}" + dir_path.rename(new_path) + + # Update the watcher state (simulating inotify events) + old_name = f"thread_test_dir_{i}" + new_name = f"renamed_thread_test_dir_{i}" + + # Simulate the race condition: rapid updates + watching.update_path(watching.state.root, PurePosixPath(old_name), loop) + watching.update_path(watching.state.root, PurePosixPath(new_name), loop) + + end_time = time.time() + + # Should complete without hanging + duration = end_time - start_time + assert duration < 10.0, f"Threaded operations took too long: {duration}s" + + # Verify final state is consistent + final_names = [entry.name for entry in watching.state.root] + + # Old names should be gone + for i in range(5): + old_name = f"thread_test_dir_{i}" + assert old_name not in final_names, f"Old directory {old_name} should be removed" + + # New names should be present + for i in range(5): + new_name = f"renamed_thread_test_dir_{i}" + assert new_name in final_names, f"New directory {new_name} should be present" + + +def test_directory_rename_with_nested_structure(setup_watcher): + """Test renaming a directory that contains nested subdirectories.""" + temp_dir = setup_watcher + + # Create a more complex nested structure + main_dir = temp_dir / "main_dir" + main_dir.mkdir() + + # Create multiple levels of nesting + level1 = main_dir / "level1" + level1.mkdir() + (level1 / "l1_file.txt").write_text("level1 content") + + level2 = level1 / "level2" + level2.mkdir() + (level2 / "l2_file.txt").write_text("level2 content") + + level3 = level2 / "level3" + level3.mkdir() + (level3 / "l3_file.txt").write_text("level3 content") + + # Initial scan + initial_root = watching.walk(PurePosixPath()) + watching.state.root = initial_root + + # Rename the main directory + renamed_main = temp_dir / "renamed_main_dir" + main_dir.rename(renamed_main) + + # Update the watching system + loop = asyncio.new_event_loop() + watching.update_path(watching.state.root, PurePosixPath("main_dir"), loop) + watching.update_path(watching.state.root, PurePosixPath("renamed_main_dir"), loop) + + # Verify the entire nested structure is properly updated + updated_root = watching.state.root + updated_names = [entry.name for entry in updated_root] + + assert "main_dir" not in updated_names + assert "renamed_main_dir" in updated_names + + # Verify the nested structure is still intact + renamed_structure = watching.walk(PurePosixPath("renamed_main_dir")) + + # Extract all the names from the renamed structure + all_names = [entry.name for entry in renamed_structure] + + # Should contain the directory itself and all nested items + assert "renamed_main_dir" in all_names + assert "level1" in all_names + assert "l1_file.txt" in all_names + assert "level2" in all_names + assert "l2_file.txt" in all_names + assert "level3" in all_names + assert "l3_file.txt" in all_names + + +def test_directory_rename_format_update(setup_watcher): + """Test that format_update correctly handles directory renames.""" + temp_dir = setup_watcher + + # Create test structure + subdir, _, other_dir = create_test_structure(temp_dir) + + # Get initial state + old_root = watching.walk(PurePosixPath()) + + # Rename directory + renamed_subdir = temp_dir / "renamed_subdir" + subdir.rename(renamed_subdir) + + # Get new state + new_root = watching.walk(PurePosixPath()) + + # Generate update message + update_msg = watching.format_update(old_root, new_root) + + # The update should not be empty and should contain proper operations + assert update_msg + assert "update" in update_msg + + # Decode and verify the update contains expected operations + decoded = msgspec.json.decode(update_msg, type=UpdateMessage) + assert decoded.update # Should have update operations + + # The update should reflect the rename operation (delete old, insert new) + operations = decoded.update + assert len(operations) > 0 + + +def test_concurrent_directory_operations(setup_watcher): + """Test behavior when multiple directory operations happen concurrently.""" + temp_dir = setup_watcher + + # Create multiple directories + dirs_to_create = ["dir1", "dir2", "dir3"] + created_dirs = [] + + for dir_name in dirs_to_create: + dir_path = temp_dir / dir_name + dir_path.mkdir() + (dir_path / f"{dir_name}_file.txt").write_text(f"content for {dir_name}") + created_dirs.append(dir_path) + + # Initial scan + initial_root = watching.walk(PurePosixPath()) + watching.state.root = initial_root + + # Rename multiple directories "simultaneously" + renamed_dirs = [] + for i, dir_path in enumerate(created_dirs): + renamed_path = temp_dir / f"renamed_dir{i+1}" + dir_path.rename(renamed_path) + renamed_dirs.append(renamed_path) + + # Update the watching system for all changes + loop = asyncio.new_event_loop() + + # Update for all old paths (should remove them) + for dir_name in dirs_to_create: + watching.update_path(watching.state.root, PurePosixPath(dir_name), loop) + + # Update for all new paths (should add them) + for i in range(len(renamed_dirs)): + watching.update_path(watching.state.root, PurePosixPath(f"renamed_dir{i+1}"), loop) + + # Verify final state + final_root = watching.state.root + final_names = [entry.name for entry in final_root] + + # Old names should be gone + for dir_name in dirs_to_create: + assert dir_name not in final_names + + # New names should be present + for i in range(len(renamed_dirs)): + assert f"renamed_dir{i+1}" in final_names + + +@pytest.mark.slow +def test_watcher_doesnt_hang_on_directory_rename(setup_watcher): + """Test that the watcher doesn't hang when a directory is renamed. + + This test specifically addresses the reported bug where directory renames + cause the system to hang and no more operations go through. + """ + temp_dir = setup_watcher + + # Create test structure + subdir, _, _ = create_test_structure(temp_dir) + + # Initialize the watcher state + watching.state.root = watching.walk(PurePosixPath()) + + # Mock the inotify events to simulate what happens during a rename + # This simulates the problematic scenario described in the bug report + with patch('time.monotonic', side_effect=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]): + + # Simulate the rename operation + renamed_subdir = temp_dir / "renamed_test_subdir" + subdir.rename(renamed_subdir) + + # Create a simple event loop for testing + loop = asyncio.new_event_loop() + + # This should complete without hanging + start_time = time.time() + + # Update the path - this is where the hang might occur + watching.update_path(watching.state.root, PurePosixPath("test_subdir"), loop) + watching.update_path(watching.state.root, PurePosixPath("renamed_test_subdir"), loop) + + end_time = time.time() + + # The operation should complete quickly (within 5 seconds) + assert end_time - start_time < 5.0, "Directory rename operation took too long, possible hang detected" + + # Verify the state is consistent + final_names = [entry.name for entry in watching.state.root] + assert "test_subdir" not in final_names + assert "renamed_test_subdir" in final_names + + # Verify we can still perform operations after the rename + # This tests that the system isn't in a broken state + another_dir = temp_dir / "post_rename_dir" + another_dir.mkdir() + + # This should work without issues + watching.update_path(watching.state.root, PurePosixPath("post_rename_dir"), loop) + final_names_after = [entry.name for entry in watching.state.root] + assert "post_rename_dir" in final_names_after + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) -- 2.49.0 From 47574675a3b7fb2b6b963a880f839db6bb4ea6ae Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Wed, 13 Aug 2025 10:17:06 -0700 Subject: [PATCH 04/43] Debug printouts for watching. --- cista/watching.py | 119 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 2 deletions(-) diff --git a/cista/watching.py b/cista/watching.py index d36c216..b42e82b 100644 --- a/cista/watching.py +++ b/cista/watching.py @@ -46,59 +46,101 @@ def treeiter(rootmod): def treeget(rootmod: list[FileEntry], path: PurePosixPath): + logger.debug(f"DEBUG: treeget ENTRY: path={path}, rootmod_len={len(rootmod)}") begin = None ret = [] + iteration_count = 0 + for i, relpath, entry in treeiter(rootmod): + iteration_count += 1 + if iteration_count % 1000 == 0: # Log every 1000 iterations to detect infinite loops + logger.debug(f"DEBUG: treeget iteration {iteration_count}, i={i}, relpath={relpath}, entry.name={entry.name}") + if begin is None: if relpath == path: + logger.debug(f"DEBUG: treeget FOUND path {path} at index {i}") begin = i ret.append(entry) continue if entry.level <= len(path.parts): + logger.debug(f"DEBUG: treeget BREAK: entry.level={entry.level} <= path.parts_len={len(path.parts)}") break ret.append(entry) + + logger.debug(f"DEBUG: treeget EXIT: path={path}, begin={begin}, ret_len={len(ret)}, iterations={iteration_count}") return begin, ret def treeinspos(rootmod: list[FileEntry], relpath: PurePosixPath, relfile: int): # Find the first entry greater than the new one # precondition: the new entry doesn't exist + logger.debug(f"DEBUG: treeinspos ENTRY: relpath={relpath}, relfile={relfile}, rootmod_len={len(rootmod)}") + isfile = 0 level = 0 i = 0 + iteration_count = 0 + for i, rel, entry in treeiter(rootmod): + iteration_count += 1 + + # Detect potential infinite loops in treeinspos + if iteration_count % 1000 == 0: + logger.debug(f"DEBUG: treeinspos iteration {iteration_count}, i={i}, rel={rel}, entry.name={entry.name}, level={level}, entry.level={entry.level}") + + if iteration_count > 10000: # Emergency brake for infinite loops + logger.error(f"ERROR: treeinspos potential infinite loop! iteration={iteration_count}, relpath={relpath}, i={i}, level={level}") + break + if entry.level > level: # We haven't found item at level, skip subdirectories + logger.debug(f"DEBUG: treeinspos SKIP: entry.level={entry.level} > level={level}") continue if entry.level < level: # We have passed the level, so the new item is the first + logger.debug(f"DEBUG: treeinspos RETURN_EARLY: entry.level={entry.level} < level={level}, returning i={i}") return i if level == 0: # root + logger.debug("DEBUG: treeinspos ROOT: incrementing level from 0 to 1") level += 1 continue + ename = rel.parts[level - 1] name = relpath.parts[level - 1] + logger.debug(f"DEBUG: treeinspos COMPARE: ename='{ename}', name='{name}', level={level}") + esort = sortkey(ename) nsort = sortkey(name) # Non-leaf are always folders, only use relfile at leaf isfile = relfile if len(relpath.parts) == level else 0 + logger.debug(f"DEBUG: treeinspos SORT: esort={esort}, nsort={nsort}, isfile={isfile}, entry.isfile={entry.isfile}") + # First compare by isfile, then by sorting order and if that too matches then case sensitive cmp = ( entry.isfile - isfile or (esort > nsort) - (esort < nsort) or (ename > name) - (ename < name) ) + logger.debug(f"DEBUG: treeinspos CMP: cmp={cmp}") + if cmp > 0: + logger.debug(f"DEBUG: treeinspos RETURN: cmp > 0, returning i={i}") return i if cmp < 0: + logger.debug(f"DEBUG: treeinspos CONTINUE: cmp < 0") continue + + logger.debug(f"DEBUG: treeinspos INCREMENT_LEVEL: level {level} -> {level + 1}") level += 1 if level > len(relpath.parts): - print("ERROR: insertpos", relpath, i, entry.name, entry.level, level) + logger.error(f"ERROR: insertpos level overflow: relpath={relpath}, i={i}, entry.name={entry.name}, entry.level={entry.level}, level={level}") break else: + logger.debug(f"DEBUG: treeinspos FOR_ELSE: incrementing i from {i} to {i + 1}") i += 1 + + logger.debug(f"DEBUG: treeinspos EXIT: returning i={i}, iterations={iteration_count}") return i @@ -177,23 +219,51 @@ def update_root(loop): def update_path(rootmod: list[FileEntry], relpath: PurePosixPath, loop): """Called on FS updates, check the filesystem and broadcast any changes.""" + logger.debug(f"DEBUG: update_path ENTRY: path={relpath}, rootmod_len={len(rootmod)}") + + # Add timing for walk operation + walk_start = time.perf_counter() new = walk(relpath) + walk_end = time.perf_counter() + logger.debug(f"DEBUG: walk({relpath}) took {walk_end - walk_start:.4f}s, returned {len(new)} entries") + + # Add timing for treeget operation + treeget_start = time.perf_counter() obegin, old = treeget(rootmod, relpath) + treeget_end = time.perf_counter() + logger.debug(f"DEBUG: treeget({relpath}) took {treeget_end - treeget_start:.4f}s, obegin={obegin}, old_len={len(old) if old else 0}") + if old == new: logger.debug( f"Watch: Event without changes needed {relpath}" if old else f"Watch: Event with old and new missing: {relpath}" ) + logger.debug(f"DEBUG: update_path EARLY_EXIT: no changes for {relpath}") return + + # Debug the deletion operation if obegin is not None: + logger.debug(f"DEBUG: DELETING entries from rootmod[{obegin}:{obegin + len(old)}] for path {relpath}") del rootmod[obegin : obegin + len(old)] + logger.debug(f"DEBUG: DELETED entries, rootmod_len now {len(rootmod)}") + if new: logger.debug(f"Watch: Update {relpath}" if old else f"Watch: Created {relpath}") + + # Add timing for treeinspos operation - this is where hangs might occur + inspos_start = time.perf_counter() i = treeinspos(rootmod, relpath, new[0].isfile) + inspos_end = time.perf_counter() + logger.debug(f"DEBUG: treeinspos({relpath}) took {inspos_end - inspos_start:.4f}s, returned index={i}") + + logger.debug(f"DEBUG: INSERTING {len(new)} entries at position {i} for path {relpath}") rootmod[i:i] = new + logger.debug(f"DEBUG: INSERTED entries, rootmod_len now {len(rootmod)}") else: logger.debug(f"Watch: Removed {relpath}") + + logger.debug(f"DEBUG: update_path EXIT: path={relpath}, final_rootmod_len={len(rootmod)}") def update_space(loop): @@ -213,61 +283,92 @@ def update_space(loop): def format_update(old, new): + logger.debug(f"DEBUG: format_update ENTRY: old_len={len(old)}, new_len={len(new)}") + # Make keep/del/insert diff until one of the lists ends oidx, nidx = 0, 0 oremain, nremain = set(old), set(new) update = [] keep_count = 0 + iteration_count = 0 + while oidx < len(old) and nidx < len(new): + iteration_count += 1 + + # Log every 1000 iterations to detect infinite loops + if iteration_count % 1000 == 0: + logger.debug(f"DEBUG: format_update iteration {iteration_count}, oidx={oidx}/{len(old)}, nidx={nidx}/{len(new)}") + + # Emergency brake for potential infinite loops + if iteration_count > 50000: + logger.error(f"ERROR: format_update potential infinite loop! iteration={iteration_count}, oidx={oidx}, nidx={nidx}") + raise Exception(f"format_update infinite loop detected at iteration {iteration_count}") + modified = False # Matching entries are kept if old[oidx] == new[nidx]: entry = old[oidx] + logger.debug(f"DEBUG: format_update MATCH: entry={entry.name}, oidx={oidx}, nidx={nidx}") oremain.remove(entry) nremain.remove(entry) keep_count += 1 oidx += 1 nidx += 1 continue + if keep_count > 0: + logger.debug(f"DEBUG: format_update KEEP: adding UpdKeep({keep_count})") modified = True update.append(UpdKeep(keep_count)) keep_count = 0 # Items only in old are deleted del_count = 0 + del_start_oidx = oidx while oidx < len(old) and old[oidx] not in nremain: + logger.debug(f"DEBUG: format_update DELETE: removing old[{oidx}]={old[oidx].name}") oremain.remove(old[oidx]) del_count += 1 oidx += 1 if del_count: + logger.debug(f"DEBUG: format_update DEL: adding UpdDel({del_count}), oidx {del_start_oidx}->{oidx}") update.append(UpdDel(del_count)) continue # Items only in new are inserted insert_items = [] + ins_start_nidx = nidx while nidx < len(new) and new[nidx] not in oremain: entry = new[nidx] + logger.debug(f"DEBUG: format_update INSERT: adding new[{nidx}]={entry.name}") nremain.remove(entry) insert_items.append(entry) nidx += 1 if insert_items: + logger.debug(f"DEBUG: format_update INS: adding UpdIns({len(insert_items)} items), nidx {ins_start_nidx}->{nidx}") modified = True update.append(UpdIns(insert_items)) if not modified: + logger.error(f"ERROR: format_update INFINITE_LOOP: nidx={nidx}, oidx={oidx}, old_len={len(old)}, new_len={len(new)}") + logger.error(f"ERROR: old[oidx]={old[oidx].name if oidx < len(old) else 'OUT_OF_BOUNDS'}") + logger.error(f"ERROR: new[nidx]={new[nidx].name if nidx < len(new) else 'OUT_OF_BOUNDS'}") raise Exception( f"Infinite loop in diff {nidx=} {oidx=} {len(old)=} {len(new)=}" ) # Diff any remaining if keep_count > 0: + logger.debug(f"DEBUG: format_update FINAL_KEEP: adding UpdKeep({keep_count})") update.append(UpdKeep(keep_count)) if oremain: + logger.debug(f"DEBUG: format_update FINAL_DEL: adding UpdDel({len(oremain)}) for remaining old items") update.append(UpdDel(len(oremain))) elif nremain: + logger.debug(f"DEBUG: format_update FINAL_INS: adding UpdIns({len(new[nidx:])}) for remaining new items") update.append(UpdIns(new[nidx:])) + logger.debug(f"DEBUG: format_update EXIT: generated {len(update)} operations, iterations={iteration_count}") return msgspec.json.encode({"update": update}).decode() @@ -339,9 +440,17 @@ def watcher_inotify(loop): logger.debug(f"Watch: {interesting=} {event=}") if interesting: # Update modified path + logger.debug(f"DEBUG: inotify PROCESSING: event={event}, path={event[2]}/{event[3]}") t0 = time.perf_counter() path = PurePosixPath(event[2]) / event[3] - update_path(rootmod, path.relative_to(rootpath), loop) + try: + rel_path = path.relative_to(rootpath) + logger.debug(f"DEBUG: inotify CALLING update_path: rel_path={rel_path}") + update_path(rootmod, rel_path, loop) + logger.debug(f"DEBUG: inotify update_path COMPLETED: rel_path={rel_path}") + except Exception as e: + logger.error(f"ERROR: inotify update_path FAILED: path={path}, error={e}") + raise t1 = time.perf_counter() logger.debug(f"Watch: Update {event[3]} took {t1 - t0:.1f}s") if not dirty: @@ -349,14 +458,20 @@ def watcher_inotify(loop): dirty = True # Wait a maximum of 0.5s to push the updates if dirty and time.monotonic() >= t + 0.5: + logger.debug("DEBUG: inotify TIMEOUT: breaking due to 0.5s timeout") break if dirty and state.root != rootmod: + logger.debug(f"DEBUG: inotify BATCH_UPDATE: state.root_len={len(state.root)}, rootmod_len={len(rootmod)}") t0 = time.perf_counter() + logger.debug("DEBUG: inotify CALLING format_update") update = format_update(state.root, rootmod) + logger.debug("DEBUG: inotify format_update COMPLETED") t1 = time.perf_counter() with state.lock: + logger.debug("DEBUG: inotify BROADCASTING update") broadcast(update, loop) state.root = rootmod + logger.debug("DEBUG: inotify BROADCAST completed, state updated") t2 = time.perf_counter() logger.debug( f"Format update took {t1 - t0:.1f}s, broadcast {t2 - t1:.1f}s" -- 2.49.0 From 4060a582d61606d75f65ecd4d53b491c5b7fd740 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Wed, 13 Aug 2025 10:18:18 -0700 Subject: [PATCH 05/43] Linter --- cista/watching.py | 212 ++++++--- tests/test_watching_directory_rename.py | 587 +++++++++++++----------- 2 files changed, 456 insertions(+), 343 deletions(-) diff --git a/cista/watching.py b/cista/watching.py index b42e82b..6e03872 100644 --- a/cista/watching.py +++ b/cista/watching.py @@ -50,12 +50,16 @@ def treeget(rootmod: list[FileEntry], path: PurePosixPath): begin = None ret = [] iteration_count = 0 - + for i, relpath, entry in treeiter(rootmod): iteration_count += 1 - if iteration_count % 1000 == 0: # Log every 1000 iterations to detect infinite loops - logger.debug(f"DEBUG: treeget iteration {iteration_count}, i={i}, relpath={relpath}, entry.name={entry.name}") - + if ( + iteration_count % 1000 == 0 + ): # Log every 1000 iterations to detect infinite loops + logger.debug( + f"DEBUG: treeget iteration {iteration_count}, i={i}, relpath={relpath}, entry.name={entry.name}" + ) + if begin is None: if relpath == path: logger.debug(f"DEBUG: treeget FOUND path {path} at index {i}") @@ -63,59 +67,77 @@ def treeget(rootmod: list[FileEntry], path: PurePosixPath): ret.append(entry) continue if entry.level <= len(path.parts): - logger.debug(f"DEBUG: treeget BREAK: entry.level={entry.level} <= path.parts_len={len(path.parts)}") + logger.debug( + f"DEBUG: treeget BREAK: entry.level={entry.level} <= path.parts_len={len(path.parts)}" + ) break ret.append(entry) - - logger.debug(f"DEBUG: treeget EXIT: path={path}, begin={begin}, ret_len={len(ret)}, iterations={iteration_count}") + + logger.debug( + f"DEBUG: treeget EXIT: path={path}, begin={begin}, ret_len={len(ret)}, iterations={iteration_count}" + ) return begin, ret def treeinspos(rootmod: list[FileEntry], relpath: PurePosixPath, relfile: int): # Find the first entry greater than the new one # precondition: the new entry doesn't exist - logger.debug(f"DEBUG: treeinspos ENTRY: relpath={relpath}, relfile={relfile}, rootmod_len={len(rootmod)}") - + logger.debug( + f"DEBUG: treeinspos ENTRY: relpath={relpath}, relfile={relfile}, rootmod_len={len(rootmod)}" + ) + isfile = 0 level = 0 i = 0 iteration_count = 0 - + for i, rel, entry in treeiter(rootmod): iteration_count += 1 - + # Detect potential infinite loops in treeinspos if iteration_count % 1000 == 0: - logger.debug(f"DEBUG: treeinspos iteration {iteration_count}, i={i}, rel={rel}, entry.name={entry.name}, level={level}, entry.level={entry.level}") - + logger.debug( + f"DEBUG: treeinspos iteration {iteration_count}, i={i}, rel={rel}, entry.name={entry.name}, level={level}, entry.level={entry.level}" + ) + if iteration_count > 10000: # Emergency brake for infinite loops - logger.error(f"ERROR: treeinspos potential infinite loop! iteration={iteration_count}, relpath={relpath}, i={i}, level={level}") + logger.error( + f"ERROR: treeinspos potential infinite loop! iteration={iteration_count}, relpath={relpath}, i={i}, level={level}" + ) break - + if entry.level > level: # We haven't found item at level, skip subdirectories - logger.debug(f"DEBUG: treeinspos SKIP: entry.level={entry.level} > level={level}") + logger.debug( + f"DEBUG: treeinspos SKIP: entry.level={entry.level} > level={level}" + ) continue if entry.level < level: # We have passed the level, so the new item is the first - logger.debug(f"DEBUG: treeinspos RETURN_EARLY: entry.level={entry.level} < level={level}, returning i={i}") + logger.debug( + f"DEBUG: treeinspos RETURN_EARLY: entry.level={entry.level} < level={level}, returning i={i}" + ) return i if level == 0: # root logger.debug("DEBUG: treeinspos ROOT: incrementing level from 0 to 1") level += 1 continue - + ename = rel.parts[level - 1] name = relpath.parts[level - 1] - logger.debug(f"DEBUG: treeinspos COMPARE: ename='{ename}', name='{name}', level={level}") - + logger.debug( + f"DEBUG: treeinspos COMPARE: ename='{ename}', name='{name}', level={level}" + ) + esort = sortkey(ename) nsort = sortkey(name) # Non-leaf are always folders, only use relfile at leaf isfile = relfile if len(relpath.parts) == level else 0 - logger.debug(f"DEBUG: treeinspos SORT: esort={esort}, nsort={nsort}, isfile={isfile}, entry.isfile={entry.isfile}") - + logger.debug( + f"DEBUG: treeinspos SORT: esort={esort}, nsort={nsort}, isfile={isfile}, entry.isfile={entry.isfile}" + ) + # First compare by isfile, then by sorting order and if that too matches then case sensitive cmp = ( entry.isfile - isfile @@ -123,24 +145,28 @@ def treeinspos(rootmod: list[FileEntry], relpath: PurePosixPath, relfile: int): or (ename > name) - (ename < name) ) logger.debug(f"DEBUG: treeinspos CMP: cmp={cmp}") - + if cmp > 0: logger.debug(f"DEBUG: treeinspos RETURN: cmp > 0, returning i={i}") return i if cmp < 0: logger.debug(f"DEBUG: treeinspos CONTINUE: cmp < 0") continue - + logger.debug(f"DEBUG: treeinspos INCREMENT_LEVEL: level {level} -> {level + 1}") level += 1 if level > len(relpath.parts): - logger.error(f"ERROR: insertpos level overflow: relpath={relpath}, i={i}, entry.name={entry.name}, entry.level={entry.level}, level={level}") + logger.error( + f"ERROR: insertpos level overflow: relpath={relpath}, i={i}, entry.name={entry.name}, entry.level={entry.level}, level={level}" + ) break else: logger.debug(f"DEBUG: treeinspos FOR_ELSE: incrementing i from {i} to {i + 1}") i += 1 - - logger.debug(f"DEBUG: treeinspos EXIT: returning i={i}, iterations={iteration_count}") + + logger.debug( + f"DEBUG: treeinspos EXIT: returning i={i}, iterations={iteration_count}" + ) return i @@ -219,20 +245,26 @@ def update_root(loop): def update_path(rootmod: list[FileEntry], relpath: PurePosixPath, loop): """Called on FS updates, check the filesystem and broadcast any changes.""" - logger.debug(f"DEBUG: update_path ENTRY: path={relpath}, rootmod_len={len(rootmod)}") - + logger.debug( + f"DEBUG: update_path ENTRY: path={relpath}, rootmod_len={len(rootmod)}" + ) + # Add timing for walk operation walk_start = time.perf_counter() new = walk(relpath) walk_end = time.perf_counter() - logger.debug(f"DEBUG: walk({relpath}) took {walk_end - walk_start:.4f}s, returned {len(new)} entries") - + logger.debug( + f"DEBUG: walk({relpath}) took {walk_end - walk_start:.4f}s, returned {len(new)} entries" + ) + # Add timing for treeget operation treeget_start = time.perf_counter() obegin, old = treeget(rootmod, relpath) treeget_end = time.perf_counter() - logger.debug(f"DEBUG: treeget({relpath}) took {treeget_end - treeget_start:.4f}s, obegin={obegin}, old_len={len(old) if old else 0}") - + logger.debug( + f"DEBUG: treeget({relpath}) took {treeget_end - treeget_start:.4f}s, obegin={obegin}, old_len={len(old) if old else 0}" + ) + if old == new: logger.debug( f"Watch: Event without changes needed {relpath}" @@ -241,29 +273,37 @@ def update_path(rootmod: list[FileEntry], relpath: PurePosixPath, loop): ) logger.debug(f"DEBUG: update_path EARLY_EXIT: no changes for {relpath}") return - + # Debug the deletion operation if obegin is not None: - logger.debug(f"DEBUG: DELETING entries from rootmod[{obegin}:{obegin + len(old)}] for path {relpath}") + logger.debug( + f"DEBUG: DELETING entries from rootmod[{obegin}:{obegin + len(old)}] for path {relpath}" + ) del rootmod[obegin : obegin + len(old)] logger.debug(f"DEBUG: DELETED entries, rootmod_len now {len(rootmod)}") - + if new: logger.debug(f"Watch: Update {relpath}" if old else f"Watch: Created {relpath}") - + # Add timing for treeinspos operation - this is where hangs might occur inspos_start = time.perf_counter() i = treeinspos(rootmod, relpath, new[0].isfile) inspos_end = time.perf_counter() - logger.debug(f"DEBUG: treeinspos({relpath}) took {inspos_end - inspos_start:.4f}s, returned index={i}") - - logger.debug(f"DEBUG: INSERTING {len(new)} entries at position {i} for path {relpath}") + logger.debug( + f"DEBUG: treeinspos({relpath}) took {inspos_end - inspos_start:.4f}s, returned index={i}" + ) + + logger.debug( + f"DEBUG: INSERTING {len(new)} entries at position {i} for path {relpath}" + ) rootmod[i:i] = new logger.debug(f"DEBUG: INSERTED entries, rootmod_len now {len(rootmod)}") else: logger.debug(f"Watch: Removed {relpath}") - - logger.debug(f"DEBUG: update_path EXIT: path={relpath}, final_rootmod_len={len(rootmod)}") + + logger.debug( + f"DEBUG: update_path EXIT: path={relpath}, final_rootmod_len={len(rootmod)}" + ) def update_space(loop): @@ -284,38 +324,46 @@ def update_space(loop): def format_update(old, new): logger.debug(f"DEBUG: format_update ENTRY: old_len={len(old)}, new_len={len(new)}") - + # Make keep/del/insert diff until one of the lists ends oidx, nidx = 0, 0 oremain, nremain = set(old), set(new) update = [] keep_count = 0 iteration_count = 0 - + while oidx < len(old) and nidx < len(new): iteration_count += 1 - + # Log every 1000 iterations to detect infinite loops if iteration_count % 1000 == 0: - logger.debug(f"DEBUG: format_update iteration {iteration_count}, oidx={oidx}/{len(old)}, nidx={nidx}/{len(new)}") - + logger.debug( + f"DEBUG: format_update iteration {iteration_count}, oidx={oidx}/{len(old)}, nidx={nidx}/{len(new)}" + ) + # Emergency brake for potential infinite loops if iteration_count > 50000: - logger.error(f"ERROR: format_update potential infinite loop! iteration={iteration_count}, oidx={oidx}, nidx={nidx}") - raise Exception(f"format_update infinite loop detected at iteration {iteration_count}") - + logger.error( + f"ERROR: format_update potential infinite loop! iteration={iteration_count}, oidx={oidx}, nidx={nidx}" + ) + raise Exception( + f"format_update infinite loop detected at iteration {iteration_count}" + ) + modified = False # Matching entries are kept if old[oidx] == new[nidx]: entry = old[oidx] - logger.debug(f"DEBUG: format_update MATCH: entry={entry.name}, oidx={oidx}, nidx={nidx}") + logger.debug( + f"DEBUG: format_update MATCH: entry={entry.name}, oidx={oidx}, nidx={nidx}" + ) oremain.remove(entry) nremain.remove(entry) keep_count += 1 oidx += 1 nidx += 1 continue - + if keep_count > 0: logger.debug(f"DEBUG: format_update KEEP: adding UpdKeep({keep_count})") modified = True @@ -326,12 +374,16 @@ def format_update(old, new): del_count = 0 del_start_oidx = oidx while oidx < len(old) and old[oidx] not in nremain: - logger.debug(f"DEBUG: format_update DELETE: removing old[{oidx}]={old[oidx].name}") + logger.debug( + f"DEBUG: format_update DELETE: removing old[{oidx}]={old[oidx].name}" + ) oremain.remove(old[oidx]) del_count += 1 oidx += 1 if del_count: - logger.debug(f"DEBUG: format_update DEL: adding UpdDel({del_count}), oidx {del_start_oidx}->{oidx}") + logger.debug( + f"DEBUG: format_update DEL: adding UpdDel({del_count}), oidx {del_start_oidx}->{oidx}" + ) update.append(UpdDel(del_count)) continue @@ -340,19 +392,29 @@ def format_update(old, new): ins_start_nidx = nidx while nidx < len(new) and new[nidx] not in oremain: entry = new[nidx] - logger.debug(f"DEBUG: format_update INSERT: adding new[{nidx}]={entry.name}") + logger.debug( + f"DEBUG: format_update INSERT: adding new[{nidx}]={entry.name}" + ) nremain.remove(entry) insert_items.append(entry) nidx += 1 if insert_items: - logger.debug(f"DEBUG: format_update INS: adding UpdIns({len(insert_items)} items), nidx {ins_start_nidx}->{nidx}") + logger.debug( + f"DEBUG: format_update INS: adding UpdIns({len(insert_items)} items), nidx {ins_start_nidx}->{nidx}" + ) modified = True update.append(UpdIns(insert_items)) if not modified: - logger.error(f"ERROR: format_update INFINITE_LOOP: nidx={nidx}, oidx={oidx}, old_len={len(old)}, new_len={len(new)}") - logger.error(f"ERROR: old[oidx]={old[oidx].name if oidx < len(old) else 'OUT_OF_BOUNDS'}") - logger.error(f"ERROR: new[nidx]={new[nidx].name if nidx < len(new) else 'OUT_OF_BOUNDS'}") + logger.error( + f"ERROR: format_update INFINITE_LOOP: nidx={nidx}, oidx={oidx}, old_len={len(old)}, new_len={len(new)}" + ) + logger.error( + f"ERROR: old[oidx]={old[oidx].name if oidx < len(old) else 'OUT_OF_BOUNDS'}" + ) + logger.error( + f"ERROR: new[nidx]={new[nidx].name if nidx < len(new) else 'OUT_OF_BOUNDS'}" + ) raise Exception( f"Infinite loop in diff {nidx=} {oidx=} {len(old)=} {len(new)=}" ) @@ -362,13 +424,19 @@ def format_update(old, new): logger.debug(f"DEBUG: format_update FINAL_KEEP: adding UpdKeep({keep_count})") update.append(UpdKeep(keep_count)) if oremain: - logger.debug(f"DEBUG: format_update FINAL_DEL: adding UpdDel({len(oremain)}) for remaining old items") + logger.debug( + f"DEBUG: format_update FINAL_DEL: adding UpdDel({len(oremain)}) for remaining old items" + ) update.append(UpdDel(len(oremain))) elif nremain: - logger.debug(f"DEBUG: format_update FINAL_INS: adding UpdIns({len(new[nidx:])}) for remaining new items") + logger.debug( + f"DEBUG: format_update FINAL_INS: adding UpdIns({len(new[nidx:])}) for remaining new items" + ) update.append(UpdIns(new[nidx:])) - logger.debug(f"DEBUG: format_update EXIT: generated {len(update)} operations, iterations={iteration_count}") + logger.debug( + f"DEBUG: format_update EXIT: generated {len(update)} operations, iterations={iteration_count}" + ) return msgspec.json.encode({"update": update}).decode() @@ -440,16 +508,24 @@ def watcher_inotify(loop): logger.debug(f"Watch: {interesting=} {event=}") if interesting: # Update modified path - logger.debug(f"DEBUG: inotify PROCESSING: event={event}, path={event[2]}/{event[3]}") + logger.debug( + f"DEBUG: inotify PROCESSING: event={event}, path={event[2]}/{event[3]}" + ) t0 = time.perf_counter() path = PurePosixPath(event[2]) / event[3] try: rel_path = path.relative_to(rootpath) - logger.debug(f"DEBUG: inotify CALLING update_path: rel_path={rel_path}") + logger.debug( + f"DEBUG: inotify CALLING update_path: rel_path={rel_path}" + ) update_path(rootmod, rel_path, loop) - logger.debug(f"DEBUG: inotify update_path COMPLETED: rel_path={rel_path}") + logger.debug( + f"DEBUG: inotify update_path COMPLETED: rel_path={rel_path}" + ) except Exception as e: - logger.error(f"ERROR: inotify update_path FAILED: path={path}, error={e}") + logger.error( + f"ERROR: inotify update_path FAILED: path={path}, error={e}" + ) raise t1 = time.perf_counter() logger.debug(f"Watch: Update {event[3]} took {t1 - t0:.1f}s") @@ -461,7 +537,9 @@ def watcher_inotify(loop): logger.debug("DEBUG: inotify TIMEOUT: breaking due to 0.5s timeout") break if dirty and state.root != rootmod: - logger.debug(f"DEBUG: inotify BATCH_UPDATE: state.root_len={len(state.root)}, rootmod_len={len(rootmod)}") + logger.debug( + f"DEBUG: inotify BATCH_UPDATE: state.root_len={len(state.root)}, rootmod_len={len(rootmod)}" + ) t0 = time.perf_counter() logger.debug("DEBUG: inotify CALLING format_update") update = format_update(state.root, rootmod) diff --git a/tests/test_watching_directory_rename.py b/tests/test_watching_directory_rename.py index 0abb6fe..d297502 100644 --- a/tests/test_watching_directory_rename.py +++ b/tests/test_watching_directory_rename.py @@ -29,15 +29,15 @@ def setup_watcher(temp_dir): original_rootpath = watching.rootpath original_state = watching.state original_quit = watching.quit - + # Setup test environment config.config = config.Config(path=temp_dir, listen=":0") watching.rootpath = temp_dir watching.state = watching.State() watching.quit = threading.Event() - + yield temp_dir - + # Cleanup watching.quit.set() watching.rootpath = original_rootpath @@ -50,96 +50,106 @@ def create_test_structure(base_path: Path): # Create main subdirectory with files subdir = base_path / "test_subdir" subdir.mkdir() - + # Add some files to the subdirectory (subdir / "file1.txt").write_text("content1") (subdir / "file2.txt").write_text("content2") - + # Create a nested subdirectory nested = subdir / "nested" nested.mkdir() (nested / "nested_file.txt").write_text("nested content") - + # Create another top-level directory for reference other_dir = base_path / "other_dir" other_dir.mkdir() (other_dir / "other_file.txt").write_text("other content") - + return subdir, nested, other_dir def test_nested_directory_rename_causes_hang(setup_watcher): """Test renaming deeply nested directories - this is where the hang typically occurs. - + The bug manifests when renaming directories that are nested within other directories, not just top-level directories. """ temp_dir = setup_watcher - + # Create a complex nested structure that mirrors real-world usage # parent/child/grandchild/target_dir/files... parent = temp_dir / "parent_folder" parent.mkdir() - + child = parent / "child_folder" child.mkdir() - + grandchild = child / "grandchild_folder" grandchild.mkdir() - + # This is the directory we'll rename - it's deeply nested target_dir = grandchild / "target_to_rename" target_dir.mkdir() - + # Add files to make the directory scan more complex for i in range(20): (target_dir / f"file_{i:03d}.txt").write_text(f"content_{i}") - + # Add another nested level inside target deep_nested = target_dir / "even_deeper" deep_nested.mkdir() for i in range(10): (deep_nested / f"deep_file_{i}.txt").write_text(f"deep_content_{i}") - + # Initialize watcher state initial_root = watching.walk(PurePosixPath()) watching.state.root = initial_root - + # Verify the nested structure exists - target_path = PurePosixPath("parent_folder/child_folder/grandchild_folder/target_to_rename") + target_path = PurePosixPath( + "parent_folder/child_folder/grandchild_folder/target_to_rename" + ) initial_begin, initial_entries = watching.treeget(initial_root, target_path) - assert initial_begin is not None, "Target directory should be found in initial state" + assert initial_begin is not None, ( + "Target directory should be found in initial state" + ) assert len(initial_entries) > 1, "Target directory should contain files" - + # Now rename the deeply nested directory new_target = grandchild / "renamed_target" target_dir.rename(new_target) - + loop = asyncio.new_event_loop() working_state = watching.state.root[:] - + # This is where the hang likely occurs - updating a deeply nested path - old_nested_path = PurePosixPath("parent_folder/child_folder/grandchild_folder/target_to_rename") - new_nested_path = PurePosixPath("parent_folder/child_folder/grandchild_folder/renamed_target") - + old_nested_path = PurePosixPath( + "parent_folder/child_folder/grandchild_folder/target_to_rename" + ) + new_nested_path = PurePosixPath( + "parent_folder/child_folder/grandchild_folder/renamed_target" + ) + start_time = time.time() - + # Update the old path (should remove it) watching.update_path(working_state, old_nested_path, loop) - + # Update the new path (should add it) watching.update_path(working_state, new_nested_path, loop) - + end_time = time.time() - + # Check for hang - nested operations should still be fast duration = end_time - start_time - assert duration < 3.0, f"Nested directory rename took too long: {duration}s - possible hang" - + assert duration < 3.0, ( + f"Nested directory rename took too long: {duration}s - possible hang" + ) + # Verify the old nested path is gone old_begin, old_entries = watching.treeget(working_state, old_nested_path) assert old_begin is None, "Old nested directory should be removed from tree" - + # Verify the new nested path exists new_begin, new_entries = watching.treeget(working_state, new_nested_path) assert new_begin is not None, "New nested directory should exist in tree" @@ -149,27 +159,27 @@ def test_nested_directory_rename_causes_hang(setup_watcher): def test_move_directory_across_nested_parents(setup_watcher): """Test moving a directory from one nested location to another - high hang risk scenario.""" temp_dir = setup_watcher - + # Create source nested structure source_parent = temp_dir / "source_area" source_parent.mkdir() source_child = source_parent / "source_child" source_child.mkdir() - + # Create the directory to move movable_dir = source_child / "movable_directory" movable_dir.mkdir() - + # Add content to make it more complex for i in range(15): (movable_dir / f"file_{i}.txt").write_text(f"movable_content_{i}") - + # Create a subdirectory within the movable directory sub_movable = movable_dir / "sub_directory" sub_movable.mkdir() for i in range(5): (sub_movable / f"sub_file_{i}.txt").write_text(f"sub_content_{i}") - + # Create destination nested structure dest_parent = temp_dir / "destination_area" dest_parent.mkdir() @@ -177,44 +187,46 @@ def test_move_directory_across_nested_parents(setup_watcher): dest_child.mkdir() dest_grandchild = dest_child / "dest_grandchild" dest_grandchild.mkdir() - + # Initialize state watching.state.root = watching.walk(PurePosixPath()) working_state = watching.state.root[:] - + # Move the directory to the deeply nested destination dest_movable = dest_grandchild / "moved_directory" movable_dir.rename(dest_movable) - + loop = asyncio.new_event_loop() - + # These paths represent the complex nested move operation old_path = PurePosixPath("source_area/source_child/movable_directory") - new_path = PurePosixPath("destination_area/dest_child/dest_grandchild/moved_directory") - + new_path = PurePosixPath( + "destination_area/dest_child/dest_grandchild/moved_directory" + ) + start_time = time.time() - + # This sequence is where hangs typically occur with cross-directory moves try: # Remove from old location watching.update_path(working_state, old_path, loop) - - # Add to new location + + # Add to new location watching.update_path(working_state, new_path, loop) - + except Exception as e: pytest.fail(f"Nested directory move failed: {e}") - + end_time = time.time() duration = end_time - start_time - + # Should complete without hanging assert duration < 5.0, f"Cross-nested move took too long: {duration}s" - + # Verify old location is empty old_begin, old_entries = watching.treeget(working_state, old_path) assert old_begin is None, "Directory should be removed from old nested location" - + # Verify new location has the directory new_begin, new_entries = watching.treeget(working_state, new_path) assert new_begin is not None, "Directory should exist in new nested location" @@ -224,7 +236,7 @@ def test_move_directory_across_nested_parents(setup_watcher): def test_rapid_nested_directory_operations_cause_corruption(setup_watcher): """Test rapid operations on nested directories that can cause state corruption.""" temp_dir = setup_watcher - + # Create multiple nested structures structures = [] for i in range(3): @@ -236,53 +248,55 @@ def test_rapid_nested_directory_operations_cause_corruption(setup_watcher): level3.mkdir() target = level3 / f"target_{i}" target.mkdir() - + # Add files for j in range(10): (target / f"file_{j}.txt").write_text(f"content_{i}_{j}") - + structures.append((level1, level2, level3, target)) - + # Initialize state watching.state.root = watching.walk(PurePosixPath()) working_state = watching.state.root[:] - + loop = asyncio.new_event_loop() - + # Perform rapid nested operations that can cause race conditions operations = [] - + for i, (level1, level2, level3, target) in enumerate(structures): # Rename the deeply nested target new_target = level3 / f"renamed_target_{i}" target.rename(new_target) - + old_path = PurePosixPath(f"level1_{i}/level2_{i}/level3_{i}/target_{i}") new_path = PurePosixPath(f"level1_{i}/level2_{i}/level3_{i}/renamed_target_{i}") operations.append((old_path, new_path)) - + start_time = time.time() - + # Process all operations rapidly - this can cause state corruption/hangs for old_path, new_path in operations: try: watching.update_path(working_state, old_path, loop) watching.update_path(working_state, new_path, loop) except Exception as e: - pytest.fail(f"Rapid nested operations failed for {old_path} -> {new_path}: {e}") - + pytest.fail( + f"Rapid nested operations failed for {old_path} -> {new_path}: {e}" + ) + end_time = time.time() duration = end_time - start_time - + # Should complete without hanging even with rapid operations assert duration < 10.0, f"Rapid nested operations took too long: {duration}s" - + # Verify final state consistency for i, (old_path, new_path) in enumerate(operations): # Old paths should be gone old_begin, old_entries = watching.treeget(working_state, old_path) assert old_begin is None, f"Old path {old_path} should be removed" - + # New paths should exist new_begin, new_entries = watching.treeget(working_state, new_path) assert new_begin is not None, f"New path {new_path} should exist" @@ -291,112 +305,115 @@ def test_rapid_nested_directory_operations_cause_corruption(setup_watcher): def test_nested_directory_treeget_corruption(setup_watcher): """Test that treeget function handles nested path operations correctly without corruption.""" temp_dir = setup_watcher - + # Create a complex tree structure root_dirs = [] for i in range(3): root_dir = temp_dir / f"root_{i}" root_dir.mkdir() - + for j in range(2): mid_dir = root_dir / f"mid_{j}" mid_dir.mkdir() - + for k in range(2): leaf_dir = mid_dir / f"leaf_{k}" leaf_dir.mkdir() - + # Add files to leaf directories for l in range(5): (leaf_dir / f"file_{l}.txt").write_text(f"content_{i}_{j}_{k}_{l}") - + root_dirs.append(root_dir) - + # Initialize state initial_root = watching.walk(PurePosixPath()) watching.state.root = initial_root - + # Test treeget with various nested paths test_paths = [ PurePosixPath("root_0"), - PurePosixPath("root_0/mid_0"), + PurePosixPath("root_0/mid_0"), PurePosixPath("root_0/mid_0/leaf_0"), PurePosixPath("root_1/mid_1/leaf_1"), PurePosixPath("root_2/mid_0/leaf_1"), ] - + # Verify treeget works correctly for all paths for path in test_paths: begin, entries = watching.treeget(initial_root, path) assert begin is not None, f"treeget should find existing path: {path}" assert len(entries) >= 1, f"treeget should return entries for: {path}" - + # Now rename a nested directory and test treeget consistency old_leaf = temp_dir / "root_0" / "mid_0" / "leaf_0" new_leaf = temp_dir / "root_0" / "mid_0" / "renamed_leaf" old_leaf.rename(new_leaf) - + # Update the state loop = asyncio.new_event_loop() working_state = initial_root[:] - + old_nested_path = PurePosixPath("root_0/mid_0/leaf_0") new_nested_path = PurePosixPath("root_0/mid_0/renamed_leaf") - + # Update paths watching.update_path(working_state, old_nested_path, loop) watching.update_path(working_state, new_nested_path, loop) - + # Verify treeget consistency after the update old_begin, old_entries = watching.treeget(working_state, old_nested_path) assert old_begin is None, "Old nested path should not be found after rename" - + new_begin, new_entries = watching.treeget(working_state, new_nested_path) assert new_begin is not None, "New nested path should be found after rename" assert len(new_entries) >= 1, "New nested path should have entries" - + # Verify that other paths are still accessible (no corruption) - for path in [PurePosixPath("root_1/mid_1/leaf_1"), PurePosixPath("root_2/mid_0/leaf_1")]: + for path in [ + PurePosixPath("root_1/mid_1/leaf_1"), + PurePosixPath("root_2/mid_0/leaf_1"), + ]: begin, entries = watching.treeget(working_state, path) assert begin is not None, f"Other paths should remain accessible: {path}" def test_format_update_infinite_loop_with_complex_nested_changes(setup_watcher): """Create a scenario that specifically triggers infinite loops in format_update. - + The hang often occurs in format_update when the diff algorithm gets confused by complex nested directory moves. """ temp_dir = setup_watcher - + # Create a complex scenario that can confuse the diff algorithm # Multiple directories with similar names and nested structures dirs_data = [] - + for i in range(4): # Create main directory main_dir = temp_dir / f"main_{i}" main_dir.mkdir() - + # Create subdirectories with similar patterns sub_dir = main_dir / "common_subdir_name" sub_dir.mkdir() - + # Create files with varying content for j in range(15): (sub_dir / f"file_{j:02d}.txt").write_text(f"main_{i}_content_{j}") - + # Add another level of nesting nested = sub_dir / "nested_level" nested.mkdir() for j in range(8): (nested / f"nested_{j}.txt").write_text(f"nested_{i}_{j}") - + dirs_data.append((main_dir, sub_dir, nested)) - + # Get initial state old_state = watching.walk(PurePosixPath()) - + # Perform complex renames that can confuse the diff algorithm # Rename all subdirectories to have even more similar names for i, (main_dir, sub_dir, nested) in enumerate(dirs_data): @@ -404,49 +421,53 @@ def test_format_update_infinite_loop_with_complex_nested_changes(setup_watcher): new_sub_name = f"renamed_common_subdir_{i}" new_sub_dir = main_dir / new_sub_name sub_dir.rename(new_sub_dir) - + # Also rename some files to create more confusion for j in range(0, 10, 2): # Rename every other file old_file = new_sub_dir / f"file_{j:02d}.txt" new_file = new_sub_dir / f"renamed_file_{j:02d}.txt" if old_file.exists(): old_file.rename(new_file) - + # Get new state new_state = watching.walk(PurePosixPath()) - + # This is the critical test - format_update with complex nested changes # that have caused infinite loops in the past start_time = time.time() - + try: # Set a more aggressive timeout def timeout_handler(signum, frame): raise TimeoutError("format_update appears to be hanging") - + # Set a 10-second timeout signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(10) - + try: update_msg = watching.format_update(old_state, new_state) signal.alarm(0) # Cancel the alarm - + end_time = time.time() duration = end_time - start_time - + # Even complex diffs should complete quickly - assert duration < 8.0, f"format_update took {duration}s - possible infinite loop" - + assert duration < 8.0, ( + f"format_update took {duration}s - possible infinite loop" + ) + # Verify the result is valid assert update_msg, "format_update should return a message" decoded = msgspec.json.decode(update_msg, type=UpdateMessage) assert decoded.update, "Update should contain operations" - + except TimeoutError: signal.alarm(0) - pytest.fail("format_update hung/infinite loop detected with complex nested changes") - + pytest.fail( + "format_update hung/infinite loop detected with complex nested changes" + ) + except Exception as e: signal.alarm(0) pytest.fail(f"format_update failed: {e}") @@ -455,7 +476,7 @@ def test_format_update_infinite_loop_with_complex_nested_changes(setup_watcher): def test_update_path_with_corrupted_tree_state(setup_watcher): """Test update_path when the tree state becomes corrupted by rapid changes.""" temp_dir = setup_watcher - + # Create a nested structure parent = temp_dir / "parent" parent.mkdir() @@ -463,57 +484,57 @@ def test_update_path_with_corrupted_tree_state(setup_watcher): child.mkdir() target = child / "target_dir" target.mkdir() - + # Add many files to make operations slower for i in range(30): (target / f"file_{i:03d}.txt").write_text(f"content_{i}") - + # Add nested subdirectories for i in range(3): subdir = target / f"subdir_{i}" subdir.mkdir() for j in range(10): (subdir / f"sub_file_{j}.txt").write_text(f"sub_content_{i}_{j}") - + # Initialize state watching.state.root = watching.walk(PurePosixPath()) - + # Create a working copy that we'll manually corrupt to simulate race conditions working_state = watching.state.root[:] - + loop = asyncio.new_event_loop() - + # Rename the directory new_target = child / "renamed_target" target.rename(new_target) - + # Simulate the race condition by manually corrupting the tree state # This mimics what happens when inotify events arrive out of order - + # First, try to update a path that should exist old_path = PurePosixPath("parent/child/target_dir") - + # Manually remove an entry to simulate corruption if len(working_state) > 5: # Remove a random entry to corrupt the tree structure del working_state[3] - + start_time = time.time() - + try: # This should handle corrupted state gracefully watching.update_path(working_state, old_path, loop) - + # Now add the new path new_path = PurePosixPath("parent/child/renamed_target") watching.update_path(working_state, new_path, loop) - + end_time = time.time() duration = end_time - start_time - + # Should complete without hanging even with corrupted state assert duration < 5.0, f"update_path with corrupted state took {duration}s" - + except Exception as e: # Some exceptions are expected with corrupted state, but shouldn't hang end_time = time.time() @@ -524,141 +545,143 @@ def test_update_path_with_corrupted_tree_state(setup_watcher): def test_simulate_real_inotify_event_sequence(setup_watcher): """Simulate the exact inotify event sequence that causes hangs.""" temp_dir = setup_watcher - + # Create the exact scenario from real usage that triggers the bug project_dir = temp_dir / "project" project_dir.mkdir() - + src_dir = project_dir / "src" src_dir.mkdir() - + components_dir = src_dir / "components" components_dir.mkdir() - + # This is the directory that will be renamed old_component = components_dir / "OldComponent" old_component.mkdir() - + # Add files that exist in real projects for filename in ["index.tsx", "styles.css", "types.ts", "utils.ts"]: (old_component / filename).write_text(f"// {filename} content") - + # Add a subdirectory with more files sub_dir = old_component / "subcomponents" sub_dir.mkdir() for i in range(5): (sub_dir / f"SubComponent{i}.tsx").write_text(f"// SubComponent{i}") - + # Initialize state watching.state.root = watching.walk(PurePosixPath()) working_state = watching.state.root[:] - + loop = asyncio.new_event_loop() - + # This is the exact operation that causes hangs in real usage new_component = components_dir / "NewComponent" old_component.rename(new_component) - + # Simulate the inotify event sequence that causes problems # IN_MOVED_FROM event for the old directory old_path = PurePosixPath("project/src/components/OldComponent") - - # IN_MOVED_TO event for the new directory + + # IN_MOVED_TO event for the new directory new_path = PurePosixPath("project/src/components/NewComponent") - + # Track how long the operations take start_time = time.time() - + # Set up timeout detection def timeout_handler(signum, frame): raise TimeoutError("Simulated inotify sequence hung") - + signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(15) # 15 second timeout - + try: # This sequence is where the hang occurs in real usage watching.update_path(working_state, old_path, loop) watching.update_path(working_state, new_path, loop) - + # If we get here without hanging, cancel the alarm signal.alarm(0) - + end_time = time.time() duration = end_time - start_time - + # Real inotify operations should be fast assert duration < 10.0, f"Simulated inotify sequence took {duration}s" - + # Verify the final state is correct old_begin, old_entries = watching.treeget(working_state, old_path) assert old_begin is None, "Old component path should be removed" - + new_begin, new_entries = watching.treeget(working_state, new_path) assert new_begin is not None, "New component path should exist" assert len(new_entries) > 1, "New component should contain all files" - + except TimeoutError: signal.alarm(0) pytest.fail("HANG DETECTED: Simulated inotify event sequence hung!") - + except Exception as e: signal.alarm(0) pytest.fail(f"Simulated inotify sequence failed: {e}") - + finally: signal.alarm(0) # Ensure alarm is cancelled """Test format_update with nested directory changes that could cause infinite loops.""" temp_dir = setup_watcher - + # Create complex nested structure that has caused issues complex_structure = temp_dir / "complex" complex_structure.mkdir() - + # Create multiple levels with similar names (potential for confusion) level_a = complex_structure / "level_a" level_a.mkdir() sublevel_a = level_a / "sublevel" sublevel_a.mkdir() - - level_b = complex_structure / "level_b" + + level_b = complex_structure / "level_b" level_b.mkdir() sublevel_b = level_b / "sublevel" sublevel_b.mkdir() - + # Add files to each sublevel for i in range(10): (sublevel_a / f"file_a_{i}.txt").write_text(f"content_a_{i}") (sublevel_b / f"file_b_{i}.txt").write_text(f"content_b_{i}") - + # Get initial state old_state = watching.walk(PurePosixPath()) - + # Perform nested directory renames that could confuse the diff algorithm renamed_sublevel_a = level_a / "renamed_sublevel" sublevel_a.rename(renamed_sublevel_a) - - renamed_sublevel_b = level_b / "also_renamed_sublevel" + + renamed_sublevel_b = level_b / "also_renamed_sublevel" sublevel_b.rename(renamed_sublevel_b) - + # Get new state new_state = watching.walk(PurePosixPath()) - + # This is where infinite loops or hangs can occur in format_update start_time = time.time() - + try: update_msg = watching.format_update(old_state, new_state) end_time = time.time() - + duration = end_time - start_time - assert duration < 5.0, f"format_update took too long with nested changes: {duration}s" - + assert duration < 5.0, ( + f"format_update took too long with nested changes: {duration}s" + ) + # Verify the update message is valid assert update_msg, "format_update should return valid message" decoded = msgspec.json.decode(update_msg, type=UpdateMessage) assert decoded.update, "Update should contain operations" - + except Exception as e: pytest.fail(f"format_update failed or hung with nested directory changes: {e}") """Test that reproduces the hang when directory rename events race with updates. @@ -670,65 +693,65 @@ def test_simulate_real_inotify_event_sequence(setup_watcher): 4. This should cause a hang where old directory names are preserved """ temp_dir = setup_watcher - + # Create test structure with many files to increase chance of race conditions subdir = temp_dir / "original_dir" subdir.mkdir() - + # Create many files to make the directory scan take longer for i in range(50): (subdir / f"file_{i:03d}.txt").write_text(f"content_{i}") - + # Create nested directories nested = subdir / "nested" nested.mkdir() for i in range(20): (nested / f"nested_file_{i:03d}.txt").write_text(f"nested_content_{i}") - + # Initial scan to populate the state initial_root = watching.walk(PurePosixPath()) watching.state.root = initial_root - + # Verify initial structure initial_names = [entry.name for entry in initial_root] assert "original_dir" in initial_names - + # Create a mock event loop for testing loop = asyncio.new_event_loop() - + # Simulate the problematic sequence: # 1. Start processing the original directory # 2. Rename it while processing # 3. Try to update both old and new paths - + # Start by getting the initial state original_rootmod = watching.state.root[:] - - # Rename the directory + + # Rename the directory renamed_dir = temp_dir / "renamed_dir" subdir.rename(renamed_dir) - + # Now simulate what happens in the inotify watcher: # Multiple rapid updates that can cause race conditions - + # First, try to update the old path (should remove it) watching.update_path(original_rootmod, PurePosixPath("original_dir"), loop) - + # Then try to update the new path (should add it) watching.update_path(original_rootmod, PurePosixPath("renamed_dir"), loop) - + # Check if the state is consistent final_names = [entry.name for entry in original_rootmod] - + # The bug would manifest as: # 1. Old directory name still present (should be gone) # 2. New directory name missing (should be there) # 3. Inconsistent state causing hangs - + # This is the expected correct behavior assert "original_dir" not in final_names, "Old directory name should be removed" assert "renamed_dir" in final_names, "New directory name should be present" - + # Additional check: verify we can still walk the renamed directory renamed_walk = watching.walk(PurePosixPath("renamed_dir")) assert len(renamed_walk) > 1, "Should be able to walk renamed directory" @@ -737,11 +760,11 @@ def test_simulate_real_inotify_event_sequence(setup_watcher): def test_concurrent_inotify_events_simulation(setup_watcher): """Simulate concurrent inotify events that can cause the hanging bug.""" temp_dir = setup_watcher - + # Create a complex directory structure dirs = ["dir_a", "dir_b", "dir_c"] created_dirs = [] - + for dir_name in dirs: dir_path = temp_dir / dir_name dir_path.mkdir() @@ -749,29 +772,29 @@ def test_concurrent_inotify_events_simulation(setup_watcher): for i in range(10): (dir_path / f"file_{i}.txt").write_text(f"content in {dir_name}") created_dirs.append(dir_path) - + # Initial state watching.state.root = watching.walk(PurePosixPath()) original_state = watching.state.root[:] - + loop = asyncio.new_event_loop() - + # Simulate rapid concurrent operations that happen in real usage # This mimics what happens when multiple filesystem events arrive rapidly - + # Rename all directories simultaneously (as might happen with mv commands) renamed_paths = [] for i, dir_path in enumerate(created_dirs): new_path = temp_dir / f"renamed_{dirs[i]}" dir_path.rename(new_path) renamed_paths.append(new_path) - + # Now simulate the inotify event processing that causes issues # In the real code, these updates happen in rapid succession # and can cause race conditions - + working_state = original_state[:] - + # Process removal events (IN_MOVED_FROM) for dir_name in dirs: try: @@ -779,7 +802,7 @@ def test_concurrent_inotify_events_simulation(setup_watcher): except Exception as e: # The bug might manifest as exceptions during updates pytest.fail(f"Update path failed for {dir_name}: {e}") - + # Process addition events (IN_MOVED_TO) for i, dir_name in enumerate(dirs): try: @@ -787,14 +810,16 @@ def test_concurrent_inotify_events_simulation(setup_watcher): watching.update_path(working_state, PurePosixPath(new_name), loop) except Exception as e: pytest.fail(f"Update path failed for {new_name}: {e}") - + # Verify final state is consistent final_names = [entry.name for entry in working_state] - + # Check that old names are gone for dir_name in dirs: - assert dir_name not in final_names, f"Old directory {dir_name} should be removed" - + assert dir_name not in final_names, ( + f"Old directory {dir_name} should be removed" + ) + # Check that new names are present for i, dir_name in enumerate(dirs): new_name = f"renamed_{dir_name}" @@ -804,92 +829,92 @@ def test_concurrent_inotify_events_simulation(setup_watcher): def test_format_update_with_rapid_changes(setup_watcher): """Test format_update with rapid directory changes that can cause hangs.""" temp_dir = setup_watcher - + # Create initial structure initial_dirs = ["test1", "test2", "test3"] for dir_name in initial_dirs: dir_path = temp_dir / dir_name dir_path.mkdir() (dir_path / "file.txt").write_text("test content") - + # Get initial state old_state = watching.walk(PurePosixPath()) - + # Perform rapid renames for i, dir_name in enumerate(initial_dirs): old_path = temp_dir / dir_name new_path = temp_dir / f"renamed_{dir_name}" old_path.rename(new_path) - + # Get new state new_state = watching.walk(PurePosixPath()) - + # This is where the hang might occur - in format_update start_time = time.time() try: update_msg = watching.format_update(old_state, new_state) end_time = time.time() - + # Should complete quickly duration = end_time - start_time assert duration < 5.0, f"format_update took too long: {duration}s" - + # Decode the update to verify it's valid decoded = msgspec.json.decode(update_msg, type=UpdateMessage) assert decoded.update, "Update message should contain operations" - + except Exception as e: pytest.fail(f"format_update failed or hung: {e}") def test_update_path_with_missing_directory(setup_watcher): """Test update_path when called on a directory that no longer exists. - + This simulates the race condition where update_path is called for a path that was just moved/deleted. """ temp_dir = setup_watcher - + # Create and populate initial state test_dir = temp_dir / "disappearing_dir" test_dir.mkdir() (test_dir / "file.txt").write_text("content") - + initial_state = watching.walk(PurePosixPath()) watching.state.root = initial_state working_state = initial_state[:] - + # Remove the directory shutil.rmtree(test_dir) - + loop = asyncio.new_event_loop() - + # Now try to update the path that no longer exists # This should handle gracefully without hanging start_time = time.time() try: watching.update_path(working_state, PurePosixPath("disappearing_dir"), loop) end_time = time.time() - + duration = end_time - start_time assert duration < 2.0, f"update_path took too long: {duration}s" - + # Verify the directory was removed from the state final_names = [entry.name for entry in working_state] assert "disappearing_dir" not in final_names - + except Exception as e: pytest.fail(f"update_path should handle missing directories gracefully: {e}") def test_threaded_watcher_simulation(setup_watcher): """Test that simulates the actual threaded watcher behavior with directory renames. - + This test creates a more realistic scenario where the watcher thread processes events while filesystem operations are happening. """ temp_dir = setup_watcher - + # Create test structure test_dirs = [] for i in range(5): @@ -899,107 +924,110 @@ def test_threaded_watcher_simulation(setup_watcher): for j in range(5): (dir_path / f"file_{j}.txt").write_text(f"content_{i}_{j}") test_dirs.append(dir_path) - + # Initialize state watching.state.root = watching.walk(PurePosixPath()) - + # Create an event loop for the simulation loop = asyncio.new_event_loop() - + # Track state changes state_changes = [] original_broadcast = watching.broadcast - + def tracking_broadcast(msg, loop_param): state_changes.append(msg) return original_broadcast(msg, loop_param) - + # Patch broadcast to track changes with patch("cista.watching.broadcast", side_effect=tracking_broadcast): - # Simulate rapid directory operations start_time = time.time() - + for i, dir_path in enumerate(test_dirs): # Rename directory new_path = temp_dir / f"renamed_thread_test_dir_{i}" dir_path.rename(new_path) - + # Update the watcher state (simulating inotify events) old_name = f"thread_test_dir_{i}" new_name = f"renamed_thread_test_dir_{i}" - + # Simulate the race condition: rapid updates watching.update_path(watching.state.root, PurePosixPath(old_name), loop) watching.update_path(watching.state.root, PurePosixPath(new_name), loop) - + end_time = time.time() - + # Should complete without hanging duration = end_time - start_time assert duration < 10.0, f"Threaded operations took too long: {duration}s" - + # Verify final state is consistent final_names = [entry.name for entry in watching.state.root] - + # Old names should be gone for i in range(5): old_name = f"thread_test_dir_{i}" - assert old_name not in final_names, f"Old directory {old_name} should be removed" - + assert old_name not in final_names, ( + f"Old directory {old_name} should be removed" + ) + # New names should be present for i in range(5): new_name = f"renamed_thread_test_dir_{i}" - assert new_name in final_names, f"New directory {new_name} should be present" + assert new_name in final_names, ( + f"New directory {new_name} should be present" + ) def test_directory_rename_with_nested_structure(setup_watcher): """Test renaming a directory that contains nested subdirectories.""" temp_dir = setup_watcher - + # Create a more complex nested structure main_dir = temp_dir / "main_dir" main_dir.mkdir() - + # Create multiple levels of nesting level1 = main_dir / "level1" level1.mkdir() (level1 / "l1_file.txt").write_text("level1 content") - + level2 = level1 / "level2" level2.mkdir() (level2 / "l2_file.txt").write_text("level2 content") - + level3 = level2 / "level3" level3.mkdir() (level3 / "l3_file.txt").write_text("level3 content") - + # Initial scan initial_root = watching.walk(PurePosixPath()) watching.state.root = initial_root - + # Rename the main directory renamed_main = temp_dir / "renamed_main_dir" main_dir.rename(renamed_main) - + # Update the watching system loop = asyncio.new_event_loop() watching.update_path(watching.state.root, PurePosixPath("main_dir"), loop) watching.update_path(watching.state.root, PurePosixPath("renamed_main_dir"), loop) - + # Verify the entire nested structure is properly updated updated_root = watching.state.root updated_names = [entry.name for entry in updated_root] - + assert "main_dir" not in updated_names assert "renamed_main_dir" in updated_names - + # Verify the nested structure is still intact renamed_structure = watching.walk(PurePosixPath("renamed_main_dir")) - + # Extract all the names from the renamed structure all_names = [entry.name for entry in renamed_structure] - + # Should contain the directory itself and all nested items assert "renamed_main_dir" in all_names assert "level1" in all_names @@ -1013,31 +1041,31 @@ def test_directory_rename_with_nested_structure(setup_watcher): def test_directory_rename_format_update(setup_watcher): """Test that format_update correctly handles directory renames.""" temp_dir = setup_watcher - + # Create test structure subdir, _, other_dir = create_test_structure(temp_dir) - + # Get initial state old_root = watching.walk(PurePosixPath()) - + # Rename directory renamed_subdir = temp_dir / "renamed_subdir" subdir.rename(renamed_subdir) - + # Get new state new_root = watching.walk(PurePosixPath()) - + # Generate update message update_msg = watching.format_update(old_root, new_root) - + # The update should not be empty and should contain proper operations assert update_msg assert "update" in update_msg - + # Decode and verify the update contains expected operations decoded = msgspec.json.decode(update_msg, type=UpdateMessage) assert decoded.update # Should have update operations - + # The update should reflect the rename operation (delete old, insert new) operations = decoded.update assert len(operations) > 0 @@ -1046,102 +1074,109 @@ def test_directory_rename_format_update(setup_watcher): def test_concurrent_directory_operations(setup_watcher): """Test behavior when multiple directory operations happen concurrently.""" temp_dir = setup_watcher - + # Create multiple directories dirs_to_create = ["dir1", "dir2", "dir3"] created_dirs = [] - + for dir_name in dirs_to_create: dir_path = temp_dir / dir_name dir_path.mkdir() (dir_path / f"{dir_name}_file.txt").write_text(f"content for {dir_name}") created_dirs.append(dir_path) - + # Initial scan initial_root = watching.walk(PurePosixPath()) watching.state.root = initial_root - + # Rename multiple directories "simultaneously" renamed_dirs = [] for i, dir_path in enumerate(created_dirs): - renamed_path = temp_dir / f"renamed_dir{i+1}" + renamed_path = temp_dir / f"renamed_dir{i + 1}" dir_path.rename(renamed_path) renamed_dirs.append(renamed_path) - + # Update the watching system for all changes loop = asyncio.new_event_loop() - + # Update for all old paths (should remove them) for dir_name in dirs_to_create: watching.update_path(watching.state.root, PurePosixPath(dir_name), loop) - + # Update for all new paths (should add them) for i in range(len(renamed_dirs)): - watching.update_path(watching.state.root, PurePosixPath(f"renamed_dir{i+1}"), loop) - + watching.update_path( + watching.state.root, PurePosixPath(f"renamed_dir{i + 1}"), loop + ) + # Verify final state final_root = watching.state.root final_names = [entry.name for entry in final_root] - + # Old names should be gone for dir_name in dirs_to_create: assert dir_name not in final_names - + # New names should be present for i in range(len(renamed_dirs)): - assert f"renamed_dir{i+1}" in final_names + assert f"renamed_dir{i + 1}" in final_names @pytest.mark.slow def test_watcher_doesnt_hang_on_directory_rename(setup_watcher): """Test that the watcher doesn't hang when a directory is renamed. - + This test specifically addresses the reported bug where directory renames cause the system to hang and no more operations go through. """ temp_dir = setup_watcher - + # Create test structure subdir, _, _ = create_test_structure(temp_dir) - + # Initialize the watcher state watching.state.root = watching.walk(PurePosixPath()) - + # Mock the inotify events to simulate what happens during a rename # This simulates the problematic scenario described in the bug report - with patch('time.monotonic', side_effect=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]): - + with patch("time.monotonic", side_effect=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]): # Simulate the rename operation renamed_subdir = temp_dir / "renamed_test_subdir" subdir.rename(renamed_subdir) - + # Create a simple event loop for testing loop = asyncio.new_event_loop() - + # This should complete without hanging start_time = time.time() - + # Update the path - this is where the hang might occur watching.update_path(watching.state.root, PurePosixPath("test_subdir"), loop) - watching.update_path(watching.state.root, PurePosixPath("renamed_test_subdir"), loop) - + watching.update_path( + watching.state.root, PurePosixPath("renamed_test_subdir"), loop + ) + end_time = time.time() - + # The operation should complete quickly (within 5 seconds) - assert end_time - start_time < 5.0, "Directory rename operation took too long, possible hang detected" - + assert end_time - start_time < 5.0, ( + "Directory rename operation took too long, possible hang detected" + ) + # Verify the state is consistent final_names = [entry.name for entry in watching.state.root] assert "test_subdir" not in final_names assert "renamed_test_subdir" in final_names - + # Verify we can still perform operations after the rename # This tests that the system isn't in a broken state another_dir = temp_dir / "post_rename_dir" another_dir.mkdir() - + # This should work without issues - watching.update_path(watching.state.root, PurePosixPath("post_rename_dir"), loop) + watching.update_path( + watching.state.root, PurePosixPath("post_rename_dir"), loop + ) final_names_after = [entry.name for entry in watching.state.root] assert "post_rename_dir" in final_names_after -- 2.49.0 From 3a65277994e2827322a1f62c753da09e2e1633db Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Wed, 13 Aug 2025 10:28:30 -0700 Subject: [PATCH 06/43] Build frontend using bun if found. --- scripts/build-frontend.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/scripts/build-frontend.py b/scripts/build-frontend.py index 85e52b2..e61d5d5 100644 --- a/scripts/build-frontend.py +++ b/scripts/build-frontend.py @@ -11,17 +11,27 @@ class CustomBuildHook(BuildHookInterface): def initialize(self, version, build_data): super().initialize(version, build_data) stderr.write(">>> Building Cista frontend\n") - npm = shutil.which("npm") - if npm is None: - raise RuntimeError( - "NodeJS `npm` is required for building Cista but it was not found" - ) + npm = None + bun = shutil.which("bun") + if bun is None: + npm = shutil.which("npm") + if npm is None: + raise RuntimeError( + "Bun or NodeJS `npm` is required for building but neither was found" + ) # npm --prefix doesn't work on Windows, so we chdir instead os.chdir("frontend") try: - stderr.write("### npm install\n") - subprocess.run([npm, "install"], check=True) # noqa: S603 - stderr.write("\n### npm run build\n") - subprocess.run([npm, "run", "build"], check=True) # noqa: S603 + if npm: + stderr.write("### npm install\n") + subprocess.run([npm, "install"], check=True) # noqa: S603 + stderr.write("\n### npm run build\n") + subprocess.run([npm, "run", "build"], check=True) # noqa: S603 + else: + assert bun + stderr.write("### bun install\n") + subprocess.run([bun, "install"], check=True) # noqa: S603 + stderr.write("\n### bun run build\n") + subprocess.run([bun, "run", "build"], check=True) # noqa: S603 finally: os.chdir("..") -- 2.49.0 From 05cc823e374d461b604f374f1b7edcfa4b49b816 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Wed, 13 Aug 2025 10:28:49 -0700 Subject: [PATCH 07/43] Support HDR image formats --- cista/preview.py | 3 +++ frontend/src/components/MediaPreview.vue | 2 +- pyproject.toml | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cista/preview.py b/cista/preview.py index c37a4a4..ad007b1 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -17,6 +17,9 @@ from sanic.log import logger from cista import config from cista.util.filename import sanitize +import pillow_heif + +pillow_heif.register_heif_opener() bp = Blueprint("preview", url_prefix="/preview") diff --git a/frontend/src/components/MediaPreview.vue b/frontend/src/components/MediaPreview.vue index 2a1e9c0..03cdf18 100644 --- a/frontend/src/components/MediaPreview.vue +++ b/frontend/src/components/MediaPreview.vue @@ -101,7 +101,7 @@ const video = () => ['mkv', 'mp4', 'webm', 'mov', 'avi'].includes(props.doc.ext) const audio = () => ['mp3', 'flac', 'ogg', 'aac'].includes(props.doc.ext) const archive = () => ['zip', 'tar', 'gz', 'bz2', 'xz', '7z', 'rar'].includes(props.doc.ext) const preview = () => ( - ['bmp', 'ico', 'tif', 'tiff', 'pdf'].includes(props.doc.ext) || + ['bmp', 'ico', 'tif', 'tiff', 'heic', 'heif', 'pdf', 'epub', 'mobi'].includes(props.doc.ext) || props.doc.size > 500000 && ['avif', 'webp', 'png', 'jpg', 'jpeg'].includes(props.doc.ext) ) diff --git a/pyproject.toml b/pyproject.toml index 401c9f1..52132f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "natsort", "pathvalidate", "pillow", + "pillow-heif>=1.1.0", "pyjwt", "pymupdf", "sanic", -- 2.49.0 From 20a5c66e77edbe4fa820697f114f959e8b84d171 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Wed, 13 Aug 2025 10:49:23 -0700 Subject: [PATCH 08/43] Point to Bun install instructions. --- scripts/build-frontend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build-frontend.py b/scripts/build-frontend.py index e61d5d5..a4b35b3 100644 --- a/scripts/build-frontend.py +++ b/scripts/build-frontend.py @@ -17,7 +17,7 @@ class CustomBuildHook(BuildHookInterface): npm = shutil.which("npm") if npm is None: raise RuntimeError( - "Bun or NodeJS `npm` is required for building but neither was found" + "Bun or NodeJS `npm` is required for building but neither was found\n Visit https://bun.com/" ) # npm --prefix doesn't work on Windows, so we chdir instead os.chdir("frontend") -- 2.49.0 From af4e90357f83b8cd36359020f4d7d879542ec696 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Wed, 13 Aug 2025 11:08:38 -0700 Subject: [PATCH 09/43] More debug on watching --- cista/watching.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/cista/watching.py b/cista/watching.py index 6e03872..95a57df 100644 --- a/cista/watching.py +++ b/cista/watching.py @@ -1,4 +1,5 @@ import asyncio +import os import shutil import sys import threading @@ -7,6 +8,7 @@ from contextlib import suppress from os import stat_result from pathlib import Path, PurePosixPath from stat import S_ISDIR, S_ISREG +import signal import msgspec from natsort import humansorted, natsort_keygen, ns @@ -174,6 +176,9 @@ state = State() rootpath: Path = None # type: ignore quit = threading.Event() +# Keep a reference so the file stays open for faulthandler outputs +_faulthandler_file = None # type: ignore + ## Filesystem scanning @@ -574,6 +579,29 @@ async def start(app, loop): global rootpath config.load_config() rootpath = config.config.path + # Optional: enable SIGUSR1 stack dumps in production for debugging hangs + # Control with env CISTA_STACK_DUMP (default: enabled). Sends all thread + # stacks to a per-process log in /tmp when receiving SIGUSR1. + if os.environ.get("CISTA_STACK_DUMP", "1") == "1": + try: + import faulthandler + + global _faulthandler_file + if _faulthandler_file is None: + log_path = f"/tmp/cista-stacks-{os.getpid()}.log" + # Line-buffered text file so writes appear promptly + _faulthandler_file = open(log_path, "a", buffering=1) + faulthandler.enable(file=_faulthandler_file) + faulthandler.register( + signal.SIGUSR1, file=_faulthandler_file, all_threads=True, chain=True + ) + logger.info( + "Stack dump enabled: send SIGUSR1 to PID %s to write all thread stacks to %s", + os.getpid(), + log_path, + ) + except Exception: + logger.exception("Failed to enable SIGUSR1 stack dump handler") use_inotify = sys.platform == "linux" app.ctx.watcher = threading.Thread( target=watcher_inotify if use_inotify else watcher_poll, -- 2.49.0 From 9cc210140e20ba648a349bb0db92c0b3c2857022 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Wed, 13 Aug 2025 11:21:29 -0700 Subject: [PATCH 10/43] More robust updates --- cista/watching.py | 118 +++++++++++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 43 deletions(-) diff --git a/cista/watching.py b/cista/watching.py index 95a57df..0825f90 100644 --- a/cista/watching.py +++ b/cista/watching.py @@ -91,22 +91,7 @@ def treeinspos(rootmod: list[FileEntry], relpath: PurePosixPath, relfile: int): isfile = 0 level = 0 i = 0 - iteration_count = 0 - for i, rel, entry in treeiter(rootmod): - iteration_count += 1 - - # Detect potential infinite loops in treeinspos - if iteration_count % 1000 == 0: - logger.debug( - f"DEBUG: treeinspos iteration {iteration_count}, i={i}, rel={rel}, entry.name={entry.name}, level={level}, entry.level={entry.level}" - ) - - if iteration_count > 10000: # Emergency brake for infinite loops - logger.error( - f"ERROR: treeinspos potential infinite loop! iteration={iteration_count}, relpath={relpath}, i={i}, level={level}" - ) - break if entry.level > level: # We haven't found item at level, skip subdirectories @@ -152,7 +137,7 @@ def treeinspos(rootmod: list[FileEntry], relpath: PurePosixPath, relfile: int): logger.debug(f"DEBUG: treeinspos RETURN: cmp > 0, returning i={i}") return i if cmp < 0: - logger.debug(f"DEBUG: treeinspos CONTINUE: cmp < 0") + logger.debug("DEBUG: treeinspos CONTINUE: cmp < 0") continue logger.debug(f"DEBUG: treeinspos INCREMENT_LEVEL: level {level} -> {level + 1}") @@ -166,9 +151,7 @@ def treeinspos(rootmod: list[FileEntry], relpath: PurePosixPath, relfile: int): logger.debug(f"DEBUG: treeinspos FOR_ELSE: incrementing i from {i} to {i + 1}") i += 1 - logger.debug( - f"DEBUG: treeinspos EXIT: returning i={i}, iterations={iteration_count}" - ) + logger.debug(f"DEBUG: treeinspos EXIT: returning i={i}") return i @@ -336,6 +319,10 @@ def format_update(old, new): update = [] keep_count = 0 iteration_count = 0 + # Precompute index maps to allow deterministic tie-breaking when both + # candidates exist in both sequences but are not equal (rename/move cases) + old_pos = {e: i for i, e in enumerate(old)} + new_pos = {e: i for i, e in enumerate(new)} while oidx < len(old) and nidx < len(new): iteration_count += 1 @@ -411,18 +398,38 @@ def format_update(old, new): update.append(UpdIns(insert_items)) if not modified: - logger.error( - f"ERROR: format_update INFINITE_LOOP: nidx={nidx}, oidx={oidx}, old_len={len(old)}, new_len={len(new)}" - ) - logger.error( - f"ERROR: old[oidx]={old[oidx].name if oidx < len(old) else 'OUT_OF_BOUNDS'}" - ) - logger.error( - f"ERROR: new[nidx]={new[nidx].name if nidx < len(new) else 'OUT_OF_BOUNDS'}" - ) - raise Exception( - f"Infinite loop in diff {nidx=} {oidx=} {len(old)=} {len(new)=}" - ) + # Tie-break: both items exist in both lists but don't match here. + # Decide whether to delete old[oidx] first or insert new[nidx] first + # based on which alignment is closer. + if oidx >= len(old) or nidx >= len(new): + break + cur_old = old[oidx] + cur_new = new[nidx] + + pos_old_in_new = new_pos.get(cur_old) + pos_new_in_old = old_pos.get(cur_new) + + # Default distances if not present (shouldn't happen if in remain sets) + dist_del = (pos_old_in_new - nidx) if pos_old_in_new is not None else 1 + dist_ins = (pos_new_in_old - oidx) if pos_new_in_old is not None else 1 + + # Prefer the operation with smaller forward distance; tie => delete + if dist_del <= dist_ins: + # Delete current old item + oremain.discard(cur_old) + update.append(UpdDel(1)) + oidx += 1 + logger.debug( + f"DEBUG: format_update TIEBREAK_DEL: oidx->{oidx}, cur_old={cur_old.name}" + ) + else: + # Insert current new item + nremain.discard(cur_new) + update.append(UpdIns([cur_new])) + nidx += 1 + logger.debug( + f"DEBUG: format_update TIEBREAK_INS: nidx->{nidx}, cur_new={cur_new.name}" + ) # Diff any remaining if keep_count > 0: @@ -547,18 +554,43 @@ def watcher_inotify(loop): ) t0 = time.perf_counter() logger.debug("DEBUG: inotify CALLING format_update") - update = format_update(state.root, rootmod) - logger.debug("DEBUG: inotify format_update COMPLETED") - t1 = time.perf_counter() - with state.lock: - logger.debug("DEBUG: inotify BROADCASTING update") - broadcast(update, loop) - state.root = rootmod - logger.debug("DEBUG: inotify BROADCAST completed, state updated") - t2 = time.perf_counter() - logger.debug( - f"Format update took {t1 - t0:.1f}s, broadcast {t2 - t1:.1f}s" - ) + try: + update = format_update(state.root, rootmod) + logger.debug("DEBUG: inotify format_update COMPLETED") + t1 = time.perf_counter() + with state.lock: + logger.debug("DEBUG: inotify BROADCASTING update") + broadcast(update, loop) + state.root = rootmod + logger.debug("DEBUG: inotify BROADCAST completed, state updated") + t2 = time.perf_counter() + logger.debug( + f"Format update took {t1 - t0:.1f}s, broadcast {t2 - t1:.1f}s" + ) + except Exception: + logger.exception( + "format_update failed; falling back to full rescan" + ) + # Fallback: full rescan and try diff again; last resort send full root + try: + fresh = walk(PurePosixPath()) + try: + update = format_update(state.root, fresh) + with state.lock: + broadcast(update, loop) + state.root = fresh + logger.debug("Fallback diff succeeded after full rescan") + except Exception: + logger.exception( + "Fallback diff failed; sending full root snapshot" + ) + with state.lock: + broadcast(format_root(fresh), loop) + state.root = fresh + except Exception: + logger.exception( + "Full rescan failed; dropping this batch of updates" + ) del i # Free the inotify object -- 2.49.0 From 10f7ff29cdc6da539f2374c8875eee21da33cc62 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Wed, 13 Aug 2025 12:23:13 -0700 Subject: [PATCH 11/43] Cleanup --- cista/watching.py | 200 ++-------------------------------------------- 1 file changed, 5 insertions(+), 195 deletions(-) diff --git a/cista/watching.py b/cista/watching.py index 0825f90..b36e165 100644 --- a/cista/watching.py +++ b/cista/watching.py @@ -1,5 +1,4 @@ import asyncio -import os import shutil import sys import threading @@ -8,7 +7,6 @@ from contextlib import suppress from os import stat_result from pathlib import Path, PurePosixPath from stat import S_ISDIR, S_ISREG -import signal import msgspec from natsort import humansorted, natsort_keygen, ns @@ -48,82 +46,47 @@ def treeiter(rootmod): def treeget(rootmod: list[FileEntry], path: PurePosixPath): - logger.debug(f"DEBUG: treeget ENTRY: path={path}, rootmod_len={len(rootmod)}") begin = None ret = [] - iteration_count = 0 for i, relpath, entry in treeiter(rootmod): - iteration_count += 1 - if ( - iteration_count % 1000 == 0 - ): # Log every 1000 iterations to detect infinite loops - logger.debug( - f"DEBUG: treeget iteration {iteration_count}, i={i}, relpath={relpath}, entry.name={entry.name}" - ) - if begin is None: if relpath == path: - logger.debug(f"DEBUG: treeget FOUND path {path} at index {i}") begin = i ret.append(entry) continue if entry.level <= len(path.parts): - logger.debug( - f"DEBUG: treeget BREAK: entry.level={entry.level} <= path.parts_len={len(path.parts)}" - ) break ret.append(entry) - logger.debug( - f"DEBUG: treeget EXIT: path={path}, begin={begin}, ret_len={len(ret)}, iterations={iteration_count}" - ) return begin, ret def treeinspos(rootmod: list[FileEntry], relpath: PurePosixPath, relfile: int): # Find the first entry greater than the new one # precondition: the new entry doesn't exist - logger.debug( - f"DEBUG: treeinspos ENTRY: relpath={relpath}, relfile={relfile}, rootmod_len={len(rootmod)}" - ) - isfile = 0 level = 0 i = 0 for i, rel, entry in treeiter(rootmod): - if entry.level > level: # We haven't found item at level, skip subdirectories - logger.debug( - f"DEBUG: treeinspos SKIP: entry.level={entry.level} > level={level}" - ) continue if entry.level < level: # We have passed the level, so the new item is the first - logger.debug( - f"DEBUG: treeinspos RETURN_EARLY: entry.level={entry.level} < level={level}, returning i={i}" - ) return i if level == 0: # root - logger.debug("DEBUG: treeinspos ROOT: incrementing level from 0 to 1") level += 1 continue ename = rel.parts[level - 1] name = relpath.parts[level - 1] - logger.debug( - f"DEBUG: treeinspos COMPARE: ename='{ename}', name='{name}', level={level}" - ) esort = sortkey(ename) nsort = sortkey(name) # Non-leaf are always folders, only use relfile at leaf isfile = relfile if len(relpath.parts) == level else 0 - logger.debug( - f"DEBUG: treeinspos SORT: esort={esort}, nsort={nsort}, isfile={isfile}, entry.isfile={entry.isfile}" - ) # First compare by isfile, then by sorting order and if that too matches then case sensitive cmp = ( @@ -131,27 +94,21 @@ def treeinspos(rootmod: list[FileEntry], relpath: PurePosixPath, relfile: int): or (esort > nsort) - (esort < nsort) or (ename > name) - (ename < name) ) - logger.debug(f"DEBUG: treeinspos CMP: cmp={cmp}") if cmp > 0: - logger.debug(f"DEBUG: treeinspos RETURN: cmp > 0, returning i={i}") return i if cmp < 0: - logger.debug("DEBUG: treeinspos CONTINUE: cmp < 0") continue - logger.debug(f"DEBUG: treeinspos INCREMENT_LEVEL: level {level} -> {level + 1}") level += 1 if level > len(relpath.parts): logger.error( - f"ERROR: insertpos level overflow: relpath={relpath}, i={i}, entry.name={entry.name}, entry.level={entry.level}, level={level}" + f"insertpos level overflow: relpath={relpath}, i={i}, entry.name={entry.name}, entry.level={entry.level}, level={level}" ) break else: - logger.debug(f"DEBUG: treeinspos FOR_ELSE: incrementing i from {i} to {i + 1}") i += 1 - logger.debug(f"DEBUG: treeinspos EXIT: returning i={i}") return i @@ -159,9 +116,6 @@ state = State() rootpath: Path = None # type: ignore quit = threading.Event() -# Keep a reference so the file stays open for faulthandler outputs -_faulthandler_file = None # type: ignore - ## Filesystem scanning @@ -233,65 +187,18 @@ def update_root(loop): def update_path(rootmod: list[FileEntry], relpath: PurePosixPath, loop): """Called on FS updates, check the filesystem and broadcast any changes.""" - logger.debug( - f"DEBUG: update_path ENTRY: path={relpath}, rootmod_len={len(rootmod)}" - ) - - # Add timing for walk operation - walk_start = time.perf_counter() new = walk(relpath) - walk_end = time.perf_counter() - logger.debug( - f"DEBUG: walk({relpath}) took {walk_end - walk_start:.4f}s, returned {len(new)} entries" - ) - - # Add timing for treeget operation - treeget_start = time.perf_counter() obegin, old = treeget(rootmod, relpath) - treeget_end = time.perf_counter() - logger.debug( - f"DEBUG: treeget({relpath}) took {treeget_end - treeget_start:.4f}s, obegin={obegin}, old_len={len(old) if old else 0}" - ) if old == new: - logger.debug( - f"Watch: Event without changes needed {relpath}" - if old - else f"Watch: Event with old and new missing: {relpath}" - ) - logger.debug(f"DEBUG: update_path EARLY_EXIT: no changes for {relpath}") return - # Debug the deletion operation if obegin is not None: - logger.debug( - f"DEBUG: DELETING entries from rootmod[{obegin}:{obegin + len(old)}] for path {relpath}" - ) del rootmod[obegin : obegin + len(old)] - logger.debug(f"DEBUG: DELETED entries, rootmod_len now {len(rootmod)}") if new: - logger.debug(f"Watch: Update {relpath}" if old else f"Watch: Created {relpath}") - - # Add timing for treeinspos operation - this is where hangs might occur - inspos_start = time.perf_counter() i = treeinspos(rootmod, relpath, new[0].isfile) - inspos_end = time.perf_counter() - logger.debug( - f"DEBUG: treeinspos({relpath}) took {inspos_end - inspos_start:.4f}s, returned index={i}" - ) - - logger.debug( - f"DEBUG: INSERTING {len(new)} entries at position {i} for path {relpath}" - ) rootmod[i:i] = new - logger.debug(f"DEBUG: INSERTED entries, rootmod_len now {len(rootmod)}") - else: - logger.debug(f"Watch: Removed {relpath}") - - logger.debug( - f"DEBUG: update_path EXIT: path={relpath}, final_rootmod_len={len(rootmod)}" - ) def update_space(loop): @@ -311,8 +218,6 @@ def update_space(loop): def format_update(old, new): - logger.debug(f"DEBUG: format_update ENTRY: old_len={len(old)}, new_len={len(new)}") - # Make keep/del/insert diff until one of the lists ends oidx, nidx = 0, 0 oremain, nremain = set(old), set(new) @@ -327,16 +232,10 @@ def format_update(old, new): while oidx < len(old) and nidx < len(new): iteration_count += 1 - # Log every 1000 iterations to detect infinite loops - if iteration_count % 1000 == 0: - logger.debug( - f"DEBUG: format_update iteration {iteration_count}, oidx={oidx}/{len(old)}, nidx={nidx}/{len(new)}" - ) - # Emergency brake for potential infinite loops if iteration_count > 50000: logger.error( - f"ERROR: format_update potential infinite loop! iteration={iteration_count}, oidx={oidx}, nidx={nidx}" + f"format_update potential infinite loop! iteration={iteration_count}, oidx={oidx}, nidx={nidx}" ) raise Exception( f"format_update infinite loop detected at iteration {iteration_count}" @@ -346,54 +245,36 @@ def format_update(old, new): # Matching entries are kept if old[oidx] == new[nidx]: entry = old[oidx] - logger.debug( - f"DEBUG: format_update MATCH: entry={entry.name}, oidx={oidx}, nidx={nidx}" - ) - oremain.remove(entry) - nremain.remove(entry) + oremain.discard(entry) + nremain.discard(entry) keep_count += 1 oidx += 1 nidx += 1 continue if keep_count > 0: - logger.debug(f"DEBUG: format_update KEEP: adding UpdKeep({keep_count})") modified = True update.append(UpdKeep(keep_count)) keep_count = 0 # Items only in old are deleted del_count = 0 - del_start_oidx = oidx while oidx < len(old) and old[oidx] not in nremain: - logger.debug( - f"DEBUG: format_update DELETE: removing old[{oidx}]={old[oidx].name}" - ) oremain.remove(old[oidx]) del_count += 1 oidx += 1 if del_count: - logger.debug( - f"DEBUG: format_update DEL: adding UpdDel({del_count}), oidx {del_start_oidx}->{oidx}" - ) update.append(UpdDel(del_count)) continue # Items only in new are inserted insert_items = [] - ins_start_nidx = nidx while nidx < len(new) and new[nidx] not in oremain: entry = new[nidx] - logger.debug( - f"DEBUG: format_update INSERT: adding new[{nidx}]={entry.name}" - ) nremain.remove(entry) insert_items.append(entry) nidx += 1 if insert_items: - logger.debug( - f"DEBUG: format_update INS: adding UpdIns({len(insert_items)} items), nidx {ins_start_nidx}->{nidx}" - ) modified = True update.append(UpdIns(insert_items)) @@ -419,36 +300,20 @@ def format_update(old, new): oremain.discard(cur_old) update.append(UpdDel(1)) oidx += 1 - logger.debug( - f"DEBUG: format_update TIEBREAK_DEL: oidx->{oidx}, cur_old={cur_old.name}" - ) else: # Insert current new item nremain.discard(cur_new) update.append(UpdIns([cur_new])) nidx += 1 - logger.debug( - f"DEBUG: format_update TIEBREAK_INS: nidx->{nidx}, cur_new={cur_new.name}" - ) # Diff any remaining if keep_count > 0: - logger.debug(f"DEBUG: format_update FINAL_KEEP: adding UpdKeep({keep_count})") update.append(UpdKeep(keep_count)) if oremain: - logger.debug( - f"DEBUG: format_update FINAL_DEL: adding UpdDel({len(oremain)}) for remaining old items" - ) update.append(UpdDel(len(oremain))) elif nremain: - logger.debug( - f"DEBUG: format_update FINAL_INS: adding UpdIns({len(new[nidx:])}) for remaining new items" - ) update.append(UpdIns(new[nidx:])) - logger.debug( - f"DEBUG: format_update EXIT: generated {len(update)} operations, iterations={iteration_count}" - ) return msgspec.json.encode({"update": update}).decode() @@ -492,10 +357,7 @@ def watcher_inotify(loop): while not quit.is_set(): i = inotify.adapters.InotifyTree(rootpath.as_posix()) # Initialize the tree from filesystem - t0 = time.perf_counter() update_root(loop) - t1 = time.perf_counter() - logger.debug(f"Root update took {t1 - t0:.1f}s") trefresh = time.monotonic() + 300.0 tspace = time.monotonic() + 5.0 # Watch for changes (frequent wakeups needed for quiting) @@ -516,57 +378,29 @@ def watcher_inotify(loop): if quit.is_set(): return interesting = any(f in modified_flags for f in event[1]) - if event[2] == rootpath.as_posix() and event[3] == "zzz": - logger.debug(f"Watch: {interesting=} {event=}") if interesting: # Update modified path - logger.debug( - f"DEBUG: inotify PROCESSING: event={event}, path={event[2]}/{event[3]}" - ) - t0 = time.perf_counter() path = PurePosixPath(event[2]) / event[3] try: rel_path = path.relative_to(rootpath) - logger.debug( - f"DEBUG: inotify CALLING update_path: rel_path={rel_path}" - ) update_path(rootmod, rel_path, loop) - logger.debug( - f"DEBUG: inotify update_path COMPLETED: rel_path={rel_path}" - ) except Exception as e: logger.error( - f"ERROR: inotify update_path FAILED: path={path}, error={e}" + f"Error processing inotify event for path {path}: {e}" ) raise - t1 = time.perf_counter() - logger.debug(f"Watch: Update {event[3]} took {t1 - t0:.1f}s") if not dirty: t = time.monotonic() dirty = True # Wait a maximum of 0.5s to push the updates if dirty and time.monotonic() >= t + 0.5: - logger.debug("DEBUG: inotify TIMEOUT: breaking due to 0.5s timeout") break if dirty and state.root != rootmod: - logger.debug( - f"DEBUG: inotify BATCH_UPDATE: state.root_len={len(state.root)}, rootmod_len={len(rootmod)}" - ) - t0 = time.perf_counter() - logger.debug("DEBUG: inotify CALLING format_update") try: update = format_update(state.root, rootmod) - logger.debug("DEBUG: inotify format_update COMPLETED") - t1 = time.perf_counter() with state.lock: - logger.debug("DEBUG: inotify BROADCASTING update") broadcast(update, loop) state.root = rootmod - logger.debug("DEBUG: inotify BROADCAST completed, state updated") - t2 = time.perf_counter() - logger.debug( - f"Format update took {t1 - t0:.1f}s, broadcast {t2 - t1:.1f}s" - ) except Exception: logger.exception( "format_update failed; falling back to full rescan" @@ -579,7 +413,6 @@ def watcher_inotify(loop): with state.lock: broadcast(update, loop) state.root = fresh - logger.debug("Fallback diff succeeded after full rescan") except Exception: logger.exception( "Fallback diff failed; sending full root snapshot" @@ -611,29 +444,6 @@ async def start(app, loop): global rootpath config.load_config() rootpath = config.config.path - # Optional: enable SIGUSR1 stack dumps in production for debugging hangs - # Control with env CISTA_STACK_DUMP (default: enabled). Sends all thread - # stacks to a per-process log in /tmp when receiving SIGUSR1. - if os.environ.get("CISTA_STACK_DUMP", "1") == "1": - try: - import faulthandler - - global _faulthandler_file - if _faulthandler_file is None: - log_path = f"/tmp/cista-stacks-{os.getpid()}.log" - # Line-buffered text file so writes appear promptly - _faulthandler_file = open(log_path, "a", buffering=1) - faulthandler.enable(file=_faulthandler_file) - faulthandler.register( - signal.SIGUSR1, file=_faulthandler_file, all_threads=True, chain=True - ) - logger.info( - "Stack dump enabled: send SIGUSR1 to PID %s to write all thread stacks to %s", - os.getpid(), - log_path, - ) - except Exception: - logger.exception("Failed to enable SIGUSR1 stack dump handler") use_inotify = sys.platform == "linux" app.ctx.watcher = threading.Thread( target=watcher_inotify if use_inotify else watcher_poll, -- 2.49.0 From 0e7da1f98db89944ca7968d96bdee33944e3a439 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Wed, 13 Aug 2025 12:34:00 -0700 Subject: [PATCH 12/43] Avoid errors with nremain/oremain processing in case the entry is not found. --- cista/watching.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cista/watching.py b/cista/watching.py index b36e165..e9eef85 100644 --- a/cista/watching.py +++ b/cista/watching.py @@ -271,7 +271,7 @@ def format_update(old, new): insert_items = [] while nidx < len(new) and new[nidx] not in oremain: entry = new[nidx] - nremain.remove(entry) + nremain.discard(entry) insert_items.append(entry) nidx += 1 if insert_items: @@ -392,8 +392,8 @@ def watcher_inotify(loop): if not dirty: t = time.monotonic() dirty = True - # Wait a maximum of 0.5s to push the updates - if dirty and time.monotonic() >= t + 0.5: + # Wait a maximum of 0.2s to push the updates + if dirty and time.monotonic() >= t + 0.2: break if dirty and state.root != rootmod: try: -- 2.49.0 From 4fe52c764de611dc292f0ee76f2c5af1ad051b91 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 10:40:32 -0700 Subject: [PATCH 13/43] Updated README for new version. --- README.md | 55 +++++++++++++++++++++++++++------------------- frontend/README.md | 12 +++++----- 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 56e7f33..016facd 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ Cista takes its name from the ancient *cistae*, metal containers used by Greeks This is a cutting-edge **file and document server** designed for speed, efficiency, and unparalleled ease of use. Experience **lightning-fast browsing**, thanks to the file list maintained directly in your browser and updated from server filesystem events, coupled with our highly optimized code. Fully **keyboard-navigable** and with a responsive layout, Cista flawlessly adapts to your devices, providing a seamless experience wherever you are. Our powerful **instant search** means you're always just a few keystrokes away from finding exactly what you need. Press **1/2/3** to switch ordering, navigate with all four arrow keys (+Shift to select). Or click your way around on **breadcrumbs that remember where you were**. +**Built-in document and media previews** let you quickly view files without downloading them. Document previews support various formats, while media previews include advanced **HDR support** for high dynamic range content, ensuring your videos and images are displayed with stunning visual quality. + The Cista project started as an inevitable remake of [Droppy](https://github.com/droppyjs/droppy) which we used and loved despite its numerous bugs. Cista Storage stands out in handling even the most exotic filenames, ensuring a smooth experience where others falter. All of this is wrapped in an intuitive interface with automatic light and dark themes, making Cista Storage the ideal choice for anyone seeking a reliable, versatile, and quick file storage solution. Quickly setup your own Cista where your files are just a click away, safe, and always accessible. @@ -16,36 +18,34 @@ Experience Cista by visiting [Cista Demo](https://drop.zi.fi) for a test run and ## Getting Started ### Installation -To install the cista application, use: +We recommend using [UV](https://docs.astral.sh/uv/getting-started/installation/) to install the package and run it: ```fish -pip install cista +uvx cista ``` -Note: Some Linux distributions might need `--break-system-packages` to install Python packages, which are safely installed in the user's home folder. As an alternative to avoid installation, run it with command `pipx run cista` +Alternatively, you can install with `pip` or `uv pip`. This enables using the `cista` command directly without `uvx` or `uv run`. + +```fish +pip install cista --break-system-packages +``` ### Running the Server Create an account: (or run a public server without authentication) ```fish -cista --user yourname --privileged +uvx cista --user yourname --privileged ``` Serve your files at http://localhost:8000: ```fish -cista -l :8000 /path/to/files +uvx cista -l :8000 /path/to/files ``` The server remembers its settings in the config folder (default `~/.local/share/cista/`), including the listen port and directory, for future runs without arguments. ### Internet Access -To use your own TLS certificates, place them in the config folder and run: - -```fish -cista -l cista.example.com -``` - Most admins instead find the [Caddy](https://caddyserver.com/) web server convenient for its auto TLS certificates and all. A proxy also allows running multiple web services or Cista instances on the same IP address but different (sub)domains. `/etc/caddy/Caddyfile`: @@ -56,33 +56,44 @@ cista.example.com { } ``` +Nxing or other proxy may be similarly used, or alternatively you can run `cista -l cista.example.com` with cert and key placed in the cista config dir. + ## Development setup For rapid development, we use the Vite development server for the Vue frontend, while running the backend on port 8000 that Vite proxies backend requests to. Each server live reloads whenever its code or configuration are modified. +Make sure you have git, uv and bun (or npm) installed. + +Backend (Python) – setup and run: + +```fish +git clone https://git.zi.fi/Vasanko/cista-storage.git +cd cista-storage +uv sync --dev +uv run cista --dev -l :8000 /path/to/files +``` + +Frontend (Vue/Vite) – run the dev server in another terminal: + ```fish cd frontend -npm install -npm run dev +bun install +bun run dev ``` -Concurrently, start the backend on another terminal: +Building the package for release (frontend + Python wheel/sdist): ```fish -hatch shell -pip install -e '.[dev]' -cista --dev -l :8000 /path/to/files +uv build ``` -We use `hatch shell` for installing on a virtual environment, to avoid disturbing the rest of the system with our hacking. - -Vue is used to build files in `cista/wwwroot`, included prebuilt in the Python package. Running `hatch build` builds the frontend and creates a NodeJS-independent Python package. +Vue is used to build files in `cista/wwwroot`, included prebuilt in the Python package. `uv build` runs the project build hooks to bundle the frontend and produce a NodeJS-independent Python package. ## System Deployment This setup allows easy addition of storages, each with its own domain, configuration, and files. -Assuming a restricted user account `storage` for serving files and that cista is installed system-wide or on this account (check with `sudo -u storage -s`). Alternatively, use `pipx run cista` or `hatch run cista` as the ExecStart command. +Assuming a restricted user account `storage` for serving files and that UV is installed system-wide or on this account. Only UV is required: this does not use git or bun/npm. Create `/etc/systemd/system/cista@.service`: @@ -92,7 +103,7 @@ Description=Cista storage %i [Service] User=storage -ExecStart=cista -c /srv/cista/%i -l /srv/cista/%i/socket /media/storage/%i +ExecStart=uvx cista -c /srv/cista/%i -l /srv/cista/%i/socket /media/storage/%i Restart=always [Install] diff --git a/frontend/README.md b/frontend/README.md index 996a7ac..c76116e 100644 --- a/frontend/README.md +++ b/frontend/README.md @@ -22,25 +22,25 @@ If the standalone TypeScript plugin doesn't feel fast enough to you, Volar has a ### Run the backend ```fish -hatch shell -cista --dev -l :8000 +uv sync --dev +uv run cista --dev -l :8000 ``` ### And the Vite server (in another terminal) ```fish cd frontend -npm install -npm run dev +bun install +bun run dev ``` Browse to Vite, which will proxy API requests to port 8000. Both servers live reload changes. ### Type-Check, Compile and Minify for Production -This is also called by `hatch build` during Python packaging: +This is also called by `uv build` during Python packaging: ```fish -npm run build +bun run build ``` -- 2.49.0 From 54b5c80e315c2b3db0a5dc89ec93bed57b5a6432 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 10:41:21 -0700 Subject: [PATCH 14/43] Use bun everywhere. --- frontend/vite.config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index 834d0fd..c022e45 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -8,7 +8,7 @@ import svgLoader from 'vite-svg-loader' import Components from 'unplugin-vue-components/vite' // Development mode: -// npm run dev # Run frontend that proxies to dev_backend +// bun run dev # Run frontend that proxies to dev_backend // cista -l :8000 --dev # Run backend const dev_backend = { target: "http://localhost:8000", -- 2.49.0 From 562398f82603fb5e2c1286cc8eef0908c4eecfdd Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 10:52:39 -0700 Subject: [PATCH 15/43] README cleanup --- README.md | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 016facd..6f8815d 100644 --- a/README.md +++ b/README.md @@ -16,22 +16,10 @@ Experience Cista by visiting [Cista Demo](https://drop.zi.fi) for a test run and ## Getting Started -### Installation - -We recommend using [UV](https://docs.astral.sh/uv/getting-started/installation/) to install the package and run it: - -```fish -uvx cista -``` - -Alternatively, you can install with `pip` or `uv pip`. This enables using the `cista` command directly without `uvx` or `uv run`. - -```fish -pip install cista --break-system-packages -``` - ### Running the Server +We recommend using [UV](https://docs.astral.sh/uv/getting-started/installation/) to install the package and run it easily: + Create an account: (or run a public server without authentication) ```fish uvx cista --user yourname --privileged @@ -42,11 +30,17 @@ Serve your files at http://localhost:8000: uvx cista -l :8000 /path/to/files ``` +Alternatively, you can install with `pip` or `uv pip`. This enables using the `cista` command directly without `uvx` or `uv run`. + +```fish +pip install cista --break-system-packages +``` + The server remembers its settings in the config folder (default `~/.local/share/cista/`), including the listen port and directory, for future runs without arguments. ### Internet Access -Most admins instead find the [Caddy](https://caddyserver.com/) web server convenient for its auto TLS certificates and all. A proxy also allows running multiple web services or Cista instances on the same IP address but different (sub)domains. +Most admins find the [Caddy](https://caddyserver.com/) web server convenient for its auto TLS certificates and all. A proxy also allows running multiple web services or Cista instances on the same IP address but different (sub)domains. `/etc/caddy/Caddyfile`: -- 2.49.0 From bef9852a6f372e2f9b4eed995002edc54d615fa8 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 10:58:05 -0700 Subject: [PATCH 16/43] Fine tuning README --- README.md | 64 +++++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 6f8815d..fa87336 100644 --- a/README.md +++ b/README.md @@ -50,38 +50,7 @@ cista.example.com { } ``` -Nxing or other proxy may be similarly used, or alternatively you can run `cista -l cista.example.com` with cert and key placed in the cista config dir. - -## Development setup - -For rapid development, we use the Vite development server for the Vue frontend, while running the backend on port 8000 that Vite proxies backend requests to. Each server live reloads whenever its code or configuration are modified. - -Make sure you have git, uv and bun (or npm) installed. - -Backend (Python) – setup and run: - -```fish -git clone https://git.zi.fi/Vasanko/cista-storage.git -cd cista-storage -uv sync --dev -uv run cista --dev -l :8000 /path/to/files -``` - -Frontend (Vue/Vite) – run the dev server in another terminal: - -```fish -cd frontend -bun install -bun run dev -``` - -Building the package for release (frontend + Python wheel/sdist): - -```fish -uv build -``` - -Vue is used to build files in `cista/wwwroot`, included prebuilt in the Python package. `uv build` runs the project build hooks to bundle the frontend and produce a NodeJS-independent Python package. +Nxing or other proxy may be similarly used, or alternatively you can place cert and key in cista config dir and run `cista -l cista.example.com` ## System Deployment @@ -121,3 +90,34 @@ foo.example.com, bar.example.com { reverse_proxy unix//srv/cista/{host}/socket } ``` + +## Development setup + +For rapid development, we use the Vite development server for the Vue frontend, while running the backend on port 8000 that Vite proxies backend requests to. Each server live reloads whenever its code or configuration are modified. + +Make sure you have git, uv and bun (or npm) installed. + +Backend (Python) – setup and run: + +```fish +git clone https://git.zi.fi/Vasanko/cista-storage.git +cd cista-storage +uv sync --dev +uv run cista --dev -l :8000 /path/to/files +``` + +Frontend (Vue/Vite) – run the dev server in another terminal: + +```fish +cd frontend +bun install +bun run dev +``` + +Building the package for release (frontend + Python wheel/sdist): + +```fish +uv build +``` + +Vue is used to build files in `cista/wwwroot`, included prebuilt in the Python package. `uv build` runs the project build hooks to bundle the frontend and produce a NodeJS-independent Python package. -- 2.49.0 From 9d829e6557c9e5d36d8ab37e4b15b2eeb807650e Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 11:05:57 -0700 Subject: [PATCH 17/43] ... --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fa87336..d18609d 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Cista takes its name from the ancient *cistae*, metal containers used by Greeks This is a cutting-edge **file and document server** designed for speed, efficiency, and unparalleled ease of use. Experience **lightning-fast browsing**, thanks to the file list maintained directly in your browser and updated from server filesystem events, coupled with our highly optimized code. Fully **keyboard-navigable** and with a responsive layout, Cista flawlessly adapts to your devices, providing a seamless experience wherever you are. Our powerful **instant search** means you're always just a few keystrokes away from finding exactly what you need. Press **1/2/3** to switch ordering, navigate with all four arrow keys (+Shift to select). Or click your way around on **breadcrumbs that remember where you were**. -**Built-in document and media previews** let you quickly view files without downloading them. Document previews support various formats, while media previews include advanced **HDR support** for high dynamic range content, ensuring your videos and images are displayed with stunning visual quality. +**Built-in document and media previews** let you quickly view files without downloading them. Cista shows PDF thumbnails, video and image thumbnails, with **HDR support** for HEIC and AVIF. It also has a player for music and video files. Enable Gallery mode to see previews. The Cista project started as an inevitable remake of [Droppy](https://github.com/droppyjs/droppy) which we used and loved despite its numerous bugs. Cista Storage stands out in handling even the most exotic filenames, ensuring a smooth experience where others falter. -- 2.49.0 From 6d2989472e95811e358dbd3b938e6a90965fd44d Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 11:07:30 -0700 Subject: [PATCH 18/43] ... --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d18609d..81e8bca 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Cista takes its name from the ancient *cistae*, metal containers used by Greeks This is a cutting-edge **file and document server** designed for speed, efficiency, and unparalleled ease of use. Experience **lightning-fast browsing**, thanks to the file list maintained directly in your browser and updated from server filesystem events, coupled with our highly optimized code. Fully **keyboard-navigable** and with a responsive layout, Cista flawlessly adapts to your devices, providing a seamless experience wherever you are. Our powerful **instant search** means you're always just a few keystrokes away from finding exactly what you need. Press **1/2/3** to switch ordering, navigate with all four arrow keys (+Shift to select). Or click your way around on **breadcrumbs that remember where you were**. -**Built-in document and media previews** let you quickly view files without downloading them. Cista shows PDF thumbnails, video and image thumbnails, with **HDR support** for HEIC and AVIF. It also has a player for music and video files. Enable Gallery mode to see previews. +**Built-in document and media previews** let you quickly view files without downloading them. Cista shows PDF, video and image thumbnails, with **HDR support** for HEIC and AVIF images. It also has a player for music and video files. Enable Gallery mode to see previews. The Cista project started as an inevitable remake of [Droppy](https://github.com/droppyjs/droppy) which we used and loved despite its numerous bugs. Cista Storage stands out in handling even the most exotic filenames, ensuring a smooth experience where others falter. -- 2.49.0 From e557bedac1070721e6e8ebd5e10d3653758e121c Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 11:20:33 -0700 Subject: [PATCH 19/43] Auto select of gallery based on previewable files whenever folder changes, added more file extensions to autodetection. --- frontend/src/repositories/Document.ts | 5 ++++- frontend/src/views/ExplorerView.vue | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/frontend/src/repositories/Document.ts b/frontend/src/repositories/Document.ts index 2b311d8..5c8567a 100644 --- a/frontend/src/repositories/Document.ts +++ b/frontend/src/repositories/Document.ts @@ -44,7 +44,10 @@ export class Doc { if (this.img) return true const ext = this.name.split('.').pop()?.toLowerCase() // Not a comprehensive list, but good enough for now - return ['mp4', 'mkv', 'webm', 'ogg', 'mp3', 'flac', 'aac', 'pdf'].includes(ext || '') + return [ + 'mp4', 'mkv', 'webm', 'ogg', 'mp3', 'flac', 'aac', 'pdf', + 'avif', 'heic', 'heif', 'jpg', 'jpeg', 'png' + ].includes(ext || '') } get previewurl(): string { return this.url.replace(/^\/files/, '/preview') diff --git a/frontend/src/views/ExplorerView.vue b/frontend/src/views/ExplorerView.vue index 6c78bd2..ef93635 100644 --- a/frontend/src/views/ExplorerView.vue +++ b/frontend/src/views/ExplorerView.vue @@ -21,7 +21,7 @@ -- 2.49.0 From e84c665e7cf1f6920f89ed3eff9131e9c7507539 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 11:44:43 -0700 Subject: [PATCH 23/43] Fix handling of filename extensions when there is no extension and incorrectly trying to display preview images for folders. --- frontend/src/repositories/Document.ts | 15 +++++++++------ frontend/src/utils/index.ts | 14 +++++++------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/frontend/src/repositories/Document.ts b/frontend/src/repositories/Document.ts index b100449..7796bdd 100644 --- a/frontend/src/repositories/Document.ts +++ b/frontend/src/repositories/Document.ts @@ -37,21 +37,24 @@ export class Doc { return this.url.replace(/^\/#/, '') } get img(): boolean { - const ext = this.name.split('.').pop()?.toLowerCase() || '' - return ['jpg', 'jpeg', 'png', 'gif', 'webp', 'avif', 'heic', 'heif', 'svg'].includes(ext) + // Folders cannot be images + if (this.dir) return false + return ['jpg', 'jpeg', 'png', 'gif', 'webp', 'avif', 'heic', 'heif', 'svg'].includes(this.ext) } get previewable(): boolean { + // Folders cannot be previewable + if (this.dir) return false if (this.img) return true - const ext = this.name.split('.').pop()?.toLowerCase() || '' // Not a comprehensive list, but good enough for now - return ['mp4', 'mkv', 'webm', 'ogg', 'mp3', 'flac', 'aac', 'pdf'].includes(ext) + return ['mp4', 'mkv', 'webm', 'ogg', 'mp3', 'flac', 'aac', 'pdf'].includes(this.ext) } get previewurl(): string { return this.url.replace(/^\/files/, '/preview') } get ext(): string { - const ext = this.name.split('.').pop() - return ext ? ext.toLowerCase() : '' + const dotIndex = this.name.lastIndexOf('.') + if (dotIndex === -1 || dotIndex === this.name.length - 1) return '' + return this.name.slice(dotIndex + 1).toLowerCase() } } export type errorEvent = { diff --git a/frontend/src/utils/index.ts b/frontend/src/utils/index.ts index 870ffca..4df8b8f 100644 --- a/frontend/src/utils/index.ts +++ b/frontend/src/utils/index.ts @@ -50,12 +50,11 @@ export function formatUnixDate(t: number) { } export function getFileExtension(filename: string) { - const parts = filename.split('.') - if (parts.length > 1) { - return parts[parts.length - 1] - } else { - return '' // No hay extensión + const dotIndex = filename.lastIndexOf('.') + if (dotIndex === -1 || dotIndex === filename.length - 1) { + return '' // No extension } + return filename.slice(dotIndex + 1) } interface FileTypes { [key: string]: string[] @@ -68,8 +67,9 @@ const filetypes: FileTypes = { } export function getFileType(name: string): string { - const ext = name.split('.').pop()?.toLowerCase() - if (!ext || ext.length === name.length) return 'unknown' + const dotIndex = name.lastIndexOf('.') + if (dotIndex === -1 || dotIndex === name.length - 1) return 'unknown' + const ext = name.slice(dotIndex + 1).toLowerCase() return Object.keys(filetypes).find(type => filetypes[type].includes(ext)) || 'unknown' } -- 2.49.0 From b1763a610e868dc0317a6d4a3fa04648183f6ef1 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 11:48:18 -0700 Subject: [PATCH 24/43] README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 81e8bca..14b61bc 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,9 @@ Experience Cista by visiting [Cista Demo](https://drop.zi.fi) for a test run and ## Getting Started ### Running the Server -We recommend using [UV](https://docs.astral.sh/uv/getting-started/installation/) to install the package and run it easily: +We recommend using [UV](https://docs.astral.sh/uv/getting-started/installation/) to directly run Cista: -Create an account: (or run a public server without authentication) +Create an account: (otherwise the server is public for all) ```fish uvx cista --user yourname --privileged ``` -- 2.49.0 From 2bce21a5abe566127b1373773ed7d88d6af84d57 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 15:26:44 -0700 Subject: [PATCH 25/43] Use AVIF for all previews (now has good browser support). Support HDR for both images and video previews. --- cista/preview.py | 83 ++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/cista/preview.py b/cista/preview.py index ad007b1..bc65f2a 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -6,7 +6,6 @@ import urllib.parse from pathlib import PurePosixPath from urllib.parse import unquote from wsgiref.handlers import format_date_time - import av import av.datasets import fitz # PyMuPDF @@ -36,13 +35,13 @@ async def preview(req, path): etag = config.derived_secret( "preview", rel, stat.st_mtime_ns, quality, maxsize, maxzoom ).hex() - savename = PurePosixPath(path.name).with_suffix(".webp") + savename = PurePosixPath(path.name).with_suffix(".avif") headers = { "etag": etag, "last-modified": format_date_time(stat.st_mtime), "cache-control": "max-age=604800, immutable" + ("" if config.config.public else ", private"), - "content-type": "image/webp", + "content-type": "image/avif", "content-disposition": f"inline; filename*=UTF-8''{urllib.parse.quote(savename.as_posix())}", } if req.headers.if_none_match == etag: @@ -83,7 +82,7 @@ def process_image(path, *, maxsize, quality): logger.error(f"Error rotating preview image: {e}") # Save as webp imgdata = io.BytesIO() - img.save(imgdata, format="webp", quality=quality, method=4) + img.save(imgdata, format="avif", quality=quality, method=4) return imgdata.getvalue() @@ -94,48 +93,48 @@ def process_pdf(path, *, maxsize, maxzoom, quality, page_number=0): zoom = min(maxsize / w, maxsize / h, maxzoom) mat = fitz.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat) - return pix.pil_tobytes(format="webp", quality=quality, method=4) + return pix.pil_tobytes(format="avif", quality=quality, method=4) def process_video(path, *, maxsize, quality): - with av.open(str(path)) as container: - stream = container.streams.video[0] - stream.codec_context.skip_frame = "NONKEY" - - # Updated side data access for newer av versions - rot = 0 - try: - # Try newer API first - if hasattr(stream, "side_data") and stream.side_data: - display_matrix = stream.side_data.get("DISPLAYMATRIX") - if display_matrix: - rot = ( - display_matrix.rotation - if hasattr(display_matrix, "rotation") - else 0 - ) - except (AttributeError, KeyError): - # Fallback for older API or missing side data - rot = 0 - - container.seek(container.duration // 8) - try: - frame = next(container.decode(stream)) - img = frame.to_image() - except StopIteration: - # If no frame found, try from beginning - container.seek(0) - frame = next(container.decode(stream)) - img = frame.to_image() - - del stream - - img.thumbnail((maxsize, maxsize)) + frame = None imgdata = io.BytesIO() - if rot and rot != 0: - img = img.rotate(-rot, expand=True) # Negative rotation for correct orientation - img.save(imgdata, format="webp", quality=quality, method=4) - del img + with ( + av.open(str(path)) as container, + av.open(imgdata, "w", format="avif") as ocontainer, + ): + istream = container.streams.video[0] + istream.codec_context.skip_frame = "NONKEY" + container.seek((container.duration or 0) // 8) + for frame in container.decode(istream): + if frame.dts is not None: + break + else: + raise RuntimeError("No frames found in video") + + # Resize frame to thumbnail size + if frame.width > maxsize or frame.height > maxsize: + scale_factor = min(maxsize / frame.width, maxsize / frame.height) + new_width = int(frame.width * scale_factor) + new_height = int(frame.height * scale_factor) + frame = frame.reformat(width=new_width, height=new_height) + + ostream = ocontainer.add_stream("av1", options={"quality": str(quality)}) + assert isinstance(ostream, av.VideoStream) + ostream.width = frame.width + ostream.height = frame.height + icc = istream.codec_context + occ = ostream.codec_context + + # Copy HDR metadata from input video stream + occ.color_primaries = icc.color_primaries + occ.color_trc = icc.color_trc + occ.colorspace = icc.colorspace + occ.color_range = icc.color_range + + ocontainer.mux(ostream.encode(frame)) + ocontainer.mux(ostream.encode(None)) # Flush the stream + ret = imgdata.getvalue() del imgdata gc.collect() -- 2.49.0 From 16c1dcd7f959a15e1f3f900db519b51e32172b36 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 15:28:03 -0700 Subject: [PATCH 26/43] README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 14b61bc..ad51f4f 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Cista takes its name from the ancient *cistae*, metal containers used by Greeks This is a cutting-edge **file and document server** designed for speed, efficiency, and unparalleled ease of use. Experience **lightning-fast browsing**, thanks to the file list maintained directly in your browser and updated from server filesystem events, coupled with our highly optimized code. Fully **keyboard-navigable** and with a responsive layout, Cista flawlessly adapts to your devices, providing a seamless experience wherever you are. Our powerful **instant search** means you're always just a few keystrokes away from finding exactly what you need. Press **1/2/3** to switch ordering, navigate with all four arrow keys (+Shift to select). Or click your way around on **breadcrumbs that remember where you were**. -**Built-in document and media previews** let you quickly view files without downloading them. Cista shows PDF, video and image thumbnails, with **HDR support** for HEIC and AVIF images. It also has a player for music and video files. Enable Gallery mode to see previews. +**Built-in document and media previews** let you quickly view files without downloading them. Cista shows PDF and other documents, video and image thumbnails, with **HDR10 support** video previews and image formats, including HEIC and AVIF. It also has a player for music and video files. The Cista project started as an inevitable remake of [Droppy](https://github.com/droppyjs/droppy) which we used and loved despite its numerous bugs. Cista Storage stands out in handling even the most exotic filenames, ensuring a smooth experience where others falter. -- 2.49.0 From a2664790276ae7a2533a538d0c00d34857d935e9 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 15:28:27 -0700 Subject: [PATCH 27/43] Log traceback on 500 errors. --- cista/util/apphelpers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cista/util/apphelpers.py b/cista/util/apphelpers.py index 52b07a6..fd6b63b 100644 --- a/cista/util/apphelpers.py +++ b/cista/util/apphelpers.py @@ -29,6 +29,8 @@ async def handle_sanic_exception(request, e): if not message or not request.app.debug and code == 500: message = "Internal Server Error" message = f"⚠️ {message}" if code < 500 else f"🛑 {message}" + if code == 500: + logger.exception(e) # Non-browsers get JSON errors if "text/html" not in request.headers.accept: return jres( -- 2.49.0 From 7d55a43119c2898dfd9b4f8ff87b4298977dcca9 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 15:28:40 -0700 Subject: [PATCH 28/43] Use ruff default options. --- pyproject.toml | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 52132f3..05bcb46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,32 +71,6 @@ testpaths = [ "tests", ] -[tool.ruff] -select = ["ALL"] -ignore = [ - "A0", - "ARG001", - "ANN", - "B018", - "BLE001", - "C901", - "COM812", # conflicts with ruff format - "D", - "E501", - "EM1", - "FIX002", - "ISC001", # conflicts with ruff format - "PGH003", - "PLR0912", - "PLR2004", - "PLW0603", - "S101", - "SLF001", - "T201", - "TD0", - "TRY", -] - [tool.ruff.isort] known-first-party = ["cista"] -- 2.49.0 From 0de8b99c025e3ac690b064eddd95bd018f3f1d3f Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 15:41:15 -0700 Subject: [PATCH 29/43] Try to leak less memory. --- cista/preview.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cista/preview.py b/cista/preview.py index bc65f2a..d8de724 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -99,14 +99,15 @@ def process_pdf(path, *, maxsize, maxzoom, quality, page_number=0): def process_video(path, *, maxsize, quality): frame = None imgdata = io.BytesIO() + istream = ostream = icontainer = ocontainer = icc = occ = frame = None with ( - av.open(str(path)) as container, + av.open(str(path)) as icontainer, av.open(imgdata, "w", format="avif") as ocontainer, ): - istream = container.streams.video[0] + istream = icontainer.streams.video[0] istream.codec_context.skip_frame = "NONKEY" - container.seek((container.duration or 0) // 8) - for frame in container.decode(istream): + icontainer.seek((icontainer.duration or 0) // 8) + for frame in icontainer.decode(istream): if frame.dts is not None: break else: @@ -136,6 +137,6 @@ def process_video(path, *, maxsize, quality): ocontainer.mux(ostream.encode(None)) # Flush the stream ret = imgdata.getvalue() - del imgdata + del imgdata, istream, ostream, icc, occ, frame gc.collect() return ret -- 2.49.0 From c47ff317c32943b53b5a69310b679ce09aeab0ba Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Thu, 14 Aug 2025 15:41:45 -0700 Subject: [PATCH 30/43] Cleanup --- cista/preview.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cista/preview.py b/cista/preview.py index d8de724..d8b4f2b 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -99,7 +99,7 @@ def process_pdf(path, *, maxsize, maxzoom, quality, page_number=0): def process_video(path, *, maxsize, quality): frame = None imgdata = io.BytesIO() - istream = ostream = icontainer = ocontainer = icc = occ = frame = None + istream = ostream = icc = occ = frame = None with ( av.open(str(path)) as icontainer, av.open(imgdata, "w", format="avif") as ocontainer, -- 2.49.0 From 44428eec71ece005e4eec7491d2761fce6aca184 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 08:25:29 -0600 Subject: [PATCH 31/43] Fix video preview rotation and quality. --- cista/preview.py | 48 ++++++++++++++++++++++++++++++++++++++++-------- pyproject.toml | 9 ++++----- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/cista/preview.py b/cista/preview.py index d8b4f2b..f902dbd 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -6,9 +6,11 @@ import urllib.parse from pathlib import PurePosixPath from urllib.parse import unquote from wsgiref.handlers import format_date_time + import av -import av.datasets import fitz # PyMuPDF +import numpy as np +import pillow_heif from PIL import Image from sanic import Blueprint, empty, raw from sanic.exceptions import NotFound @@ -16,7 +18,6 @@ from sanic.log import logger from cista import config from cista.util.filename import sanitize -import pillow_heif pillow_heif.register_heif_opener() @@ -60,7 +61,8 @@ async def preview(req, path): def dispatch(path, quality, maxsize, maxzoom): if path.suffix.lower() in (".pdf", ".xps", ".epub", ".mobi"): return process_pdf(path, quality=quality, maxsize=maxsize, maxzoom=maxzoom) - if mimetypes.guess_type(path.name)[0].startswith("video/"): + type, _ = mimetypes.guess_type(path.name) + if type and type.startswith("video/"): return process_video(path, quality=quality, maxsize=maxsize) return process_image(path, quality=quality, maxsize=maxsize) @@ -72,17 +74,16 @@ def process_image(path, *, maxsize, quality): # Fix rotation based on EXIF data try: rotate_values = {3: 180, 6: 270, 8: 90} - orientation = img._getexif().get(274) + orientation = img.getexif().get(274) if orientation in rotate_values: logger.debug(f"Rotating preview {path} by {rotate_values[orientation]}") img = img.rotate(rotate_values[orientation], expand=True) - except AttributeError: - ... except Exception as e: logger.error(f"Error rotating preview image: {e}") # Save as webp imgdata = io.BytesIO() - img.save(imgdata, format="avif", quality=quality, method=4) + print("Image quality", quality) + img.save(imgdata, format="avif", quality=quality) return imgdata.getvalue() @@ -120,7 +121,38 @@ def process_video(path, *, maxsize, quality): new_height = int(frame.height * scale_factor) frame = frame.reformat(width=new_width, height=new_height) - ostream = ocontainer.add_stream("av1", options={"quality": str(quality)}) + # Simple rotation detection and logging + if frame.rotation: + try: + fplanes = frame.to_ndarray() + # Split into Y, U, V planes of proper dimensions + planes = [ + fplanes[: frame.height], + fplanes[frame.height : frame.height + frame.height // 4].reshape( + frame.height // 2, frame.width // 2 + ), + fplanes[frame.height + frame.height // 4 :].reshape( + frame.height // 2, frame.width // 2 + ), + ] + # Rotate + planes = [np.rot90(p, frame.rotation // 90) for p in planes] + # Restore PyAV format + planes = np.hstack([p.flat for p in planes]).reshape( + -1, planes[0].shape[1] + ) + frame = av.VideoFrame.from_ndarray(planes, format=frame.format.name) + del planes, fplanes + except Exception as e: + if "not yet supported" in str(e): + logger.warning( + f"Not rotating {path.name} preview image by {frame.rotation}°:\n PyAV: {e}" + ) + else: + logger.exception(f"Error rotating video frame: {e}") + + crf = str(int(63 * (1 - quality / 100) ** 2)) # Closely matching PIL quality-% + ostream = ocontainer.add_stream("av1", options={"crf": crf}) assert isinstance(ostream, av.VideoStream) ostream.width = frame.width ostream.height = frame.height diff --git a/pyproject.toml b/pyproject.toml index 05bcb46..be704aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ "inotify", "msgspec", "natsort", + "numpy>=2.3.2", "pathvalidate", "pillow", "pillow-heif>=1.1.0", @@ -71,11 +72,9 @@ testpaths = [ "tests", ] -[tool.ruff.isort] -known-first-party = ["cista"] - -[tool.ruff.per-file-ignores] -"tests/*" = ["S", "ANN", "D", "INP"] +[tool.ruff.lint] +isort.known-first-party = ["cista"] +per-file-ignores."tests/*" = ["S", "ANN", "D", "INP"] [dependency-groups] dev = [ -- 2.49.0 From 65c6ed6a174f2c43030e33270d03845c0ce73064 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 08:25:38 -0600 Subject: [PATCH 32/43] Linter --- cista/protocol.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cista/protocol.py b/cista/protocol.py index 8a0c975..c47e729 100644 --- a/cista/protocol.py +++ b/cista/protocol.py @@ -127,8 +127,7 @@ class FileEntry(msgspec.Struct, array_like=True, frozen=True): return f"{self.name} ({self.size}, {self.mtime})" -class Update(msgspec.Struct, array_like=True): - ... +class Update(msgspec.Struct, array_like=True): ... class UpdKeep(Update, tag="k"): -- 2.49.0 From 35d20dedb1173faab7c61611206ae162b090bf47 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 09:42:33 -0600 Subject: [PATCH 33/43] Faster video preview processing, added profiling debug logging. --- cista/preview.py | 88 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 79 insertions(+), 9 deletions(-) diff --git a/cista/preview.py b/cista/preview.py index f902dbd..1ca5a20 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -4,6 +4,7 @@ import io import mimetypes import urllib.parse from pathlib import PurePosixPath +from time import perf_counter from urllib.parse import unquote from wsgiref.handlers import format_date_time @@ -68,10 +69,16 @@ def dispatch(path, quality, maxsize, maxzoom): def process_image(path, *, maxsize, quality): + t_load_start = perf_counter() img = Image.open(path) - w, h = img.size - img.thumbnail((min(w, maxsize), min(h, maxsize))) - # Fix rotation based on EXIF data + # Force decode to include I/O in load timing + img.load() + t_load_end = perf_counter() + + # Resize and orientation fix (processing) + orig_w, orig_h = img.size + t_proc_start = perf_counter() + img.thumbnail((min(orig_w, maxsize), min(orig_h, maxsize))) try: rotate_values = {3: 180, 6: 270, 8: 90} orientation = img.getexif().get(274) @@ -80,27 +87,72 @@ def process_image(path, *, maxsize, quality): img = img.rotate(rotate_values[orientation], expand=True) except Exception as e: logger.error(f"Error rotating preview image: {e}") - # Save as webp + t_proc_end = perf_counter() + + # Save as AVIF imgdata = io.BytesIO() - print("Image quality", quality) + t_save_start = perf_counter() img.save(imgdata, format="avif", quality=quality) - return imgdata.getvalue() + t_save_end = perf_counter() + + ret = imgdata.getvalue() + + load_ms = (t_load_end - t_load_start) * 1000 + proc_ms = (t_proc_end - t_proc_start) * 1000 + save_ms = (t_save_end - t_save_start) * 1000 + logger.debug( + "Preview image %s: load=%.1fms process=%.1fms save=%.1fms out=%.1fKB %dx%d -> %dx%d q=%d", + path.name, + load_ms, + proc_ms, + save_ms, + len(ret) / 1024, + orig_w, + orig_h, + getattr(img, "width", 0), + getattr(img, "height", 0), + quality, + ) + + return ret def process_pdf(path, *, maxsize, maxzoom, quality, page_number=0): + t_load_start = perf_counter() pdf = fitz.open(path) page = pdf.load_page(page_number) w, h = page.rect[2:4] zoom = min(maxsize / w, maxsize / h, maxzoom) mat = fitz.Matrix(zoom, zoom) - pix = page.get_pixmap(matrix=mat) - return pix.pil_tobytes(format="avif", quality=quality, method=4) + pix = page.get_pixmap(matrix=mat) # type: ignore[attr-defined] + t_load_end = perf_counter() + + t_save_start = perf_counter() + ret = pix.pil_tobytes(format="avif", quality=quality, method=4) + t_save_end = perf_counter() + + logger.debug( + "Preview pdf %s: load+render=%.1fms save=%.1fms out=%.1fKB page=%d zoom=%.2f", + path.name, + (t_load_end - t_load_start) * 1000, + (t_save_end - t_save_start) * 1000, + len(ret) / 1024, + page_number, + zoom, + ) + + return ret def process_video(path, *, maxsize, quality): frame = None imgdata = io.BytesIO() istream = ostream = icc = occ = frame = None + t_load_start = perf_counter() + # Initialize to avoid "possibly unbound" in static analysis when exceptions occur + t_load_end = t_load_start + t_save_start = t_load_start + t_save_end = t_load_start with ( av.open(str(path)) as icontainer, av.open(imgdata, "w", format="avif") as ocontainer, @@ -150,9 +202,13 @@ def process_video(path, *, maxsize, quality): ) else: logger.exception(f"Error rotating video frame: {e}") + t_load_end = perf_counter() + t_save_start = perf_counter() crf = str(int(63 * (1 - quality / 100) ** 2)) # Closely matching PIL quality-% - ostream = ocontainer.add_stream("av1", options={"crf": crf}) + ostream = ocontainer.add_stream( + "av1", options={"crf": crf, "usage": "realtime"} + ) assert isinstance(ostream, av.VideoStream) ostream.width = frame.width ostream.height = frame.height @@ -167,8 +223,22 @@ def process_video(path, *, maxsize, quality): ocontainer.mux(ostream.encode(frame)) ocontainer.mux(ostream.encode(None)) # Flush the stream + t_save_end = perf_counter() + # Capture frame dimensions before cleanup + fw = getattr(frame, "width", 0) if frame else 0 + fh = getattr(frame, "height", 0) if frame else 0 ret = imgdata.getvalue() + logger.debug( + "Preview video %s: load+decode=%.1fms save=%.1fms out=%.1fKB dims=%dx%d q=%d", + path.name, + (t_load_end - t_load_start) * 1000, + (t_save_end - t_save_start) * 1000, + len(ret) / 1024, + fw, + fh, + quality, + ) del imgdata, istream, ostream, icc, occ, frame gc.collect() return ret -- 2.49.0 From 80bb84aaed5a00a72b37cc1f88022bae8c960152 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 09:51:40 -0600 Subject: [PATCH 34/43] Need info loglevel. --- cista/preview.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cista/preview.py b/cista/preview.py index 1ca5a20..633aef0 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -100,7 +100,7 @@ def process_image(path, *, maxsize, quality): load_ms = (t_load_end - t_load_start) * 1000 proc_ms = (t_proc_end - t_proc_start) * 1000 save_ms = (t_save_end - t_save_start) * 1000 - logger.debug( + logger.info( "Preview image %s: load=%.1fms process=%.1fms save=%.1fms out=%.1fKB %dx%d -> %dx%d q=%d", path.name, load_ms, @@ -131,7 +131,7 @@ def process_pdf(path, *, maxsize, maxzoom, quality, page_number=0): ret = pix.pil_tobytes(format="avif", quality=quality, method=4) t_save_end = perf_counter() - logger.debug( + logger.info( "Preview pdf %s: load+render=%.1fms save=%.1fms out=%.1fKB page=%d zoom=%.2f", path.name, (t_load_end - t_load_start) * 1000, @@ -229,7 +229,7 @@ def process_video(path, *, maxsize, quality): fw = getattr(frame, "width", 0) if frame else 0 fh = getattr(frame, "height", 0) if frame else 0 ret = imgdata.getvalue() - logger.debug( + logger.info( "Preview video %s: load+decode=%.1fms save=%.1fms out=%.1fKB dims=%dx%d q=%d", path.name, (t_load_end - t_load_start) * 1000, -- 2.49.0 From 112b9b4da540922c34b6ecd96747983f8b545050 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 09:57:48 -0600 Subject: [PATCH 35/43] Dieharder --- cista/serve.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cista/serve.py b/cista/serve.py index 280ba31..fd7fc9b 100644 --- a/cista/serve.py +++ b/cista/serve.py @@ -1,5 +1,7 @@ import os import re +import signal +import sys from pathlib import Path from sanic import Sanic @@ -11,6 +13,14 @@ def run(*, dev=False): """Run Sanic main process that spawns worker processes to serve HTTP requests.""" from .app import app + # Set up immediate exit on Ctrl+C for faster termination + def signal_handler(signum, frame): + print("\nReceived interrupt signal, exiting immediately...") + os._exit(0) + + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + url, opts = parse_listen(config.config.listen) # Silence Sanic's warning about running in production rather than debug os.environ["SANIC_IGNORE_PRODUCTION_WARNING"] = "1" -- 2.49.0 From fcb95e9154f6c92651149598fdbf98faea076a27 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 10:11:20 -0600 Subject: [PATCH 36/43] Faster image previews --- cista/preview.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cista/preview.py b/cista/preview.py index 633aef0..0520a4b 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -92,7 +92,7 @@ def process_image(path, *, maxsize, quality): # Save as AVIF imgdata = io.BytesIO() t_save_start = perf_counter() - img.save(imgdata, format="avif", quality=quality) + img.save(imgdata, format="avif", quality=quality, speed=10) t_save_end = perf_counter() ret = imgdata.getvalue() -- 2.49.0 From 97353efffbaa8f3a6683898f227e3a8eec1e3247 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 10:13:12 -0600 Subject: [PATCH 37/43] Default higher quality. --- cista/preview.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cista/preview.py b/cista/preview.py index 0520a4b..2878d25 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -30,7 +30,7 @@ async def preview(req, path): """Preview a file""" maxsize = int(req.args.get("px", 1024)) maxzoom = float(req.args.get("zoom", 2.0)) - quality = int(req.args.get("q", 40)) + quality = int(req.args.get("q", 60)) rel = PurePosixPath(sanitize(unquote(path))) path = config.config.path / rel stat = path.lstat() -- 2.49.0 From 21352daf112550c4bf5c6b4a0b653db9f6199953 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 10:17:48 -0600 Subject: [PATCH 38/43] Limit number of threads. --- cista/preview.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cista/preview.py b/cista/preview.py index 2878d25..fcaecb7 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -92,7 +92,7 @@ def process_image(path, *, maxsize, quality): # Save as AVIF imgdata = io.BytesIO() t_save_start = perf_counter() - img.save(imgdata, format="avif", quality=quality, speed=10) + img.save(imgdata, format="avif", quality=quality, speed=10, max_threads=1) t_save_end = perf_counter() ret = imgdata.getvalue() -- 2.49.0 From 5084d1e0ed2182a6a456784fff6a70d6a150ff5e Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 10:19:29 -0600 Subject: [PATCH 39/43] Also for PDF --- cista/preview.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cista/preview.py b/cista/preview.py index fcaecb7..906c64a 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -128,7 +128,7 @@ def process_pdf(path, *, maxsize, maxzoom, quality, page_number=0): t_load_end = perf_counter() t_save_start = perf_counter() - ret = pix.pil_tobytes(format="avif", quality=quality, method=4) + ret = pix.pil_tobytes(format="avif", quality=quality, speed=10, max_threads=1) t_save_end = perf_counter() logger.info( -- 2.49.0 From fff23617ba15ab22dec89937c48d131ff6654906 Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 10:34:30 -0600 Subject: [PATCH 40/43] Gotta go faster --- cista/preview.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cista/preview.py b/cista/preview.py index 906c64a..2c9a2fb 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -207,7 +207,13 @@ def process_video(path, *, maxsize, quality): t_save_start = perf_counter() crf = str(int(63 * (1 - quality / 100) ** 2)) # Closely matching PIL quality-% ostream = ocontainer.add_stream( - "av1", options={"crf": crf, "usage": "realtime"} + "av1", + options={ + "crf": crf, + "usage": "realtime", + "cpu-used": "8", + "threads": "1", + }, ) assert isinstance(ostream, av.VideoStream) ostream.width = frame.width -- 2.49.0 From 67389466368f00f56c4b5266f7a59fa757a12ebd Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 10:38:08 -0600 Subject: [PATCH 41/43] Remove image rotation (no longer needed with modern PIL?) --- cista/preview.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/cista/preview.py b/cista/preview.py index 2c9a2fb..5d09332 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -74,21 +74,11 @@ def process_image(path, *, maxsize, quality): # Force decode to include I/O in load timing img.load() t_load_end = perf_counter() - - # Resize and orientation fix (processing) + # Resize orig_w, orig_h = img.size t_proc_start = perf_counter() img.thumbnail((min(orig_w, maxsize), min(orig_h, maxsize))) - try: - rotate_values = {3: 180, 6: 270, 8: 90} - orientation = img.getexif().get(274) - if orientation in rotate_values: - logger.debug(f"Rotating preview {path} by {rotate_values[orientation]}") - img = img.rotate(rotate_values[orientation], expand=True) - except Exception as e: - logger.error(f"Error rotating preview image: {e}") t_proc_end = perf_counter() - # Save as AVIF imgdata = io.BytesIO() t_save_start = perf_counter() -- 2.49.0 From 1d41a98756904da9a7954c5fbae8dca3c4a1d8db Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 10:45:25 -0600 Subject: [PATCH 42/43] Cleanup logging, timing as debug. --- cista/preview.py | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/cista/preview.py b/cista/preview.py index 5d09332..cb5dda3 100644 --- a/cista/preview.py +++ b/cista/preview.py @@ -90,18 +90,13 @@ def process_image(path, *, maxsize, quality): load_ms = (t_load_end - t_load_start) * 1000 proc_ms = (t_proc_end - t_proc_start) * 1000 save_ms = (t_save_end - t_save_start) * 1000 - logger.info( - "Preview image %s: load=%.1fms process=%.1fms save=%.1fms out=%.1fKB %dx%d -> %dx%d q=%d", + logger.debug( + "Preview image %s: load=%.1fms process=%.1fms save=%.1fms out=%.1fKB", path.name, load_ms, proc_ms, save_ms, len(ret) / 1024, - orig_w, - orig_h, - getattr(img, "width", 0), - getattr(img, "height", 0), - quality, ) return ret @@ -121,16 +116,12 @@ def process_pdf(path, *, maxsize, maxzoom, quality, page_number=0): ret = pix.pil_tobytes(format="avif", quality=quality, speed=10, max_threads=1) t_save_end = perf_counter() - logger.info( - "Preview pdf %s: load+render=%.1fms save=%.1fms out=%.1fKB page=%d zoom=%.2f", + logger.debug( + "Preview pdf %s: load+render=%.1fms save=%.1fms", path.name, (t_load_end - t_load_start) * 1000, (t_save_end - t_save_start) * 1000, - len(ret) / 1024, - page_number, - zoom, ) - return ret @@ -222,18 +213,12 @@ def process_video(path, *, maxsize, quality): t_save_end = perf_counter() # Capture frame dimensions before cleanup - fw = getattr(frame, "width", 0) if frame else 0 - fh = getattr(frame, "height", 0) if frame else 0 ret = imgdata.getvalue() - logger.info( - "Preview video %s: load+decode=%.1fms save=%.1fms out=%.1fKB dims=%dx%d q=%d", + logger.debug( + "Preview video %s: load+decode=%.1fms save=%.1fms", path.name, (t_load_end - t_load_start) * 1000, (t_save_end - t_save_start) * 1000, - len(ret) / 1024, - fw, - fh, - quality, ) del imgdata, istream, ostream, icc, occ, frame gc.collect() -- 2.49.0 From 1ae22b215972c14ed74ee3464f16f039687c54ad Mon Sep 17 00:00:00 2001 From: Leo Vasanko Date: Fri, 15 Aug 2025 10:58:01 -0600 Subject: [PATCH 43/43] Remove test for directory rename because it didn't trigger the bug. --- tests/test_watching_directory_rename.py | 1185 ----------------------- 1 file changed, 1185 deletions(-) delete mode 100644 tests/test_watching_directory_rename.py diff --git a/tests/test_watching_directory_rename.py b/tests/test_watching_directory_rename.py deleted file mode 100644 index d297502..0000000 --- a/tests/test_watching_directory_rename.py +++ /dev/null @@ -1,1185 +0,0 @@ -import asyncio -import queue -import shutil -import signal -import tempfile -import threading -import time -from pathlib import Path, PurePosixPath -from unittest.mock import MagicMock, patch - -import msgspec -import pytest - -from cista import config, watching -from cista.protocol import UpdateMessage - - -@pytest.fixture -def temp_dir(): - """Create a temporary directory for testing.""" - with tempfile.TemporaryDirectory() as tmpdirname: - yield Path(tmpdirname) - - -@pytest.fixture -def setup_watcher(temp_dir): - """Setup the watcher with a temporary directory.""" - # Store original values - original_rootpath = watching.rootpath - original_state = watching.state - original_quit = watching.quit - - # Setup test environment - config.config = config.Config(path=temp_dir, listen=":0") - watching.rootpath = temp_dir - watching.state = watching.State() - watching.quit = threading.Event() - - yield temp_dir - - # Cleanup - watching.quit.set() - watching.rootpath = original_rootpath - watching.state = original_state - watching.quit = original_quit - - -def create_test_structure(base_path: Path): - """Create a test directory structure with subdirectories and files.""" - # Create main subdirectory with files - subdir = base_path / "test_subdir" - subdir.mkdir() - - # Add some files to the subdirectory - (subdir / "file1.txt").write_text("content1") - (subdir / "file2.txt").write_text("content2") - - # Create a nested subdirectory - nested = subdir / "nested" - nested.mkdir() - (nested / "nested_file.txt").write_text("nested content") - - # Create another top-level directory for reference - other_dir = base_path / "other_dir" - other_dir.mkdir() - (other_dir / "other_file.txt").write_text("other content") - - return subdir, nested, other_dir - - -def test_nested_directory_rename_causes_hang(setup_watcher): - """Test renaming deeply nested directories - this is where the hang typically occurs. - - The bug manifests when renaming directories that are nested within other directories, - not just top-level directories. - """ - temp_dir = setup_watcher - - # Create a complex nested structure that mirrors real-world usage - # parent/child/grandchild/target_dir/files... - parent = temp_dir / "parent_folder" - parent.mkdir() - - child = parent / "child_folder" - child.mkdir() - - grandchild = child / "grandchild_folder" - grandchild.mkdir() - - # This is the directory we'll rename - it's deeply nested - target_dir = grandchild / "target_to_rename" - target_dir.mkdir() - - # Add files to make the directory scan more complex - for i in range(20): - (target_dir / f"file_{i:03d}.txt").write_text(f"content_{i}") - - # Add another nested level inside target - deep_nested = target_dir / "even_deeper" - deep_nested.mkdir() - for i in range(10): - (deep_nested / f"deep_file_{i}.txt").write_text(f"deep_content_{i}") - - # Initialize watcher state - initial_root = watching.walk(PurePosixPath()) - watching.state.root = initial_root - - # Verify the nested structure exists - target_path = PurePosixPath( - "parent_folder/child_folder/grandchild_folder/target_to_rename" - ) - initial_begin, initial_entries = watching.treeget(initial_root, target_path) - assert initial_begin is not None, ( - "Target directory should be found in initial state" - ) - assert len(initial_entries) > 1, "Target directory should contain files" - - # Now rename the deeply nested directory - new_target = grandchild / "renamed_target" - target_dir.rename(new_target) - - loop = asyncio.new_event_loop() - working_state = watching.state.root[:] - - # This is where the hang likely occurs - updating a deeply nested path - old_nested_path = PurePosixPath( - "parent_folder/child_folder/grandchild_folder/target_to_rename" - ) - new_nested_path = PurePosixPath( - "parent_folder/child_folder/grandchild_folder/renamed_target" - ) - - start_time = time.time() - - # Update the old path (should remove it) - watching.update_path(working_state, old_nested_path, loop) - - # Update the new path (should add it) - watching.update_path(working_state, new_nested_path, loop) - - end_time = time.time() - - # Check for hang - nested operations should still be fast - duration = end_time - start_time - assert duration < 3.0, ( - f"Nested directory rename took too long: {duration}s - possible hang" - ) - - # Verify the old nested path is gone - old_begin, old_entries = watching.treeget(working_state, old_nested_path) - assert old_begin is None, "Old nested directory should be removed from tree" - - # Verify the new nested path exists - new_begin, new_entries = watching.treeget(working_state, new_nested_path) - assert new_begin is not None, "New nested directory should exist in tree" - assert len(new_entries) > 1, "New nested directory should contain all the files" - - -def test_move_directory_across_nested_parents(setup_watcher): - """Test moving a directory from one nested location to another - high hang risk scenario.""" - temp_dir = setup_watcher - - # Create source nested structure - source_parent = temp_dir / "source_area" - source_parent.mkdir() - source_child = source_parent / "source_child" - source_child.mkdir() - - # Create the directory to move - movable_dir = source_child / "movable_directory" - movable_dir.mkdir() - - # Add content to make it more complex - for i in range(15): - (movable_dir / f"file_{i}.txt").write_text(f"movable_content_{i}") - - # Create a subdirectory within the movable directory - sub_movable = movable_dir / "sub_directory" - sub_movable.mkdir() - for i in range(5): - (sub_movable / f"sub_file_{i}.txt").write_text(f"sub_content_{i}") - - # Create destination nested structure - dest_parent = temp_dir / "destination_area" - dest_parent.mkdir() - dest_child = dest_parent / "dest_child" - dest_child.mkdir() - dest_grandchild = dest_child / "dest_grandchild" - dest_grandchild.mkdir() - - # Initialize state - watching.state.root = watching.walk(PurePosixPath()) - working_state = watching.state.root[:] - - # Move the directory to the deeply nested destination - dest_movable = dest_grandchild / "moved_directory" - movable_dir.rename(dest_movable) - - loop = asyncio.new_event_loop() - - # These paths represent the complex nested move operation - old_path = PurePosixPath("source_area/source_child/movable_directory") - new_path = PurePosixPath( - "destination_area/dest_child/dest_grandchild/moved_directory" - ) - - start_time = time.time() - - # This sequence is where hangs typically occur with cross-directory moves - try: - # Remove from old location - watching.update_path(working_state, old_path, loop) - - # Add to new location - watching.update_path(working_state, new_path, loop) - - except Exception as e: - pytest.fail(f"Nested directory move failed: {e}") - - end_time = time.time() - duration = end_time - start_time - - # Should complete without hanging - assert duration < 5.0, f"Cross-nested move took too long: {duration}s" - - # Verify old location is empty - old_begin, old_entries = watching.treeget(working_state, old_path) - assert old_begin is None, "Directory should be removed from old nested location" - - # Verify new location has the directory - new_begin, new_entries = watching.treeget(working_state, new_path) - assert new_begin is not None, "Directory should exist in new nested location" - assert len(new_entries) > 1, "Moved directory should retain all its contents" - - -def test_rapid_nested_directory_operations_cause_corruption(setup_watcher): - """Test rapid operations on nested directories that can cause state corruption.""" - temp_dir = setup_watcher - - # Create multiple nested structures - structures = [] - for i in range(3): - level1 = temp_dir / f"level1_{i}" - level1.mkdir() - level2 = level1 / f"level2_{i}" - level2.mkdir() - level3 = level2 / f"level3_{i}" - level3.mkdir() - target = level3 / f"target_{i}" - target.mkdir() - - # Add files - for j in range(10): - (target / f"file_{j}.txt").write_text(f"content_{i}_{j}") - - structures.append((level1, level2, level3, target)) - - # Initialize state - watching.state.root = watching.walk(PurePosixPath()) - working_state = watching.state.root[:] - - loop = asyncio.new_event_loop() - - # Perform rapid nested operations that can cause race conditions - operations = [] - - for i, (level1, level2, level3, target) in enumerate(structures): - # Rename the deeply nested target - new_target = level3 / f"renamed_target_{i}" - target.rename(new_target) - - old_path = PurePosixPath(f"level1_{i}/level2_{i}/level3_{i}/target_{i}") - new_path = PurePosixPath(f"level1_{i}/level2_{i}/level3_{i}/renamed_target_{i}") - operations.append((old_path, new_path)) - - start_time = time.time() - - # Process all operations rapidly - this can cause state corruption/hangs - for old_path, new_path in operations: - try: - watching.update_path(working_state, old_path, loop) - watching.update_path(working_state, new_path, loop) - except Exception as e: - pytest.fail( - f"Rapid nested operations failed for {old_path} -> {new_path}: {e}" - ) - - end_time = time.time() - duration = end_time - start_time - - # Should complete without hanging even with rapid operations - assert duration < 10.0, f"Rapid nested operations took too long: {duration}s" - - # Verify final state consistency - for i, (old_path, new_path) in enumerate(operations): - # Old paths should be gone - old_begin, old_entries = watching.treeget(working_state, old_path) - assert old_begin is None, f"Old path {old_path} should be removed" - - # New paths should exist - new_begin, new_entries = watching.treeget(working_state, new_path) - assert new_begin is not None, f"New path {new_path} should exist" - - -def test_nested_directory_treeget_corruption(setup_watcher): - """Test that treeget function handles nested path operations correctly without corruption.""" - temp_dir = setup_watcher - - # Create a complex tree structure - root_dirs = [] - for i in range(3): - root_dir = temp_dir / f"root_{i}" - root_dir.mkdir() - - for j in range(2): - mid_dir = root_dir / f"mid_{j}" - mid_dir.mkdir() - - for k in range(2): - leaf_dir = mid_dir / f"leaf_{k}" - leaf_dir.mkdir() - - # Add files to leaf directories - for l in range(5): - (leaf_dir / f"file_{l}.txt").write_text(f"content_{i}_{j}_{k}_{l}") - - root_dirs.append(root_dir) - - # Initialize state - initial_root = watching.walk(PurePosixPath()) - watching.state.root = initial_root - - # Test treeget with various nested paths - test_paths = [ - PurePosixPath("root_0"), - PurePosixPath("root_0/mid_0"), - PurePosixPath("root_0/mid_0/leaf_0"), - PurePosixPath("root_1/mid_1/leaf_1"), - PurePosixPath("root_2/mid_0/leaf_1"), - ] - - # Verify treeget works correctly for all paths - for path in test_paths: - begin, entries = watching.treeget(initial_root, path) - assert begin is not None, f"treeget should find existing path: {path}" - assert len(entries) >= 1, f"treeget should return entries for: {path}" - - # Now rename a nested directory and test treeget consistency - old_leaf = temp_dir / "root_0" / "mid_0" / "leaf_0" - new_leaf = temp_dir / "root_0" / "mid_0" / "renamed_leaf" - old_leaf.rename(new_leaf) - - # Update the state - loop = asyncio.new_event_loop() - working_state = initial_root[:] - - old_nested_path = PurePosixPath("root_0/mid_0/leaf_0") - new_nested_path = PurePosixPath("root_0/mid_0/renamed_leaf") - - # Update paths - watching.update_path(working_state, old_nested_path, loop) - watching.update_path(working_state, new_nested_path, loop) - - # Verify treeget consistency after the update - old_begin, old_entries = watching.treeget(working_state, old_nested_path) - assert old_begin is None, "Old nested path should not be found after rename" - - new_begin, new_entries = watching.treeget(working_state, new_nested_path) - assert new_begin is not None, "New nested path should be found after rename" - assert len(new_entries) >= 1, "New nested path should have entries" - - # Verify that other paths are still accessible (no corruption) - for path in [ - PurePosixPath("root_1/mid_1/leaf_1"), - PurePosixPath("root_2/mid_0/leaf_1"), - ]: - begin, entries = watching.treeget(working_state, path) - assert begin is not None, f"Other paths should remain accessible: {path}" - - -def test_format_update_infinite_loop_with_complex_nested_changes(setup_watcher): - """Create a scenario that specifically triggers infinite loops in format_update. - - The hang often occurs in format_update when the diff algorithm gets confused - by complex nested directory moves. - """ - temp_dir = setup_watcher - - # Create a complex scenario that can confuse the diff algorithm - # Multiple directories with similar names and nested structures - dirs_data = [] - - for i in range(4): - # Create main directory - main_dir = temp_dir / f"main_{i}" - main_dir.mkdir() - - # Create subdirectories with similar patterns - sub_dir = main_dir / "common_subdir_name" - sub_dir.mkdir() - - # Create files with varying content - for j in range(15): - (sub_dir / f"file_{j:02d}.txt").write_text(f"main_{i}_content_{j}") - - # Add another level of nesting - nested = sub_dir / "nested_level" - nested.mkdir() - for j in range(8): - (nested / f"nested_{j}.txt").write_text(f"nested_{i}_{j}") - - dirs_data.append((main_dir, sub_dir, nested)) - - # Get initial state - old_state = watching.walk(PurePosixPath()) - - # Perform complex renames that can confuse the diff algorithm - # Rename all subdirectories to have even more similar names - for i, (main_dir, sub_dir, nested) in enumerate(dirs_data): - # Rename the subdirectory to a name that's very similar to others - new_sub_name = f"renamed_common_subdir_{i}" - new_sub_dir = main_dir / new_sub_name - sub_dir.rename(new_sub_dir) - - # Also rename some files to create more confusion - for j in range(0, 10, 2): # Rename every other file - old_file = new_sub_dir / f"file_{j:02d}.txt" - new_file = new_sub_dir / f"renamed_file_{j:02d}.txt" - if old_file.exists(): - old_file.rename(new_file) - - # Get new state - new_state = watching.walk(PurePosixPath()) - - # This is the critical test - format_update with complex nested changes - # that have caused infinite loops in the past - start_time = time.time() - - try: - # Set a more aggressive timeout - def timeout_handler(signum, frame): - raise TimeoutError("format_update appears to be hanging") - - # Set a 10-second timeout - signal.signal(signal.SIGALRM, timeout_handler) - signal.alarm(10) - - try: - update_msg = watching.format_update(old_state, new_state) - signal.alarm(0) # Cancel the alarm - - end_time = time.time() - duration = end_time - start_time - - # Even complex diffs should complete quickly - assert duration < 8.0, ( - f"format_update took {duration}s - possible infinite loop" - ) - - # Verify the result is valid - assert update_msg, "format_update should return a message" - decoded = msgspec.json.decode(update_msg, type=UpdateMessage) - assert decoded.update, "Update should contain operations" - - except TimeoutError: - signal.alarm(0) - pytest.fail( - "format_update hung/infinite loop detected with complex nested changes" - ) - - except Exception as e: - signal.alarm(0) - pytest.fail(f"format_update failed: {e}") - - -def test_update_path_with_corrupted_tree_state(setup_watcher): - """Test update_path when the tree state becomes corrupted by rapid changes.""" - temp_dir = setup_watcher - - # Create a nested structure - parent = temp_dir / "parent" - parent.mkdir() - child = parent / "child" - child.mkdir() - target = child / "target_dir" - target.mkdir() - - # Add many files to make operations slower - for i in range(30): - (target / f"file_{i:03d}.txt").write_text(f"content_{i}") - - # Add nested subdirectories - for i in range(3): - subdir = target / f"subdir_{i}" - subdir.mkdir() - for j in range(10): - (subdir / f"sub_file_{j}.txt").write_text(f"sub_content_{i}_{j}") - - # Initialize state - watching.state.root = watching.walk(PurePosixPath()) - - # Create a working copy that we'll manually corrupt to simulate race conditions - working_state = watching.state.root[:] - - loop = asyncio.new_event_loop() - - # Rename the directory - new_target = child / "renamed_target" - target.rename(new_target) - - # Simulate the race condition by manually corrupting the tree state - # This mimics what happens when inotify events arrive out of order - - # First, try to update a path that should exist - old_path = PurePosixPath("parent/child/target_dir") - - # Manually remove an entry to simulate corruption - if len(working_state) > 5: - # Remove a random entry to corrupt the tree structure - del working_state[3] - - start_time = time.time() - - try: - # This should handle corrupted state gracefully - watching.update_path(working_state, old_path, loop) - - # Now add the new path - new_path = PurePosixPath("parent/child/renamed_target") - watching.update_path(working_state, new_path, loop) - - end_time = time.time() - duration = end_time - start_time - - # Should complete without hanging even with corrupted state - assert duration < 5.0, f"update_path with corrupted state took {duration}s" - - except Exception as e: - # Some exceptions are expected with corrupted state, but shouldn't hang - end_time = time.time() - duration = end_time - start_time - assert duration < 5.0, f"update_path hung even when failing: {duration}s" - - -def test_simulate_real_inotify_event_sequence(setup_watcher): - """Simulate the exact inotify event sequence that causes hangs.""" - temp_dir = setup_watcher - - # Create the exact scenario from real usage that triggers the bug - project_dir = temp_dir / "project" - project_dir.mkdir() - - src_dir = project_dir / "src" - src_dir.mkdir() - - components_dir = src_dir / "components" - components_dir.mkdir() - - # This is the directory that will be renamed - old_component = components_dir / "OldComponent" - old_component.mkdir() - - # Add files that exist in real projects - for filename in ["index.tsx", "styles.css", "types.ts", "utils.ts"]: - (old_component / filename).write_text(f"// {filename} content") - - # Add a subdirectory with more files - sub_dir = old_component / "subcomponents" - sub_dir.mkdir() - for i in range(5): - (sub_dir / f"SubComponent{i}.tsx").write_text(f"// SubComponent{i}") - - # Initialize state - watching.state.root = watching.walk(PurePosixPath()) - working_state = watching.state.root[:] - - loop = asyncio.new_event_loop() - - # This is the exact operation that causes hangs in real usage - new_component = components_dir / "NewComponent" - old_component.rename(new_component) - - # Simulate the inotify event sequence that causes problems - # IN_MOVED_FROM event for the old directory - old_path = PurePosixPath("project/src/components/OldComponent") - - # IN_MOVED_TO event for the new directory - new_path = PurePosixPath("project/src/components/NewComponent") - - # Track how long the operations take - start_time = time.time() - - # Set up timeout detection - def timeout_handler(signum, frame): - raise TimeoutError("Simulated inotify sequence hung") - - signal.signal(signal.SIGALRM, timeout_handler) - signal.alarm(15) # 15 second timeout - - try: - # This sequence is where the hang occurs in real usage - watching.update_path(working_state, old_path, loop) - watching.update_path(working_state, new_path, loop) - - # If we get here without hanging, cancel the alarm - signal.alarm(0) - - end_time = time.time() - duration = end_time - start_time - - # Real inotify operations should be fast - assert duration < 10.0, f"Simulated inotify sequence took {duration}s" - - # Verify the final state is correct - old_begin, old_entries = watching.treeget(working_state, old_path) - assert old_begin is None, "Old component path should be removed" - - new_begin, new_entries = watching.treeget(working_state, new_path) - assert new_begin is not None, "New component path should exist" - assert len(new_entries) > 1, "New component should contain all files" - - except TimeoutError: - signal.alarm(0) - pytest.fail("HANG DETECTED: Simulated inotify event sequence hung!") - - except Exception as e: - signal.alarm(0) - pytest.fail(f"Simulated inotify sequence failed: {e}") - - finally: - signal.alarm(0) # Ensure alarm is cancelled - """Test format_update with nested directory changes that could cause infinite loops.""" - temp_dir = setup_watcher - - # Create complex nested structure that has caused issues - complex_structure = temp_dir / "complex" - complex_structure.mkdir() - - # Create multiple levels with similar names (potential for confusion) - level_a = complex_structure / "level_a" - level_a.mkdir() - sublevel_a = level_a / "sublevel" - sublevel_a.mkdir() - - level_b = complex_structure / "level_b" - level_b.mkdir() - sublevel_b = level_b / "sublevel" - sublevel_b.mkdir() - - # Add files to each sublevel - for i in range(10): - (sublevel_a / f"file_a_{i}.txt").write_text(f"content_a_{i}") - (sublevel_b / f"file_b_{i}.txt").write_text(f"content_b_{i}") - - # Get initial state - old_state = watching.walk(PurePosixPath()) - - # Perform nested directory renames that could confuse the diff algorithm - renamed_sublevel_a = level_a / "renamed_sublevel" - sublevel_a.rename(renamed_sublevel_a) - - renamed_sublevel_b = level_b / "also_renamed_sublevel" - sublevel_b.rename(renamed_sublevel_b) - - # Get new state - new_state = watching.walk(PurePosixPath()) - - # This is where infinite loops or hangs can occur in format_update - start_time = time.time() - - try: - update_msg = watching.format_update(old_state, new_state) - end_time = time.time() - - duration = end_time - start_time - assert duration < 5.0, ( - f"format_update took too long with nested changes: {duration}s" - ) - - # Verify the update message is valid - assert update_msg, "format_update should return valid message" - decoded = msgspec.json.decode(update_msg, type=UpdateMessage) - assert decoded.update, "Update should contain operations" - - except Exception as e: - pytest.fail(f"format_update failed or hung with nested directory changes: {e}") - """Test that reproduces the hang when directory rename events race with updates. - - This test simulates the exact conditions that cause the hang: - 1. Create a directory with files - 2. Start monitoring it - 3. Rename the directory while the watcher is processing events - 4. This should cause a hang where old directory names are preserved - """ - temp_dir = setup_watcher - - # Create test structure with many files to increase chance of race conditions - subdir = temp_dir / "original_dir" - subdir.mkdir() - - # Create many files to make the directory scan take longer - for i in range(50): - (subdir / f"file_{i:03d}.txt").write_text(f"content_{i}") - - # Create nested directories - nested = subdir / "nested" - nested.mkdir() - for i in range(20): - (nested / f"nested_file_{i:03d}.txt").write_text(f"nested_content_{i}") - - # Initial scan to populate the state - initial_root = watching.walk(PurePosixPath()) - watching.state.root = initial_root - - # Verify initial structure - initial_names = [entry.name for entry in initial_root] - assert "original_dir" in initial_names - - # Create a mock event loop for testing - loop = asyncio.new_event_loop() - - # Simulate the problematic sequence: - # 1. Start processing the original directory - # 2. Rename it while processing - # 3. Try to update both old and new paths - - # Start by getting the initial state - original_rootmod = watching.state.root[:] - - # Rename the directory - renamed_dir = temp_dir / "renamed_dir" - subdir.rename(renamed_dir) - - # Now simulate what happens in the inotify watcher: - # Multiple rapid updates that can cause race conditions - - # First, try to update the old path (should remove it) - watching.update_path(original_rootmod, PurePosixPath("original_dir"), loop) - - # Then try to update the new path (should add it) - watching.update_path(original_rootmod, PurePosixPath("renamed_dir"), loop) - - # Check if the state is consistent - final_names = [entry.name for entry in original_rootmod] - - # The bug would manifest as: - # 1. Old directory name still present (should be gone) - # 2. New directory name missing (should be there) - # 3. Inconsistent state causing hangs - - # This is the expected correct behavior - assert "original_dir" not in final_names, "Old directory name should be removed" - assert "renamed_dir" in final_names, "New directory name should be present" - - # Additional check: verify we can still walk the renamed directory - renamed_walk = watching.walk(PurePosixPath("renamed_dir")) - assert len(renamed_walk) > 1, "Should be able to walk renamed directory" - - -def test_concurrent_inotify_events_simulation(setup_watcher): - """Simulate concurrent inotify events that can cause the hanging bug.""" - temp_dir = setup_watcher - - # Create a complex directory structure - dirs = ["dir_a", "dir_b", "dir_c"] - created_dirs = [] - - for dir_name in dirs: - dir_path = temp_dir / dir_name - dir_path.mkdir() - # Add files to each directory - for i in range(10): - (dir_path / f"file_{i}.txt").write_text(f"content in {dir_name}") - created_dirs.append(dir_path) - - # Initial state - watching.state.root = watching.walk(PurePosixPath()) - original_state = watching.state.root[:] - - loop = asyncio.new_event_loop() - - # Simulate rapid concurrent operations that happen in real usage - # This mimics what happens when multiple filesystem events arrive rapidly - - # Rename all directories simultaneously (as might happen with mv commands) - renamed_paths = [] - for i, dir_path in enumerate(created_dirs): - new_path = temp_dir / f"renamed_{dirs[i]}" - dir_path.rename(new_path) - renamed_paths.append(new_path) - - # Now simulate the inotify event processing that causes issues - # In the real code, these updates happen in rapid succession - # and can cause race conditions - - working_state = original_state[:] - - # Process removal events (IN_MOVED_FROM) - for dir_name in dirs: - try: - watching.update_path(working_state, PurePosixPath(dir_name), loop) - except Exception as e: - # The bug might manifest as exceptions during updates - pytest.fail(f"Update path failed for {dir_name}: {e}") - - # Process addition events (IN_MOVED_TO) - for i, dir_name in enumerate(dirs): - try: - new_name = f"renamed_{dir_name}" - watching.update_path(working_state, PurePosixPath(new_name), loop) - except Exception as e: - pytest.fail(f"Update path failed for {new_name}: {e}") - - # Verify final state is consistent - final_names = [entry.name for entry in working_state] - - # Check that old names are gone - for dir_name in dirs: - assert dir_name not in final_names, ( - f"Old directory {dir_name} should be removed" - ) - - # Check that new names are present - for i, dir_name in enumerate(dirs): - new_name = f"renamed_{dir_name}" - assert new_name in final_names, f"New directory {new_name} should be present" - - -def test_format_update_with_rapid_changes(setup_watcher): - """Test format_update with rapid directory changes that can cause hangs.""" - temp_dir = setup_watcher - - # Create initial structure - initial_dirs = ["test1", "test2", "test3"] - for dir_name in initial_dirs: - dir_path = temp_dir / dir_name - dir_path.mkdir() - (dir_path / "file.txt").write_text("test content") - - # Get initial state - old_state = watching.walk(PurePosixPath()) - - # Perform rapid renames - for i, dir_name in enumerate(initial_dirs): - old_path = temp_dir / dir_name - new_path = temp_dir / f"renamed_{dir_name}" - old_path.rename(new_path) - - # Get new state - new_state = watching.walk(PurePosixPath()) - - # This is where the hang might occur - in format_update - start_time = time.time() - try: - update_msg = watching.format_update(old_state, new_state) - end_time = time.time() - - # Should complete quickly - duration = end_time - start_time - assert duration < 5.0, f"format_update took too long: {duration}s" - - # Decode the update to verify it's valid - decoded = msgspec.json.decode(update_msg, type=UpdateMessage) - assert decoded.update, "Update message should contain operations" - - except Exception as e: - pytest.fail(f"format_update failed or hung: {e}") - - -def test_update_path_with_missing_directory(setup_watcher): - """Test update_path when called on a directory that no longer exists. - - This simulates the race condition where update_path is called for a path - that was just moved/deleted. - """ - temp_dir = setup_watcher - - # Create and populate initial state - test_dir = temp_dir / "disappearing_dir" - test_dir.mkdir() - (test_dir / "file.txt").write_text("content") - - initial_state = watching.walk(PurePosixPath()) - watching.state.root = initial_state - working_state = initial_state[:] - - # Remove the directory - shutil.rmtree(test_dir) - - loop = asyncio.new_event_loop() - - # Now try to update the path that no longer exists - # This should handle gracefully without hanging - start_time = time.time() - try: - watching.update_path(working_state, PurePosixPath("disappearing_dir"), loop) - end_time = time.time() - - duration = end_time - start_time - assert duration < 2.0, f"update_path took too long: {duration}s" - - # Verify the directory was removed from the state - final_names = [entry.name for entry in working_state] - assert "disappearing_dir" not in final_names - - except Exception as e: - pytest.fail(f"update_path should handle missing directories gracefully: {e}") - - -def test_threaded_watcher_simulation(setup_watcher): - """Test that simulates the actual threaded watcher behavior with directory renames. - - This test creates a more realistic scenario where the watcher thread - processes events while filesystem operations are happening. - """ - temp_dir = setup_watcher - - # Create test structure - test_dirs = [] - for i in range(5): - dir_path = temp_dir / f"thread_test_dir_{i}" - dir_path.mkdir() - # Add some files - for j in range(5): - (dir_path / f"file_{j}.txt").write_text(f"content_{i}_{j}") - test_dirs.append(dir_path) - - # Initialize state - watching.state.root = watching.walk(PurePosixPath()) - - # Create an event loop for the simulation - loop = asyncio.new_event_loop() - - # Track state changes - state_changes = [] - original_broadcast = watching.broadcast - - def tracking_broadcast(msg, loop_param): - state_changes.append(msg) - return original_broadcast(msg, loop_param) - - # Patch broadcast to track changes - with patch("cista.watching.broadcast", side_effect=tracking_broadcast): - # Simulate rapid directory operations - start_time = time.time() - - for i, dir_path in enumerate(test_dirs): - # Rename directory - new_path = temp_dir / f"renamed_thread_test_dir_{i}" - dir_path.rename(new_path) - - # Update the watcher state (simulating inotify events) - old_name = f"thread_test_dir_{i}" - new_name = f"renamed_thread_test_dir_{i}" - - # Simulate the race condition: rapid updates - watching.update_path(watching.state.root, PurePosixPath(old_name), loop) - watching.update_path(watching.state.root, PurePosixPath(new_name), loop) - - end_time = time.time() - - # Should complete without hanging - duration = end_time - start_time - assert duration < 10.0, f"Threaded operations took too long: {duration}s" - - # Verify final state is consistent - final_names = [entry.name for entry in watching.state.root] - - # Old names should be gone - for i in range(5): - old_name = f"thread_test_dir_{i}" - assert old_name not in final_names, ( - f"Old directory {old_name} should be removed" - ) - - # New names should be present - for i in range(5): - new_name = f"renamed_thread_test_dir_{i}" - assert new_name in final_names, ( - f"New directory {new_name} should be present" - ) - - -def test_directory_rename_with_nested_structure(setup_watcher): - """Test renaming a directory that contains nested subdirectories.""" - temp_dir = setup_watcher - - # Create a more complex nested structure - main_dir = temp_dir / "main_dir" - main_dir.mkdir() - - # Create multiple levels of nesting - level1 = main_dir / "level1" - level1.mkdir() - (level1 / "l1_file.txt").write_text("level1 content") - - level2 = level1 / "level2" - level2.mkdir() - (level2 / "l2_file.txt").write_text("level2 content") - - level3 = level2 / "level3" - level3.mkdir() - (level3 / "l3_file.txt").write_text("level3 content") - - # Initial scan - initial_root = watching.walk(PurePosixPath()) - watching.state.root = initial_root - - # Rename the main directory - renamed_main = temp_dir / "renamed_main_dir" - main_dir.rename(renamed_main) - - # Update the watching system - loop = asyncio.new_event_loop() - watching.update_path(watching.state.root, PurePosixPath("main_dir"), loop) - watching.update_path(watching.state.root, PurePosixPath("renamed_main_dir"), loop) - - # Verify the entire nested structure is properly updated - updated_root = watching.state.root - updated_names = [entry.name for entry in updated_root] - - assert "main_dir" not in updated_names - assert "renamed_main_dir" in updated_names - - # Verify the nested structure is still intact - renamed_structure = watching.walk(PurePosixPath("renamed_main_dir")) - - # Extract all the names from the renamed structure - all_names = [entry.name for entry in renamed_structure] - - # Should contain the directory itself and all nested items - assert "renamed_main_dir" in all_names - assert "level1" in all_names - assert "l1_file.txt" in all_names - assert "level2" in all_names - assert "l2_file.txt" in all_names - assert "level3" in all_names - assert "l3_file.txt" in all_names - - -def test_directory_rename_format_update(setup_watcher): - """Test that format_update correctly handles directory renames.""" - temp_dir = setup_watcher - - # Create test structure - subdir, _, other_dir = create_test_structure(temp_dir) - - # Get initial state - old_root = watching.walk(PurePosixPath()) - - # Rename directory - renamed_subdir = temp_dir / "renamed_subdir" - subdir.rename(renamed_subdir) - - # Get new state - new_root = watching.walk(PurePosixPath()) - - # Generate update message - update_msg = watching.format_update(old_root, new_root) - - # The update should not be empty and should contain proper operations - assert update_msg - assert "update" in update_msg - - # Decode and verify the update contains expected operations - decoded = msgspec.json.decode(update_msg, type=UpdateMessage) - assert decoded.update # Should have update operations - - # The update should reflect the rename operation (delete old, insert new) - operations = decoded.update - assert len(operations) > 0 - - -def test_concurrent_directory_operations(setup_watcher): - """Test behavior when multiple directory operations happen concurrently.""" - temp_dir = setup_watcher - - # Create multiple directories - dirs_to_create = ["dir1", "dir2", "dir3"] - created_dirs = [] - - for dir_name in dirs_to_create: - dir_path = temp_dir / dir_name - dir_path.mkdir() - (dir_path / f"{dir_name}_file.txt").write_text(f"content for {dir_name}") - created_dirs.append(dir_path) - - # Initial scan - initial_root = watching.walk(PurePosixPath()) - watching.state.root = initial_root - - # Rename multiple directories "simultaneously" - renamed_dirs = [] - for i, dir_path in enumerate(created_dirs): - renamed_path = temp_dir / f"renamed_dir{i + 1}" - dir_path.rename(renamed_path) - renamed_dirs.append(renamed_path) - - # Update the watching system for all changes - loop = asyncio.new_event_loop() - - # Update for all old paths (should remove them) - for dir_name in dirs_to_create: - watching.update_path(watching.state.root, PurePosixPath(dir_name), loop) - - # Update for all new paths (should add them) - for i in range(len(renamed_dirs)): - watching.update_path( - watching.state.root, PurePosixPath(f"renamed_dir{i + 1}"), loop - ) - - # Verify final state - final_root = watching.state.root - final_names = [entry.name for entry in final_root] - - # Old names should be gone - for dir_name in dirs_to_create: - assert dir_name not in final_names - - # New names should be present - for i in range(len(renamed_dirs)): - assert f"renamed_dir{i + 1}" in final_names - - -@pytest.mark.slow -def test_watcher_doesnt_hang_on_directory_rename(setup_watcher): - """Test that the watcher doesn't hang when a directory is renamed. - - This test specifically addresses the reported bug where directory renames - cause the system to hang and no more operations go through. - """ - temp_dir = setup_watcher - - # Create test structure - subdir, _, _ = create_test_structure(temp_dir) - - # Initialize the watcher state - watching.state.root = watching.walk(PurePosixPath()) - - # Mock the inotify events to simulate what happens during a rename - # This simulates the problematic scenario described in the bug report - with patch("time.monotonic", side_effect=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]): - # Simulate the rename operation - renamed_subdir = temp_dir / "renamed_test_subdir" - subdir.rename(renamed_subdir) - - # Create a simple event loop for testing - loop = asyncio.new_event_loop() - - # This should complete without hanging - start_time = time.time() - - # Update the path - this is where the hang might occur - watching.update_path(watching.state.root, PurePosixPath("test_subdir"), loop) - watching.update_path( - watching.state.root, PurePosixPath("renamed_test_subdir"), loop - ) - - end_time = time.time() - - # The operation should complete quickly (within 5 seconds) - assert end_time - start_time < 5.0, ( - "Directory rename operation took too long, possible hang detected" - ) - - # Verify the state is consistent - final_names = [entry.name for entry in watching.state.root] - assert "test_subdir" not in final_names - assert "renamed_test_subdir" in final_names - - # Verify we can still perform operations after the rename - # This tests that the system isn't in a broken state - another_dir = temp_dir / "post_rename_dir" - another_dir.mkdir() - - # This should work without issues - watching.update_path( - watching.state.root, PurePosixPath("post_rename_dir"), loop - ) - final_names_after = [entry.name for entry in watching.state.root] - assert "post_rename_dir" in final_names_after - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) -- 2.49.0