Initial commit

2025-08-17 22:57:52 -06:00
commit dd3d0c63c4
7 changed files with 616 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,19 @@
+
+# Data Compression Benchmark
+
+This project benchmarks the performance of various compression algorithms (zstd, brotli, gzip) using different compression levels on all files in a given directory.
+
+You can easily see which algorithm and setting is best for your use case, benchmarking on the actual files you'll be compressing.
+
+## Demo on a NodeJS project's static client files
+
+!(Compression)[compression.png]
+!(Decompression)[decompression.png]
+
+## How to Run
+
+The recommended way is to use [uv](https://github.com/astral-sh/uv) to run from git:
+
+```fish
+uvx --from git+https://git.zi.fi/LeoVasanko/compression-benchmark.git compression-benchmark /path/to/folder
+```
--- a/bench.ipynb
+++ b/bench.ipynb
@@ -0,0 +1,329 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1883540a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# /// script\n",
+    "# dependencies = [\n",
+    "#     \"zstandard\",\n",
+    "#     \"brotli\",\n",
+    "#     \"matplotlib\",\n",
+    "#     \"tqdm\",\n",
+    "#     \"numpy\",\n",
+    "# ]\n",
+    "# ///\n",
+    "\n",
+    "import gc\n",
+    "import gzip\n",
+    "import time\n",
+    "from dataclasses import dataclass\n",
+    "from pathlib import Path\n",
+    "from typing import Callable\n",
+    "\n",
+    "import brotli\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "from tqdm.auto import tqdm\n",
+    "from zstandard import ZstdCompressor, ZstdDecompressor\n",
+    "\n",
+    "# What should we benchmark?\n",
+    "FOLDER = Path(\".\")\n",
+    "ROUNDS = 5\n",
+    "ALGS = {\n",
+    "    \"zstd\": {\n",
+    "        \"levels\": [1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 14, 16, 22],\n",
+    "        \"c\": lambda b, q: ZstdCompressor(level=q).compress(b),\n",
+    "        \"d\": lambda b: ZstdDecompressor().decompress(b),\n",
+    "    },\n",
+    "    \"brotli\": {\n",
+    "        \"levels\": [1, 2, 3, 4, 5, 7, 9, 11],\n",
+    "        \"c\": lambda b, q: brotli.compress(b, quality=q),\n",
+    "        \"d\": lambda b: brotli.decompress(b),\n",
+    "    },\n",
+    "    \"gzip\": {\n",
+    "        \"levels\": [1, 3, 4, 5, 9],\n",
+    "        \"c\": lambda b, q: gzip.compress(b, compresslevel=q),\n",
+    "        \"d\": lambda b: gzip.decompress(b),\n",
+    "    },\n",
+    "}\n",
+    "\n",
+    "# In case you were wondering:\n",
+    "# Sharing the ZstdCompressor across operations or using threads makes a little difference (with small files)\n",
+    "\n",
+    "files = [p.read_bytes() for p in FOLDER.rglob(\"*\") if p.is_file()]\n",
+    "orig_total = sum(len(b) for b in files)\n",
+    "print(f\"Loaded {len(files)} files, total {orig_total / 1e6:.2f} MB\")\n",
+    "assert files, \"No files found in the specified folder\"\n",
+    "\n",
+    "\n",
+    "@dataclass\n",
+    "class BenchPoint:\n",
+    "    name: str\n",
+    "    lvl: int\n",
+    "    compress: Callable\n",
+    "    decompress: Callable\n",
+    "    compdata: list[bytes]\n",
+    "    comp_time_ns: int = 0  # median compression time (ns)\n",
+    "    decomp_time_ns: int = 0  # median decompression time (ns)\n",
+    "\n",
+    "\n",
+    "bench_points = [\n",
+    "    BenchPoint(alg, lvl, spec[\"c\"], spec[\"d\"], [])\n",
+    "    for alg, spec in ALGS.items()\n",
+    "    for lvl in spec[\"levels\"]\n",
+    "]\n",
+    "P = len(bench_points)\n",
+    "C_ns = np.zeros((P, ROUNDS), dtype=np.int64)  # compression time (ns)\n",
+    "D_ns = np.zeros((P, ROUNDS), dtype=np.int64)  # decompression time (ns)\n",
+    "\n",
+    "# Benchmark\n",
+    "gc.disable()\n",
+    "time.time = lambda: 0  # make gzip deterministic (timestamp header)\n",
+    "progress = tqdm(range(ROUNDS), desc=\"Rounds\")\n",
+    "for r in progress:\n",
+    "    for i, bp in enumerate(bench_points):\n",
+    "        progress.set_postfix_str(f\"Compress {bp.name}/{bp.lvl}\")\n",
+    "        for f, data in enumerate(files):\n",
+    "            t0 = time.perf_counter_ns()\n",
+    "            out = bp.compress(data, bp.lvl)\n",
+    "            t1 = time.perf_counter_ns()\n",
+    "            C_ns[i, r] += t1 - t0\n",
+    "            if r == 0:\n",
+    "                bp.compdata.append(out)\n",
+    "            else:\n",
+    "                assert out == bp.compdata[f], (\n",
+    "                    f\"Compressed data changed between rounds for {bp.name}, level={bp.lvl}\"\n",
+    "                )\n",
+    "        progress.set_postfix_str(f\"Decompress {bp.name}/{bp.lvl}\")\n",
+    "        for file in bp.compdata:\n",
+    "            t0 = time.perf_counter_ns()\n",
+    "            _ = bp.decompress(file)\n",
+    "            t1 = time.perf_counter_ns()\n",
+    "            D_ns[i, r] += t1 - t0\n",
+    "        progress.set_postfix_str(\"Benchmark Done\")\n",
+    "gc.enable()\n",
+    "\n",
+    "# Store median times directly in bench points\n",
+    "Cns, Dns = np.median(C_ns, 1), np.median(D_ns, 1)\n",
+    "for i, bp in enumerate(bench_points):\n",
+    "    bp.comp_time_ns = int(Cns[i])\n",
+    "    bp.decomp_time_ns = int(Dns[i])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12a0cf49",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_xy(bench_points, time_attr, title, xlabel, ylabel, ylabel2):\n",
+    "    fig, ax1 = plt.subplots(figsize=(9, 5), dpi=300)\n",
+    "\n",
+    "    # Collect all y values (Mbit/s) and their corresponding x values for overlap detection\n",
+    "    all_data_points = []\n",
+    "\n",
+    "    for alg_name in ALGS:\n",
+    "        alg_points = [bp for bp in bench_points if bp.name == alg_name]\n",
+    "        comp_totals = np.array([sum(len(b) for b in bp.compdata) for bp in alg_points])\n",
+    "        times_ns = np.array([getattr(bp, time_attr) for bp in alg_points])\n",
+    "        levels = [bp.lvl for bp in alg_points]\n",
+    "\n",
+    "        x = 100.0 * comp_totals / orig_total  # compression percentages\n",
+    "        y_mbps = orig_total * 8000.0 / times_ns  # mbps - this is what we plot\n",
+    "\n",
+    "        # Store all data points for overlap detection\n",
+    "        for xx, yy in zip(x, y_mbps):\n",
+    "            all_data_points.append((xx, yy))\n",
+    "\n",
+    "        # Plot using Mbit/s (so faster operations are at the top)\n",
+    "        ax1.plot(x, y_mbps, marker=\"s\", linewidth=1.25, label=alg_name, zorder=2)\n",
+    "        levelstyle = {\n",
+    "            \"ha\": \"center\",\n",
+    "            \"va\": \"center\",\n",
+    "            \"fontsize\": 7,\n",
+    "            \"zorder\": 3,\n",
+    "            \"bbox\": dict(\n",
+    "                boxstyle=\"round,pad=0.2\",\n",
+    "                facecolor=\"white\",\n",
+    "                edgecolor=\"black\",\n",
+    "                lw=0.4,\n",
+    "            ),\n",
+    "        }\n",
+    "        for xx, yy, L in zip(x, y_mbps, levels):\n",
+    "            ax1.text(xx, yy, str(L), **levelstyle)\n",
+    "\n",
+    "    # Set up the axis\n",
+    "    ax1.set_xlabel(xlabel)\n",
+    "    ax1.set_ylabel(ylabel)  # Compression/Decompression speed (Mbit/s)\n",
+    "\n",
+    "    # Get plot dimensions\n",
+    "    y_min, y_max = ax1.get_ylim()\n",
+    "    x_min, x_max = ax1.get_xlim()\n",
+    "\n",
+    "    # Extract unique y values and sort them\n",
+    "    unique_y_values = sorted(set(point[1] for point in all_data_points))\n",
+    "\n",
+    "    # Filter to y values within the plot range\n",
+    "    visible_y_values = [y for y in unique_y_values if y_min <= y <= y_max]\n",
+    "\n",
+    "    # Determine minimum spacing to avoid overlap (as a percentage of the y-range)\n",
+    "    y_range = y_max - y_min\n",
+    "    min_spacing = y_range * 0.05  # 5% of the y-range as minimum spacing\n",
+    "\n",
+    "    # Select y values with sufficient spacing\n",
+    "    selected_y_values = []\n",
+    "    for y in visible_y_values:\n",
+    "        # Check if this y value is far enough from already selected ones\n",
+    "        if not selected_y_values or all(\n",
+    "            abs(y - selected) >= min_spacing for selected in selected_y_values\n",
+    "        ):\n",
+    "            selected_y_values.append(y)\n",
+    "\n",
+    "    # Function to format time labels\n",
+    "    def format_time_label(ms):\n",
+    "        if ms < 10:\n",
+    "            return f\"{ms:.1f} ms\"  # One decimal for <10ms\n",
+    "        elif ms < 1000:\n",
+    "            return f\"{ms:.0f} ms\"  # No decimal for <1000ms\n",
+    "        else:\n",
+    "            seconds = ms / 1000\n",
+    "            return f\"{seconds:.1f} s\"  # Seconds with one decimal above 1000ms\n",
+    "\n",
+    "    # Draw black lines from data points to right margin with labels\n",
+    "    for y_mbps in selected_y_values:\n",
+    "        # Find the rightmost x position for this y value\n",
+    "        rightmost_x = max(\n",
+    "            [point[0] for point in all_data_points if abs(point[1] - y_mbps) < 1e-6],\n",
+    "            default=x_min,\n",
+    "        )\n",
+    "\n",
+    "        # Convert Mbit/s to milliseconds for the label\n",
+    "        ms = orig_total * 8000.0 / (y_mbps * 1e6)\n",
+    "\n",
+    "        # Draw a black line from the rightmost data point to the right margin\n",
+    "        ax1.plot(\n",
+    "            [rightmost_x, x_max],\n",
+    "            [y_mbps, y_mbps],\n",
+    "            color=\"black\",\n",
+    "            linestyle=\"-\",\n",
+    "            alpha=0.3,\n",
+    "            linewidth=0.8,\n",
+    "            zorder=1,\n",
+    "        )\n",
+    "\n",
+    "        # Add text label on the right side with improved formatting\n",
+    "        ms_label = format_time_label(ms)\n",
+    "\n",
+    "        ax1.text(\n",
+    "            x_max,\n",
+    "            y_mbps,\n",
+    "            ms_label,\n",
+    "            verticalalignment=\"center\",\n",
+    "            horizontalalignment=\"left\",\n",
+    "            fontsize=7,\n",
+    "            color=\"black\",\n",
+    "            alpha=0.8,\n",
+    "            bbox=dict(\n",
+    "                boxstyle=\"round,pad=0.1\", facecolor=\"white\", alpha=0.9, edgecolor=\"none\"\n",
+    "            ),\n",
+    "        )\n",
+    "\n",
+    "    # Add the original fixed millisecond reference lines (only if they don't conflict)\n",
+    "    ms_reference_values = [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]\n",
+    "\n",
+    "    for ms in ms_reference_values:\n",
+    "        # Convert milliseconds to Mbit/s: mbps = orig_total * 8000.0 / (ms * 1e6)\n",
+    "        mbps = orig_total * 8000.0 / (ms * 1e6)\n",
+    "\n",
+    "        # Only draw the line if it falls within the current y-axis range and isn't too close to data points\n",
+    "        if y_min <= mbps <= y_max:\n",
+    "            # Check if this reference line is far enough from data points\n",
+    "            if not any(abs(mbps - y) < min_spacing for y in selected_y_values):\n",
+    "                ax1.axhline(\n",
+    "                    y=mbps,\n",
+    "                    color=\"gray\",\n",
+    "                    linestyle=\"--\",\n",
+    "                    alpha=0.3,\n",
+    "                    linewidth=0.5,\n",
+    "                    zorder=1,\n",
+    "                )\n",
+    "                # Add text label on the right side with improved formatting\n",
+    "                ms_label = format_time_label(ms)\n",
+    "                ax1.text(\n",
+    "                    x_max,\n",
+    "                    mbps,\n",
+    "                    ms_label,\n",
+    "                    verticalalignment=\"center\",\n",
+    "                    horizontalalignment=\"left\",\n",
+    "                    fontsize=7,\n",
+    "                    color=\"gray\",\n",
+    "                    alpha=0.6,\n",
+    "                    bbox=dict(\n",
+    "                        boxstyle=\"round,pad=0.1\",\n",
+    "                        facecolor=\"white\",\n",
+    "                        alpha=0.8,\n",
+    "                        edgecolor=\"none\",\n",
+    "                    ),\n",
+    "                )\n",
+    "\n",
+    "    ax1.set_title(title)\n",
+    "    ax1.grid(True, linestyle=\":\", linewidth=0.7, alpha=0.6, zorder=0)\n",
+    "    ax1.legend()\n",
+    "    plt.tight_layout()\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "plot_xy(\n",
+    "    bench_points,\n",
+    "    \"comp_time_ns\",\n",
+    "    f\"Compression — {len(files)} files, total {orig_total / 1_048_576:.2f} MiB (median of {ROUNDS} rounds)\",\n",
+    "    \"Compressed size (% of original)\",\n",
+    "    \"Compression speed (Mbit/s)\",\n",
+    "    \"Time (ms)\",\n",
+    ")\n",
+    "plot_xy(\n",
+    "    bench_points,\n",
+    "    \"decomp_time_ns\",\n",
+    "    f\"Decompression — {len(files)} files, total {orig_total / 1_048_576:.2f} MiB (median of {ROUNDS} rounds)\",\n",
+    "    \"Compressed size (% of original)\",\n",
+    "    \"Decompression speed (Mbit/s)\",\n",
+    "    \"Time (ms)\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8a2bdcd",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/compression.png
+++ b/compression.png
--- a/compression_benchmark/main.py
+++ b/compression_benchmark/main.py
@@ -0,0 +1,4 @@
+from .bench import main
+
+if __name__ == "__main__":
+    main()
--- a/compression_benchmark/bench.py
+++ b/compression_benchmark/bench.py
@@ -0,0 +1,245 @@
+import argparse
+import gc
+import gzip
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Callable
+
+import brotli
+import matplotlib.pyplot as plt
+import numpy as np
+from tqdm.auto import tqdm
+from zstandard import ZstdCompressor, ZstdDecompressor
+
+ALGS = {
+    "zstd": {
+        "levels": [1, 2, 3, 4, 5, 6, 7, 9, 10, 12, 14, 16, 22],
+        "c": lambda b, q: ZstdCompressor(level=q).compress(b),
+        "d": lambda b: ZstdDecompressor().decompress(b),
+    },
+    "brotli": {
+        "levels": [1, 2, 3, 4, 5, 7, 9, 11],
+        "c": lambda b, q: brotli.compress(b, quality=q),
+        "d": lambda b: brotli.decompress(b),
+    },
+    "gzip": {
+        "levels": [1, 3, 4, 5, 9],
+        "c": lambda b, q: gzip.compress(b, compresslevel=q),
+        "d": lambda b: gzip.decompress(b),
+    },
+}
+
+
+@dataclass
+class BenchPoint:
+    name: str
+    lvl: int
+    compress: Callable
+    decompress: Callable
+    compdata: list
+    comp_time_ns: int = 0
+    decomp_time_ns: int = 0
+
+
+def plot_xy(
+    bench_points, time_attr, title, xlabel, ylabel, ylabel2, orig_total, out_path
+):
+    fig, ax1 = plt.subplots(figsize=(9, 5), dpi=300)
+    all_data_points = []
+    for alg_name in ALGS:
+        alg_points = [bp for bp in bench_points if bp.name == alg_name]
+        comp_totals = np.array([sum(len(b) for b in bp.compdata) for bp in alg_points])
+        times_ns = np.array([getattr(bp, time_attr) for bp in alg_points])
+        levels = [bp.lvl for bp in alg_points]
+        x = 100.0 * comp_totals / orig_total
+        y_mbps = orig_total * 8000.0 / times_ns
+        for xx, yy in zip(x, y_mbps):
+            all_data_points.append((xx, yy))
+        ax1.plot(x, y_mbps, marker="s", linewidth=1.25, label=alg_name, zorder=2)
+        levelstyle = {
+            "ha": "center",
+            "va": "center",
+            "fontsize": 7,
+            "zorder": 3,
+            "bbox": dict(
+                boxstyle="round,pad=0.2",
+                facecolor="white",
+                edgecolor="black",
+                lw=0.4,
+            ),
+        }
+        for xx, yy, L in zip(x, y_mbps, levels):
+            ax1.text(xx, yy, str(L), **levelstyle)
+    ax1.set_xlabel(xlabel)
+    ax1.set_ylabel(ylabel)
+    y_min, y_max = ax1.get_ylim()
+    x_min, x_max = ax1.get_xlim()
+    unique_y_values = sorted(set(point[1] for point in all_data_points))
+    visible_y_values = [y for y in unique_y_values if y_min <= y <= y_max]
+    y_range = y_max - y_min
+    min_spacing = y_range * 0.05
+    selected_y_values = []
+    for y in visible_y_values:
+        if not selected_y_values or all(
+            abs(y - selected) >= min_spacing for selected in selected_y_values
+        ):
+            selected_y_values.append(y)
+
+    def format_time_label(ms):
+        if ms < 10:
+            return f"{ms:.1f} ms"
+        elif ms < 1000:
+            return f"{ms:.0f} ms"
+        else:
+            seconds = ms / 1000
+            return f"{seconds:.1f} s"
+
+    for y_mbps in selected_y_values:
+        rightmost_x = max(
+            [point[0] for point in all_data_points if abs(point[1] - y_mbps) < 1e-6],
+            default=x_min,
+        )
+        ms = orig_total * 8000.0 / (y_mbps * 1e6)
+        ax1.plot(
+            [rightmost_x, x_max],
+            [y_mbps, y_mbps],
+            color="black",
+            linestyle="-",
+            alpha=0.3,
+            linewidth=0.8,
+            zorder=1,
+        )
+        ms_label = format_time_label(ms)
+        ax1.text(
+            x_max,
+            y_mbps,
+            ms_label,
+            verticalalignment="center",
+            horizontalalignment="left",
+            fontsize=7,
+            color="black",
+            alpha=0.8,
+            bbox=dict(
+                boxstyle="round,pad=0.1", facecolor="white", alpha=0.9, edgecolor="none"
+            ),
+        )
+    ms_reference_values = [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]
+    for ms in ms_reference_values:
+        mbps = orig_total * 8000.0 / (ms * 1e6)
+        if y_min <= mbps <= y_max:
+            if not any(abs(mbps - y) < min_spacing for y in selected_y_values):
+                ax1.axhline(
+                    y=mbps,
+                    color="gray",
+                    linestyle="--",
+                    alpha=0.3,
+                    linewidth=0.5,
+                    zorder=1,
+                )
+                ms_label = format_time_label(ms)
+                ax1.text(
+                    x_max,
+                    mbps,
+                    ms_label,
+                    verticalalignment="center",
+                    horizontalalignment="left",
+                    fontsize=7,
+                    color="gray",
+                    alpha=0.6,
+                    bbox=dict(
+                        boxstyle="round,pad=0.1",
+                        facecolor="white",
+                        alpha=0.8,
+                        edgecolor="none",
+                    ),
+                )
+    ax1.set_title(title)
+    ax1.grid(True, linestyle=":", linewidth=0.7, alpha=0.6, zorder=0)
+    ax1.legend()
+    plt.tight_layout()
+    plt.savefig(out_path)
+    plt.close(fig)
+
+
+def run_benchmark(folder: Path, rounds: int):
+    files = [p.read_bytes() for p in folder.rglob("*") if p.is_file()]
+    orig_total = sum(len(b) for b in files)
+    print(f"Loaded {len(files)} files, total {orig_total / 1e6:.2f} MB")
+    assert files, "No files found in the specified folder"
+    bench_points = [
+        BenchPoint(alg, lvl, spec["c"], spec["d"], [])
+        for alg, spec in ALGS.items()
+        for lvl in spec["levels"]
+    ]
+    P = len(bench_points)
+    C_ns = np.zeros((P, rounds), dtype=np.int64)
+    D_ns = np.zeros((P, rounds), dtype=np.int64)
+    gc.disable()
+    time.time = lambda: 0
+    progress = tqdm(range(rounds), desc="Rounds")
+    for r in progress:
+        for i, bp in enumerate(bench_points):
+            progress.set_postfix_str(f"Compress {bp.name}/{bp.lvl}")
+            for f, data in enumerate(files):
+                t0 = time.perf_counter_ns()
+                out = bp.compress(data, bp.lvl)
+                t1 = time.perf_counter_ns()
+                C_ns[i, r] += t1 - t0
+                if r == 0:
+                    bp.compdata.append(out)
+                else:
+                    assert out == bp.compdata[f], (
+                        f"Compressed data changed between rounds for {bp.name}, level={bp.lvl}"
+                    )
+            progress.set_postfix_str(f"Decompress {bp.name}/{bp.lvl}")
+            for file in bp.compdata:
+                t0 = time.perf_counter_ns()
+                _ = bp.decompress(file)
+                t1 = time.perf_counter_ns()
+                D_ns[i, r] += t1 - t0
+            progress.set_postfix_str("Benchmark Done")
+    gc.enable()
+    Cns, Dns = np.median(C_ns, 1), np.median(D_ns, 1)
+    for i, bp in enumerate(bench_points):
+        bp.comp_time_ns = int(Cns[i])
+        bp.decomp_time_ns = int(Dns[i])
+    return bench_points, files, orig_total
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Compression benchmark.")
+    parser.add_argument("folder", type=str, help="Folder to benchmark")
+    parser.add_argument("--rounds", type=int, default=5, help="Number of rounds")
+    parser.add_argument(
+        "--outdir", type=str, default=".", help="Output directory for PNGs"
+    )
+    args = parser.parse_args()
+    folder = Path(args.folder)
+    rounds = args.rounds
+    outdir = Path(args.outdir)
+    outdir.mkdir(parents=True, exist_ok=True)
+
+    bench_points, files, orig_total = run_benchmark(folder, rounds)
+
+    plot_xy(
+        bench_points,
+        "comp_time_ns",
+        f"Compression — {len(files)} files, total {orig_total / 1_048_576:.2f} MiB (median of {rounds} rounds)",
+        "Compressed size (% of original)",
+        "Compression speed (Mbit/s)",
+        "Time (ms)",
+        orig_total,
+        outdir / "compression.png",
+    )
+    plot_xy(
+        bench_points,
+        "decomp_time_ns",
+        f"Decompression — {len(files)} files, total {orig_total / 1_048_576:.2f} MiB (median of {rounds} rounds)",
+        "Compressed size (% of original)",
+        "Decompression speed (Mbit/s)",
+        "Time (ms)",
+        orig_total,
+        outdir / "decompression.png",
+    )
+    print(f"Saved plots to {outdir}")
--- a/decompression.png
+++ b/decompression.png
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,19 @@
+[project]
+name = "compression-benchmark"
+version = "0.1.0"
+description = "Benchmark compression algorithms (zstd, brotli, gzip) on your own files."
+authors = [
+    { name = "Leo Vasanko" }
+]
+license = { text = "MIT" }
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "zstandard",
+    "brotli",
+    "matplotlib",
+    "tqdm",
+    "numpy"
+]
+[project.scripts]
+compression-benchmark = "compression_benchmark.bench:main"