Combine the two generator scripts into one that also reads ALIGNMENT and RATE from C sources.

This commit is contained in:
Leo Vasanko
2025-11-08 13:20:15 -06:00
parent d4f8be69ed
commit 751a929836
11 changed files with 266 additions and 314 deletions

View File

@@ -74,16 +74,10 @@ This creates files in the `dist/` directory.
## Code Generation
The Python modules are generated from templates. If you modify the core implementation in `pyaegis/aegis256x4.py`, regenerate the other variants:
The Python modules and CFFI definitions are generated from C sources and templates. If you modify the core implementation in `pyaegis/aegis256x4.py` or update libaegis headers, regenerate all files:
```fish
python tools/gen_modules.py
```
If you update libaegis headers, regenerate the CFFI definitions:
```fish
python tools/gen_cdef.py
python tools/generate.py
```
## Troubleshooting

View File

@@ -1,5 +1,5 @@
"""AEGIS-128L"""
# All modules are generated from aegis128l.py by tools/gen_modules.py!
# All modules are generated from aegis256x4.py by tools/generate.py!
# DO NOT EDIT OTHER ALGORITHM FILES MANUALLY!
import errno

View File

@@ -1,5 +1,5 @@
"""AEGIS-128X2"""
# All modules are generated from aegis128x2.py by tools/gen_modules.py!
# All modules are generated from aegis256x4.py by tools/generate.py!
# DO NOT EDIT OTHER ALGORITHM FILES MANUALLY!
import errno

View File

@@ -1,5 +1,5 @@
"""AEGIS-128X4"""
# All modules are generated from aegis128x4.py by tools/gen_modules.py!
# All modules are generated from aegis256x4.py by tools/generate.py!
# DO NOT EDIT OTHER ALGORITHM FILES MANUALLY!
import errno

View File

@@ -1,5 +1,5 @@
"""AEGIS-256"""
# All modules are generated from aegis256.py by tools/gen_modules.py!
# All modules are generated from aegis256x4.py by tools/generate.py!
# DO NOT EDIT OTHER ALGORITHM FILES MANUALLY!
import errno

View File

@@ -1,5 +1,5 @@
"""AEGIS-256X2"""
# All modules are generated from aegis256x2.py by tools/gen_modules.py!
# All modules are generated from aegis256x4.py by tools/generate.py!
# DO NOT EDIT OTHER ALGORITHM FILES MANUALLY!
import errno

View File

@@ -1,5 +1,5 @@
"""AEGIS-256X4"""
# All modules are generated from aegis256x4.py by tools/gen_modules.py!
# All modules are generated from aegis256x4.py by tools/generate.py!
# DO NOT EDIT OTHER ALGORITHM FILES MANUALLY!
import errno

View File

@@ -1,4 +1,4 @@
/* This file is generated with tools/gen_cdef.py. Do not edit. */
/* This file is generated with tools/generate.py. Do not edit. */
typedef unsigned char uint8_t;
typedef unsigned long size_t;

View File

@@ -1,168 +0,0 @@
#!/usr/bin/env python3
"""Generate CFFI cdef string from libaegis headers.
This script parses the C header files and extracts function declarations,
typedefs, and struct definitions to generate the cdef() string needed by CFFI.
"""
import pathlib
import re
import sys
def preprocess_content(content: str) -> str:
"""Remove comments, preprocessor directives, and extern "C" blocks."""
# Remove multi-line comments
content = re.sub(r"/\*.*?\*/", " ", content, flags=re.DOTALL)
# Remove line comments
content = re.sub(r"//.*$", "", content, flags=re.MULTILINE)
# Remove preprocessor directives
content = re.sub(r"^\s*#.*$", "", content, flags=re.MULTILINE)
# Remove extern "C" blocks
content = re.sub(r'extern\s+"C"\s*\{', "", content)
content = re.sub(r"(?:^|\n)\s*\}\s*(?:\n|$)", "\n", content, flags=re.MULTILINE)
return content
def clean_declaration(text: str) -> str:
"""Clean up a C declaration for CFFI consumption."""
# Remove __attribute__(...) with proper nesting
while "__attribute__" in text:
old = text
text = re.sub(r"__attribute__\s*\(\([^()]*\)\)", "", text)
if text == old:
break
# For structs with CRYPTO_ALIGN, replace the field with "...;" to make it flexible
# This tells CFFI to use the C compiler's alignment instead of calculating it
if "CRYPTO_ALIGN" in text and "typedef struct" in text:
# Replace "CRYPTO_ALIGN(N) uint8_t opaque[SIZE];" with "...;"
text = re.sub(
r"CRYPTO_ALIGN\s*\(\s*\d+\s*\)\s+uint8_t\s+opaque\[\d+\];", "...;", text
)
else:
# For non-struct declarations, just remove CRYPTO_ALIGN
text = re.sub(r"CRYPTO_ALIGN\s*\(\s*\d+\s*\)", "", text)
# Normalize whitespace but preserve structure
lines = []
for line in text.split("\n"):
line = re.sub(r"\s+", " ", line).strip()
if line:
lines.append(line)
return " ".join(lines)
def extract_declarations(header_path: pathlib.Path) -> list[str]:
"""Extract function declarations and typedefs from a header file."""
content = header_path.read_text(encoding="utf-8")
content = preprocess_content(content)
declarations = []
# Extract typedefs (including structs)
typedef_pattern = r"typedef\s+struct\s+\w+\s*\{[^}]+\}\s*\w+\s*;"
for match in re.finditer(typedef_pattern, content, re.DOTALL):
decl = clean_declaration(match.group(0))
if decl:
declarations.append(decl)
# Extract function declarations - more permissive pattern
func_pattern = r"((?:const\s+)?(?:int|void|size_t)\s+\w+\s*\([^;]+?\)\s*;)"
for match in re.finditer(func_pattern, content, re.DOTALL):
decl = clean_declaration(match.group(0))
if decl and "aegis" in decl.lower():
declarations.append(decl)
return declarations
def format_declaration(decl: str, max_width: int = 100) -> str:
"""Format a declaration for readability, with intelligent line breaking."""
# If it's short enough, return as-is
if len(decl) <= max_width:
return decl
# For function declarations, try to break at parameter boundaries
if "(" in decl and ")" in decl:
# Find the function name and opening paren
match = re.match(r"(.*?\s+\w+\s*)\((.*)\)(.*)", decl)
if match:
prefix, params, suffix = match.groups()
# Break parameters if they're too long
if len(prefix) + len(params) + 2 > max_width:
# Split parameters
param_list = [p.strip() for p in params.split(",")]
if len(param_list) > 1:
formatted_params = (",\n" + " " * (len(prefix) + 1)).join(
param_list
)
return f"{prefix}({formatted_params}){suffix}"
return decl
def generate_cdef(include_dir: pathlib.Path) -> str:
"""Generate the complete CFFI cdef string from all aegis headers."""
lines = [
"/* This file is generated with tools/gen_cdef.py. Do not edit. */",
"",
"typedef unsigned char uint8_t;",
"typedef unsigned long size_t;",
"",
]
# Header files in order, skipping aegis.h as it might be included elsewhere
headers = [
"aegis.h",
"aegis128l.h",
"aegis128x2.h",
"aegis128x4.h",
"aegis256.h",
"aegis256x2.h",
"aegis256x4.h",
]
for header_name in headers:
header_path = include_dir / header_name
if not header_path.exists():
print(f"Warning: {header_name} not found", file=sys.stderr)
continue
lines.append(f"/* {header_name} */")
declarations = extract_declarations(header_path)
for decl in declarations:
formatted = format_declaration(decl)
lines.append(formatted)
lines.append("")
return "\n".join(lines)
def main() -> int:
# Find the include directory
root = pathlib.Path(__file__).parent.parent
include_dir = root / "libaegis" / "src" / "include"
if not include_dir.exists():
print(f"Include directory not found: {include_dir}", file=sys.stderr)
return 1
cdef_string = generate_cdef(include_dir)
# Write to a file in the pyaegis directory
output_dir = root / "pyaegis"
output_dir.mkdir(exist_ok=True)
output_path = output_dir / "aegis_cdef.h"
output_path.write_text(cdef_string, encoding="utf-8")
print(f"Generated: {output_path}", file=sys.stderr)
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -1,131 +0,0 @@
#!/usr/bin/env python3
"""
Regenerate aegis*.py modules from the canonical template aegis256x4.py.
Changes per variant:
- Replace module name (aegis256x4 -> target)
- Replace label (AEGIS-256X4 -> target label like AEGIS-128L)
- Replace only the ALIGNMENT = <int> value
- Replace only the RATE = <int> value
We do not touch alloc_aligned(...) calls or any code formatting. Blank lines
after ALIGNMENT are preserved.
"""
import pathlib
import re
import sys
# Template and target locations
ROOT = pathlib.Path(__file__).parent.parent
AEGIS_DIR = ROOT / "pyaegis"
TEMPLATE = AEGIS_DIR / "aegis256x4.py"
# Variants to generate (template excluded) and their ALIGNMENT values
VARIANT_ALIGN = {
"aegis256": 16,
"aegis256x2": 32,
"aegis256x4": 64,
"aegis128l": 32,
"aegis128x2": 64,
"aegis128x4": 64,
}
# Variants and their RATE values
VARIANT_RATE = {
"aegis256": 16,
"aegis256x2": 32,
"aegis256x4": 64,
"aegis128l": 32,
"aegis128x2": 64,
"aegis128x4": 128,
}
TEMPLATE_NAME = "aegis256x4"
TEMPLATE_LABEL = "AEGIS-256X4"
ALIGNMENT_LINE_RE = re.compile(r"^(ALIGNMENT\s*=\s*)(\d+)(\s*)$", re.MULTILINE)
RATE_LINE_RE = re.compile(r"^(RATE\s*=\s*)(\d+)(\s*)$", re.MULTILINE)
def set_alignment_only(text: str, value: int) -> str:
"""Replace only the numeric ALIGNMENT value, preserving surrounding whitespace and lines.
This preserves any empty lines following the ALIGNMENT assignment because
the line ending is not part of the match; we keep any trailing spaces too.
"""
def _sub(m: re.Match[str]) -> str:
prefix, _num, suffix = m.group(1), m.group(2), m.group(3)
return f"{prefix}{value}{suffix}"
return ALIGNMENT_LINE_RE.sub(_sub, text)
def set_rate_only(text: str, value: int) -> str:
"""Replace only the numeric RATE value, preserving surrounding whitespace and lines.
This preserves any empty lines following the RATE assignment because
the line ending is not part of the match; we keep any trailing spaces too.
"""
def _sub(m: re.Match[str]) -> str:
prefix, _num, suffix = m.group(1), m.group(2), m.group(3)
return f"{prefix}{value}{suffix}"
return RATE_LINE_RE.sub(_sub, text)
def algo_label(name: str) -> str:
"""Return the canonical label like AEGIS-256X4 for a module name like aegis256x4."""
if not name.startswith("aegis"):
raise ValueError(f"Unexpected algorithm name: {name}")
return "AEGIS-" + name[5:].upper()
def generate_variant(template_src: str, variant: str) -> str:
# 1) replace lowercase template name
s = template_src.replace(TEMPLATE_NAME, variant)
# 2) replace uppercase label
s = s.replace(TEMPLATE_LABEL, algo_label(variant))
# 3) set ALIGNMENT constant value using fallback map
align_value = VARIANT_ALIGN.get(variant, 64)
s = set_alignment_only(s, align_value)
# 4) set RATE constant value using fallback map
rate_value = VARIANT_RATE.get(variant, 64)
s = set_rate_only(s, rate_value)
return s
def main() -> int:
if not TEMPLATE.exists():
print(f"Template not found: {TEMPLATE}", file=sys.stderr)
return 2
template_src = TEMPLATE.read_text(encoding="utf-8")
# Safety: ensure we are working from an up-to-date template that contains expected tokens
if TEMPLATE_NAME not in template_src or TEMPLATE_LABEL not in template_src:
print(
"Template file does not contain expected identifiers; aborting.",
file=sys.stderr,
)
return 3
wrote = []
for variant in VARIANT_ALIGN.keys():
# Skip the template itself; recreate all other modules
if variant == TEMPLATE_NAME:
continue
dst = AEGIS_DIR / f"{variant}.py"
content = generate_variant(template_src, variant)
dst.write_text(content, encoding="utf-8")
wrote.append(dst.relative_to(ROOT))
print("Generated modules:")
for p in wrote:
print(" -", p)
return 0
if __name__ == "__main__":
raise SystemExit(main())

257
tools/generate.py Normal file
View File

@@ -0,0 +1,257 @@
"""Generate CFFI cdef and Python modules from libaegis C sources."""
import pathlib
import re
import sys
from typing import Dict, Tuple
def preprocess_content(content: str) -> str:
content = re.sub(r"/\*.*?\*/", " ", content, flags=re.DOTALL)
content = re.sub(r"//.*$", "", content, flags=re.MULTILINE)
content = re.sub(r"^\s*#.*$", "", content, flags=re.MULTILINE)
content = re.sub(r'extern\s+"C"\s*\{', "", content)
content = re.sub(r"(?:^|\n)\s*\}\s*(?:\n|$)", "\n", content, flags=re.MULTILINE)
return content
def clean_declaration(text: str) -> str:
while "__attribute__" in text:
old = text
text = re.sub(r"__attribute__\s*\(\([^()]*\)\)", "", text)
if text == old:
break
if "CRYPTO_ALIGN" in text and "typedef struct" in text:
text = re.sub(
r"CRYPTO_ALIGN\s*\(\s*\d+\s*\)\s+uint8_t\s+opaque\[\d+\];", "...;", text
)
else:
text = re.sub(r"CRYPTO_ALIGN\s*\(\s*\d+\s*\)", "", text)
lines = [
re.sub(r"\s+", " ", line).strip() for line in text.split("\n") if line.strip()
]
return " ".join(lines)
def extract_declarations(header_path: pathlib.Path) -> list[str]:
content = preprocess_content(header_path.read_text(encoding="utf-8"))
declarations = []
typedef_pattern = r"typedef\s+struct\s+\w+\s*\{[^}]+\}\s*\w+\s*;"
for match in re.finditer(typedef_pattern, content, re.DOTALL):
if decl := clean_declaration(match.group(0)):
declarations.append(decl)
func_pattern = r"((?:const\s+)?(?:int|void|size_t)\s+\w+\s*\([^;]+?\)\s*;)"
for match in re.finditer(func_pattern, content, re.DOTALL):
if (decl := clean_declaration(match.group(0))) and "aegis" in decl.lower():
declarations.append(decl)
return declarations
def format_declaration(decl: str, max_width: int = 100) -> str:
if len(decl) <= max_width:
return decl
if "(" in decl and ")" in decl:
if match := re.match(r"(.*?\s+\w+\s*)\((.*)\)(.*)", decl):
prefix, params, suffix = match.groups()
if len(prefix) + len(params) + 2 > max_width:
param_list = [p.strip() for p in params.split(",")]
if len(param_list) > 1:
formatted_params = (",\n" + " " * (len(prefix) + 1)).join(
param_list
)
return f"{prefix}({formatted_params}){suffix}"
return decl
def generate_cdef(include_dir: pathlib.Path) -> str:
lines = [
"/* This file is generated with tools/generate.py. Do not edit. */",
"",
"typedef unsigned char uint8_t;",
"typedef unsigned long size_t;",
"",
]
headers = [
"aegis.h",
"aegis128l.h",
"aegis128x2.h",
"aegis128x4.h",
"aegis256.h",
"aegis256x2.h",
"aegis256x4.h",
]
for header_name in headers:
header_path = include_dir / header_name
if not header_path.exists():
print(f"Warning: {header_name} not found", file=sys.stderr)
continue
lines.append(f"/* {header_name} */")
for decl in extract_declarations(header_path):
lines.append(format_declaration(decl))
lines.append("")
return "\n".join(lines)
def extract_constants(common_h_path: pathlib.Path) -> Tuple[int, int]:
content = common_h_path.read_text(encoding="utf-8")
align_match = re.search(r"^\s*#define\s+ALIGNMENT\s+(\d+)", content, re.MULTILINE)
rate_match = re.search(r"^\s*#define\s+RATE\s+(\d+)", content, re.MULTILINE)
if not align_match or not rate_match:
raise ValueError(
f"Could not extract ALIGNMENT and/or RATE from {common_h_path}"
)
return int(align_match.group(1)), int(rate_match.group(1))
def extract_all_constants(libaegis_src_dir: pathlib.Path) -> Dict[str, Tuple[int, int]]:
variants = [
"aegis128l",
"aegis128x2",
"aegis128x4",
"aegis256",
"aegis256x2",
"aegis256x4",
]
constants = {}
for variant in variants:
common_h = libaegis_src_dir / variant / f"{variant}_common.h"
if not common_h.exists():
print(f"Warning: {common_h} not found, skipping {variant}", file=sys.stderr)
continue
try:
alignment, rate = extract_constants(common_h)
constants[variant] = (alignment, rate)
except Exception as e:
print(f"Error extracting constants from {variant}: {e}", file=sys.stderr)
return constants
ALIGNMENT_RE = re.compile(r"^(ALIGNMENT\s*=\s*)(\d+)(\s*)$", re.MULTILINE)
RATE_RE = re.compile(r"^(RATE\s*=\s*)(\d+)(\s*)$", re.MULTILINE)
def replace_constant(pattern: re.Pattern, text: str, value: int) -> str:
return pattern.sub(lambda m: f"{m.group(1)}{value}{m.group(3)}", text)
def algo_label(name: str) -> str:
return "AEGIS-" + name[5:].upper()
def generate_variant(template_src: str, variant: str, alignment: int, rate: int) -> str:
s = template_src.replace("aegis256x4", variant).replace(
"AEGIS-256X4", algo_label(variant)
)
# Fix the comment to reference the template, not the variant itself
s = re.sub(
r"# All modules are generated from \w+\.py by tools/generate\.py!",
"# All modules are generated from aegis256x4.py by tools/generate.py!",
s,
)
s = replace_constant(ALIGNMENT_RE, s, alignment)
s = replace_constant(RATE_RE, s, rate)
return s
def generate_python_modules(
template_path: pathlib.Path,
output_dir: pathlib.Path,
constants: Dict[str, Tuple[int, int]],
) -> Tuple[list[pathlib.Path], list[pathlib.Path]]:
if not template_path.exists():
raise FileNotFoundError(f"Template not found: {template_path}")
template_src = template_path.read_text(encoding="utf-8")
if "aegis256x4" not in template_src or "AEGIS-256X4" not in template_src:
raise ValueError("Template file does not contain expected identifiers")
updated = []
unchanged = []
for variant, (alignment, rate) in constants.items():
dst = output_dir / f"{variant}.py"
if variant == "aegis256x4":
new_content = replace_constant(ALIGNMENT_RE, template_src, alignment)
new_content = replace_constant(RATE_RE, new_content, rate)
else:
new_content = generate_variant(template_src, variant, alignment, rate)
if dst.exists() and dst.read_text(encoding="utf-8") == new_content:
unchanged.append(dst)
else:
dst.write_text(new_content, encoding="utf-8")
updated.append(dst)
return updated, unchanged
def main() -> int:
root = pathlib.Path(__file__).parent.parent
libaegis_src_dir = root / "libaegis" / "src"
include_dir = libaegis_src_dir / "include"
pyaegis_dir = root / "pyaegis"
if not include_dir.exists():
print(f"Include directory not found: {include_dir}", file=sys.stderr)
return 1
if not libaegis_src_dir.exists():
print(f"Source directory not found: {libaegis_src_dir}", file=sys.stderr)
return 1
print("Step 1: Extracting constants from C sources...", file=sys.stderr)
constants = extract_all_constants(libaegis_src_dir)
if not constants:
print("Error: No constants extracted", file=sys.stderr)
return 1
print("Step 2: Generating CFFI cdef header...", file=sys.stderr)
pyaegis_dir.mkdir(exist_ok=True)
cdef_path = pyaegis_dir / "aegis_cdef.h"
cdef_content = generate_cdef(include_dir)
if cdef_path.exists() and cdef_path.read_text(encoding="utf-8") == cdef_content:
print(f" - No changes to {cdef_path}", file=sys.stderr)
else:
cdef_path.write_text(cdef_content, encoding="utf-8")
print(f" - Updated {cdef_path}", file=sys.stderr)
print("Step 3: Generating Python modules...", file=sys.stderr)
try:
updated, unchanged = generate_python_modules(
pyaegis_dir / "aegis256x4.py", pyaegis_dir, constants
)
if updated:
for p in updated:
print(f" - {p.relative_to(root)}", file=sys.stderr)
if unchanged:
print(
" - No changes to",
f"{len(unchanged)} modules" if len(unchanged) > 1 else unchanged[0],
file=sys.stderr,
)
except Exception as e:
print(f"Error generating Python modules: {e}", file=sys.stderr)
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())