Source code for uclchem.advanced.generate_metadata
"""Utility to regenerate the ``fortran_parameters`` section of ``fortran_metadata.yaml``.
Parses all Fortran source files in ``src/fortran_src/`` and extracts module-scope
``PARAMETER`` declarations, then writes the result back into the YAML file. The
``internal_parameters`` and ``file_path_parameters`` sections are left untouched as
they require manual curation.
Usage::
uclchem-generate-metadata # update YAML in-place
uclchem-generate-metadata --dry-run # print diff, do not write
uclchem-generate-metadata --check # exit 1 if YAML would change (CI use)
"""
from __future__ import annotations
import argparse
import difflib
import re
import sys
from pathlib import Path
import yaml
from uclchem.advanced.worker_state import _MODULE_NAMES
from uclchem.utils import UCLCHEM_ROOT_DIR
_FORTRAN_SRC = UCLCHEM_ROOT_DIR.parent / "fortran_src"
_METADATA_PATH = Path(__file__).parent / "fortran_metadata.yaml"
# Fortran types that can have PARAMETER attribute
_TYPE_RE = re.compile(
r"^\s*(?:INTEGER|REAL|LOGICAL|CHARACTER|COMPLEX|DOUBLE\s+PRECISION)"
r"[^:]*,\s*PARAMETER\s*::\s*(.+)",
re.IGNORECASE,
)
# Lines that increase nesting depth (we only want module-scope PARAMETERs)
_NEST_OPEN = re.compile(r"^\s*(?:SUBROUTINE|FUNCTION|CONTAINS)\b", re.IGNORECASE)
_NEST_CLOSE = re.compile(r"^\s*END\s+(?:SUBROUTINE|FUNCTION)\b", re.IGNORECASE)
# Fortran MODULE declaration
_MODULE_RE = re.compile(r"^\s*MODULE\s+(\w+)\s*$", re.IGNORECASE)
def _strip_comment(line: str) -> str:
"""Remove Fortran inline comment (everything from ``!`` onward).
Parameters
----------
line : str
A single line of Fortran source code.
Returns
-------
str
Line with comments stripped, respecting character literals.
"""
# Respect character literals by scanning manually
in_str = False
quote = ""
for i, ch in enumerate(line):
if in_str:
if ch == quote:
in_str = False
elif ch in {"'", '"'}:
in_str = True
quote = ch
elif ch == "!":
return line[:i]
return line
def _extract_param_names(rhs: str) -> list[str]:
"""Extract variable names from the RHS of a ``PARAMETER ::`` declaration.
Handles comma-separated names with optional array dimensions and initializers::
a = 1.0, b(10) = (/.../) -> ["a", "b"]
Parameters
----------
rhs : str
Right-hand side of a Fortran parameter assignment.
Returns
-------
list[str]
List of parameter names in lowercase.
"""
names: list[str] = []
# Split on commas that are not inside parentheses
depth = 0
current: list[str] = []
for ch in rhs:
if ch == "(":
depth += 1
current.append(ch)
elif ch == ")":
depth -= 1
current.append(ch)
elif ch == "," and depth == 0:
names.append("".join(current).strip())
current = []
else:
current.append(ch)
if current:
names.append("".join(current).strip())
result: list[str] = []
for tok in names:
# Take the part before '(' (array dim) or '=' (initializer)
name = re.split(r"[=(]", tok)[0].strip()
if re.match(r"^\w+$", name):
result.append(name.lower())
return result
[docs]
def parse_fortran_parameters(src_dir: Path) -> dict[str, list[str]]:
"""Parse all ``.f90`` files in *src_dir* and return module-scope PARAMETERs.
Handles Fortran continuation lines (ending with ``&`` and starting next line with ``&``).
Parameters
----------
src_dir : Path
Path to the directory containing Fortran source files.
Returns
-------
dict[str, list[str]]
Mapping of f2py module name (lowercase) to sorted list of PARAMETER names.
"""
known_modules = set(_MODULE_NAMES)
result: dict[str, list[str]] = {}
for f90 in sorted(src_dir.glob("*.f90")):
module_name: str | None = None
params: list[str] = []
depth = 0 # nesting level; 0 = module scope
continuation = "" # accumulated continuation lines
with Path(f90).open(encoding="utf-8", errors="replace") as fh:
for raw in fh:
line = _strip_comment(raw).rstrip()
# Handle Fortran continuation: lines ending with & continue on next line
if continuation:
# Previous line ended with &, prepend it
line = continuation + line.lstrip("&").lstrip()
continuation = ""
if line.endswith("&"):
# This line continues on the next; accumulate and skip processing
continuation = line[:-1].rstrip()
continue
# Detect MODULE declaration (must be depth 0, i.e. file scope)
if module_name is None:
m = _MODULE_RE.match(line)
if m:
candidate = m.group(1).lower()
if candidate in known_modules:
module_name = candidate
if module_name is None:
continue
# Track nesting so we only grab module-scope PARAMETERs
if _NEST_CLOSE.match(line):
depth = max(0, depth - 1)
elif _NEST_OPEN.match(line):
depth += 1
if depth > 0:
continue
m = _TYPE_RE.match(line)
if m:
params.extend(_extract_param_names(m.group(1)))
if module_name and params:
result[module_name] = sorted(set(params))
return result
def _load_yaml(path: Path) -> dict:
with Path(path).open() as f:
return yaml.safe_load(f) or {}
def _dump_yaml(data: dict) -> str:
return yaml.dump(data, default_flow_style=False, sort_keys=False, allow_unicode=True)
def _merge(existing: dict, detected: dict[str, list[str]]) -> dict:
"""Merge *detected* into the ``fortran_parameters`` section of *existing*.
The ``global`` key and any other hand-maintained keys not present in
*detected* are left untouched. Auto-detected module keys are replaced.
Parameters
----------
existing : dict
Existing metadata dict.
detected : dict[str, list[str]]
Newly detected metadata dict from source parsing.
Returns
-------
dict
New merged dictionary.
"""
merged = dict(existing)
fp: dict = dict(merged.get("fortran_parameters", {}))
fp.update(detected)
merged["fortran_parameters"] = fp
return merged
[docs]
def main(argv: list[str] | None = None) -> None:
"""Entry point for ``uclchem-generate-metadata``."""
parser = argparse.ArgumentParser(
description="Regenerate the fortran_parameters section of fortran_metadata.yaml."
)
mode = parser.add_mutually_exclusive_group()
mode.add_argument(
"--dry-run",
action="store_true",
help="Print a unified diff of the changes without writing.",
)
mode.add_argument(
"--check",
action="store_true",
help="Exit with status 1 if the YAML would change (useful in CI).",
)
args = parser.parse_args(argv)
# Resolve source tree paths. For editable installs _FORTRAN_SRC already
# points into the source tree. For non-editable installs it points into
# site-packages where no .f90 files exist, so fall back to CWD-relative
# paths so the user can run the CLI from the repo root.
fortran_src = _FORTRAN_SRC
metadata_path = _METADATA_PATH
if not fortran_src.is_dir():
cwd = Path.cwd()
cwd_fortran_src = cwd / "src" / "fortran_src"
cwd_metadata = cwd / "src" / "uclchem" / "advanced" / "fortran_metadata.yaml"
if cwd_fortran_src.is_dir() and cwd_metadata.exists():
fortran_src = cwd_fortran_src
metadata_path = cwd_metadata
else:
sys.exit(
f"ERROR: Fortran source directory not found: {fortran_src}\n"
"Run this command from the repo root (the directory containing src/),\n"
"or use an editable install (pip install -e .)."
)
detected = parse_fortran_parameters(fortran_src)
existing = _load_yaml(metadata_path)
merged = _merge(existing, detected)
old_text = _dump_yaml(existing)
new_text = _dump_yaml(merged)
if old_text == new_text:
print("fortran_metadata.yaml is already up to date.")
return
if args.dry_run or args.check:
diff = difflib.unified_diff(
old_text.splitlines(keepends=True),
new_text.splitlines(keepends=True),
fromfile="fortran_metadata.yaml (current)",
tofile="fortran_metadata.yaml (updated)",
)
sys.stdout.writelines(diff)
if args.check:
sys.exit(1)
return
with Path(metadata_path).open("w") as f:
f.write(new_text)
print(f"Updated {metadata_path}")
for mod, names in sorted(detected.items()):
print(f" {mod}: {names}")
if __name__ == "__main__":
main()