Source code for uclchem.advanced.generate_metadata

"""Utility to regenerate the ``fortran_parameters`` section of ``fortran_metadata.yaml``.

Parses all Fortran source files in ``src/fortran_src/`` and extracts module-scope
``PARAMETER`` declarations, then writes the result back into the YAML file.  The
``internal_parameters`` and ``file_path_parameters`` sections are left untouched as
they require manual curation.

Usage::

    uclchem-generate-metadata            # update YAML in-place
    uclchem-generate-metadata --dry-run  # print diff, do not write
    uclchem-generate-metadata --check    # exit 1 if YAML would change (CI use)

"""

from __future__ import annotations

import argparse
import difflib
import re
import sys
from pathlib import Path

import yaml

from uclchem.advanced.worker_state import _MODULE_NAMES
from uclchem.utils import UCLCHEM_ROOT_DIR

_FORTRAN_SRC = UCLCHEM_ROOT_DIR.parent / "fortran_src"
_METADATA_PATH = Path(__file__).parent / "fortran_metadata.yaml"

# Fortran types that can have PARAMETER attribute
_TYPE_RE = re.compile(
    r"^\s*(?:INTEGER|REAL|LOGICAL|CHARACTER|COMPLEX|DOUBLE\s+PRECISION)"
    r"[^:]*,\s*PARAMETER\s*::\s*(.+)",
    re.IGNORECASE,
)

# Lines that increase nesting depth (we only want module-scope PARAMETERs)
_NEST_OPEN = re.compile(r"^\s*(?:SUBROUTINE|FUNCTION|CONTAINS)\b", re.IGNORECASE)
_NEST_CLOSE = re.compile(r"^\s*END\s+(?:SUBROUTINE|FUNCTION)\b", re.IGNORECASE)

# Fortran MODULE declaration
_MODULE_RE = re.compile(r"^\s*MODULE\s+(\w+)\s*$", re.IGNORECASE)


def _strip_comment(line: str) -> str:
    """Remove Fortran inline comment (everything from ``!`` onward).

    Parameters
    ----------
    line : str
        A single line of Fortran source code.

    Returns
    -------
    str
        Line with comments stripped, respecting character literals.

    """
    # Respect character literals by scanning manually
    in_str = False
    quote = ""
    for i, ch in enumerate(line):
        if in_str:
            if ch == quote:
                in_str = False
        elif ch in {"'", '"'}:
            in_str = True
            quote = ch
        elif ch == "!":
            return line[:i]
    return line


def _extract_param_names(rhs: str) -> list[str]:
    """Extract variable names from the RHS of a ``PARAMETER ::`` declaration.

    Handles comma-separated names with optional array dimensions and initializers::

        a = 1.0, b(10) = (/.../)  ->  ["a", "b"]

    Parameters
    ----------
    rhs : str
        Right-hand side of a Fortran parameter assignment.

    Returns
    -------
    list[str]
        List of parameter names in lowercase.

    """
    names: list[str] = []
    # Split on commas that are not inside parentheses
    depth = 0
    current: list[str] = []
    for ch in rhs:
        if ch == "(":
            depth += 1
            current.append(ch)
        elif ch == ")":
            depth -= 1
            current.append(ch)
        elif ch == "," and depth == 0:
            names.append("".join(current).strip())
            current = []
        else:
            current.append(ch)
    if current:
        names.append("".join(current).strip())

    result: list[str] = []
    for tok in names:
        # Take the part before '(' (array dim) or '=' (initializer)
        name = re.split(r"[=(]", tok)[0].strip()
        if re.match(r"^\w+$", name):
            result.append(name.lower())
    return result


[docs] def parse_fortran_parameters(src_dir: Path) -> dict[str, list[str]]: """Parse all ``.f90`` files in *src_dir* and return module-scope PARAMETERs. Handles Fortran continuation lines (ending with ``&`` and starting next line with ``&``). Parameters ---------- src_dir : Path Path to the directory containing Fortran source files. Returns ------- dict[str, list[str]] Mapping of f2py module name (lowercase) to sorted list of PARAMETER names. """ known_modules = set(_MODULE_NAMES) result: dict[str, list[str]] = {} for f90 in sorted(src_dir.glob("*.f90")): module_name: str | None = None params: list[str] = [] depth = 0 # nesting level; 0 = module scope continuation = "" # accumulated continuation lines with Path(f90).open(encoding="utf-8", errors="replace") as fh: for raw in fh: line = _strip_comment(raw).rstrip() # Handle Fortran continuation: lines ending with & continue on next line if continuation: # Previous line ended with &, prepend it line = continuation + line.lstrip("&").lstrip() continuation = "" if line.endswith("&"): # This line continues on the next; accumulate and skip processing continuation = line[:-1].rstrip() continue # Detect MODULE declaration (must be depth 0, i.e. file scope) if module_name is None: m = _MODULE_RE.match(line) if m: candidate = m.group(1).lower() if candidate in known_modules: module_name = candidate if module_name is None: continue # Track nesting so we only grab module-scope PARAMETERs if _NEST_CLOSE.match(line): depth = max(0, depth - 1) elif _NEST_OPEN.match(line): depth += 1 if depth > 0: continue m = _TYPE_RE.match(line) if m: params.extend(_extract_param_names(m.group(1))) if module_name and params: result[module_name] = sorted(set(params)) return result
def _load_yaml(path: Path) -> dict: with Path(path).open() as f: return yaml.safe_load(f) or {} def _dump_yaml(data: dict) -> str: return yaml.dump(data, default_flow_style=False, sort_keys=False, allow_unicode=True) def _merge(existing: dict, detected: dict[str, list[str]]) -> dict: """Merge *detected* into the ``fortran_parameters`` section of *existing*. The ``global`` key and any other hand-maintained keys not present in *detected* are left untouched. Auto-detected module keys are replaced. Parameters ---------- existing : dict Existing metadata dict. detected : dict[str, list[str]] Newly detected metadata dict from source parsing. Returns ------- dict New merged dictionary. """ merged = dict(existing) fp: dict = dict(merged.get("fortran_parameters", {})) fp.update(detected) merged["fortran_parameters"] = fp return merged
[docs] def main(argv: list[str] | None = None) -> None: """Entry point for ``uclchem-generate-metadata``.""" parser = argparse.ArgumentParser( description="Regenerate the fortran_parameters section of fortran_metadata.yaml." ) mode = parser.add_mutually_exclusive_group() mode.add_argument( "--dry-run", action="store_true", help="Print a unified diff of the changes without writing.", ) mode.add_argument( "--check", action="store_true", help="Exit with status 1 if the YAML would change (useful in CI).", ) args = parser.parse_args(argv) # Resolve source tree paths. For editable installs _FORTRAN_SRC already # points into the source tree. For non-editable installs it points into # site-packages where no .f90 files exist, so fall back to CWD-relative # paths so the user can run the CLI from the repo root. fortran_src = _FORTRAN_SRC metadata_path = _METADATA_PATH if not fortran_src.is_dir(): cwd = Path.cwd() cwd_fortran_src = cwd / "src" / "fortran_src" cwd_metadata = cwd / "src" / "uclchem" / "advanced" / "fortran_metadata.yaml" if cwd_fortran_src.is_dir() and cwd_metadata.exists(): fortran_src = cwd_fortran_src metadata_path = cwd_metadata else: sys.exit( f"ERROR: Fortran source directory not found: {fortran_src}\n" "Run this command from the repo root (the directory containing src/),\n" "or use an editable install (pip install -e .)." ) detected = parse_fortran_parameters(fortran_src) existing = _load_yaml(metadata_path) merged = _merge(existing, detected) old_text = _dump_yaml(existing) new_text = _dump_yaml(merged) if old_text == new_text: print("fortran_metadata.yaml is already up to date.") return if args.dry_run or args.check: diff = difflib.unified_diff( old_text.splitlines(keepends=True), new_text.splitlines(keepends=True), fromfile="fortran_metadata.yaml (current)", tofile="fortran_metadata.yaml (updated)", ) sys.stdout.writelines(diff) if args.check: sys.exit(1) return with Path(metadata_path).open("w") as f: f.write(new_text) print(f"Updated {metadata_path}") for mod, names in sorted(detected.items()): print(f" {mod}: {names}")
if __name__ == "__main__": main()