Source code for ufp.workflows.onebody

"""Reusable workflow helpers for examples and small supervised UFP studies."""

from __future__ import annotations

from typing import Sequence

import ase
import numpy as np
import torch

from ufp.terms import ElementOneBodyTerm, UFPModel
from ufp.training import ASEAtomsDataset


[docs] def onebody_reference_energy(model: UFPModel) -> float: """Return the scalar value from a single ``ElementOneBodyTerm`` model.""" if len(model.onebody_terms) != 1: raise ValueError("expected exactly one one-body term") term = model.onebody_terms[0] if not isinstance(term, ElementOneBodyTerm): raise TypeError("expected the one-body term to be ElementOneBodyTerm") return float(term.values.detach().cpu().reshape(-1)[0].item())
[docs] def onebody_reference_energies(model: UFPModel) -> dict[int, float]: """Return all element one-body values keyed by atomic number.""" values_by_type: dict[int, float] = {} for term in model.onebody_terms: if not isinstance(term, ElementOneBodyTerm): raise TypeError("expected one-body terms to be ElementOneBodyTerm") values = term.values.detach().cpu().reshape(-1) if values.numel() != len(term.atomic_types): raise ValueError("one-body values do not match declared atomic types") for atomic_type, value in zip(term.atomic_types, values, strict=True): if int(atomic_type) in values_by_type: raise ValueError(f"duplicate one-body value for Z={atomic_type}") values_by_type[int(atomic_type)] = float(value.item()) return values_by_type
def _target_energy_from_atoms( atoms: ase.Atoms, *, energy_key: str | None, ) -> float: """Read one total-energy target from an ASE object.""" if energy_key is not None: if energy_key in atoms.info: return float(atoms.info[energy_key]) if energy_key in atoms.arrays: value = np.asarray(atoms.arrays[energy_key], dtype=float) if value.size != 1: raise ValueError(f"`{energy_key}` must contain one scalar energy") return float(value.reshape(-1)[0]) raise KeyError(f"ASE atoms object is missing energy target `{energy_key}`") return float(atoms.get_potential_energy())
[docs] def fit_element_onebody_energies( atoms_list: Sequence[ase.Atoms], energies: Sequence[float] | None = None, *, atomic_types: Sequence[int] | None = None, energy_key: str | None = None, rcond: float | None = None, ) -> dict[int, float]: """ Fit composition-only per-element reference energies by least squares. Args: atoms_list: ASE structures whose elemental compositions define the linear system. energies: Optional total energies, one per structure. If omitted, energies are read with ``atoms.get_potential_energy()`` unless ``energy_key`` is set. atomic_types: Atomic numbers to fit. When omitted, all elements observed in ``atoms_list`` are fitted. energy_key: Optional key in ``atoms.info`` or ``atoms.arrays`` used when ``energies`` is omitted. rcond: Cutoff passed to ``numpy.linalg.lstsq``. Returns: Mapping from atomic number to fitted reference energy in eV. Raises: KeyError: If ``energy_key`` is set but missing from a structure. TypeError: If ``atoms_list`` contains non-ASE objects. ValueError: If the inputs are empty or have inconsistent lengths. """ structures = list(atoms_list) if not structures: raise ValueError("`atoms_list` must contain at least one ASE structure") for atoms in structures: if not isinstance(atoms, ase.Atoms): raise TypeError(f"`atoms_list` must contain ase.Atoms, got {type(atoms)}") if atomic_types is None: fitted_types = tuple( sorted({int(number) for atoms in structures for number in atoms.numbers}) ) else: fitted_types = tuple(sorted(set(int(number) for number in atomic_types))) if not fitted_types: raise ValueError("at least one atomic type must be fitted") if energies is None: targets = np.asarray( [ _target_energy_from_atoms(atoms, energy_key=energy_key) for atoms in structures ], dtype=float, ) else: targets = np.asarray(list(energies), dtype=float) if targets.shape != (len(structures),): raise ValueError( "`energies` must contain exactly one scalar energy per structure" ) column_by_type = { atomic_type: column for column, atomic_type in enumerate(fitted_types) } counts = np.zeros((len(structures), len(fitted_types)), dtype=float) for row, atoms in enumerate(structures): atomic_numbers, atomic_counts = np.unique( np.asarray(atoms.numbers, dtype=int), return_counts=True, ) for atomic_number, atomic_count in zip( atomic_numbers, atomic_counts, strict=True, ): column = column_by_type.get(int(atomic_number)) if column is not None: counts[row, column] = float(atomic_count) values, *_ = np.linalg.lstsq(counts, targets, rcond=rcond) return { atomic_type: float(values[index]) for index, atomic_type in enumerate(fitted_types) }
[docs] def initialize_onebody_terms_from_atoms( model: UFPModel, atoms_list: Sequence[ase.Atoms], energies: Sequence[float] | None = None, *, energy_key: str | None = None, rcond: float | None = None, ) -> dict[int, float]: """Initialize trainable element one-body terms from composition least squares.""" trainable_terms = [ term for term in model.onebody_terms if isinstance(term, ElementOneBodyTerm) and term.values.requires_grad ] if not trainable_terms: return {} fitted = fit_element_onebody_energies( atoms_list, energies, energy_key=energy_key, rcond=rcond, ) missing_types = sorted( set(fitted).difference( int(atomic_type) for term in trainable_terms for atomic_type in term.atomic_types ) ) if missing_types: raise ValueError( "trainable ElementOneBodyTerm coverage is missing observed atomic " f"types {missing_types}" ) unobserved_types = sorted( { int(atomic_type) for term in trainable_terms for atomic_type in term.atomic_types }.difference(fitted) ) if unobserved_types: raise ValueError( "can not initialize one-body terms for unobserved atomic types " f"{unobserved_types}" ) with torch.no_grad(): for term in trainable_terms: values = torch.tensor( [fitted[int(atomic_type)] for atomic_type in term.atomic_types], dtype=term.values.dtype, device=term.values.device, ) term.values.copy_(values) return { int(atomic_type): float(value) for term in trainable_terms for atomic_type, value in zip( term.atomic_types, term.values.detach().cpu().reshape(-1).tolist(), strict=True, ) }
[docs] def initialize_onebody_terms_from_dataset( model: UFPModel, dataset: ASEAtomsDataset, *, rcond: float | None = None, ) -> dict[int, float]: """Initialize trainable element one-body terms from an ASE training dataset.""" atoms_list = [sample.atoms for sample in dataset] # type: ignore[attr-defined] energies = [sample.energy for sample in dataset] # type: ignore[attr-defined] if any(energy is None for energy in energies): return {} return initialize_onebody_terms_from_atoms( model, atoms_list, [float(energy) for energy in energies if energy is not None], rcond=rcond, )