Source code for endf_parserpy.tape.endf_file

############################################################
#
# Author(s):       Georg Schnabel
# Email:           g.schnabel@iaea.org
# Creation date:   2026/05/15
# Last modified:   2026/05/18
# License:         MIT
# Copyright (c) 2026 International Atomic Energy Agency (IAEA)
#
############################################################

"""Lazy, memory-bounded, editable access to a multi-material ENDF file.

:class:`EndfFile` indexes a tape on construction and reads and parses
individual sections from disk only when they are accessed, keeping the
results in bounded caches (see :mod:`endf_parserpy.tape.cache`). A
single section is parsed by wrapping its records in a minimal
single-material tape and handing that to an ordinary parser, so the
parsing engine is used unchanged.

Each material is represented by a :class:`_MaterialSlot` that carries an
edit overlay. Sections can be replaced, added or deleted and materials
can be deleted, appended or reordered; :meth:`EndfFile.export` writes
the edited tape back out and :meth:`EndfFile.to_string` returns it as
text. Untouched sections keep their data records verbatim from disk;
the SEND/FEND/MEND framing and the column 76-80 sequence numbers are
regenerated, so an unedited round trip preserves every data field
byte-for-byte but is not necessarily byte-identical overall -- the same
guarantee the ordinary writer gives.
"""

import os
from contextlib import contextmanager
from collections.abc import Mapping

from ..endf_parser_factory import EndfParserFactory
from ..endf_parser_base import EndfParserBase
from .address import (
    EndfMaterialPath,
    parse_index_spec,
    parse_section_path,
    section_has,
    walk_section,
)
from .cache import _RawCache, _SectionCache, _Section
from .errors import (
    AmbiguousMaterialError,
    SectionParseError,
    SectionRenderError,
    StaleSourceError,
    TapeStructureError,
)
from .index import TapeIndex
from .material import MaterialView, _MaterialSlot
from .operations import write_tape, write_tape_file, _VALID_ON_ERROR, _FailedUnit
from .records import _control_numbers, _control_line, _strip_send, TEND_LINE
from .views import (
    _FrozenMapping,
    _FrozenSequence,
    _LiveMapping,
    _LiveSequence,
    _SectionView,
    _navigate,
    _plain,
)


_VALID_MODES = ("index", "load_raw", "parse_all")
_VALID_CHECK_EDITS = ("eager", "deferred")
# _VALID_ON_ERROR is shared with operations.py -- the on_error policy is
# the same concept for EndfFile and for the parse_tape functions.

# sentinel distinguishing "no value given" from an explicit value of None
_UNSET = object()


def _value_match(field, value, tol):
    if tol and isinstance(field, (int, float)) and isinstance(value, (int, float)):
        return abs(field - value) <= tol
    return field == value


class _CurrentMaterials:
    """Adapter over the current slot list for material-selector resolution.

    Exposes the ``__len__`` and ``by_mat`` that
    :meth:`EndfMaterialPath.resolve_material` needs, so a material
    selector resolves against the *current* (possibly edited) material
    order rather than the on-disk index.
    """

    def __init__(self, slots):
        self._slots = slots

    def __len__(self):
        return len(self._slots)

    def by_mat(self, mat):
        return [i for i, s in enumerate(self._slots) if s.mat == mat]


[docs] class FailedSection(_FailedUnit): """Internal placeholder for a section that could not be parsed. When the :class:`EndfFile` was opened with ``on_error="mark"`` a section that fails to parse is kept as a :class:`FailedSection` so that the bulk operations (:meth:`~EndfFile.query`, :meth:`~EndfFile.build_index`, :meth:`~EndfFile.export`) can skip it or write it back verbatim instead of aborting. Accessing such a section directly (``endf_file[path]`` or ``material[mf, mt]``) raises :class:`SectionParseError`, with this object's :attr:`exception` kept as the cause; a :class:`FailedSection` is therefore never handed back to the caller. Attributes ---------- exception : Exception The exception raised while parsing the section. raw_lines : list[str] The raw text of the section. position : int Position of the material the section belongs to. mf, mt : int The MF/MT numbers of the section. """ def __init__(self, exception, raw_lines, position, mf, mt): super().__init__(exception, raw_lines) self.position = position self.mf = mf self.mt = mt def __repr__(self): return ( f"FailedSection(position={self.position}, MF={self.mf}, " f"MT={self.mt}, exception={self.exception!r})" )
[docs] class EndfFile: """Lazy, memory-bounded, editable view of a multi-material ENDF file. The file is indexed on construction (see :class:`TapeIndex`). Section data is read from disk and parsed only on access and is then held in bounded caches. Materials are addressed by zero-based position:: with EndfFile("tape.endf") as endf_file: material = endf_file[0] # a MaterialView section = material[3, 2] # parsed MF=3/MT=2 section material[3, 2] = section # edit it back in endf_file.export("edited.endf") Parameters ---------- filename : str or os.PathLike Path to the ENDF file. parser : EndfParserBase, optional Engine used to parse and write sections. Defaults to ``EndfParserFactory.create(select="fastest")``. mode : {"index", "load_raw", "parse_all"} ``"index"`` (default) only builds the index. ``"load_raw"`` also pre-reads section text into the raw cache; ``"parse_all"`` also parses every section. The cache budgets still apply, so these modes pre-warm the caches rather than guarantee residency. parsed_cache_bytes, raw_cache_bytes : int Budgets, in raw-text-equivalent bytes, for the parsed-section and raw-text caches. on_error : {"raise", "mark"} Whether a section that fails to parse raises :class:`SectionParseError` or is returned as a :class:`FailedSection`. check_edits : {"eager", "deferred"} When the recipe-conformity of an edited section is checked. ``"eager"`` (the default) renders every edited section through the parser's writer immediately, so a malformed edit raises at the offending assignment, and a retrieved section is a read-only (frozen) view. ``"deferred"`` accepts every edit, marking the section dirty, and checks conformity only at :meth:`export` / :meth:`to_string` or :meth:`invalid_edits`; a retrieved section is then a live write-through view. verify_source : bool If true, the file's size and mtime are checked against the index before every disk read; a change raises :class:`StaleSourceError`. Notes ----- An :class:`EndfFile` is not safe for concurrent use from several threads: its caches, material list and edit overlays are plain, unguarded state, so any access racing with another is undefined. Use one :class:`EndfFile` per thread. """ def __init__( self, filename, *, parser=None, mode="index", parsed_cache_bytes=64 << 20, raw_cache_bytes=64 << 20, on_error="mark", check_edits="eager", verify_source=False, ): if mode not in _VALID_MODES: raise ValueError(f"mode must be one of {_VALID_MODES}, got {mode!r}") if on_error not in _VALID_ON_ERROR: raise ValueError( f"on_error must be one of {_VALID_ON_ERROR}, got {on_error!r}" ) if check_edits not in _VALID_CHECK_EDITS: raise ValueError( f"check_edits must be one of {_VALID_CHECK_EDITS}, got " f"{check_edits!r}" ) self._path = os.fspath(filename) self._parser = parser or EndfParserFactory.create(select="fastest") if not isinstance(self._parser, EndfParserBase): raise TypeError( f"parser must be an EndfParserBase instance (as created by " f"EndfParserFactory.create()), got {type(self._parser).__name__}" ) self._on_error = on_error self._check_edits = check_edits self._verify_source = verify_source self._index = TapeIndex.from_file(self._path) self._materials = [ _MaterialSlot(e.position, e.mat, e.za, e.awr) for e in self._index ] self._raw_cache = _RawCache(raw_cache_bytes) self._section_cache = _SectionCache(parsed_cache_bytes) self._material_views = {} self._secondary_indexes = {} self._read_fh = None self._invalidated = False if mode == "load_raw": self._preload(parse=False) elif mode == "parse_all": self._preload(parse=True) def _ensure_valid(self): """Raise if the object was invalidated by an export onto its source. After :meth:`export` overwrites the file the :class:`EndfFile` was opened from, the structural index no longer matches the bytes on disk, so lazily reading any untouched section would return garbage. The object is therefore invalidated; every data operation raises until the file is re-opened. """ if self._invalidated: raise StaleSourceError( f"this EndfFile was invalidated when export() overwrote its " f"source file {self._path!r}; its structural index no longer " f"matches the file on disk -- re-open it with " f"EndfFile({self._path!r})" ) def _preload(self, parse): # a whole-tape operation: read every section through a single # held file handle instead of reopening the file per section with self._read_session(): for entry in self._index: for mf, mt in list(entry.sections): if parse: self._get_section(entry.position, mf, mt) else: self._get_raw(entry.position, mf, mt, entry.sections[(mf, mt)]) # -- polymorphic item protocol ------------------------------------- # # ``[]``, ``[]=``, ``del`` and ``in`` accept either an integer # material position or an EndfMaterialPath (string or object). The # path may stop at a material, a section or a field; see the design # note docs/design/endf_file_path_addressing.md. def __len__(self): self._ensure_valid() return len(self._materials) def _material_view(self, slot): """Return the (cached) :class:`MaterialView` of a slot.""" view = self._material_views.get(slot) if view is None: view = MaterialView(self, slot) self._material_views[slot] = view return view def _remove_material(self, position): """Delete the material at ``position`` and drop its cached view. Dropping the :class:`MaterialView` keeps ``_material_views`` from accumulating entries for materials that no longer exist; an external reference to the view stays valid as an *invalid* view (its :attr:`~MaterialView.position` then raises). The named secondary indexes are dropped too -- see :meth:`_invalidate_indexes`. """ slot = self._materials.pop(position) slot.removed = True self._material_views.pop(slot, None) self._invalidate_indexes() def _invalidate_indexes(self): """Drop the cached secondary indexes after a structural edit. :meth:`build_index` keys its result by tape position, so adding, removing or reordering materials would silently leave a stored index pointing at the wrong materials. Rather than let it go stale, any structural edit clears it; rebuild it afterwards. """ self._secondary_indexes.clear() def _resolve_key(self, key): """Resolve a path key to ``(position, mf, mt, subpath)``. ``mf`` is ``None`` for a material-depth path; ``subpath`` is ``None`` unless the path reaches into a section. """ mp = key if isinstance(key, EndfMaterialPath) else EndfMaterialPath(key) if mp.mf is not None and mp.mt is None: raise ValueError( f"{mp!r} addresses a whole MF file; MF-level addressing is " "not supported -- address a section as material/MF/MT" ) position = mp.resolve_material(_CurrentMaterials(self._materials)) return position, mp.mf, mp.mt, mp.subpath def __getitem__(self, key): """Return the material, section or field addressed by ``key``. ``key`` is an integer material position or an :class:`EndfMaterialPath` (string or object). A material-depth path yields a :class:`MaterialView`, a section-depth path a section view and a field-depth path the value at that field. """ self._ensure_valid() if isinstance(key, int): return self._material_view(self._materials[key]) if not isinstance(key, (str, EndfMaterialPath)): raise TypeError( "EndfFile is indexed by an integer material position or an " "EndfMaterialPath (string or object); use by_mat() or " "by_za() for other lookups" ) position, mf, mt, subpath = self._resolve_key(key) slot = self._materials[position] if mf is None: return self._material_view(slot) section = self._get_slot_section(slot, mf, mt) return self._view(slot, mf, mt, section, subpath) def __setitem__(self, key, value): """Assign the section or field addressed by an :class:`EndfMaterialPath`. A section-depth path replaces or adds a whole section; a field-depth path edits one field within it. Whole materials cannot be assigned -- use :meth:`append_material`. """ self._ensure_valid() if isinstance(key, int): raise ValueError( "a whole material cannot be assigned by position; use " "append_material() to add a material" ) if not isinstance(key, (str, EndfMaterialPath)): raise TypeError( "EndfFile is indexed by an integer material position or an " "EndfMaterialPath (string or object)" ) position, mf, mt, subpath = self._resolve_key(key) slot = self._materials[position] if mf is None: raise ValueError( "a whole material cannot be assigned; use append_material() " "to add a material" ) if subpath is None: self._set_slot_section(slot, mf, mt, value) else: self._set_slot_field(slot, mf, mt, subpath, value) def __delitem__(self, key): """Delete the material, section or field addressed by ``key``.""" self._ensure_valid() if isinstance(key, int): self._remove_material(key) return if not isinstance(key, (str, EndfMaterialPath)): raise TypeError( "EndfFile is indexed by an integer material position or an " "EndfMaterialPath (string or object)" ) position, mf, mt, subpath = self._resolve_key(key) slot = self._materials[position] if mf is None: self._remove_material(position) elif subpath is None: self._delete_slot_section(slot, mf, mt) else: self._delete_slot_field(slot, mf, mt, subpath) def __contains__(self, key): """Whether ``key`` resolves to a present material/section/field. An ``int`` is tested as a material position. A malformed path or an ambiguous bare-MAT selector is genuinely ill-posed and propagates its :class:`ValueError` / :class:`AmbiguousMaterialError` rather than being answered ``False``. A field-depth path whose section cannot be parsed answers ``False`` -- the field is not reachable -- regardless of the ``on_error`` mode. """ self._ensure_valid() if isinstance(key, int): return -len(self._materials) <= key < len(self._materials) if not isinstance(key, (str, EndfMaterialPath)): return False try: position, mf, mt, subpath = self._resolve_key(key) except (KeyError, IndexError): return False slot = self._materials[position] if mf is None: return True if (mf, mt) not in self._slot_section_keys_set(slot): return False if subpath is None: return True try: section = self._get_slot_section(slot, mf, mt) except (KeyError, SectionParseError): return False if not isinstance(section, Mapping): return False # an unparsable (FailedSection) or raw section return section_has(section, subpath) def __iter__(self): """Iterate over the materials as :class:`MaterialView` objects.""" self._ensure_valid() for position in range(len(self._materials)): yield self._material_view(self._materials[position])
[docs] def materials(self): """Return all materials as a list of :class:`MaterialView` objects.""" return list(self)
def _position_of(self, slot): try: return self._materials.index(slot) except ValueError: raise RuntimeError("this material has been deleted from the tape") from None # -- editing -------------------------------------------------------
[docs] def append_material(self, material, *, mat, za=None, awr=None): """Append a new material to the tape. Parameters ---------- material : Mapping A nested ``{MF: {MT: section}}`` mapping, as returned by an ordinary ``parsefile``. The ``MF=0`` tape-head entry, if present, is ignored. mat : int ENDF MAT number of the new material. za, awr : optional Identifiers, used by :meth:`by_za` and the index. Returns ------- MaterialView A view of the appended material. Notes ----- Under ``check_edits="eager"`` every section of the appended material is render-checked immediately, exactly as a section assignment is, so a malformed section is rejected here rather than at :meth:`export` time. The ``mat`` argument must agree with the MAT number the material carries in its own records (the ``'MAT'`` key of a parsed section, or the control field of a raw section's first line); a mismatch is rejected, since the records, not the argument, are what gets written to the tape. """ self._ensure_valid() mat = int(mat) slot = _MaterialSlot( original_position=None, mat=mat, za=None if za is None else int(za), awr=None if awr is None else float(awr), ) for mf, mtdic in material.items(): mf_i = int(mf) if mf_i == 0: continue # the MF=0 tape-head entry, if present, is ignored for mt, section in mtdic.items(): mf_mt = (mf_i, int(mt)) self._check_section_key(*mf_mt) if isinstance(section, _SectionView): section = section.detach() if not isinstance(section, (Mapping, list)): raise TypeError( f"section MF={mf_mt[0]}/MT={mf_mt[1]} of the appended " "material must be a mapping (parsed) or a list of " "strings (raw)" ) section_mat = self._section_mat(section) if section_mat is not None and section_mat != mat: raise ValueError( f"mat={mat} was given, but section MF={mf_mt[0]}/" f"MT={mf_mt[1]} of the material carries MAT=" f"{section_mat}; the two must agree" ) if self._check_edits == "eager": self._check_section(*mf_mt, section) slot.overlay[mf_mt] = section self._materials.append(slot) self._invalidate_indexes() return self[len(self._materials) - 1]
[docs] def reorder(self, order): """Reorder the materials of the tape. ``order`` is a permutation of ``range(len(self))``: the material currently at ``order[i]`` moves to position ``i``. """ self._ensure_valid() order = list(order) if sorted(order) != list(range(len(self._materials))): raise ValueError("order must be a permutation of range(len(self))") self._materials = [self._materials[i] for i in order] self._invalidate_indexes()
# -- secondary lookups --------------------------------------------- def _positions(self, *, mat=_UNSET, za=_UNSET): """Tape positions of the materials matching every given criterion. A criterion left as ``_UNSET`` is not applied; ``None`` is treated as an ordinary value to match, so ``by_za(None)`` selects the materials whose ZA is unknown. This is the single structural-filter loop behind :meth:`by_mat`, :meth:`by_za` and :meth:`find`. """ return [ i for i, s in enumerate(self._materials) if (mat is _UNSET or s.mat == mat) and (za is _UNSET or s.za == za) ]
[docs] def by_mat(self, mat, *, occurrence=None): """Return the material with the given MAT number. ``occurrence`` (zero-based) selects among several materials that share a MAT number, as on a PENDF tape. Without it, a MAT number that is not unique raises :class:`AmbiguousMaterialError`. """ self._ensure_valid() positions = self._positions(mat=mat) if not positions: raise KeyError(f"no material with MAT={mat}") if occurrence is not None: return self[positions[occurrence]] if len(positions) > 1: raise AmbiguousMaterialError( f"MAT={mat} matches {len(positions)} materials at positions " f"{positions}; pass occurrence=0..{len(positions) - 1}" ) return self[positions[0]]
[docs] def by_za(self, za): """Return a list of materials with the given ZA identifier.""" self._ensure_valid() return [self[i] for i in self._positions(za=za)]
[docs] def find(self, *, mat=None, za=None): """Return a list of materials matching every given criterion. This is the structural lookup; a criterion left as ``None`` is not applied. For lookups by a parsed section field, see :meth:`query`. """ self._ensure_valid() criteria = {} if mat is not None: criteria["mat"] = mat if za is not None: criteria["za"] = za return [self[i] for i in self._positions(**criteria)]
# -- path-based queries --------------------------------------------
[docs] def get(self, path): """Return the material, section or field addressed by ``path``. ``path`` is an :class:`EndfMaterialPath` or a string of the form ``material[/MF/MT[/field...]]``. This is the explicit-method synonym of ``endf_file[path]``: a material-depth path yields a :class:`MaterialView`, a section-depth path a section view (see :mod:`endf_parserpy.tape.views`) and a field-depth path the value at that field. If the addressed section cannot be parsed a :class:`SectionParseError` is raised regardless of ``on_error``. """ return self[path]
[docs] def build_index(self, section_path, *, name=None): """Build a secondary index over one or several section fields. With a single section path -- a string ``"MF/MT[/field...]"`` -- this parses that section of every material that has it, reads the value at the field path and returns a dict ``{value: [positions]}``. With a list (or tuple) of section paths it builds a *composite* index instead: the key is the tuple of the values at the respective paths, in the order given, so the result is a dict ``{(value0, value1, ...): [positions]}``. A material is indexed only if *every* path resolves for it; one that lacks any of the addressed sections or fields is skipped. Paths that share an ``MF/MT`` section have it parsed only once per material. The key shape follows the *argument type*: a one-element list still yields one-element-tuple keys. One section is parsed per material per distinct ``MF/MT``, so the cost grows with the number of materials. With ``name`` the result is also stored and reachable via :attr:`secondary_indexes`; because the index is keyed by tape position, a stored index is dropped whenever a material is appended, removed or reordered, and must then be rebuilt. """ self._ensure_valid() specs, is_multi = parse_index_spec(section_path) mapping = {} for position, slot in enumerate(self._materials): values = self._collect_index_values(slot, specs) if values is None: continue key = tuple(values) if is_multi else values[0] try: mapping.setdefault(key, []).append(position) except TypeError: raise ValueError( f"section path {section_path!r} resolves to a " "non-hashable value; build_index needs scalar field(s)" ) from None if name is not None: self._secondary_indexes[name] = mapping return mapping
def _resolve_query_field(self, slot, mf, mt, subpath): """Resolve one section field of a material for the bulk lookups. Returns ``(True, value)`` for a field that is present, and ``(False, None)`` when the material lacks the section or the field, or the section failed to parse under ``on_error="mark"`` (under ``on_error="raise"`` the failing parse propagates). The section is read through the cache, so addressing the same ``MF/MT`` more than once does not re-parse it. Shared by :meth:`query` and :meth:`build_index`. """ if (mf, mt) not in self._slot_section_keys_set(slot): return False, None section = self._get_slot_section(slot, mf, mt) if isinstance(section, FailedSection): return False, None if not section_has(section, subpath): return False, None return True, walk_section(section, subpath) def _collect_index_values(self, slot, specs): """Return the field values for ``build_index``, or ``None`` to skip. ``specs`` is a list of ``(mf, mt, subpath)``. The material is skipped (``None`` is returned) when it lacks any of the addressed sections or fields, or when a needed section failed to parse under ``on_error="mark"``. """ values = [] for mf, mt, subpath in specs: found, value = self._resolve_query_field(slot, mf, mt, subpath) if not found: return None values.append(value) return values
[docs] def query(self, section_path, value=_UNSET, *, predicate=None, tol=0.0): """Return the materials whose section field matches. Pass exactly one of ``value`` (equality, within ``tol`` for numbers) or ``predicate`` (a callable applied to the field). Returns a list of :class:`MaterialView`. """ self._ensure_valid() if (value is _UNSET) == (predicate is None): raise ValueError("pass exactly one of value or predicate") mf, mt, subpath = parse_section_path(section_path) matches = [] for position, slot in enumerate(self._materials): found, field = self._resolve_query_field(slot, mf, mt, subpath) if not found: continue if predicate is not None: matched = bool(predicate(field)) else: matched = _value_match(field, value, tol) if matched: matches.append(self[position]) return matches
@property def secondary_indexes(self): """The named secondary indexes built by :meth:`build_index`. Emptied whenever a material is appended, removed or reordered, since the indexes are keyed by tape position. """ return self._secondary_indexes # -- per-material section access (slot-aware) ---------------------- def _slot_section_keys_set(self, slot): if slot.original_position is not None: keys = set(self._index[slot.original_position].sections) else: keys = set() keys -= slot.deleted keys |= set(slot.overlay) return keys def _slot_section_keys(self, slot): return sorted(self._slot_section_keys_set(slot)) def _get_slot_section(self, slot, mf, mt): self._ensure_valid() key = (mf, mt) if key in slot.overlay: return slot.overlay[key] if key in slot.deleted: raise KeyError(f"this material has no MF={mf}/MT={mt} section") if ( slot.original_position is None or key not in self._index[slot.original_position].sections ): raise KeyError(f"this material has no MF={mf}/MT={mt} section") return self._get_section(slot.original_position, mf, mt) @staticmethod def _check_section_key(mf, mt): """Reject an ``(MF, MT)`` that cannot name a material section. A material section has ``MF >= 1`` and ``MT >= 1``; ``MF 0`` is the tape-head slot and ``MT 0`` marks a SEND record, so assigning a section to either would corrupt the written tape. """ if mf < 1 or mt < 1: raise ValueError( f"MF={mf}/MT={mt} is not a valid section key; a material " "section has MF >= 1 and MT >= 1" ) @staticmethod def _section_mat(section): """The MAT number a material section carries, or ``None``. A parsed section is a mapping with a ``'MAT'`` key; a raw section is a list of ENDF lines whose first line names the MAT in its control field. ``None`` is returned when neither yields a usable (positive) value. """ if isinstance(section, Mapping): mat = section.get("MAT") return None if mat is None else int(mat) if isinstance(section, list) and section: mat = _control_numbers(section[0])[0] return mat if mat > 0 else None return None def _set_slot_section(self, slot, mf, mt, value): self._ensure_valid() self._check_section_key(mf, mt) if isinstance(value, _SectionView): value = value.detach() if not isinstance(value, (Mapping, list)): raise TypeError( "a section must be a mapping (parsed) or a list of strings (raw)" ) if self._check_edits == "eager": self._check_section(mf, mt, value) slot.overlay[(mf, mt)] = value slot.deleted.discard((mf, mt)) def _set_slot_field(self, slot, mf, mt, subpath, value): """Read-modify-write a single field within a section. Under ``check_edits="eager"`` a deep copy of the section is edited and render-checked before it is committed, so a malformed result leaves the canonical section untouched. Under ``"deferred"`` the canonical section is edited in place and marked dirty. """ if isinstance(value, _SectionView): value = value.detach() section = self._get_slot_section(slot, mf, mt) self._require_mapping_section(section, mf, mt) if self._check_edits == "eager": work = _plain(section) self._set_at(work, subpath, value) self._check_section(mf, mt, work) slot.overlay[(mf, mt)] = work slot.deleted.discard((mf, mt)) else: self._set_at(section, subpath, value) slot.overlay[(mf, mt)] = section slot.deleted.discard((mf, mt)) def _delete_slot_section(self, slot, mf, mt): self._ensure_valid() if (mf, mt) not in self._slot_section_keys_set(slot): raise KeyError(f"this material has no MF={mf}/MT={mt} section") slot.overlay.pop((mf, mt), None) if ( slot.original_position is not None and (mf, mt) in self._index[slot.original_position].sections ): slot.deleted.add((mf, mt)) def _delete_slot_field(self, slot, mf, mt, subpath): """Delete a single field within a section (deferred mode only).""" if self._check_edits == "eager": raise ValueError( "deleting a section field is rejected in check_edits='eager' " "mode because the resulting section no longer conforms to " "its ENDF recipe; open the EndfFile with " "check_edits='deferred', or assign a whole edited section" ) section = self._get_slot_section(slot, mf, mt) self._require_mapping_section(section, mf, mt) self._del_at(section, subpath) slot.overlay[(mf, mt)] = section slot.deleted.discard((mf, mt)) def _require_mapping_section(self, section, mf, mt): """Raise unless ``section`` is a parsed (mapping) section.""" if isinstance(section, FailedSection): raise SectionParseError( f"MF={mf}/MT={mt} of the material at position " f"{section.position} failed to parse" ) from section.exception if not isinstance(section, Mapping): raise TypeError( f"MF={mf}/MT={mt} is a recipe-less (raw) section; it has no " "addressable fields" ) @staticmethod def _set_at(container, subpath, value): parent, last = _navigate(container, subpath) parent[last] = value @staticmethod def _del_at(container, subpath): parent, last = _navigate(container, subpath) del parent[last] def _view(self, slot, mf, mt, section, subpath=None): """Wrap a canonical section in the mode-dependent view. ``check_edits="eager"`` yields a frozen (read-only) view, ``"deferred"`` a live write-through view. A :class:`FailedSection` raises :class:`SectionParseError`. With a ``subpath`` the view is navigated to that field, returning a nested view or a bare scalar. """ if isinstance(section, FailedSection): raise SectionParseError( f"MF={mf}/MT={mt} of the material at position " f"{section.position} failed to parse" ) from section.exception if self._check_edits == "deferred": def touch(_slot=slot, _mf=mf, _mt=mt, _section=section): _slot.overlay[(_mf, _mt)] = _section _slot.deleted.discard((_mf, _mt)) if isinstance(section, Mapping): view = _LiveMapping(section, touch) else: view = _LiveSequence(section, touch) else: if isinstance(section, Mapping): view = _FrozenMapping(section) else: view = _FrozenSequence(section) if subpath is None: return view return view[subpath] def _check_section(self, mf, mt, section): """Render a section through the writer to check recipe conformity. A render failure -- the section does not conform to its ENDF recipe -- propagates. Only mapping sections are checked; a recipe-less raw section is written verbatim and has no recipe to violate. """ if not isinstance(section, Mapping): return try: self._parser.write({0: {0: [self._index.tpid_line]}, mf: {mt: section}}) except Exception as exc: raise SectionRenderError( f"the edited MF={mf}/MT={mt} section does not render to " f"valid ENDF-6 text: {exc}" ) from exc
[docs] def invalid_edits(self): """Return the edited sections that do not conform to their recipe. Renders every edited section through the parser's writer and returns a list of ``(position, MF, MT, exception)`` tuples, one per edited section that fails to render; an empty list means every edit is conformant, so ``if not endf_file.invalid_edits()`` reads as "every edit is valid". Untouched sections are written verbatim and are not checked. Under ``check_edits="deferred"`` this is the explicit conformity check that :meth:`export` and :meth:`to_string` perform implicitly; under ``"eager"`` every edit was already checked at write time, so it is a near no-op but remains harmless to call. """ report = [] for position, slot in enumerate(self._materials): for (mf, mt), section in list(slot.overlay.items()): if not isinstance(section, Mapping): continue try: self._check_section(mf, mt, section) except SectionRenderError as exc: report.append((position, mf, mt, exc)) return report
# -- the lazy access path ------------------------------------------ def _get_raw(self, position, mf, mt, sec_entry): key = (position, mf, mt) cached = self._raw_cache.get(key) if cached is not None: return cached raw = self._read_span(sec_entry.offset, sec_entry.length) self._raw_cache.put(key, raw, sec_entry.length) return raw def _get_section(self, position, mf, mt): key = (position, mf, mt) cached = self._section_cache.get(key) if cached is not None: return cached entry = self._index[position] sec_entry = entry.sections.get((mf, mt)) if sec_entry is None: raise KeyError( f"material at position {position} (MAT={entry.mat}) has " f"no MF={mf}/MT={mt} section" ) raw = self._get_raw(position, mf, mt, sec_entry) section = self._parse_section(entry, mf, mt, raw) self._section_cache.put(key, section, sec_entry.length) return section def _parse_section(self, entry, mf, mt, raw_lines): # wrap the section in a minimal single-material tape so the # ordinary parser can be used unchanged mini_tape = ( [self._index.tpid_line] + list(raw_lines) + [ _control_line(entry.mat, 0, 0), # FEND _control_line(0, 0, 0), # MEND TEND_LINE, # TEND ] ) try: result = self._parser.parse(mini_tape) section = result[mf][mt] except Exception as exc: if self._on_error == "raise": raise SectionParseError( f"failed to parse MF={mf}/MT={mt} of the material at " f"position {entry.position} (MAT={entry.mat})" ) from exc return FailedSection(exc, raw_lines, entry.position, mf, mt) if isinstance(section, Mapping): return _Section(section) return section # a section without a recipe stays a list of strings @contextmanager def _read_session(self): """Hold one file handle open for the duration of the block. Disk reads performed inside the block reuse a single handle instead of reopening the file per section. Used for whole-tape operations; outside such a block every read opens and closes the file on its own, which keeps interactive use simple and never pins the file open. """ with open(self._path, "rb") as fh: self._read_fh = fh try: yield finally: self._read_fh = None def _read_span(self, offset, length): if self._verify_source: self._check_source() fh = self._read_fh if fh is None: with open(self._path, "rb") as fh: fh.seek(offset) data = fh.read(length) else: fh.seek(offset) data = fh.read(length) return data.decode("latin-1").splitlines() def _check_source(self): stat = os.stat(self._path) if ( stat.st_size != self._index.source_size or stat.st_mtime_ns != self._index.source_mtime_ns ): raise StaleSourceError( f"the source file {self._path!r} changed after it was indexed" ) # -- write-back ---------------------------------------------------- def _assemble(self, slot): """Build a ``{MF: {MT: section}}`` dict ready for the writer. Untouched sections are taken verbatim from disk; edited or added sections come from the overlay. """ material = {0: {0: [self._index.tpid_line]}} for mf, mt in self._slot_section_keys(slot): if (mf, mt) in slot.overlay: section = slot.overlay[(mf, mt)] # an overlay section is always a parsed mapping or a raw # list of lines; a raw list is stripped of any trailing # SEND so the writer can re-emit it if isinstance(section, list): section = _strip_send(section) else: sec_entry = self._index[slot.original_position].sections[(mf, mt)] raw = self._get_raw(slot.original_position, mf, mt, sec_entry) section = _strip_send(raw) material.setdefault(mf, {})[mt] = section return material def _check_deferred_edits(self): """Render-check the edits before output (deferred mode only). Under ``check_edits="deferred"`` a non-conformant edited section raises :class:`SectionRenderError` here, before any output is produced; under ``"eager"`` every edit was already checked when it was made, so this is a no-op. """ if self._check_edits != "deferred": return report = self.invalid_edits() if report: position, mf, mt, exc = report[0] raise SectionRenderError( f"the edited MF={mf}/MT={mt} section of the material at " f"position {position} does not render to valid ENDF-6 text " f"({len(report)} edited section(s) failed to render); call " "invalid_edits() for the full report" ) from exc.__cause__ def _check_materials_have_sections(self): """Reject, before output, a material left with no sections. A sectionless material is a legal *transient* state -- e.g. while every section is deleted to rebuild a material -- but it is not valid ENDF: on re-indexing such a material is silently dropped. It is therefore rejected at :meth:`export` / :meth:`to_string` time, naming the offending position. """ for position, slot in enumerate(self._materials): if not self._slot_section_keys_set(slot): raise TapeStructureError( f"the material at position {position} has no sections; " "a material must have at least one section to be " "written -- use 'del endf_file[position]' to remove a " "material entirely" ) def _empty_tape_text(self): """The ENDF-6 text of this tape once every material is removed. A tape with no materials is just its tape head (TPID) followed by the tape end (TEND). The original TPID is kept, so an emptied tape retains its identity. """ return self._index.tpid_line + "\n" + TEND_LINE + "\n" def _output_materials(self): """Yield each material assembled and ready for :func:`write_tape`. Each material is produced as a ``{MF: {MT: section}}`` dict, one at a time, so a streaming consumer (:func:`write_tape_file`) never holds the whole tape in memory. An untouched section is assembled from its raw on-disk text -- it is not parsed -- so only sections that were actually edited were ever parsed. """ for slot in self._materials: yield self._assemble(slot)
[docs] def to_string(self): """Return the (possibly edited) tape as an ENDF-6 formatted string. Untouched sections keep their data records verbatim from disk and edited or added sections are rendered by the parser; in both cases the SEND/FEND/MEND framing and the column 76-80 sequence numbers are regenerated, so every data field is preserved byte-for-byte but the tape is not necessarily byte-identical to the input. The result ends with a newline; use :meth:`str.splitlines` if a list of lines is needed. A tape from which every material has been deleted is written as its tape head (TPID) followed by the tape end (TEND). This necessarily builds the whole tape in memory; for a large tape, write it to a file with :meth:`export`, which is memory-bounded. """ self._ensure_valid() self._check_deferred_edits() self._check_materials_have_sections() if not self._materials: return self._empty_tape_text() with self._read_session(): return write_tape(self._output_materials(), parser=self._parser)
[docs] def export(self, path, *, overwrite=False): """Write the (possibly edited) tape to a file. The tape is written one material at a time via a temporary file and an atomic replace, so peak memory stays bounded by a single material regardless of the tape size. Untouched sections keep their data records verbatim from disk (they are not parsed) and edited or added sections are rendered by the parser; the SEND/FEND/MEND framing and the column 76-80 sequence numbers are regenerated either way, preserving every data field byte-for-byte without making the tape byte-identical. An existing file is only overwritten when ``overwrite=True``. A tape from which every material has been deleted is written as its tape head (TPID) followed by the tape end (TEND). Exporting onto the file the :class:`EndfFile` was opened from is permitted, but it leaves the in-memory structural index stale (the byte offsets of untouched sections have moved). The object is therefore *invalidated*: every subsequent operation raises :class:`StaleSourceError`, and the file must be re-opened with a new :class:`EndfFile` to continue. Exporting to any other path leaves the object usable. """ self._ensure_valid() self._check_deferred_edits() self._check_materials_have_sections() path = os.fspath(path) if os.path.exists(path) and not overwrite: raise FileExistsError( f"file {path} already exists; pass overwrite=True to replace it" ) onto_source = os.path.realpath(path) == os.path.realpath(self._path) tmp = path + ".endfparserpy-tmp" try: if self._materials: with self._read_session(): write_tape_file( self._output_materials(), tmp, parser=self._parser, overwrite=True, ) else: # every material was deleted: a valid TPID + TEND tape # (newline="" keeps the LF terminators verbatim on Windows) with open(tmp, "w", newline="") as fh: fh.write(self._empty_tape_text()) os.replace(tmp, path) except BaseException: # a failed or interrupted write must not leave the temporary # file behind (os.replace has consumed it on success); a # cleanup failure must not mask the original error try: os.remove(tmp) except OSError: pass raise if onto_source: # the file the index describes has just been rewritten; the # offsets of untouched sections no longer match -- this object # can no longer read from disk safely (see _ensure_valid) self._invalidated = True
# -- memory management ---------------------------------------------
[docs] def unload(self, position=None): """Drop cached raw text and parsed sections. Edits held in the material overlays are not affected. With no argument the whole cache is cleared; given a material position, only that material's cached data is dropped. """ if position is None: self._raw_cache.clear() self._section_cache.clear() return original = self._materials[position].original_position if original is not None: self._raw_cache.drop_material(original) self._section_cache.drop_material(original)
@property def cache_nbytes(self): """The current ``(raw, parsed)`` cache sizes in bytes.""" return self._raw_cache.nbytes, self._section_cache.nbytes @property def index(self): """The underlying :class:`TapeIndex` (describes the file on disk).""" return self._index @property def parser(self): """The parser engine used for sections.""" return self._parser # -- context manager ----------------------------------------------- def __enter__(self): return self def __exit__(self, *exc): self.unload() return False def __repr__(self): state = " (invalidated)" if self._invalidated else "" return f"<EndfFile {self._path!r}: {len(self._materials)} " f"materials{state}>" # -- pickling ------------------------------------------------------ # # The index, the material slots (which carry any edits) and the # parser are pickled; the caches and named secondary indexes are # not. The parser pickles by recipe (see EndfParserBase), so its # construction options are preserved across pickling. Any secondary # indexes must be rebuilt with build_index() afterwards. def __getstate__(self): return { "path": self._path, "parser": self._parser, "on_error": self._on_error, "check_edits": self._check_edits, "invalidated": self._invalidated, "verify_source": self._verify_source, "raw_cache_bytes": self._raw_cache.max_bytes, "parsed_cache_bytes": self._section_cache.max_bytes, "index": self._index, "materials": self._materials, } def __setstate__(self, state): self._path = state["path"] self._parser = state["parser"] self._on_error = state["on_error"] self._check_edits = state.get("check_edits", "eager") self._invalidated = state.get("invalidated", False) self._verify_source = state["verify_source"] self._index = state["index"] self._materials = state["materials"] self._raw_cache = _RawCache(state["raw_cache_bytes"]) self._section_cache = _SectionCache(state["parsed_cache_bytes"]) self._material_views = {} self._secondary_indexes = {} self._read_fh = None