Source code for snapm.fsdiff.fsdiffer

# Copyright Red Hat
#
# snapm/fsdiff/fsdiffer.py - Snapshot Manager file system differ
#
# This file is part of the snapm project.
#
# SPDX-License-Identifier: Apache-2.0
"""
Top-level fsdiff interface.
"""
from typing import Optional, TYPE_CHECKING
import logging
import pickle
import lzma


try:
    import magic

    _HAVE_MAGIC = True

    # This is horrible and shouldn't be necessary:
    # https://github.com/snapshotmanager/snapm/issues/858
    # https://bugzilla.redhat.com/show_bug.cgi?id=2419719
[docs] def close(theself): """ Work around Magic destructor breakage. :param theself: The ``magic.Magic`` instance. :type theself: ``magic.Magic`` """ if magic._close: # pylint: disable=protected-access magic._close(theself._magic_t) # pylint: disable=protected-access
magic.Magic.close = close except ModuleNotFoundError: _HAVE_MAGIC = False try: import zstandard as zstd compress_errors = ( lzma.LZMAError, zstd.ZstdError, ) except ModuleNotFoundError: compress_errors = (lzma.LZMAError,) from snapm import ( get_current_rss, get_total_memory, SnapmError, SnapmSystemError, SnapmNotFoundError, ) from snapm.progress import TermControl from .cache import load_cache, save_cache from .engine import DiffEngine, FsDiffResults from .options import DiffOptions from .treewalk import TreeWalker if TYPE_CHECKING: from snapm.manager import Manager from snapm.manager._mounts import Mount _log = logging.getLogger(__name__) _log_debug = _log.debug _log_info = _log.info _log_warn = _log.warning _log_error = _log.error #: Maximum fraction of memory used to attempt diffing after building trees _MAX_RSS_FRACTION = 0.333
[docs]def _should_diff(options: DiffOptions) -> bool: """ Determine whether it is safe to attempt computing the diff given system memory constraints and current state. :param options: The effective options for this run. :type options: ``DiffOptions`` :returns: ``True`` if diffing should be attempted or ``False`` otherwise. :rtype: ``bool`` """ if not options.include_content_diffs or options.no_mem_check: return True memtotal = get_total_memory() rss = get_current_rss() if not memtotal: _log_warn("Cannot determine available system memory!") return True if not rss: _log_warn("Cannot determine current process memory use!") return True fraction = rss / memtotal if fraction > _MAX_RSS_FRACTION: _log_error( "Cannot compute diff: current RSS exceeds safe threshold " "of system memory (%d%% > %d%%)", round(fraction * 100), round(_MAX_RSS_FRACTION * 100), ) _log_error("Try again with -C / --no-content-diff") return False return True
[docs]class FsDiffer: """ Top-level interface for generating file system comparisons. """
[docs] def __init__( self, manager: "Manager", options: Optional[DiffOptions] = None, cache: bool = True, cache_expires: int = -1, color: str = "auto", term_control: Optional[TermControl] = None, ): """ Initialise a new ``FsDiffer`` to compute file system differences. :param manager: The manager context for snapshot operations (used by future methods). :type manager: ``Manager`` :param options: Options to control this ``FsDiffer`` instance. :type options: ``DiffOptions`` :param cache: Use diff cache if available. :type cache: ``bool`` :param cache_expires: Cache expiry in seconds. Use 0 to disable cache expiry or -1 to use the cache default (15m). :type cache_expires: ``int`` :param color: A string to control color tree rendering: "auto", "always", or "never". :type color: ``str`` :param term_control: An optional ``TermControl`` instance to use for formatting. The supplied instance overrides any ``color`` argument if set. :type term_control: ``Optional[TermControl]`` """ options = options or DiffOptions() if options.use_magic_file_type and not _HAVE_MAGIC: raise SnapmNotFoundError( "python-file-magic is not installed " "(required for --file-type/use_magic_file_type)" ) if options.no_mem_check: _log_warn("RSS memory pressure safety checks disabled") #: Manager context for snapshot operations (used by future methods) self.manager: "Manager" = manager self.options: DiffOptions = options self.cache = cache self.cache_expires = cache_expires self.tree_walker: TreeWalker = TreeWalker(options) self.diff_engine: DiffEngine = DiffEngine() self._term_control: Optional[TermControl] = term_control or TermControl( color=color )
[docs] def compare_roots(self, mount_a: "Mount", mount_b: "Mount") -> FsDiffResults: """ Compare two mounted snapshot sets and return diff results. :param mount_a: The first (left hand) mount to compare. :type mount_a: ``Mount`` :param mount_b: The second (right hand) mount to compare. :type mount_b: ``Mount`` :returns: The diff results for the comparison. :rtype: ``FsDiffResults`` """ try: _log_debug( "Attempting to load diff cache for %s/%s (cache=%s, expiry=%d)", mount_a.name, mount_b.name, self.cache, self.cache_expires, ) cached_results = ( load_cache( mount_a, mount_b, self.options, expires=self.cache_expires, quiet=self.options.quiet, term_control=self._term_control, ) if self.cache else None ) if cached_results is not None: _log_debug( "Loaded diff cache for %s/%s: %s", mount_a.name, mount_b.name, cached_results, ) return cached_results except SnapmNotFoundError: _log_debug("No diff cache found for %s/%s", mount_a.name, mount_b.name) except SnapmError as err: _log_info( "Failed to load diff cache for %s/%s: %s", mount_a.name, mount_b.name, err, ) strip_prefix_a = "" if mount_a.root == "/" else mount_a.root tree_a = self.tree_walker.walk_tree( mount_a, strip_prefix=strip_prefix_a, quiet=self.options.quiet, term_control=self._term_control, ) strip_prefix_b = "" if mount_b.root == "/" else mount_b.root tree_b = self.tree_walker.walk_tree( mount_b, strip_prefix=strip_prefix_b, quiet=self.options.quiet, term_control=self._term_control, ) if not _should_diff(self.options): raise SnapmSystemError("RSS limit exceeded after tree construction") results = self.diff_engine.compute_diff( tree_a, tree_b, self.options, self._term_control ) # "We don't need that anymore." -- Praga Khan, 1993. del tree_a del tree_b if self.cache: try: save_cache( mount_a, mount_b, results, quiet=self.options.quiet, term_control=self._term_control, ) except (OSError, pickle.PicklingError, *compress_errors) as err: _log_info( "Failed to save diff cache for %s/%s: %s", mount_a.name, mount_b.name, err, ) return results