123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560 |
- #-----------------------------------------------------------------------------
- # Copyright (c) 2005-2021, PyInstaller Development Team.
- #
- # Distributed under the terms of the GNU General Public License (version 2
- # or later) with exception for distributing the bootloader.
- #
- # The full license is in the file COPYING.txt, distributed with this software.
- #
- # SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
- #-----------------------------------------------------------------------------
- """
- Code related to processing of import hooks.
- """
- import glob
- import os.path
- import sys
- import weakref
- from PyInstaller import log as logging
- from PyInstaller.building.utils import format_binaries_and_datas
- from PyInstaller.compat import expand_path, importlib_load_source
- from PyInstaller.depend.imphookapi import PostGraphAPI
- from PyInstaller.exceptions import ImportErrorWhenRunningHook
- logger = logging.getLogger(__name__)
- # Safety check: Hook module names need to be unique. Duplicate names might occur if the cached PyuModuleGraph has an
- # issue.
- HOOKS_MODULE_NAMES = set()
- class ModuleHookCache(dict):
- """
- Cache of lazily loadable hook script objects.
- This cache is implemented as a `dict` subclass mapping from the fully-qualified names of all modules with at
- least one hook script to lists of `ModuleHook` instances encapsulating these scripts. As a `dict` subclass,
- all cached module names and hook scripts are accessible via standard dictionary operations.
- Attributes
- ----------
- module_graph : ModuleGraph
- Current module graph.
- _hook_module_name_prefix : str
- String prefixing the names of all in-memory modules lazily loaded from cached hook scripts. See also the
- `hook_module_name_prefix` parameter passed to the `ModuleHook.__init__()` method.
- """
- _cache_id_next = 0
- """
- 0-based identifier unique to the next `ModuleHookCache` to be instantiated.
- This identifier is incremented on each instantiation of a new `ModuleHookCache` to isolate in-memory modules of
- lazily loaded hook scripts in that cache to the same cache-specific namespace, preventing edge-case collisions
- with existing in-memory modules in other caches.
- """
- def __init__(self, module_graph, hook_dirs):
- """
- Cache all hook scripts in the passed directories.
- **Order of caching is significant** with respect to hooks for the same module, as the values of this
- dictionary are lists. Hooks for the same module will be run in the order in which they are cached. Previously
- cached hooks are always preserved rather than overidden.
- By default, official hooks are cached _before_ user-defined hooks. For modules with both official and
- user-defined hooks, this implies that the former take priority over and hence will be loaded _before_ the
- latter.
- Parameters
- ----------
- module_graph : ModuleGraph
- Current module graph.
- hook_dirs : list
- List of the absolute or relative paths of all directories containing **hook scripts** (i.e.,
- Python scripts with filenames matching `hook-{module_name}.py`, where `{module_name}` is the module
- hooked by that script) to be cached.
- """
- super().__init__()
- # To avoid circular references and hence increased memory consumption, a weak rather than strong reference is
- # stored to the passed graph. Since this graph is guaranteed to live longer than this cache,
- # this is guaranteed to be safe.
- self.module_graph = weakref.proxy(module_graph)
- # String unique to this cache prefixing the names of all in-memory modules lazily loaded from cached hook
- # scripts, privatized for safety.
- self._hook_module_name_prefix = '__PyInstaller_hooks_{}_'.format(ModuleHookCache._cache_id_next)
- ModuleHookCache._cache_id_next += 1
- # Cache all hook scripts in the passed directories.
- self._cache_hook_dirs(hook_dirs)
- def _cache_hook_dirs(self, hook_dirs):
- """
- Cache all hook scripts in the passed directories.
- Parameters
- ----------
- hook_dirs : list
- List of the absolute or relative paths of all directories containing hook scripts to be cached.
- """
- for hook_dir in hook_dirs:
- # Canonicalize this directory's path and validate its existence.
- hook_dir = os.path.abspath(expand_path(hook_dir))
- if not os.path.isdir(hook_dir):
- raise FileNotFoundError('Hook directory "{}" not found.'.format(hook_dir))
- # For each hook script in this directory...
- hook_filenames = glob.glob(os.path.join(hook_dir, 'hook-*.py'))
- for hook_filename in hook_filenames:
- # Fully-qualified name of this hook's corresponding module, constructed by removing the "hook-" prefix
- # and ".py" suffix.
- module_name = os.path.basename(hook_filename)[5:-3]
- if module_name in self:
- logger.warning(
- "Several hooks defined for module %r. Please take care they do not conflict.", module_name
- )
- # Lazily loadable hook object.
- module_hook = ModuleHook(
- module_graph=self.module_graph,
- module_name=module_name,
- hook_filename=hook_filename,
- hook_module_name_prefix=self._hook_module_name_prefix,
- )
- # Add this hook to this module's list of hooks.
- module_hooks = self.setdefault(module_name, [])
- module_hooks.append(module_hook)
- def remove_modules(self, *module_names):
- """
- Remove the passed modules and all hook scripts cached for these modules from this cache.
- Parameters
- ----------
- module_names : list
- List of all fully-qualified module names to be removed.
- """
- for module_name in module_names:
- # Unload this module's hook script modules from memory. Since these are top-level pure-Python modules cached
- # only in the "sys.modules" dictionary, popping these modules from this dictionary suffices to garbage
- # collect these modules.
- module_hooks = self.get(module_name, [])
- for module_hook in module_hooks:
- sys.modules.pop(module_hook.hook_module_name, None)
- # Remove this module and its hook script objects from this cache.
- self.pop(module_name, None)
- # Dictionary mapping the names of magic attributes required by the "ModuleHook" class to 2-tuples "(default_type,
- # sanitizer_func)", where:
- #
- # * "default_type" is the type to which that attribute will be initialized when that hook is lazily loaded.
- # * "sanitizer_func" is the callable sanitizing the original value of that attribute defined by that hook into a
- # safer value consumable by "ModuleHook" callers if any or "None" if the original value requires no sanitization.
- #
- # To avoid subtleties in the ModuleHook.__getattr__() method, this dictionary is declared as a module rather than a
- # class attribute. If declared as a class attribute and then undefined (...for whatever reason), attempting to access
- # this attribute from that method would produce infinite recursion.
- _MAGIC_MODULE_HOOK_ATTRS = {
- # Collections in which order is insignificant. This includes:
- #
- # * "datas", sanitized from hook-style 2-tuple lists defined by hooks into TOC-style 2-tuple sets consumable by
- # "ModuleHook" callers.
- # * "binaries", sanitized in the same way.
- 'datas': (set, format_binaries_and_datas),
- 'binaries': (set, format_binaries_and_datas),
- 'excludedimports': (set, None),
- # Collections in which order is significant. This includes:
- #
- # * "hiddenimports", as order of importation is significant. On module importation, hook scripts are loaded and hook
- # functions declared by these scripts are called. As these scripts and functions can have side effects dependent
- # on module importation order, module importation itself can have side effects dependent on this order!
- 'hiddenimports': (list, None),
- }
- class ModuleHook(object):
- """
- Cached object encapsulating a lazy loadable hook script.
- This object exposes public attributes (e.g., `datas`) of the underlying hook script as attributes of the same
- name of this object. On the first access of any such attribute, this hook script is lazily loaded into an
- in-memory private module reused on subsequent accesses. These dynamic attributes are referred to as "magic." All
- other static attributes of this object (e.g., `hook_module_name`) are referred to as "non-magic."
- Attributes (Magic)
- ----------
- datas : set
- Set of `TOC`-style 2-tuples `(target_file, source_file)` for all external non-executable files required by
- the module being hooked, converted from the `datas` list of hook-style 2-tuples `(source_dir_or_glob,
- target_dir)` defined by this hook script.
- binaries : set
- Set of `TOC`-style 2-tuples `(target_file, source_file)` for all external executable files required by the
- module being hooked, converted from the `binaries` list of hook-style 2-tuples `(source_dir_or_glob,
- target_dir)` defined by this hook script.
- excludedimports : set
- Set of the fully-qualified names of all modules imported by the module being hooked to be ignored rather than
- imported from that module, converted from the `excludedimports` list defined by this hook script. These
- modules will only be "locally" rather than "globally" ignored. These modules will remain importable from all
- modules other than the module being hooked.
- hiddenimports : set
- Set of the fully-qualified names of all modules imported by the module being hooked that are _not_
- automatically detectable by PyInstaller (usually due to being dynamically imported in that module),
- converted from the `hiddenimports` list defined by this hook script.
- Attributes (Non-magic)
- ----------
- module_graph : ModuleGraph
- Current module graph.
- module_name : str
- Name of the module hooked by this hook script.
- hook_filename : str
- Absolute or relative path of this hook script.
- hook_module_name : str
- Name of the in-memory module of this hook script's interpreted contents.
- _hook_module : module
- In-memory module of this hook script's interpreted contents, lazily loaded on the first call to the
- `_load_hook_module()` method _or_ `None` if this method has yet to be accessed.
- """
- #-- Magic --
- def __init__(self, module_graph, module_name, hook_filename, hook_module_name_prefix):
- """
- Initialize this metadata.
- Parameters
- ----------
- module_graph : ModuleGraph
- Current module graph.
- module_name : str
- Name of the module hooked by this hook script.
- hook_filename : str
- Absolute or relative path of this hook script.
- hook_module_name_prefix : str
- String prefixing the name of the in-memory module for this hook script. To avoid namespace clashes with
- similar modules created by other `ModuleHook` objects in other `ModuleHookCache` containers, this string
- _must_ be unique to the `ModuleHookCache` container containing this `ModuleHook` object. If this string
- is non-unique, an existing in-memory module will be erroneously reused when lazily loading this hook
- script, thus erroneously resanitizing previously sanitized hook script attributes (e.g., `datas`) with
- the `format_binaries_and_datas()` helper.
- """
- # Note that the passed module graph is already a weak reference, avoiding circular reference issues. See
- # ModuleHookCache.__init__(). TODO: Add a failure message
- assert isinstance(module_graph, weakref.ProxyTypes)
- self.module_graph = module_graph
- self.module_name = module_name
- self.hook_filename = hook_filename
- # Name of the in-memory module fabricated to refer to this hook script.
- self.hook_module_name = hook_module_name_prefix + self.module_name.replace('.', '_')
- # Safety check, see above
- global HOOKS_MODULE_NAMES
- if self.hook_module_name in HOOKS_MODULE_NAMES:
- # When self._shallow is true, this class never loads the hook and sets the attributes to empty values
- self._shallow = True
- else:
- self._shallow = False
- HOOKS_MODULE_NAMES.add(self.hook_module_name)
- # Attributes subsequently defined by the _load_hook_module() method.
- self._hook_module = None
- def __getattr__(self, attr_name):
- """
- Get the magic attribute with the passed name (e.g., `datas`) from this lazily loaded hook script if any _or_
- raise `AttributeError` otherwise.
- This special method is called only for attributes _not_ already defined by this object. This includes
- undefined attributes and the first attempt to access magic attributes.
- This special method is _not_ called for subsequent attempts to access magic attributes. The first attempt to
- access magic attributes defines corresponding instance variables accessible via the `self.__dict__` instance
- dictionary (e.g., as `self.datas`) without calling this method. This approach also allows magic attributes to
- be deleted from this object _without_ defining the `__delattr__()` special method.
- See Also
- ----------
- Class docstring for supported magic attributes.
- """
- # If this is a magic attribute, initialize this attribute by lazy loading this hook script and then return
- # this attribute. To avoid recursion, the superclass method rather than getattr() is called.
- if attr_name in _MAGIC_MODULE_HOOK_ATTRS:
- self._load_hook_module()
- return super().__getattr__(attr_name)
- # Else, this is an undefined attribute. Raise an exception.
- else:
- raise AttributeError(attr_name)
- def __setattr__(self, attr_name, attr_value):
- """
- Set the attribute with the passed name to the passed value.
- If this is a magic attribute, this hook script will be lazily loaded before setting this attribute. Unlike
- `__getattr__()`, this special method is called to set _any_ attribute -- including magic, non-magic,
- and undefined attributes.
- See Also
- ----------
- Class docstring for supported magic attributes.
- """
- # If this is a magic attribute, initialize this attribute by lazy loading this hook script before overwriting
- # this attribute.
- if attr_name in _MAGIC_MODULE_HOOK_ATTRS:
- self._load_hook_module()
- # Set this attribute to the passed value. To avoid recursion, the superclass method rather than setattr() is
- # called.
- return super().__setattr__(attr_name, attr_value)
- #-- Loading --
- def _load_hook_module(self):
- """
- Lazily load this hook script into an in-memory private module.
- This method (and, indeed, this class) preserves all attributes and functions defined by this hook script as
- is, ensuring sane behaviour in hook functions _not_ expecting unplanned external modification. Instead,
- this method copies public attributes defined by this hook script (e.g., `binaries`) into private attributes
- of this object, which the special `__getattr__()` and `__setattr__()` methods safely expose to external
- callers. For public attributes _not_ defined by this hook script, the corresponding private attributes will
- be assigned sane defaults. For some public attributes defined by this hook script, the corresponding private
- attributes will be transformed into objects more readily and safely consumed elsewhere by external callers.
- See Also
- ----------
- Class docstring for supported attributes.
- """
- # If this hook script module has already been loaded, or we are _shallow, noop.
- if self._hook_module is not None or self._shallow:
- if self._shallow:
- self._hook_module = True # Not None
- # Inform the user
- logger.debug(
- 'Skipping module hook %r from %r because a hook for %s has already been loaded.',
- *os.path.split(self.hook_filename)[::-1], self.module_name
- )
- # Set the default attributes to empty instances of the type.
- for attr_name, (attr_type, _) in _MAGIC_MODULE_HOOK_ATTRS.items():
- super().__setattr__(attr_name, attr_type())
- return
- # Load and execute the hook script. Even if mechanisms from the import machinery are used, this does not import
- # the hook as the module.
- head, tail = os.path.split(self.hook_filename)
- logger.info('Loading module hook %r from %r...', tail, head)
- try:
- self._hook_module = importlib_load_source(self.hook_module_name, self.hook_filename)
- except ImportError:
- logger.debug("Hook failed with:", exc_info=True)
- raise ImportErrorWhenRunningHook(self.hook_module_name, self.hook_filename)
- # Copy hook script attributes into magic attributes exposed as instance variables of the current "ModuleHook"
- # instance.
- for attr_name, (default_type, sanitizer_func) in _MAGIC_MODULE_HOOK_ATTRS.items():
- # Unsanitized value of this attribute.
- attr_value = getattr(self._hook_module, attr_name, None)
- # If this attribute is undefined, expose a sane default instead.
- if attr_value is None:
- attr_value = default_type()
- # Else if this attribute requires sanitization, do so.
- elif sanitizer_func is not None:
- attr_value = sanitizer_func(attr_value)
- # Else, expose the unsanitized value of this attribute.
- # Expose this attribute as an instance variable of the same name.
- setattr(self, attr_name, attr_value)
- #-- Hooks --
- def post_graph(self, analysis):
- """
- Call the **post-graph hook** (i.e., `hook()` function) defined by this hook script, if any.
- Parameters
- ----------
- analysis: build_main.Analysis
- Analysis that calls the hook
- This method is intended to be called _after_ the module graph for this application is constructed.
- """
- # Lazily load this hook script into an in-memory module.
- self._load_hook_module()
- # Call this hook script's hook() function, which modifies attributes accessed by subsequent methods and hence
- # must be called first.
- self._process_hook_func(analysis)
- # Order is insignificant here.
- self._process_hidden_imports()
- self._process_excluded_imports()
- def _process_hook_func(self, analysis):
- """
- Call this hook's `hook()` function if defined.
- Parameters
- ----------
- analysis: build_main.Analysis
- Analysis that calls the hook
- """
- # If this hook script defines no hook() function, noop.
- if not hasattr(self._hook_module, 'hook'):
- return
- # Call this hook() function.
- hook_api = PostGraphAPI(module_name=self.module_name, module_graph=self.module_graph, analysis=analysis)
- try:
- self._hook_module.hook(hook_api)
- except ImportError:
- logger.debug("Hook failed with:", exc_info=True)
- raise ImportErrorWhenRunningHook(self.hook_module_name, self.hook_filename)
- # Update all magic attributes modified by the prior call.
- self.datas.update(set(hook_api._added_datas))
- self.binaries.update(set(hook_api._added_binaries))
- self.hiddenimports.extend(hook_api._added_imports)
- # FIXME: Deleted imports should be appended to self.excludedimports rather than handled here. However, see the
- # _process_excluded_imports() FIXME below for a sensible alternative.
- for deleted_module_name in hook_api._deleted_imports:
- # Remove the graph link between the hooked module and item. This removes the 'item' node from the graph if
- # no other links go to it (no other modules import it)
- self.module_graph.removeReference(hook_api.node, deleted_module_name)
- def _process_hidden_imports(self):
- """
- Add all imports listed in this hook script's `hiddenimports` attribute to the module graph as if directly
- imported by this hooked module.
- These imports are typically _not_ implicitly detectable by PyInstaller and hence must be explicitly defined
- by hook scripts.
- """
- # For each hidden import required by the module being hooked...
- for import_module_name in self.hiddenimports:
- try:
- # Graph node for this module. Do not implicitly create namespace packages for non-existent packages.
- caller = self.module_graph.find_node(self.module_name, create_nspkg=False)
- # Manually import this hidden import from this module.
- self.module_graph.import_hook(import_module_name, caller)
- # If this hidden import is unimportable, print a non-fatal warning. Hidden imports often become
- # desynchronized from upstream packages and hence are only "soft" recommendations.
- except ImportError:
- logger.warning('Hidden import "%s" not found!', import_module_name)
- # FIXME: This is pretty... intense. Attempting to cleanly "undo" prior module graph operations is a recipe for
- # subtle edge cases and difficult-to-debug issues. It would be both safer and simpler to prevent these
- # imports from being added to the graph in the first place. To do so:
- #
- # * Remove the _process_excluded_imports() method below.
- # * Remove the PostGraphAPI.del_imports() method, which cannot reasonably be supported by the following solution,
- # appears to be currently broken, and (in any case) is not called anywhere in the PyInstaller codebase.
- # * Override the ModuleGraph._safe_import_hook() superclass method with a new PyiModuleGraph._safe_import_hook()
- # subclass method resembling:
- #
- # def _safe_import_hook(
- # self, target_module_name, source_module, fromlist,
- # level=DEFAULT_IMPORT_LEVEL, attr=None):
- #
- # if source_module.identifier in self._module_hook_cache:
- # for module_hook in self._module_hook_cache[
- # source_module.identifier]:
- # if target_module_name in module_hook.excludedimports:
- # return []
- #
- # return super()._safe_import_hook(
- # target_module_name, source_module, fromlist,
- # level=level, attr=attr)
- def _process_excluded_imports(self):
- """
- 'excludedimports' is a list of Python module names that PyInstaller should not detect as dependency of this
- module name.
- So remove all import-edges from the current module (and it's submodules) to the given `excludedimports` (end
- their submodules).
- """
- def find_all_package_nodes(name):
- mods = [name]
- name += '.'
- for subnode in self.module_graph.nodes():
- if subnode.identifier.startswith(name):
- mods.append(subnode.identifier)
- return mods
- # If this hook excludes no imports, noop.
- if not self.excludedimports:
- return
- # Collect all submodules of this module.
- hooked_mods = find_all_package_nodes(self.module_name)
- # Collect all dependencies and their submodules
- # TODO: Optimize this by using a pattern and walking the graph only once.
- for item in set(self.excludedimports):
- excluded_node = self.module_graph.find_node(item, create_nspkg=False)
- if excluded_node is None:
- logger.info("Import to be excluded not found: %r", item)
- continue
- imports_to_remove = set(find_all_package_nodes(item))
- # Remove references between module nodes, as though they would not be imported from 'name'. Note: Doing this
- # in a nested loop is less efficient than collecting all import to remove first, but log messages are easier
- # to understand since related to the "Excluding ..." message above.
- for src in hooked_mods:
- # modules, this `src` does import
- references = set(node.identifier for node in self.module_graph.outgoing(src))
- # Remove all of these imports which are also in "imports_to_remove".
- for dest in imports_to_remove & references:
- self.module_graph.removeReference(src, dest)
- logger.debug("Excluding import of %s from module %s", dest, src)
- class AdditionalFilesCache(object):
- """
- Cache for storing what binaries and datas were pushed by what modules when import hooks were processed.
- """
- def __init__(self):
- self._binaries = {}
- self._datas = {}
- def add(self, modname, binaries, datas):
- self._binaries.setdefault(modname, [])
- self._binaries[modname].extend(binaries or [])
- self._datas.setdefault(modname, [])
- self._datas[modname].extend(datas or [])
- def __contains__(self, name):
- return name in self._binaries or name in self._datas
- def binaries(self, modname):
- """
- Return list of binaries for given module name.
- """
- return self._binaries[modname]
- def datas(self, modname):
- """
- Return list of datas for given module name.
- """
- return self._datas[modname]
|