hashes.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. import hashlib
  2. from typing import TYPE_CHECKING, BinaryIO, Dict, Iterator, List
  3. from pip._internal.exceptions import HashMismatch, HashMissing, InstallationError
  4. from pip._internal.utils.misc import read_chunks
  5. if TYPE_CHECKING:
  6. from hashlib import _Hash
  7. # NoReturn introduced in 3.6.2; imported only for type checking to maintain
  8. # pip compatibility with older patch versions of Python 3.6
  9. from typing import NoReturn
  10. # The recommended hash algo of the moment. Change this whenever the state of
  11. # the art changes; it won't hurt backward compatibility.
  12. FAVORITE_HASH = "sha256"
  13. # Names of hashlib algorithms allowed by the --hash option and ``pip hash``
  14. # Currently, those are the ones at least as collision-resistant as sha256.
  15. STRONG_HASHES = ["sha256", "sha384", "sha512"]
  16. class Hashes:
  17. """A wrapper that builds multiple hashes at once and checks them against
  18. known-good values
  19. """
  20. def __init__(self, hashes=None):
  21. # type: (Dict[str, List[str]]) -> None
  22. """
  23. :param hashes: A dict of algorithm names pointing to lists of allowed
  24. hex digests
  25. """
  26. allowed = {}
  27. if hashes is not None:
  28. for alg, keys in hashes.items():
  29. # Make sure values are always sorted (to ease equality checks)
  30. allowed[alg] = sorted(keys)
  31. self._allowed = allowed
  32. def __and__(self, other):
  33. # type: (Hashes) -> Hashes
  34. if not isinstance(other, Hashes):
  35. return NotImplemented
  36. # If either of the Hashes object is entirely empty (i.e. no hash
  37. # specified at all), all hashes from the other object are allowed.
  38. if not other:
  39. return self
  40. if not self:
  41. return other
  42. # Otherwise only hashes that present in both objects are allowed.
  43. new = {}
  44. for alg, values in other._allowed.items():
  45. if alg not in self._allowed:
  46. continue
  47. new[alg] = [v for v in values if v in self._allowed[alg]]
  48. return Hashes(new)
  49. @property
  50. def digest_count(self):
  51. # type: () -> int
  52. return sum(len(digests) for digests in self._allowed.values())
  53. def is_hash_allowed(
  54. self,
  55. hash_name, # type: str
  56. hex_digest, # type: str
  57. ):
  58. # type: (...) -> bool
  59. """Return whether the given hex digest is allowed."""
  60. return hex_digest in self._allowed.get(hash_name, [])
  61. def check_against_chunks(self, chunks):
  62. # type: (Iterator[bytes]) -> None
  63. """Check good hashes against ones built from iterable of chunks of
  64. data.
  65. Raise HashMismatch if none match.
  66. """
  67. gots = {}
  68. for hash_name in self._allowed.keys():
  69. try:
  70. gots[hash_name] = hashlib.new(hash_name)
  71. except (ValueError, TypeError):
  72. raise InstallationError(f"Unknown hash name: {hash_name}")
  73. for chunk in chunks:
  74. for hash in gots.values():
  75. hash.update(chunk)
  76. for hash_name, got in gots.items():
  77. if got.hexdigest() in self._allowed[hash_name]:
  78. return
  79. self._raise(gots)
  80. def _raise(self, gots):
  81. # type: (Dict[str, _Hash]) -> NoReturn
  82. raise HashMismatch(self._allowed, gots)
  83. def check_against_file(self, file):
  84. # type: (BinaryIO) -> None
  85. """Check good hashes against a file-like object
  86. Raise HashMismatch if none match.
  87. """
  88. return self.check_against_chunks(read_chunks(file))
  89. def check_against_path(self, path):
  90. # type: (str) -> None
  91. with open(path, "rb") as file:
  92. return self.check_against_file(file)
  93. def __nonzero__(self):
  94. # type: () -> bool
  95. """Return whether I know any known-good hashes."""
  96. return bool(self._allowed)
  97. def __bool__(self):
  98. # type: () -> bool
  99. return self.__nonzero__()
  100. def __eq__(self, other):
  101. # type: (object) -> bool
  102. if not isinstance(other, Hashes):
  103. return NotImplemented
  104. return self._allowed == other._allowed
  105. def __hash__(self):
  106. # type: () -> int
  107. return hash(
  108. ",".join(
  109. sorted(
  110. ":".join((alg, digest))
  111. for alg, digest_list in self._allowed.items()
  112. for digest in digest_list
  113. )
  114. )
  115. )
  116. class MissingHashes(Hashes):
  117. """A workalike for Hashes used when we're missing a hash for a requirement
  118. It computes the actual hash of the requirement and raises a HashMissing
  119. exception showing it to the user.
  120. """
  121. def __init__(self):
  122. # type: () -> None
  123. """Don't offer the ``hashes`` kwarg."""
  124. # Pass our favorite hash in to generate a "gotten hash". With the
  125. # empty list, it will never match, so an error will always raise.
  126. super().__init__(hashes={FAVORITE_HASH: []})
  127. def _raise(self, gots):
  128. # type: (Dict[str, _Hash]) -> NoReturn
  129. raise HashMissing(gots[FAVORITE_HASH].hexdigest())