prepare.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655
  1. """Prepares a distribution for installation
  2. """
  3. # The following comment should be removed at some point in the future.
  4. # mypy: strict-optional=False
  5. import logging
  6. import mimetypes
  7. import os
  8. import shutil
  9. from typing import Dict, Iterable, List, Optional, Tuple
  10. from pip._vendor.packaging.utils import canonicalize_name
  11. from pip._vendor.pkg_resources import Distribution
  12. from pip._internal.distributions import make_distribution_for_install_requirement
  13. from pip._internal.distributions.installed import InstalledDistribution
  14. from pip._internal.exceptions import (
  15. DirectoryUrlHashUnsupported,
  16. HashMismatch,
  17. HashUnpinned,
  18. InstallationError,
  19. NetworkConnectionError,
  20. PreviousBuildDirError,
  21. VcsHashUnsupported,
  22. )
  23. from pip._internal.index.package_finder import PackageFinder
  24. from pip._internal.models.link import Link
  25. from pip._internal.models.wheel import Wheel
  26. from pip._internal.network.download import BatchDownloader, Downloader
  27. from pip._internal.network.lazy_wheel import (
  28. HTTPRangeRequestUnsupported,
  29. dist_from_wheel_url,
  30. )
  31. from pip._internal.network.session import PipSession
  32. from pip._internal.req.req_install import InstallRequirement
  33. from pip._internal.req.req_tracker import RequirementTracker
  34. from pip._internal.utils.deprecation import deprecated
  35. from pip._internal.utils.filesystem import copy2_fixed
  36. from pip._internal.utils.hashes import Hashes, MissingHashes
  37. from pip._internal.utils.logging import indent_log
  38. from pip._internal.utils.misc import display_path, hide_url, is_installable_dir, rmtree
  39. from pip._internal.utils.temp_dir import TempDirectory
  40. from pip._internal.utils.unpacking import unpack_file
  41. from pip._internal.vcs import vcs
  42. logger = logging.getLogger(__name__)
  43. def _get_prepared_distribution(
  44. req, # type: InstallRequirement
  45. req_tracker, # type: RequirementTracker
  46. finder, # type: PackageFinder
  47. build_isolation, # type: bool
  48. ):
  49. # type: (...) -> Distribution
  50. """Prepare a distribution for installation."""
  51. abstract_dist = make_distribution_for_install_requirement(req)
  52. with req_tracker.track(req):
  53. abstract_dist.prepare_distribution_metadata(finder, build_isolation)
  54. return abstract_dist.get_pkg_resources_distribution()
  55. def unpack_vcs_link(link, location):
  56. # type: (Link, str) -> None
  57. vcs_backend = vcs.get_backend_for_scheme(link.scheme)
  58. assert vcs_backend is not None
  59. vcs_backend.unpack(location, url=hide_url(link.url))
  60. class File:
  61. def __init__(self, path, content_type):
  62. # type: (str, Optional[str]) -> None
  63. self.path = path
  64. if content_type is None:
  65. self.content_type = mimetypes.guess_type(path)[0]
  66. else:
  67. self.content_type = content_type
  68. def get_http_url(
  69. link, # type: Link
  70. download, # type: Downloader
  71. download_dir=None, # type: Optional[str]
  72. hashes=None, # type: Optional[Hashes]
  73. ):
  74. # type: (...) -> File
  75. temp_dir = TempDirectory(kind="unpack", globally_managed=True)
  76. # If a download dir is specified, is the file already downloaded there?
  77. already_downloaded_path = None
  78. if download_dir:
  79. already_downloaded_path = _check_download_dir(
  80. link, download_dir, hashes
  81. )
  82. if already_downloaded_path:
  83. from_path = already_downloaded_path
  84. content_type = None
  85. else:
  86. # let's download to a tmp dir
  87. from_path, content_type = download(link, temp_dir.path)
  88. if hashes:
  89. hashes.check_against_path(from_path)
  90. return File(from_path, content_type)
  91. def _copy2_ignoring_special_files(src, dest):
  92. # type: (str, str) -> None
  93. """Copying special files is not supported, but as a convenience to users
  94. we skip errors copying them. This supports tools that may create e.g.
  95. socket files in the project source directory.
  96. """
  97. try:
  98. copy2_fixed(src, dest)
  99. except shutil.SpecialFileError as e:
  100. # SpecialFileError may be raised due to either the source or
  101. # destination. If the destination was the cause then we would actually
  102. # care, but since the destination directory is deleted prior to
  103. # copy we ignore all of them assuming it is caused by the source.
  104. logger.warning(
  105. "Ignoring special file error '%s' encountered copying %s to %s.",
  106. str(e),
  107. src,
  108. dest,
  109. )
  110. def _copy_source_tree(source, target):
  111. # type: (str, str) -> None
  112. target_abspath = os.path.abspath(target)
  113. target_basename = os.path.basename(target_abspath)
  114. target_dirname = os.path.dirname(target_abspath)
  115. def ignore(d, names):
  116. # type: (str, List[str]) -> List[str]
  117. skipped = [] # type: List[str]
  118. if d == source:
  119. # Pulling in those directories can potentially be very slow,
  120. # exclude the following directories if they appear in the top
  121. # level dir (and only it).
  122. # See discussion at https://github.com/pypa/pip/pull/6770
  123. skipped += ['.tox', '.nox']
  124. if os.path.abspath(d) == target_dirname:
  125. # Prevent an infinite recursion if the target is in source.
  126. # This can happen when TMPDIR is set to ${PWD}/...
  127. # and we copy PWD to TMPDIR.
  128. skipped += [target_basename]
  129. return skipped
  130. shutil.copytree(
  131. source,
  132. target,
  133. ignore=ignore,
  134. symlinks=True,
  135. copy_function=_copy2_ignoring_special_files,
  136. )
  137. def get_file_url(
  138. link, # type: Link
  139. download_dir=None, # type: Optional[str]
  140. hashes=None # type: Optional[Hashes]
  141. ):
  142. # type: (...) -> File
  143. """Get file and optionally check its hash.
  144. """
  145. # If a download dir is specified, is the file already there and valid?
  146. already_downloaded_path = None
  147. if download_dir:
  148. already_downloaded_path = _check_download_dir(
  149. link, download_dir, hashes
  150. )
  151. if already_downloaded_path:
  152. from_path = already_downloaded_path
  153. else:
  154. from_path = link.file_path
  155. # If --require-hashes is off, `hashes` is either empty, the
  156. # link's embedded hash, or MissingHashes; it is required to
  157. # match. If --require-hashes is on, we are satisfied by any
  158. # hash in `hashes` matching: a URL-based or an option-based
  159. # one; no internet-sourced hash will be in `hashes`.
  160. if hashes:
  161. hashes.check_against_path(from_path)
  162. return File(from_path, None)
  163. def unpack_url(
  164. link, # type: Link
  165. location, # type: str
  166. download, # type: Downloader
  167. download_dir=None, # type: Optional[str]
  168. hashes=None, # type: Optional[Hashes]
  169. ):
  170. # type: (...) -> Optional[File]
  171. """Unpack link into location, downloading if required.
  172. :param hashes: A Hashes object, one of whose embedded hashes must match,
  173. or HashMismatch will be raised. If the Hashes is empty, no matches are
  174. required, and unhashable types of requirements (like VCS ones, which
  175. would ordinarily raise HashUnsupported) are allowed.
  176. """
  177. # non-editable vcs urls
  178. if link.is_vcs:
  179. unpack_vcs_link(link, location)
  180. return None
  181. # Once out-of-tree-builds are no longer supported, could potentially
  182. # replace the below condition with `assert not link.is_existing_dir`
  183. # - unpack_url does not need to be called for in-tree-builds.
  184. #
  185. # As further cleanup, _copy_source_tree and accompanying tests can
  186. # be removed.
  187. if link.is_existing_dir():
  188. deprecated(
  189. "A future pip version will change local packages to be built "
  190. "in-place without first copying to a temporary directory. "
  191. "We recommend you use --use-feature=in-tree-build to test "
  192. "your packages with this new behavior before it becomes the "
  193. "default.\n",
  194. replacement=None,
  195. gone_in="21.3",
  196. issue=7555
  197. )
  198. if os.path.isdir(location):
  199. rmtree(location)
  200. _copy_source_tree(link.file_path, location)
  201. return None
  202. # file urls
  203. if link.is_file:
  204. file = get_file_url(link, download_dir, hashes=hashes)
  205. # http urls
  206. else:
  207. file = get_http_url(
  208. link,
  209. download,
  210. download_dir,
  211. hashes=hashes,
  212. )
  213. # unpack the archive to the build dir location. even when only downloading
  214. # archives, they have to be unpacked to parse dependencies, except wheels
  215. if not link.is_wheel:
  216. unpack_file(file.path, location, file.content_type)
  217. return file
  218. def _check_download_dir(link, download_dir, hashes):
  219. # type: (Link, str, Optional[Hashes]) -> Optional[str]
  220. """ Check download_dir for previously downloaded file with correct hash
  221. If a correct file is found return its path else None
  222. """
  223. download_path = os.path.join(download_dir, link.filename)
  224. if not os.path.exists(download_path):
  225. return None
  226. # If already downloaded, does its hash match?
  227. logger.info('File was already downloaded %s', download_path)
  228. if hashes:
  229. try:
  230. hashes.check_against_path(download_path)
  231. except HashMismatch:
  232. logger.warning(
  233. 'Previously-downloaded file %s has bad hash. '
  234. 'Re-downloading.',
  235. download_path
  236. )
  237. os.unlink(download_path)
  238. return None
  239. return download_path
  240. class RequirementPreparer:
  241. """Prepares a Requirement
  242. """
  243. def __init__(
  244. self,
  245. build_dir, # type: str
  246. download_dir, # type: Optional[str]
  247. src_dir, # type: str
  248. build_isolation, # type: bool
  249. req_tracker, # type: RequirementTracker
  250. session, # type: PipSession
  251. progress_bar, # type: str
  252. finder, # type: PackageFinder
  253. require_hashes, # type: bool
  254. use_user_site, # type: bool
  255. lazy_wheel, # type: bool
  256. in_tree_build, # type: bool
  257. ):
  258. # type: (...) -> None
  259. super().__init__()
  260. self.src_dir = src_dir
  261. self.build_dir = build_dir
  262. self.req_tracker = req_tracker
  263. self._session = session
  264. self._download = Downloader(session, progress_bar)
  265. self._batch_download = BatchDownloader(session, progress_bar)
  266. self.finder = finder
  267. # Where still-packed archives should be written to. If None, they are
  268. # not saved, and are deleted immediately after unpacking.
  269. self.download_dir = download_dir
  270. # Is build isolation allowed?
  271. self.build_isolation = build_isolation
  272. # Should hash-checking be required?
  273. self.require_hashes = require_hashes
  274. # Should install in user site-packages?
  275. self.use_user_site = use_user_site
  276. # Should wheels be downloaded lazily?
  277. self.use_lazy_wheel = lazy_wheel
  278. # Should in-tree builds be used for local paths?
  279. self.in_tree_build = in_tree_build
  280. # Memoized downloaded files, as mapping of url: (path, mime type)
  281. self._downloaded = {} # type: Dict[str, Tuple[str, str]]
  282. # Previous "header" printed for a link-based InstallRequirement
  283. self._previous_requirement_header = ("", "")
  284. def _log_preparing_link(self, req):
  285. # type: (InstallRequirement) -> None
  286. """Provide context for the requirement being prepared."""
  287. if req.link.is_file and not req.original_link_is_in_wheel_cache:
  288. message = "Processing %s"
  289. information = str(display_path(req.link.file_path))
  290. else:
  291. message = "Collecting %s"
  292. information = str(req.req or req)
  293. if (message, information) != self._previous_requirement_header:
  294. self._previous_requirement_header = (message, information)
  295. logger.info(message, information)
  296. if req.original_link_is_in_wheel_cache:
  297. with indent_log():
  298. logger.info("Using cached %s", req.link.filename)
  299. def _ensure_link_req_src_dir(self, req, parallel_builds):
  300. # type: (InstallRequirement, bool) -> None
  301. """Ensure source_dir of a linked InstallRequirement."""
  302. # Since source_dir is only set for editable requirements.
  303. if req.link.is_wheel:
  304. # We don't need to unpack wheels, so no need for a source
  305. # directory.
  306. return
  307. assert req.source_dir is None
  308. if req.link.is_existing_dir() and self.in_tree_build:
  309. # build local directories in-tree
  310. req.source_dir = req.link.file_path
  311. return
  312. # We always delete unpacked sdists after pip runs.
  313. req.ensure_has_source_dir(
  314. self.build_dir,
  315. autodelete=True,
  316. parallel_builds=parallel_builds,
  317. )
  318. # If a checkout exists, it's unwise to keep going. version
  319. # inconsistencies are logged later, but do not fail the
  320. # installation.
  321. # FIXME: this won't upgrade when there's an existing
  322. # package unpacked in `req.source_dir`
  323. if is_installable_dir(req.source_dir):
  324. raise PreviousBuildDirError(
  325. "pip can't proceed with requirements '{}' due to a"
  326. "pre-existing build directory ({}). This is likely "
  327. "due to a previous installation that failed . pip is "
  328. "being responsible and not assuming it can delete this. "
  329. "Please delete it and try again.".format(req, req.source_dir)
  330. )
  331. def _get_linked_req_hashes(self, req):
  332. # type: (InstallRequirement) -> Hashes
  333. # By the time this is called, the requirement's link should have
  334. # been checked so we can tell what kind of requirements req is
  335. # and raise some more informative errors than otherwise.
  336. # (For example, we can raise VcsHashUnsupported for a VCS URL
  337. # rather than HashMissing.)
  338. if not self.require_hashes:
  339. return req.hashes(trust_internet=True)
  340. # We could check these first 2 conditions inside unpack_url
  341. # and save repetition of conditions, but then we would
  342. # report less-useful error messages for unhashable
  343. # requirements, complaining that there's no hash provided.
  344. if req.link.is_vcs:
  345. raise VcsHashUnsupported()
  346. if req.link.is_existing_dir():
  347. raise DirectoryUrlHashUnsupported()
  348. # Unpinned packages are asking for trouble when a new version
  349. # is uploaded. This isn't a security check, but it saves users
  350. # a surprising hash mismatch in the future.
  351. # file:/// URLs aren't pinnable, so don't complain about them
  352. # not being pinned.
  353. if req.original_link is None and not req.is_pinned:
  354. raise HashUnpinned()
  355. # If known-good hashes are missing for this requirement,
  356. # shim it with a facade object that will provoke hash
  357. # computation and then raise a HashMissing exception
  358. # showing the user what the hash should be.
  359. return req.hashes(trust_internet=False) or MissingHashes()
  360. def _fetch_metadata_using_lazy_wheel(self, link):
  361. # type: (Link) -> Optional[Distribution]
  362. """Fetch metadata using lazy wheel, if possible."""
  363. if not self.use_lazy_wheel:
  364. return None
  365. if self.require_hashes:
  366. logger.debug('Lazy wheel is not used as hash checking is required')
  367. return None
  368. if link.is_file or not link.is_wheel:
  369. logger.debug(
  370. 'Lazy wheel is not used as '
  371. '%r does not points to a remote wheel',
  372. link,
  373. )
  374. return None
  375. wheel = Wheel(link.filename)
  376. name = canonicalize_name(wheel.name)
  377. logger.info(
  378. 'Obtaining dependency information from %s %s',
  379. name, wheel.version,
  380. )
  381. url = link.url.split('#', 1)[0]
  382. try:
  383. return dist_from_wheel_url(name, url, self._session)
  384. except HTTPRangeRequestUnsupported:
  385. logger.debug('%s does not support range requests', url)
  386. return None
  387. def _complete_partial_requirements(
  388. self,
  389. partially_downloaded_reqs, # type: Iterable[InstallRequirement]
  390. parallel_builds=False, # type: bool
  391. ):
  392. # type: (...) -> None
  393. """Download any requirements which were only fetched by metadata."""
  394. # Download to a temporary directory. These will be copied over as
  395. # needed for downstream 'download', 'wheel', and 'install' commands.
  396. temp_dir = TempDirectory(kind="unpack", globally_managed=True).path
  397. # Map each link to the requirement that owns it. This allows us to set
  398. # `req.local_file_path` on the appropriate requirement after passing
  399. # all the links at once into BatchDownloader.
  400. links_to_fully_download = {} # type: Dict[Link, InstallRequirement]
  401. for req in partially_downloaded_reqs:
  402. assert req.link
  403. links_to_fully_download[req.link] = req
  404. batch_download = self._batch_download(
  405. links_to_fully_download.keys(),
  406. temp_dir,
  407. )
  408. for link, (filepath, _) in batch_download:
  409. logger.debug("Downloading link %s to %s", link, filepath)
  410. req = links_to_fully_download[link]
  411. req.local_file_path = filepath
  412. # This step is necessary to ensure all lazy wheels are processed
  413. # successfully by the 'download', 'wheel', and 'install' commands.
  414. for req in partially_downloaded_reqs:
  415. self._prepare_linked_requirement(req, parallel_builds)
  416. def prepare_linked_requirement(self, req, parallel_builds=False):
  417. # type: (InstallRequirement, bool) -> Distribution
  418. """Prepare a requirement to be obtained from req.link."""
  419. assert req.link
  420. link = req.link
  421. self._log_preparing_link(req)
  422. with indent_log():
  423. # Check if the relevant file is already available
  424. # in the download directory
  425. file_path = None
  426. if self.download_dir is not None and link.is_wheel:
  427. hashes = self._get_linked_req_hashes(req)
  428. file_path = _check_download_dir(req.link, self.download_dir, hashes)
  429. if file_path is not None:
  430. # The file is already available, so mark it as downloaded
  431. self._downloaded[req.link.url] = file_path, None
  432. else:
  433. # The file is not available, attempt to fetch only metadata
  434. wheel_dist = self._fetch_metadata_using_lazy_wheel(link)
  435. if wheel_dist is not None:
  436. req.needs_more_preparation = True
  437. return wheel_dist
  438. # None of the optimizations worked, fully prepare the requirement
  439. return self._prepare_linked_requirement(req, parallel_builds)
  440. def prepare_linked_requirements_more(self, reqs, parallel_builds=False):
  441. # type: (Iterable[InstallRequirement], bool) -> None
  442. """Prepare linked requirements more, if needed."""
  443. reqs = [req for req in reqs if req.needs_more_preparation]
  444. for req in reqs:
  445. # Determine if any of these requirements were already downloaded.
  446. if self.download_dir is not None and req.link.is_wheel:
  447. hashes = self._get_linked_req_hashes(req)
  448. file_path = _check_download_dir(req.link, self.download_dir, hashes)
  449. if file_path is not None:
  450. self._downloaded[req.link.url] = file_path, None
  451. req.needs_more_preparation = False
  452. # Prepare requirements we found were already downloaded for some
  453. # reason. The other downloads will be completed separately.
  454. partially_downloaded_reqs = [] # type: List[InstallRequirement]
  455. for req in reqs:
  456. if req.needs_more_preparation:
  457. partially_downloaded_reqs.append(req)
  458. else:
  459. self._prepare_linked_requirement(req, parallel_builds)
  460. # TODO: separate this part out from RequirementPreparer when the v1
  461. # resolver can be removed!
  462. self._complete_partial_requirements(
  463. partially_downloaded_reqs, parallel_builds=parallel_builds,
  464. )
  465. def _prepare_linked_requirement(self, req, parallel_builds):
  466. # type: (InstallRequirement, bool) -> Distribution
  467. assert req.link
  468. link = req.link
  469. self._ensure_link_req_src_dir(req, parallel_builds)
  470. hashes = self._get_linked_req_hashes(req)
  471. if link.is_existing_dir() and self.in_tree_build:
  472. local_file = None
  473. elif link.url not in self._downloaded:
  474. try:
  475. local_file = unpack_url(
  476. link, req.source_dir, self._download,
  477. self.download_dir, hashes
  478. )
  479. except NetworkConnectionError as exc:
  480. raise InstallationError(
  481. 'Could not install requirement {} because of HTTP '
  482. 'error {} for URL {}'.format(req, exc, link)
  483. )
  484. else:
  485. file_path, content_type = self._downloaded[link.url]
  486. if hashes:
  487. hashes.check_against_path(file_path)
  488. local_file = File(file_path, content_type)
  489. # For use in later processing,
  490. # preserve the file path on the requirement.
  491. if local_file:
  492. req.local_file_path = local_file.path
  493. dist = _get_prepared_distribution(
  494. req, self.req_tracker, self.finder, self.build_isolation,
  495. )
  496. return dist
  497. def save_linked_requirement(self, req):
  498. # type: (InstallRequirement) -> None
  499. assert self.download_dir is not None
  500. assert req.link is not None
  501. link = req.link
  502. if link.is_vcs or (link.is_existing_dir() and req.editable):
  503. # Make a .zip of the source_dir we already created.
  504. req.archive(self.download_dir)
  505. return
  506. if link.is_existing_dir():
  507. logger.debug(
  508. 'Not copying link to destination directory '
  509. 'since it is a directory: %s', link,
  510. )
  511. return
  512. if req.local_file_path is None:
  513. # No distribution was downloaded for this requirement.
  514. return
  515. download_location = os.path.join(self.download_dir, link.filename)
  516. if not os.path.exists(download_location):
  517. shutil.copy(req.local_file_path, download_location)
  518. download_path = display_path(download_location)
  519. logger.info('Saved %s', download_path)
  520. def prepare_editable_requirement(
  521. self,
  522. req, # type: InstallRequirement
  523. ):
  524. # type: (...) -> Distribution
  525. """Prepare an editable requirement
  526. """
  527. assert req.editable, "cannot prepare a non-editable req as editable"
  528. logger.info('Obtaining %s', req)
  529. with indent_log():
  530. if self.require_hashes:
  531. raise InstallationError(
  532. 'The editable requirement {} cannot be installed when '
  533. 'requiring hashes, because there is no single file to '
  534. 'hash.'.format(req)
  535. )
  536. req.ensure_has_source_dir(self.src_dir)
  537. req.update_editable()
  538. dist = _get_prepared_distribution(
  539. req, self.req_tracker, self.finder, self.build_isolation,
  540. )
  541. req.check_if_exists(self.use_user_site)
  542. return dist
  543. def prepare_installed_requirement(
  544. self,
  545. req, # type: InstallRequirement
  546. skip_reason # type: str
  547. ):
  548. # type: (...) -> Distribution
  549. """Prepare an already-installed requirement
  550. """
  551. assert req.satisfied_by, "req should have been satisfied but isn't"
  552. assert skip_reason is not None, (
  553. "did not get skip reason skipped but req.satisfied_by "
  554. "is set to {}".format(req.satisfied_by)
  555. )
  556. logger.info(
  557. 'Requirement %s: %s (%s)',
  558. skip_reason, req, req.satisfied_by.version
  559. )
  560. with indent_log():
  561. if self.require_hashes:
  562. logger.debug(
  563. 'Since it is already installed, we are trusting this '
  564. 'package without checking its hash. To ensure a '
  565. 'completely repeatable environment, install into an '
  566. 'empty virtualenv.'
  567. )
  568. return InstalledDistribution(req).get_pkg_resources_distribution()