summaryrefslogtreecommitdiff
path: root/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_internal/index.py
diff options
context:
space:
mode:
authorShubham Saini <shubham6405@gmail.com>2019-08-05 08:32:33 +0000
committerShubham Saini <shubham6405@gmail.com>2019-08-05 08:32:33 +0000
commit227b2d30a8675b44918f9d9ca89b24144a938215 (patch)
tree9f8e6a28724514b6fdf463a9ab2067a7ef309b72 /venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_internal/index.py
parent842a8cfbbbdb1f92889d892e4859dbd5d40c5be8 (diff)
removing venv files
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_internal/index.py')
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_internal/index.py1117
1 files changed, 0 insertions, 1117 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_internal/index.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_internal/index.py
deleted file mode 100644
index 15e0bf3..0000000
--- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_internal/index.py
+++ /dev/null
@@ -1,1117 +0,0 @@
1"""Routines related to PyPI, indexes"""
2from __future__ import absolute_import
3
4import cgi
5import itertools
6import logging
7import mimetypes
8import os
9import posixpath
10import re
11import sys
12import warnings
13from collections import namedtuple
14
15from pip._vendor import html5lib, requests, six
16from pip._vendor.distlib.compat import unescape
17from pip._vendor.packaging import specifiers
18from pip._vendor.packaging.utils import canonicalize_name
19from pip._vendor.packaging.version import parse as parse_version
20from pip._vendor.requests.exceptions import SSLError
21from pip._vendor.six.moves.urllib import parse as urllib_parse
22from pip._vendor.six.moves.urllib import request as urllib_request
23
24from pip._internal.compat import ipaddress
25from pip._internal.download import HAS_TLS, is_url, path_to_url, url_to_path
26from pip._internal.exceptions import (
27 BestVersionAlreadyInstalled, DistributionNotFound, InvalidWheelFilename,
28 UnsupportedWheel,
29)
30from pip._internal.models import PyPI
31from pip._internal.pep425tags import get_supported
32from pip._internal.utils.deprecation import RemovedInPip11Warning
33from pip._internal.utils.logging import indent_log
34from pip._internal.utils.misc import (
35 ARCHIVE_EXTENSIONS, SUPPORTED_EXTENSIONS, cached_property, normalize_path,
36 splitext,
37)
38from pip._internal.utils.packaging import check_requires_python
39from pip._internal.wheel import Wheel, wheel_ext
40
41__all__ = ['FormatControl', 'fmt_ctl_handle_mutual_exclude', 'PackageFinder']
42
43
44SECURE_ORIGINS = [
45 # protocol, hostname, port
46 # Taken from Chrome's list of secure origins (See: http://bit.ly/1qrySKC)
47 ("https", "*", "*"),
48 ("*", "localhost", "*"),
49 ("*", "127.0.0.0/8", "*"),
50 ("*", "::1/128", "*"),
51 ("file", "*", None),
52 # ssh is always secure.
53 ("ssh", "*", "*"),
54]
55
56
57logger = logging.getLogger(__name__)
58
59
60class InstallationCandidate(object):
61
62 def __init__(self, project, version, location):
63 self.project = project
64 self.version = parse_version(version)
65 self.location = location
66 self._key = (self.project, self.version, self.location)
67
68 def __repr__(self):
69 return "<InstallationCandidate({!r}, {!r}, {!r})>".format(
70 self.project, self.version, self.location,
71 )
72
73 def __hash__(self):
74 return hash(self._key)
75
76 def __lt__(self, other):
77 return self._compare(other, lambda s, o: s < o)
78
79 def __le__(self, other):
80 return self._compare(other, lambda s, o: s <= o)
81
82 def __eq__(self, other):
83 return self._compare(other, lambda s, o: s == o)
84
85 def __ge__(self, other):
86 return self._compare(other, lambda s, o: s >= o)
87
88 def __gt__(self, other):
89 return self._compare(other, lambda s, o: s > o)
90
91 def __ne__(self, other):
92 return self._compare(other, lambda s, o: s != o)
93
94 def _compare(self, other, method):
95 if not isinstance(other, InstallationCandidate):
96 return NotImplemented
97
98 return method(self._key, other._key)
99
100
101class PackageFinder(object):
102 """This finds packages.
103
104 This is meant to match easy_install's technique for looking for
105 packages, by reading pages and looking for appropriate links.
106 """
107
108 def __init__(self, find_links, index_urls, allow_all_prereleases=False,
109 trusted_hosts=None, process_dependency_links=False,
110 session=None, format_control=None, platform=None,
111 versions=None, abi=None, implementation=None):
112 """Create a PackageFinder.
113
114 :param format_control: A FormatControl object or None. Used to control
115 the selection of source packages / binary packages when consulting
116 the index and links.
117 :param platform: A string or None. If None, searches for packages
118 that are supported by the current system. Otherwise, will find
119 packages that can be built on the platform passed in. These
120 packages will only be downloaded for distribution: they will
121 not be built locally.
122 :param versions: A list of strings or None. This is passed directly
123 to pep425tags.py in the get_supported() method.
124 :param abi: A string or None. This is passed directly
125 to pep425tags.py in the get_supported() method.
126 :param implementation: A string or None. This is passed directly
127 to pep425tags.py in the get_supported() method.
128 """
129 if session is None:
130 raise TypeError(
131 "PackageFinder() missing 1 required keyword argument: "
132 "'session'"
133 )
134
135 # Build find_links. If an argument starts with ~, it may be
136 # a local file relative to a home directory. So try normalizing
137 # it and if it exists, use the normalized version.
138 # This is deliberately conservative - it might be fine just to
139 # blindly normalize anything starting with a ~...
140 self.find_links = []
141 for link in find_links:
142 if link.startswith('~'):
143 new_link = normalize_path(link)
144 if os.path.exists(new_link):
145 link = new_link
146 self.find_links.append(link)
147
148 self.index_urls = index_urls
149 self.dependency_links = []
150
151 # These are boring links that have already been logged somehow:
152 self.logged_links = set()
153
154 self.format_control = format_control or FormatControl(set(), set())
155
156 # Domains that we won't emit warnings for when not using HTTPS
157 self.secure_origins = [
158 ("*", host, "*")
159 for host in (trusted_hosts if trusted_hosts else [])
160 ]
161
162 # Do we want to allow _all_ pre-releases?
163 self.allow_all_prereleases = allow_all_prereleases
164
165 # Do we process dependency links?
166 self.process_dependency_links = process_dependency_links
167
168 # The Session we'll use to make requests
169 self.session = session
170
171 # The valid tags to check potential found wheel candidates against
172 self.valid_tags = get_supported(
173 versions=versions,
174 platform=platform,
175 abi=abi,
176 impl=implementation,
177 )
178
179 # If we don't have TLS enabled, then WARN if anyplace we're looking
180 # relies on TLS.
181 if not HAS_TLS:
182 for link in itertools.chain(self.index_urls, self.find_links):
183 parsed = urllib_parse.urlparse(link)
184 if parsed.scheme == "https":
185 logger.warning(
186 "pip is configured with locations that require "
187 "TLS/SSL, however the ssl module in Python is not "
188 "available."
189 )
190 break
191
192 def get_formatted_locations(self):
193 lines = []
194 if self.index_urls and self.index_urls != [PyPI.simple_url]:
195 lines.append(
196 "Looking in indexes: {}".format(", ".join(self.index_urls))
197 )
198 if self.find_links:
199 lines.append(
200 "Looking in links: {}".format(", ".join(self.find_links))
201 )
202 return "\n".join(lines)
203
204 def add_dependency_links(self, links):
205 # # FIXME: this shouldn't be global list this, it should only
206 # # apply to requirements of the package that specifies the
207 # # dependency_links value
208 # # FIXME: also, we should track comes_from (i.e., use Link)
209 if self.process_dependency_links:
210 warnings.warn(
211 "Dependency Links processing has been deprecated and will be "
212 "removed in a future release.",
213 RemovedInPip11Warning,
214 )
215 self.dependency_links.extend(links)
216
217 @staticmethod
218 def _sort_locations(locations, expand_dir=False):
219 """
220 Sort locations into "files" (archives) and "urls", and return
221 a pair of lists (files,urls)
222 """
223 files = []
224 urls = []
225
226 # puts the url for the given file path into the appropriate list
227 def sort_path(path):
228 url = path_to_url(path)
229 if mimetypes.guess_type(url, strict=False)[0] == 'text/html':
230 urls.append(url)
231 else:
232 files.append(url)
233
234 for url in locations:
235
236 is_local_path = os.path.exists(url)
237 is_file_url = url.startswith('file:')
238
239 if is_local_path or is_file_url:
240 if is_local_path:
241 path = url
242 else:
243 path = url_to_path(url)
244 if os.path.isdir(path):
245 if expand_dir:
246 path = os.path.realpath(path)
247 for item in os.listdir(path):
248 sort_path(os.path.join(path, item))
249 elif is_file_url:
250 urls.append(url)
251 elif os.path.isfile(path):
252 sort_path(path)
253 else:
254 logger.warning(
255 "Url '%s' is ignored: it is neither a file "
256 "nor a directory.", url,
257 )
258 elif is_url(url):
259 # Only add url with clear scheme
260 urls.append(url)
261 else:
262 logger.warning(
263 "Url '%s' is ignored. It is either a non-existing "
264 "path or lacks a specific scheme.", url,
265 )
266
267 return files, urls
268
269 def _candidate_sort_key(self, candidate):
270 """
271 Function used to generate link sort key for link tuples.
272 The greater the return value, the more preferred it is.
273 If not finding wheels, then sorted by version only.
274 If finding wheels, then the sort order is by version, then:
275 1. existing installs
276 2. wheels ordered via Wheel.support_index_min(self.valid_tags)
277 3. source archives
278 Note: it was considered to embed this logic into the Link
279 comparison operators, but then different sdist links
280 with the same version, would have to be considered equal
281 """
282 support_num = len(self.valid_tags)
283 build_tag = tuple()
284 if candidate.location.is_wheel:
285 # can raise InvalidWheelFilename
286 wheel = Wheel(candidate.location.filename)
287 if not wheel.supported(self.valid_tags):
288 raise UnsupportedWheel(
289 "%s is not a supported wheel for this platform. It "
290 "can't be sorted." % wheel.filename
291 )
292 pri = -(wheel.support_index_min(self.valid_tags))
293 if wheel.build_tag is not None:
294 match = re.match(r'^(\d+)(.*)$', wheel.build_tag)
295 build_tag_groups = match.groups()
296 build_tag = (int(build_tag_groups[0]), build_tag_groups[1])
297 else: # sdist
298 pri = -(support_num)
299 return (candidate.version, build_tag, pri)
300
301 def _validate_secure_origin(self, logger, location):
302 # Determine if this url used a secure transport mechanism
303 parsed = urllib_parse.urlparse(str(location))
304 origin = (parsed.scheme, parsed.hostname, parsed.port)
305
306 # The protocol to use to see if the protocol matches.
307 # Don't count the repository type as part of the protocol: in
308 # cases such as "git+ssh", only use "ssh". (I.e., Only verify against
309 # the last scheme.)
310 protocol = origin[0].rsplit('+', 1)[-1]
311
312 # Determine if our origin is a secure origin by looking through our
313 # hardcoded list of secure origins, as well as any additional ones
314 # configured on this PackageFinder instance.
315 for secure_origin in (SECURE_ORIGINS + self.secure_origins):
316 if protocol != secure_origin[0] and secure_origin[0] != "*":
317 continue
318
319 try:
320 # We need to do this decode dance to ensure that we have a
321 # unicode object, even on Python 2.x.
322 addr = ipaddress.ip_address(
323 origin[1]
324 if (
325 isinstance(origin[1], six.text_type) or
326 origin[1] is None
327 )
328 else origin[1].decode("utf8")
329 )
330 network = ipaddress.ip_network(
331 secure_origin[1]
332 if isinstance(secure_origin[1], six.text_type)
333 else secure_origin[1].decode("utf8")
334 )
335 except ValueError:
336 # We don't have both a valid address or a valid network, so
337 # we'll check this origin against hostnames.
338 if (origin[1] and
339 origin[1].lower() != secure_origin[1].lower() and
340 secure_origin[1] != "*"):
341 continue
342 else:
343 # We have a valid address and network, so see if the address
344 # is contained within the network.
345 if addr not in network:
346 continue
347
348 # Check to see if the port patches
349 if (origin[2] != secure_origin[2] and
350 secure_origin[2] != "*" and
351 secure_origin[2] is not None):
352 continue
353
354 # If we've gotten here, then this origin matches the current
355 # secure origin and we should return True
356 return True
357
358 # If we've gotten to this point, then the origin isn't secure and we
359 # will not accept it as a valid location to search. We will however
360 # log a warning that we are ignoring it.
361 logger.warning(
362 "The repository located at %s is not a trusted or secure host and "
363 "is being ignored. If this repository is available via HTTPS we "
364 "recommend you use HTTPS instead, otherwise you may silence "
365 "this warning and allow it anyway with '--trusted-host %s'.",
366 parsed.hostname,
367 parsed.hostname,
368 )
369
370 return False
371
372 def _get_index_urls_locations(self, project_name):
373 """Returns the locations found via self.index_urls
374
375 Checks the url_name on the main (first in the list) index and
376 use this url_name to produce all locations
377 """
378
379 def mkurl_pypi_url(url):
380 loc = posixpath.join(
381 url,
382 urllib_parse.quote(canonicalize_name(project_name)))
383 # For maximum compatibility with easy_install, ensure the path
384 # ends in a trailing slash. Although this isn't in the spec
385 # (and PyPI can handle it without the slash) some other index
386 # implementations might break if they relied on easy_install's
387 # behavior.
388 if not loc.endswith('/'):
389 loc = loc + '/'
390 return loc
391
392 return [mkurl_pypi_url(url) for url in self.index_urls]
393
394 def find_all_candidates(self, project_name):
395 """Find all available InstallationCandidate for project_name
396
397 This checks index_urls, find_links and dependency_links.
398 All versions found are returned as an InstallationCandidate list.
399
400 See _link_package_versions for details on which files are accepted
401 """
402 index_locations = self._get_index_urls_locations(project_name)
403 index_file_loc, index_url_loc = self._sort_locations(index_locations)
404 fl_file_loc, fl_url_loc = self._sort_locations(
405 self.find_links, expand_dir=True,
406 )
407 dep_file_loc, dep_url_loc = self._sort_locations(self.dependency_links)
408
409 file_locations = (Link(url) for url in itertools.chain(
410 index_file_loc, fl_file_loc, dep_file_loc,
411 ))
412
413 # We trust every url that the user has given us whether it was given
414 # via --index-url or --find-links
415 # We explicitly do not trust links that came from dependency_links
416 # We want to filter out any thing which does not have a secure origin.
417 url_locations = [
418 link for link in itertools.chain(
419 (Link(url) for url in index_url_loc),
420 (Link(url) for url in fl_url_loc),
421 (Link(url) for url in dep_url_loc),
422 )
423 if self._validate_secure_origin(logger, link)
424 ]
425
426 logger.debug('%d location(s) to search for versions of %s:',
427 len(url_locations), project_name)
428
429 for location in url_locations:
430 logger.debug('* %s', location)
431
432 canonical_name = canonicalize_name(project_name)
433 formats = fmt_ctl_formats(self.format_control, canonical_name)
434 search = Search(project_name, canonical_name, formats)
435 find_links_versions = self._package_versions(
436 # We trust every directly linked archive in find_links
437 (Link(url, '-f') for url in self.find_links),
438 search
439 )
440
441 page_versions = []
442 for page in self._get_pages(url_locations, project_name):
443 logger.debug('Analyzing links from page %s', page.url)
444 with indent_log():
445 page_versions.extend(
446 self._package_versions(page.links, search)
447 )
448
449 dependency_versions = self._package_versions(
450 (Link(url) for url in self.dependency_links), search
451 )
452 if dependency_versions:
453 logger.debug(
454 'dependency_links found: %s',
455 ', '.join([
456 version.location.url for version in dependency_versions
457 ])
458 )
459
460 file_versions = self._package_versions(file_locations, search)
461 if file_versions:
462 file_versions.sort(reverse=True)
463 logger.debug(
464 'Local files found: %s',
465 ', '.join([
466 url_to_path(candidate.location.url)
467 for candidate in file_versions
468 ])
469 )
470
471 # This is an intentional priority ordering
472 return (
473 file_versions + find_links_versions + page_versions +
474 dependency_versions
475 )
476
477 def find_requirement(self, req, upgrade):
478 """Try to find a Link matching req
479
480 Expects req, an InstallRequirement and upgrade, a boolean
481 Returns a Link if found,
482 Raises DistributionNotFound or BestVersionAlreadyInstalled otherwise
483 """
484 all_candidates = self.find_all_candidates(req.name)
485
486 # Filter out anything which doesn't match our specifier
487 compatible_versions = set(
488 req.specifier.filter(
489 # We turn the version object into a str here because otherwise
490 # when we're debundled but setuptools isn't, Python will see
491 # packaging.version.Version and
492 # pkg_resources._vendor.packaging.version.Version as different
493 # types. This way we'll use a str as a common data interchange
494 # format. If we stop using the pkg_resources provided specifier
495 # and start using our own, we can drop the cast to str().
496 [str(c.version) for c in all_candidates],
497 prereleases=(
498 self.allow_all_prereleases
499 if self.allow_all_prereleases else None
500 ),
501 )
502 )
503 applicable_candidates = [
504 # Again, converting to str to deal with debundling.
505 c for c in all_candidates if str(c.version) in compatible_versions
506 ]
507
508 if applicable_candidates:
509 best_candidate = max(applicable_candidates,
510 key=self._candidate_sort_key)
511 else:
512 best_candidate = None
513
514 if req.satisfied_by is not None:
515 installed_version = parse_version(req.satisfied_by.version)
516 else:
517 installed_version = None
518
519 if installed_version is None and best_candidate is None:
520 logger.critical(
521 'Could not find a version that satisfies the requirement %s '
522 '(from versions: %s)',
523 req,
524 ', '.join(
525 sorted(
526 {str(c.version) for c in all_candidates},
527 key=parse_version,
528 )
529 )
530 )
531
532 raise DistributionNotFound(
533 'No matching distribution found for %s' % req
534 )
535
536 best_installed = False
537 if installed_version and (
538 best_candidate is None or
539 best_candidate.version <= installed_version):
540 best_installed = True
541
542 if not upgrade and installed_version is not None:
543 if best_installed:
544 logger.debug(
545 'Existing installed version (%s) is most up-to-date and '
546 'satisfies requirement',
547 installed_version,
548 )
549 else:
550 logger.debug(
551 'Existing installed version (%s) satisfies requirement '
552 '(most up-to-date version is %s)',
553 installed_version,
554 best_candidate.version,
555 )
556 return None
557
558 if best_installed:
559 # We have an existing version, and its the best version
560 logger.debug(
561 'Installed version (%s) is most up-to-date (past versions: '
562 '%s)',
563 installed_version,
564 ', '.join(sorted(compatible_versions, key=parse_version)) or
565 "none",
566 )
567 raise BestVersionAlreadyInstalled
568
569 logger.debug(
570 'Using version %s (newest of versions: %s)',
571 best_candidate.version,
572 ', '.join(sorted(compatible_versions, key=parse_version))
573 )
574 return best_candidate.location
575
576 def _get_pages(self, locations, project_name):
577 """
578 Yields (page, page_url) from the given locations, skipping
579 locations that have errors.
580 """
581 seen = set()
582 for location in locations:
583 if location in seen:
584 continue
585 seen.add(location)
586
587 page = self._get_page(location)
588 if page is None:
589 continue
590
591 yield page
592
593 _py_version_re = re.compile(r'-py([123]\.?[0-9]?)$')
594
595 def _sort_links(self, links):
596 """
597 Returns elements of links in order, non-egg links first, egg links
598 second, while eliminating duplicates
599 """
600 eggs, no_eggs = [], []
601 seen = set()
602 for link in links:
603 if link not in seen:
604 seen.add(link)
605 if link.egg_fragment:
606 eggs.append(link)
607 else:
608 no_eggs.append(link)
609 return no_eggs + eggs
610
611 def _package_versions(self, links, search):
612 result = []
613 for link in self._sort_links(links):
614 v = self._link_package_versions(link, search)
615 if v is not None:
616 result.append(v)
617 return result
618
619 def _log_skipped_link(self, link, reason):
620 if link not in self.logged_links:
621 logger.debug('Skipping link %s; %s', link, reason)
622 self.logged_links.add(link)
623
624 def _link_package_versions(self, link, search):
625 """Return an InstallationCandidate or None"""
626 version = None
627 if link.egg_fragment:
628 egg_info = link.egg_fragment
629 ext = link.ext
630 else:
631 egg_info, ext = link.splitext()
632 if not ext:
633 self._log_skipped_link(link, 'not a file')
634 return
635 if ext not in SUPPORTED_EXTENSIONS:
636 self._log_skipped_link(
637 link, 'unsupported archive format: %s' % ext,
638 )
639 return
640 if "binary" not in search.formats and ext == wheel_ext:
641 self._log_skipped_link(
642 link, 'No binaries permitted for %s' % search.supplied,
643 )
644 return
645 if "macosx10" in link.path and ext == '.zip':
646 self._log_skipped_link(link, 'macosx10 one')
647 return
648 if ext == wheel_ext:
649 try:
650 wheel = Wheel(link.filename)
651 except InvalidWheelFilename:
652 self._log_skipped_link(link, 'invalid wheel filename')
653 return
654 if canonicalize_name(wheel.name) != search.canonical:
655 self._log_skipped_link(
656 link, 'wrong project name (not %s)' % search.supplied)
657 return
658
659 if not wheel.supported(self.valid_tags):
660 self._log_skipped_link(
661 link, 'it is not compatible with this Python')
662 return
663
664 version = wheel.version
665
666 # This should be up by the search.ok_binary check, but see issue 2700.
667 if "source" not in search.formats and ext != wheel_ext:
668 self._log_skipped_link(
669 link, 'No sources permitted for %s' % search.supplied,
670 )
671 return
672
673 if not version:
674 version = egg_info_matches(egg_info, search.supplied, link)
675 if version is None:
676 self._log_skipped_link(
677 link, 'wrong project name (not %s)' % search.supplied)
678 return
679
680 match = self._py_version_re.search(version)
681 if match:
682 version = version[:match.start()]
683 py_version = match.group(1)
684 if py_version != sys.version[:3]:
685 self._log_skipped_link(
686 link, 'Python version is incorrect')
687 return
688 try:
689 support_this_python = check_requires_python(link.requires_python)
690 except specifiers.InvalidSpecifier:
691 logger.debug("Package %s has an invalid Requires-Python entry: %s",
692 link.filename, link.requires_python)
693 support_this_python = True
694
695 if not support_this_python:
696 logger.debug("The package %s is incompatible with the python"
697 "version in use. Acceptable python versions are:%s",
698 link, link.requires_python)
699 return
700 logger.debug('Found link %s, version: %s', link, version)
701
702 return InstallationCandidate(search.supplied, version, link)
703
704 def _get_page(self, link):
705 return HTMLPage.get_page(link, session=self.session)
706
707
708def egg_info_matches(
709 egg_info, search_name, link,
710 _egg_info_re=re.compile(r'([a-z0-9_.]+)-([a-z0-9_.!+-]+)', re.I)):
711 """Pull the version part out of a string.
712
713 :param egg_info: The string to parse. E.g. foo-2.1
714 :param search_name: The name of the package this belongs to. None to
715 infer the name. Note that this cannot unambiguously parse strings
716 like foo-2-2 which might be foo, 2-2 or foo-2, 2.
717 :param link: The link the string came from, for logging on failure.
718 """
719 match = _egg_info_re.search(egg_info)
720 if not match:
721 logger.debug('Could not parse version from link: %s', link)
722 return None
723 if search_name is None:
724 full_match = match.group(0)
725 return full_match[full_match.index('-'):]
726 name = match.group(0).lower()
727 # To match the "safe" name that pkg_resources creates:
728 name = name.replace('_', '-')
729 # project name and version must be separated by a dash
730 look_for = search_name.lower() + "-"
731 if name.startswith(look_for):
732 return match.group(0)[len(look_for):]
733 else:
734 return None
735
736
737class HTMLPage(object):
738 """Represents one page, along with its URL"""
739
740 def __init__(self, content, url, headers=None):
741 # Determine if we have any encoding information in our headers
742 encoding = None
743 if headers and "Content-Type" in headers:
744 content_type, params = cgi.parse_header(headers["Content-Type"])
745
746 if "charset" in params:
747 encoding = params['charset']
748
749 self.content = content
750 self.parsed = html5lib.parse(
751 self.content,
752 transport_encoding=encoding,
753 namespaceHTMLElements=False,
754 )
755 self.url = url
756 self.headers = headers
757
758 def __str__(self):
759 return self.url
760
761 @classmethod
762 def get_page(cls, link, skip_archives=True, session=None):
763 if session is None:
764 raise TypeError(
765 "get_page() missing 1 required keyword argument: 'session'"
766 )
767
768 url = link.url
769 url = url.split('#', 1)[0]
770
771 # Check for VCS schemes that do not support lookup as web pages.
772 from pip._internal.vcs import VcsSupport
773 for scheme in VcsSupport.schemes:
774 if url.lower().startswith(scheme) and url[len(scheme)] in '+:':
775 logger.debug('Cannot look at %s URL %s', scheme, link)
776 return None
777
778 try:
779 if skip_archives:
780 filename = link.filename
781 for bad_ext in ARCHIVE_EXTENSIONS:
782 if filename.endswith(bad_ext):
783 content_type = cls._get_content_type(
784 url, session=session,
785 )
786 if content_type.lower().startswith('text/html'):
787 break
788 else:
789 logger.debug(
790 'Skipping page %s because of Content-Type: %s',
791 link,
792 content_type,
793 )
794 return
795
796 logger.debug('Getting page %s', url)
797
798 # Tack index.html onto file:// URLs that point to directories
799 (scheme, netloc, path, params, query, fragment) = \
800 urllib_parse.urlparse(url)
801 if (scheme == 'file' and
802 os.path.isdir(urllib_request.url2pathname(path))):
803 # add trailing slash if not present so urljoin doesn't trim
804 # final segment
805 if not url.endswith('/'):
806 url += '/'
807 url = urllib_parse.urljoin(url, 'index.html')
808 logger.debug(' file: URL is directory, getting %s', url)
809
810 resp = session.get(
811 url,
812 headers={
813 "Accept": "text/html",
814 "Cache-Control": "max-age=600",
815 },
816 )
817 resp.raise_for_status()
818
819 # The check for archives above only works if the url ends with
820 # something that looks like an archive. However that is not a
821 # requirement of an url. Unless we issue a HEAD request on every
822 # url we cannot know ahead of time for sure if something is HTML
823 # or not. However we can check after we've downloaded it.
824 content_type = resp.headers.get('Content-Type', 'unknown')
825 if not content_type.lower().startswith("text/html"):
826 logger.debug(
827 'Skipping page %s because of Content-Type: %s',
828 link,
829 content_type,
830 )
831 return
832
833 inst = cls(resp.content, resp.url, resp.headers)
834 except requests.HTTPError as exc:
835 cls._handle_fail(link, exc, url)
836 except SSLError as exc:
837 reason = "There was a problem confirming the ssl certificate: "
838 reason += str(exc)
839 cls._handle_fail(link, reason, url, meth=logger.info)
840 except requests.ConnectionError as exc:
841 cls._handle_fail(link, "connection error: %s" % exc, url)
842 except requests.Timeout:
843 cls._handle_fail(link, "timed out", url)
844 else:
845 return inst
846
847 @staticmethod
848 def _handle_fail(link, reason, url, meth=None):
849 if meth is None:
850 meth = logger.debug
851
852 meth("Could not fetch URL %s: %s - skipping", link, reason)
853
854 @staticmethod
855 def _get_content_type(url, session):
856 """Get the Content-Type of the given url, using a HEAD request"""
857 scheme, netloc, path, query, fragment = urllib_parse.urlsplit(url)
858 if scheme not in {'http', 'https'}:
859 # FIXME: some warning or something?
860 # assertion error?
861 return ''
862
863 resp = session.head(url, allow_redirects=True)
864 resp.raise_for_status()
865
866 return resp.headers.get("Content-Type", "")
867
868 @cached_property
869 def base_url(self):
870 bases = [
871 x for x in self.parsed.findall(".//base")
872 if x.get("href") is not None
873 ]
874 if bases and bases[0].get("href"):
875 return bases[0].get("href")
876 else:
877 return self.url
878
879 @property
880 def links(self):
881 """Yields all links in the page"""
882 for anchor in self.parsed.findall(".//a"):
883 if anchor.get("href"):
884 href = anchor.get("href")
885 url = self.clean_link(
886 urllib_parse.urljoin(self.base_url, href)
887 )
888 pyrequire = anchor.get('data-requires-python')
889 pyrequire = unescape(pyrequire) if pyrequire else None
890 yield Link(url, self, requires_python=pyrequire)
891
892 _clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)
893
894 def clean_link(self, url):
895 """Makes sure a link is fully encoded. That is, if a ' ' shows up in
896 the link, it will be rewritten to %20 (while not over-quoting
897 % or other characters)."""
898 return self._clean_re.sub(
899 lambda match: '%%%2x' % ord(match.group(0)), url)
900
901
902class Link(object):
903
904 def __init__(self, url, comes_from=None, requires_python=None):
905 """
906 Object representing a parsed link from https://pypi.org/simple/*
907
908 url:
909 url of the resource pointed to (href of the link)
910 comes_from:
911 instance of HTMLPage where the link was found, or string.
912 requires_python:
913 String containing the `Requires-Python` metadata field, specified
914 in PEP 345. This may be specified by a data-requires-python
915 attribute in the HTML link tag, as described in PEP 503.
916 """
917
918 # url can be a UNC windows share
919 if url.startswith('\\\\'):
920 url = path_to_url(url)
921
922 self.url = url
923 self.comes_from = comes_from
924 self.requires_python = requires_python if requires_python else None
925
926 def __str__(self):
927 if self.requires_python:
928 rp = ' (requires-python:%s)' % self.requires_python
929 else:
930 rp = ''
931 if self.comes_from:
932 return '%s (from %s)%s' % (self.url, self.comes_from, rp)
933 else:
934 return str(self.url)
935
936 def __repr__(self):
937 return '<Link %s>' % self
938
939 def __eq__(self, other):
940 if not isinstance(other, Link):
941 return NotImplemented
942 return self.url == other.url
943
944 def __ne__(self, other):
945 if not isinstance(other, Link):
946 return NotImplemented
947 return self.url != other.url
948
949 def __lt__(self, other):
950 if not isinstance(other, Link):
951 return NotImplemented
952 return self.url < other.url
953
954 def __le__(self, other):
955 if not isinstance(other, Link):
956 return NotImplemented
957 return self.url <= other.url
958
959 def __gt__(self, other):
960 if not isinstance(other, Link):
961 return NotImplemented
962 return self.url > other.url
963
964 def __ge__(self, other):
965 if not isinstance(other, Link):
966 return NotImplemented
967 return self.url >= other.url
968
969 def __hash__(self):
970 return hash(self.url)
971
972 @property
973 def filename(self):
974 _, netloc, path, _, _ = urllib_parse.urlsplit(self.url)
975 name = posixpath.basename(path.rstrip('/')) or netloc
976 name = urllib_parse.unquote(name)
977 assert name, ('URL %r produced no filename' % self.url)
978 return name
979
980 @property
981 def scheme(self):
982 return urllib_parse.urlsplit(self.url)[0]
983
984 @property
985 def netloc(self):
986 return urllib_parse.urlsplit(self.url)[1]
987
988 @property
989 def path(self):
990 return urllib_parse.unquote(urllib_parse.urlsplit(self.url)[2])
991
992 def splitext(self):
993 return splitext(posixpath.basename(self.path.rstrip('/')))
994
995 @property
996 def ext(self):
997 return self.splitext()[1]
998
999 @property
1000 def url_without_fragment(self):
1001 scheme, netloc, path, query, fragment = urllib_parse.urlsplit(self.url)
1002 return urllib_parse.urlunsplit((scheme, netloc, path, query, None))
1003
1004 _egg_fragment_re = re.compile(r'[#&]egg=([^&]*)')
1005
1006 @property
1007 def egg_fragment(self):
1008 match = self._egg_fragment_re.search(self.url)
1009 if not match:
1010 return None
1011 return match.group(1)
1012
1013 _subdirectory_fragment_re = re.compile(r'[#&]subdirectory=([^&]*)')
1014
1015 @property
1016 def subdirectory_fragment(self):
1017 match = self._subdirectory_fragment_re.search(self.url)
1018 if not match:
1019 return None
1020 return match.group(1)
1021
1022 _hash_re = re.compile(
1023 r'(sha1|sha224|sha384|sha256|sha512|md5)=([a-f0-9]+)'
1024 )
1025
1026 @property
1027 def hash(self):
1028 match = self._hash_re.search(self.url)
1029 if match:
1030 return match.group(2)
1031 return None
1032
1033 @property
1034 def hash_name(self):
1035 match = self._hash_re.search(self.url)
1036 if match:
1037 return match.group(1)
1038 return None
1039
1040 @property
1041 def show_url(self):
1042 return posixpath.basename(self.url.split('#', 1)[0].split('?', 1)[0])
1043
1044 @property
1045 def is_wheel(self):
1046 return self.ext == wheel_ext
1047
1048 @property
1049 def is_artifact(self):
1050 """
1051 Determines if this points to an actual artifact (e.g. a tarball) or if
1052 it points to an "abstract" thing like a path or a VCS location.
1053 """
1054 from pip._internal.vcs import vcs
1055
1056 if self.scheme in vcs.all_schemes:
1057 return False
1058
1059 return True
1060
1061
1062FormatControl = namedtuple('FormatControl', 'no_binary only_binary')
1063"""This object has two fields, no_binary and only_binary.
1064
1065If a field is falsy, it isn't set. If it is {':all:'}, it should match all
1066packages except those listed in the other field. Only one field can be set
1067to {':all:'} at a time. The rest of the time exact package name matches
1068are listed, with any given package only showing up in one field at a time.
1069"""
1070
1071
1072def fmt_ctl_handle_mutual_exclude(value, target, other):
1073 new = value.split(',')
1074 while ':all:' in new:
1075 other.clear()
1076 target.clear()
1077 target.add(':all:')
1078 del new[:new.index(':all:') + 1]
1079 if ':none:' not in new:
1080 # Without a none, we want to discard everything as :all: covers it
1081 return
1082 for name in new:
1083 if name == ':none:':
1084 target.clear()
1085 continue
1086 name = canonicalize_name(name)
1087 other.discard(name)
1088 target.add(name)
1089
1090
1091def fmt_ctl_formats(fmt_ctl, canonical_name):
1092 result = {"binary", "source"}
1093 if canonical_name in fmt_ctl.only_binary:
1094 result.discard('source')
1095 elif canonical_name in fmt_ctl.no_binary:
1096 result.discard('binary')
1097 elif ':all:' in fmt_ctl.only_binary:
1098 result.discard('source')
1099 elif ':all:' in fmt_ctl.no_binary:
1100 result.discard('binary')
1101 return frozenset(result)
1102
1103
1104def fmt_ctl_no_binary(fmt_ctl):
1105 fmt_ctl_handle_mutual_exclude(
1106 ':all:', fmt_ctl.no_binary, fmt_ctl.only_binary,
1107 )
1108
1109
1110Search = namedtuple('Search', 'supplied canonical formats')
1111"""Capture key aspects of a search.
1112
1113:attribute supplied: The user supplied package.
1114:attribute canonical: The canonical package name.
1115:attribute formats: The formats allowed for this package. Should be a set
1116 with 'binary' or 'source' or both in it.
1117"""