diff options
author | Shubham Saini <shubham6405@gmail.com> | 2018-12-11 10:01:23 +0000 |
---|---|---|
committer | Shubham Saini <shubham6405@gmail.com> | 2018-12-11 10:01:23 +0000 |
commit | 68df54d6629ec019142eb149dd037774f2d11e7c (patch) | |
tree | 345bc22d46b4e01a4ba8303b94278952a4ed2b9e /venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/locators.py |
First commit
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/locators.py')
-rw-r--r-- | venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/locators.py | 1292 |
1 files changed, 1292 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/locators.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/locators.py new file mode 100644 index 0000000..9131b77 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/distlib/locators.py | |||
@@ -0,0 +1,1292 @@ | |||
1 | # -*- coding: utf-8 -*- | ||
2 | # | ||
3 | # Copyright (C) 2012-2015 Vinay Sajip. | ||
4 | # Licensed to the Python Software Foundation under a contributor agreement. | ||
5 | # See LICENSE.txt and CONTRIBUTORS.txt. | ||
6 | # | ||
7 | |||
8 | import gzip | ||
9 | from io import BytesIO | ||
10 | import json | ||
11 | import logging | ||
12 | import os | ||
13 | import posixpath | ||
14 | import re | ||
15 | try: | ||
16 | import threading | ||
17 | except ImportError: # pragma: no cover | ||
18 | import dummy_threading as threading | ||
19 | import zlib | ||
20 | |||
21 | from . import DistlibException | ||
22 | from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url, | ||
23 | queue, quote, unescape, string_types, build_opener, | ||
24 | HTTPRedirectHandler as BaseRedirectHandler, text_type, | ||
25 | Request, HTTPError, URLError) | ||
26 | from .database import Distribution, DistributionPath, make_dist | ||
27 | from .metadata import Metadata, MetadataInvalidError | ||
28 | from .util import (cached_property, parse_credentials, ensure_slash, | ||
29 | split_filename, get_project_data, parse_requirement, | ||
30 | parse_name_and_version, ServerProxy, normalize_name) | ||
31 | from .version import get_scheme, UnsupportedVersionError | ||
32 | from .wheel import Wheel, is_compatible | ||
33 | |||
34 | logger = logging.getLogger(__name__) | ||
35 | |||
36 | HASHER_HASH = re.compile(r'^(\w+)=([a-f0-9]+)') | ||
37 | CHARSET = re.compile(r';\s*charset\s*=\s*(.*)\s*$', re.I) | ||
38 | HTML_CONTENT_TYPE = re.compile('text/html|application/x(ht)?ml') | ||
39 | DEFAULT_INDEX = 'https://pypi.python.org/pypi' | ||
40 | |||
41 | def get_all_distribution_names(url=None): | ||
42 | """ | ||
43 | Return all distribution names known by an index. | ||
44 | :param url: The URL of the index. | ||
45 | :return: A list of all known distribution names. | ||
46 | """ | ||
47 | if url is None: | ||
48 | url = DEFAULT_INDEX | ||
49 | client = ServerProxy(url, timeout=3.0) | ||
50 | try: | ||
51 | return client.list_packages() | ||
52 | finally: | ||
53 | client('close')() | ||
54 | |||
55 | class RedirectHandler(BaseRedirectHandler): | ||
56 | """ | ||
57 | A class to work around a bug in some Python 3.2.x releases. | ||
58 | """ | ||
59 | # There's a bug in the base version for some 3.2.x | ||
60 | # (e.g. 3.2.2 on Ubuntu Oneiric). If a Location header | ||
61 | # returns e.g. /abc, it bails because it says the scheme '' | ||
62 | # is bogus, when actually it should use the request's | ||
63 | # URL for the scheme. See Python issue #13696. | ||
64 | def http_error_302(self, req, fp, code, msg, headers): | ||
65 | # Some servers (incorrectly) return multiple Location headers | ||
66 | # (so probably same goes for URI). Use first header. | ||
67 | newurl = None | ||
68 | for key in ('location', 'uri'): | ||
69 | if key in headers: | ||
70 | newurl = headers[key] | ||
71 | break | ||
72 | if newurl is None: # pragma: no cover | ||
73 | return | ||
74 | urlparts = urlparse(newurl) | ||
75 | if urlparts.scheme == '': | ||
76 | newurl = urljoin(req.get_full_url(), newurl) | ||
77 | if hasattr(headers, 'replace_header'): | ||
78 | headers.replace_header(key, newurl) | ||
79 | else: | ||
80 | headers[key] = newurl | ||
81 | return BaseRedirectHandler.http_error_302(self, req, fp, code, msg, | ||
82 | headers) | ||
83 | |||
84 | http_error_301 = http_error_303 = http_error_307 = http_error_302 | ||
85 | |||
86 | class Locator(object): | ||
87 | """ | ||
88 | A base class for locators - things that locate distributions. | ||
89 | """ | ||
90 | source_extensions = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz') | ||
91 | binary_extensions = ('.egg', '.exe', '.whl') | ||
92 | excluded_extensions = ('.pdf',) | ||
93 | |||
94 | # A list of tags indicating which wheels you want to match. The default | ||
95 | # value of None matches against the tags compatible with the running | ||
96 | # Python. If you want to match other values, set wheel_tags on a locator | ||
97 | # instance to a list of tuples (pyver, abi, arch) which you want to match. | ||
98 | wheel_tags = None | ||
99 | |||
100 | downloadable_extensions = source_extensions + ('.whl',) | ||
101 | |||
102 | def __init__(self, scheme='default'): | ||
103 | """ | ||
104 | Initialise an instance. | ||
105 | :param scheme: Because locators look for most recent versions, they | ||
106 | need to know the version scheme to use. This specifies | ||
107 | the current PEP-recommended scheme - use ``'legacy'`` | ||
108 | if you need to support existing distributions on PyPI. | ||
109 | """ | ||
110 | self._cache = {} | ||
111 | self.scheme = scheme | ||
112 | # Because of bugs in some of the handlers on some of the platforms, | ||
113 | # we use our own opener rather than just using urlopen. | ||
114 | self.opener = build_opener(RedirectHandler()) | ||
115 | # If get_project() is called from locate(), the matcher instance | ||
116 | # is set from the requirement passed to locate(). See issue #18 for | ||
117 | # why this can be useful to know. | ||
118 | self.matcher = None | ||
119 | self.errors = queue.Queue() | ||
120 | |||
121 | def get_errors(self): | ||
122 | """ | ||
123 | Return any errors which have occurred. | ||
124 | """ | ||
125 | result = [] | ||
126 | while not self.errors.empty(): # pragma: no cover | ||
127 | try: | ||
128 | e = self.errors.get(False) | ||
129 | result.append(e) | ||
130 | except self.errors.Empty: | ||
131 | continue | ||
132 | self.errors.task_done() | ||
133 | return result | ||
134 | |||
135 | def clear_errors(self): | ||
136 | """ | ||
137 | Clear any errors which may have been logged. | ||
138 | """ | ||
139 | # Just get the errors and throw them away | ||
140 | self.get_errors() | ||
141 | |||
142 | def clear_cache(self): | ||
143 | self._cache.clear() | ||
144 | |||
145 | def _get_scheme(self): | ||
146 | return self._scheme | ||
147 | |||
148 | def _set_scheme(self, value): | ||
149 | self._scheme = value | ||
150 | |||
151 | scheme = property(_get_scheme, _set_scheme) | ||
152 | |||
153 | def _get_project(self, name): | ||
154 | """ | ||
155 | For a given project, get a dictionary mapping available versions to Distribution | ||
156 | instances. | ||
157 | |||
158 | This should be implemented in subclasses. | ||
159 | |||
160 | If called from a locate() request, self.matcher will be set to a | ||
161 | matcher for the requirement to satisfy, otherwise it will be None. | ||
162 | """ | ||
163 | raise NotImplementedError('Please implement in the subclass') | ||
164 | |||
165 | def get_distribution_names(self): | ||
166 | """ | ||
167 | Return all the distribution names known to this locator. | ||
168 | """ | ||
169 | raise NotImplementedError('Please implement in the subclass') | ||
170 | |||
171 | def get_project(self, name): | ||
172 | """ | ||
173 | For a given project, get a dictionary mapping available versions to Distribution | ||
174 | instances. | ||
175 | |||
176 | This calls _get_project to do all the work, and just implements a caching layer on top. | ||
177 | """ | ||
178 | if self._cache is None: # pragma: no cover | ||
179 | result = self._get_project(name) | ||
180 | elif name in self._cache: | ||
181 | result = self._cache[name] | ||
182 | else: | ||
183 | self.clear_errors() | ||
184 | result = self._get_project(name) | ||
185 | self._cache[name] = result | ||
186 | return result | ||
187 | |||
188 | def score_url(self, url): | ||
189 | """ | ||
190 | Give an url a score which can be used to choose preferred URLs | ||
191 | for a given project release. | ||
192 | """ | ||
193 | t = urlparse(url) | ||
194 | basename = posixpath.basename(t.path) | ||
195 | compatible = True | ||
196 | is_wheel = basename.endswith('.whl') | ||
197 | is_downloadable = basename.endswith(self.downloadable_extensions) | ||
198 | if is_wheel: | ||
199 | compatible = is_compatible(Wheel(basename), self.wheel_tags) | ||
200 | return (t.scheme == 'https', 'pypi.python.org' in t.netloc, | ||
201 | is_downloadable, is_wheel, compatible, basename) | ||
202 | |||
203 | def prefer_url(self, url1, url2): | ||
204 | """ | ||
205 | Choose one of two URLs where both are candidates for distribution | ||
206 | archives for the same version of a distribution (for example, | ||
207 | .tar.gz vs. zip). | ||
208 | |||
209 | The current implementation favours https:// URLs over http://, archives | ||
210 | from PyPI over those from other locations, wheel compatibility (if a | ||
211 | wheel) and then the archive name. | ||
212 | """ | ||
213 | result = url2 | ||
214 | if url1: | ||
215 | s1 = self.score_url(url1) | ||
216 | s2 = self.score_url(url2) | ||
217 | if s1 > s2: | ||
218 | result = url1 | ||
219 | if result != url2: | ||
220 | logger.debug('Not replacing %r with %r', url1, url2) | ||
221 | else: | ||
222 | logger.debug('Replacing %r with %r', url1, url2) | ||
223 | return result | ||
224 | |||
225 | def split_filename(self, filename, project_name): | ||
226 | """ | ||
227 | Attempt to split a filename in project name, version and Python version. | ||
228 | """ | ||
229 | return split_filename(filename, project_name) | ||
230 | |||
231 | def convert_url_to_download_info(self, url, project_name): | ||
232 | """ | ||
233 | See if a URL is a candidate for a download URL for a project (the URL | ||
234 | has typically been scraped from an HTML page). | ||
235 | |||
236 | If it is, a dictionary is returned with keys "name", "version", | ||
237 | "filename" and "url"; otherwise, None is returned. | ||
238 | """ | ||
239 | def same_project(name1, name2): | ||
240 | return normalize_name(name1) == normalize_name(name2) | ||
241 | |||
242 | result = None | ||
243 | scheme, netloc, path, params, query, frag = urlparse(url) | ||
244 | if frag.lower().startswith('egg='): # pragma: no cover | ||
245 | logger.debug('%s: version hint in fragment: %r', | ||
246 | project_name, frag) | ||
247 | m = HASHER_HASH.match(frag) | ||
248 | if m: | ||
249 | algo, digest = m.groups() | ||
250 | else: | ||
251 | algo, digest = None, None | ||
252 | origpath = path | ||
253 | if path and path[-1] == '/': # pragma: no cover | ||
254 | path = path[:-1] | ||
255 | if path.endswith('.whl'): | ||
256 | try: | ||
257 | wheel = Wheel(path) | ||
258 | if is_compatible(wheel, self.wheel_tags): | ||
259 | if project_name is None: | ||
260 | include = True | ||
261 | else: | ||
262 | include = same_project(wheel.name, project_name) | ||
263 | if include: | ||
264 | result = { | ||
265 | 'name': wheel.name, | ||
266 | 'version': wheel.version, | ||
267 | 'filename': wheel.filename, | ||
268 | 'url': urlunparse((scheme, netloc, origpath, | ||
269 | params, query, '')), | ||
270 | 'python-version': ', '.join( | ||
271 | ['.'.join(list(v[2:])) for v in wheel.pyver]), | ||
272 | } | ||
273 | except Exception as e: # pragma: no cover | ||
274 | logger.warning('invalid path for wheel: %s', path) | ||
275 | elif not path.endswith(self.downloadable_extensions): # pragma: no cover | ||
276 | logger.debug('Not downloadable: %s', path) | ||
277 | else: # downloadable extension | ||
278 | path = filename = posixpath.basename(path) | ||
279 | for ext in self.downloadable_extensions: | ||
280 | if path.endswith(ext): | ||
281 | path = path[:-len(ext)] | ||
282 | t = self.split_filename(path, project_name) | ||
283 | if not t: # pragma: no cover | ||
284 | logger.debug('No match for project/version: %s', path) | ||
285 | else: | ||
286 | name, version, pyver = t | ||
287 | if not project_name or same_project(project_name, name): | ||
288 | result = { | ||
289 | 'name': name, | ||
290 | 'version': version, | ||
291 | 'filename': filename, | ||
292 | 'url': urlunparse((scheme, netloc, origpath, | ||
293 | params, query, '')), | ||
294 | #'packagetype': 'sdist', | ||
295 | } | ||
296 | if pyver: # pragma: no cover | ||
297 | result['python-version'] = pyver | ||
298 | break | ||
299 | if result and algo: | ||
300 | result['%s_digest' % algo] = digest | ||
301 | return result | ||
302 | |||
303 | def _get_digest(self, info): | ||
304 | """ | ||
305 | Get a digest from a dictionary by looking at keys of the form | ||
306 | 'algo_digest'. | ||
307 | |||
308 | Returns a 2-tuple (algo, digest) if found, else None. Currently | ||
309 | looks only for SHA256, then MD5. | ||
310 | """ | ||
311 | result = None | ||
312 | for algo in ('sha256', 'md5'): | ||
313 | key = '%s_digest' % algo | ||
314 | if key in info: | ||
315 | result = (algo, info[key]) | ||
316 | break | ||
317 | return result | ||
318 | |||
319 | def _update_version_data(self, result, info): | ||
320 | """ | ||
321 | Update a result dictionary (the final result from _get_project) with a | ||
322 | dictionary for a specific version, which typically holds information | ||
323 | gleaned from a filename or URL for an archive for the distribution. | ||
324 | """ | ||
325 | name = info.pop('name') | ||
326 | version = info.pop('version') | ||
327 | if version in result: | ||
328 | dist = result[version] | ||
329 | md = dist.metadata | ||
330 | else: | ||
331 | dist = make_dist(name, version, scheme=self.scheme) | ||
332 | md = dist.metadata | ||
333 | dist.digest = digest = self._get_digest(info) | ||
334 | url = info['url'] | ||
335 | result['digests'][url] = digest | ||
336 | if md.source_url != info['url']: | ||
337 | md.source_url = self.prefer_url(md.source_url, url) | ||
338 | result['urls'].setdefault(version, set()).add(url) | ||
339 | dist.locator = self | ||
340 | result[version] = dist | ||
341 | |||
342 | def locate(self, requirement, prereleases=False): | ||
343 | """ | ||
344 | Find the most recent distribution which matches the given | ||
345 | requirement. | ||
346 | |||
347 | :param requirement: A requirement of the form 'foo (1.0)' or perhaps | ||
348 | 'foo (>= 1.0, < 2.0, != 1.3)' | ||
349 | :param prereleases: If ``True``, allow pre-release versions | ||
350 | to be located. Otherwise, pre-release versions | ||
351 | are not returned. | ||
352 | :return: A :class:`Distribution` instance, or ``None`` if no such | ||
353 | distribution could be located. | ||
354 | """ | ||
355 | result = None | ||
356 | r = parse_requirement(requirement) | ||
357 | if r is None: # pragma: no cover | ||
358 | raise DistlibException('Not a valid requirement: %r' % requirement) | ||
359 | scheme = get_scheme(self.scheme) | ||
360 | self.matcher = matcher = scheme.matcher(r.requirement) | ||
361 | logger.debug('matcher: %s (%s)', matcher, type(matcher).__name__) | ||
362 | versions = self.get_project(r.name) | ||
363 | if len(versions) > 2: # urls and digests keys are present | ||
364 | # sometimes, versions are invalid | ||
365 | slist = [] | ||
366 | vcls = matcher.version_class | ||
367 | for k in versions: | ||
368 | if k in ('urls', 'digests'): | ||
369 | continue | ||
370 | try: | ||
371 | if not matcher.match(k): | ||
372 | logger.debug('%s did not match %r', matcher, k) | ||
373 | else: | ||
374 | if prereleases or not vcls(k).is_prerelease: | ||
375 | slist.append(k) | ||
376 | else: | ||
377 | logger.debug('skipping pre-release ' | ||
378 | 'version %s of %s', k, matcher.name) | ||
379 | except Exception: # pragma: no cover | ||
380 | logger.warning('error matching %s with %r', matcher, k) | ||
381 | pass # slist.append(k) | ||
382 | if len(slist) > 1: | ||
383 | slist = sorted(slist, key=scheme.key) | ||
384 | if slist: | ||
385 | logger.debug('sorted list: %s', slist) | ||
386 | version = slist[-1] | ||
387 | result = versions[version] | ||
388 | if result: | ||
389 | if r.extras: | ||
390 | result.extras = r.extras | ||
391 | result.download_urls = versions.get('urls', {}).get(version, set()) | ||
392 | d = {} | ||
393 | sd = versions.get('digests', {}) | ||
394 | for url in result.download_urls: | ||
395 | if url in sd: # pragma: no cover | ||
396 | d[url] = sd[url] | ||
397 | result.digests = d | ||
398 | self.matcher = None | ||
399 | return result | ||
400 | |||
401 | |||
402 | class PyPIRPCLocator(Locator): | ||
403 | """ | ||
404 | This locator uses XML-RPC to locate distributions. It therefore | ||
405 | cannot be used with simple mirrors (that only mirror file content). | ||
406 | """ | ||
407 | def __init__(self, url, **kwargs): | ||
408 | """ | ||
409 | Initialise an instance. | ||
410 | |||
411 | :param url: The URL to use for XML-RPC. | ||
412 | :param kwargs: Passed to the superclass constructor. | ||
413 | """ | ||
414 | super(PyPIRPCLocator, self).__init__(**kwargs) | ||
415 | self.base_url = url | ||
416 | self.client = ServerProxy(url, timeout=3.0) | ||
417 | |||
418 | def get_distribution_names(self): | ||
419 | """ | ||
420 | Return all the distribution names known to this locator. | ||
421 | """ | ||
422 | return set(self.client.list_packages()) | ||
423 | |||
424 | def _get_project(self, name): | ||
425 | result = {'urls': {}, 'digests': {}} | ||
426 | versions = self.client.package_releases(name, True) | ||
427 | for v in versions: | ||
428 | urls = self.client.release_urls(name, v) | ||
429 | data = self.client.release_data(name, v) | ||
430 | metadata = Metadata(scheme=self.scheme) | ||
431 | metadata.name = data['name'] | ||
432 | metadata.version = data['version'] | ||
433 | metadata.license = data.get('license') | ||
434 | metadata.keywords = data.get('keywords', []) | ||
435 | metadata.summary = data.get('summary') | ||
436 | dist = Distribution(metadata) | ||
437 | if urls: | ||
438 | info = urls[0] | ||
439 | metadata.source_url = info['url'] | ||
440 | dist.digest = self._get_digest(info) | ||
441 | dist.locator = self | ||
442 | result[v] = dist | ||
443 | for info in urls: | ||
444 | url = info['url'] | ||
445 | digest = self._get_digest(info) | ||
446 | result['urls'].setdefault(v, set()).add(url) | ||
447 | result['digests'][url] = digest | ||
448 | return result | ||
449 | |||
450 | class PyPIJSONLocator(Locator): | ||
451 | """ | ||
452 | This locator uses PyPI's JSON interface. It's very limited in functionality | ||
453 | and probably not worth using. | ||
454 | """ | ||
455 | def __init__(self, url, **kwargs): | ||
456 | super(PyPIJSONLocator, self).__init__(**kwargs) | ||
457 | self.base_url = ensure_slash(url) | ||
458 | |||
459 | def get_distribution_names(self): | ||
460 | """ | ||
461 | Return all the distribution names known to this locator. | ||
462 | """ | ||
463 | raise NotImplementedError('Not available from this locator') | ||
464 | |||
465 | def _get_project(self, name): | ||
466 | result = {'urls': {}, 'digests': {}} | ||
467 | url = urljoin(self.base_url, '%s/json' % quote(name)) | ||
468 | try: | ||
469 | resp = self.opener.open(url) | ||
470 | data = resp.read().decode() # for now | ||
471 | d = json.loads(data) | ||
472 | md = Metadata(scheme=self.scheme) | ||
473 | data = d['info'] | ||
474 | md.name = data['name'] | ||
475 | md.version = data['version'] | ||
476 | md.license = data.get('license') | ||
477 | md.keywords = data.get('keywords', []) | ||
478 | md.summary = data.get('summary') | ||
479 | dist = Distribution(md) | ||
480 | dist.locator = self | ||
481 | urls = d['urls'] | ||
482 | result[md.version] = dist | ||
483 | for info in d['urls']: | ||
484 | url = info['url'] | ||
485 | dist.download_urls.add(url) | ||
486 | dist.digests[url] = self._get_digest(info) | ||
487 | result['urls'].setdefault(md.version, set()).add(url) | ||
488 | result['digests'][url] = self._get_digest(info) | ||
489 | # Now get other releases | ||
490 | for version, infos in d['releases'].items(): | ||
491 | if version == md.version: | ||
492 | continue # already done | ||
493 | omd = Metadata(scheme=self.scheme) | ||
494 | omd.name = md.name | ||
495 | omd.version = version | ||
496 | odist = Distribution(omd) | ||
497 | odist.locator = self | ||
498 | result[version] = odist | ||
499 | for info in infos: | ||
500 | url = info['url'] | ||
501 | odist.download_urls.add(url) | ||
502 | odist.digests[url] = self._get_digest(info) | ||
503 | result['urls'].setdefault(version, set()).add(url) | ||
504 | result['digests'][url] = self._get_digest(info) | ||
505 | # for info in urls: | ||
506 | # md.source_url = info['url'] | ||
507 | # dist.digest = self._get_digest(info) | ||
508 | # dist.locator = self | ||
509 | # for info in urls: | ||
510 | # url = info['url'] | ||
511 | # result['urls'].setdefault(md.version, set()).add(url) | ||
512 | # result['digests'][url] = self._get_digest(info) | ||
513 | except Exception as e: | ||
514 | self.errors.put(text_type(e)) | ||
515 | logger.exception('JSON fetch failed: %s', e) | ||
516 | return result | ||
517 | |||
518 | |||
519 | class Page(object): | ||
520 | """ | ||
521 | This class represents a scraped HTML page. | ||
522 | """ | ||
523 | # The following slightly hairy-looking regex just looks for the contents of | ||
524 | # an anchor link, which has an attribute "href" either immediately preceded | ||
525 | # or immediately followed by a "rel" attribute. The attribute values can be | ||
526 | # declared with double quotes, single quotes or no quotes - which leads to | ||
527 | # the length of the expression. | ||
528 | _href = re.compile(""" | ||
529 | (rel\\s*=\\s*(?:"(?P<rel1>[^"]*)"|'(?P<rel2>[^']*)'|(?P<rel3>[^>\\s\n]*))\\s+)? | ||
530 | href\\s*=\\s*(?:"(?P<url1>[^"]*)"|'(?P<url2>[^']*)'|(?P<url3>[^>\\s\n]*)) | ||
531 | (\\s+rel\\s*=\\s*(?:"(?P<rel4>[^"]*)"|'(?P<rel5>[^']*)'|(?P<rel6>[^>\\s\n]*)))? | ||
532 | """, re.I | re.S | re.X) | ||
533 | _base = re.compile(r"""<base\s+href\s*=\s*['"]?([^'">]+)""", re.I | re.S) | ||
534 | |||
535 | def __init__(self, data, url): | ||
536 | """ | ||
537 | Initialise an instance with the Unicode page contents and the URL they | ||
538 | came from. | ||
539 | """ | ||
540 | self.data = data | ||
541 | self.base_url = self.url = url | ||
542 | m = self._base.search(self.data) | ||
543 | if m: | ||
544 | self.base_url = m.group(1) | ||
545 | |||
546 | _clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I) | ||
547 | |||
548 | @cached_property | ||
549 | def links(self): | ||
550 | """ | ||
551 | Return the URLs of all the links on a page together with information | ||
552 | about their "rel" attribute, for determining which ones to treat as | ||
553 | downloads and which ones to queue for further scraping. | ||
554 | """ | ||
555 | def clean(url): | ||
556 | "Tidy up an URL." | ||
557 | scheme, netloc, path, params, query, frag = urlparse(url) | ||
558 | return urlunparse((scheme, netloc, quote(path), | ||
559 | params, query, frag)) | ||
560 | |||
561 | result = set() | ||
562 | for match in self._href.finditer(self.data): | ||
563 | d = match.groupdict('') | ||
564 | rel = (d['rel1'] or d['rel2'] or d['rel3'] or | ||
565 | d['rel4'] or d['rel5'] or d['rel6']) | ||
566 | url = d['url1'] or d['url2'] or d['url3'] | ||
567 | url = urljoin(self.base_url, url) | ||
568 | url = unescape(url) | ||
569 | url = self._clean_re.sub(lambda m: '%%%2x' % ord(m.group(0)), url) | ||
570 | result.add((url, rel)) | ||
571 | # We sort the result, hoping to bring the most recent versions | ||
572 | # to the front | ||
573 | result = sorted(result, key=lambda t: t[0], reverse=True) | ||
574 | return result | ||
575 | |||
576 | |||
577 | class SimpleScrapingLocator(Locator): | ||
578 | """ | ||
579 | A locator which scrapes HTML pages to locate downloads for a distribution. | ||
580 | This runs multiple threads to do the I/O; performance is at least as good | ||
581 | as pip's PackageFinder, which works in an analogous fashion. | ||
582 | """ | ||
583 | |||
584 | # These are used to deal with various Content-Encoding schemes. | ||
585 | decoders = { | ||
586 | 'deflate': zlib.decompress, | ||
587 | 'gzip': lambda b: gzip.GzipFile(fileobj=BytesIO(d)).read(), | ||
588 | 'none': lambda b: b, | ||
589 | } | ||
590 | |||
591 | def __init__(self, url, timeout=None, num_workers=10, **kwargs): | ||
592 | """ | ||
593 | Initialise an instance. | ||
594 | :param url: The root URL to use for scraping. | ||
595 | :param timeout: The timeout, in seconds, to be applied to requests. | ||
596 | This defaults to ``None`` (no timeout specified). | ||
597 | :param num_workers: The number of worker threads you want to do I/O, | ||
598 | This defaults to 10. | ||
599 | :param kwargs: Passed to the superclass. | ||
600 | """ | ||
601 | super(SimpleScrapingLocator, self).__init__(**kwargs) | ||
602 | self.base_url = ensure_slash(url) | ||
603 | self.timeout = timeout | ||
604 | self._page_cache = {} | ||
605 | self._seen = set() | ||
606 | self._to_fetch = queue.Queue() | ||
607 | self._bad_hosts = set() | ||
608 | self.skip_externals = False | ||
609 | self.num_workers = num_workers | ||
610 | self._lock = threading.RLock() | ||
611 | # See issue #45: we need to be resilient when the locator is used | ||
612 | # in a thread, e.g. with concurrent.futures. We can't use self._lock | ||
613 | # as it is for coordinating our internal threads - the ones created | ||
614 | # in _prepare_threads. | ||
615 | self._gplock = threading.RLock() | ||
616 | |||
617 | def _prepare_threads(self): | ||
618 | """ | ||
619 | Threads are created only when get_project is called, and terminate | ||
620 | before it returns. They are there primarily to parallelise I/O (i.e. | ||
621 | fetching web pages). | ||
622 | """ | ||
623 | self._threads = [] | ||
624 | for i in range(self.num_workers): | ||
625 | t = threading.Thread(target=self._fetch) | ||
626 | t.setDaemon(True) | ||
627 | t.start() | ||
628 | self._threads.append(t) | ||
629 | |||
630 | def _wait_threads(self): | ||
631 | """ | ||
632 | Tell all the threads to terminate (by sending a sentinel value) and | ||
633 | wait for them to do so. | ||
634 | """ | ||
635 | # Note that you need two loops, since you can't say which | ||
636 | # thread will get each sentinel | ||
637 | for t in self._threads: | ||
638 | self._to_fetch.put(None) # sentinel | ||
639 | for t in self._threads: | ||
640 | t.join() | ||
641 | self._threads = [] | ||
642 | |||
643 | def _get_project(self, name): | ||
644 | result = {'urls': {}, 'digests': {}} | ||
645 | with self._gplock: | ||
646 | self.result = result | ||
647 | self.project_name = name | ||
648 | url = urljoin(self.base_url, '%s/' % quote(name)) | ||
649 | self._seen.clear() | ||
650 | self._page_cache.clear() | ||
651 | self._prepare_threads() | ||
652 | try: | ||
653 | logger.debug('Queueing %s', url) | ||
654 | self._to_fetch.put(url) | ||
655 | self._to_fetch.join() | ||
656 | finally: | ||
657 | self._wait_threads() | ||
658 | del self.result | ||
659 | return result | ||
660 | |||
661 | platform_dependent = re.compile(r'\b(linux-(i\d86|x86_64|arm\w+)|' | ||
662 | r'win(32|-amd64)|macosx-?\d+)\b', re.I) | ||
663 | |||
664 | def _is_platform_dependent(self, url): | ||
665 | """ | ||
666 | Does an URL refer to a platform-specific download? | ||
667 | """ | ||
668 | return self.platform_dependent.search(url) | ||
669 | |||
670 | def _process_download(self, url): | ||
671 | """ | ||
672 | See if an URL is a suitable download for a project. | ||
673 | |||
674 | If it is, register information in the result dictionary (for | ||
675 | _get_project) about the specific version it's for. | ||
676 | |||
677 | Note that the return value isn't actually used other than as a boolean | ||
678 | value. | ||
679 | """ | ||
680 | if self._is_platform_dependent(url): | ||
681 | info = None | ||
682 | else: | ||
683 | info = self.convert_url_to_download_info(url, self.project_name) | ||
684 | logger.debug('process_download: %s -> %s', url, info) | ||
685 | if info: | ||
686 | with self._lock: # needed because self.result is shared | ||
687 | self._update_version_data(self.result, info) | ||
688 | return info | ||
689 | |||
690 | def _should_queue(self, link, referrer, rel): | ||
691 | """ | ||
692 | Determine whether a link URL from a referring page and with a | ||
693 | particular "rel" attribute should be queued for scraping. | ||
694 | """ | ||
695 | scheme, netloc, path, _, _, _ = urlparse(link) | ||
696 | if path.endswith(self.source_extensions + self.binary_extensions + | ||
697 | self.excluded_extensions): | ||
698 | result = False | ||
699 | elif self.skip_externals and not link.startswith(self.base_url): | ||
700 | result = False | ||
701 | elif not referrer.startswith(self.base_url): | ||
702 | result = False | ||
703 | elif rel not in ('homepage', 'download'): | ||
704 | result = False | ||
705 | elif scheme not in ('http', 'https', 'ftp'): | ||
706 | result = False | ||
707 | elif self._is_platform_dependent(link): | ||
708 | result = False | ||
709 | else: | ||
710 | host = netloc.split(':', 1)[0] | ||
711 | if host.lower() == 'localhost': | ||
712 | result = False | ||
713 | else: | ||
714 | result = True | ||
715 | logger.debug('should_queue: %s (%s) from %s -> %s', link, rel, | ||
716 | referrer, result) | ||
717 | return result | ||
718 | |||
719 | def _fetch(self): | ||
720 | """ | ||
721 | Get a URL to fetch from the work queue, get the HTML page, examine its | ||
722 | links for download candidates and candidates for further scraping. | ||
723 | |||
724 | This is a handy method to run in a thread. | ||
725 | """ | ||
726 | while True: | ||
727 | url = self._to_fetch.get() | ||
728 | try: | ||
729 | if url: | ||
730 | page = self.get_page(url) | ||
731 | if page is None: # e.g. after an error | ||
732 | continue | ||
733 | for link, rel in page.links: | ||
734 | if link not in self._seen: | ||
735 | try: | ||
736 | self._seen.add(link) | ||
737 | if (not self._process_download(link) and | ||
738 | self._should_queue(link, url, rel)): | ||
739 | logger.debug('Queueing %s from %s', link, url) | ||
740 | self._to_fetch.put(link) | ||
741 | except MetadataInvalidError: # e.g. invalid versions | ||
742 | pass | ||
743 | except Exception as e: # pragma: no cover | ||
744 | self.errors.put(text_type(e)) | ||
745 | finally: | ||
746 | # always do this, to avoid hangs :-) | ||
747 | self._to_fetch.task_done() | ||
748 | if not url: | ||
749 | #logger.debug('Sentinel seen, quitting.') | ||
750 | break | ||
751 | |||
752 | def get_page(self, url): | ||
753 | """ | ||
754 | Get the HTML for an URL, possibly from an in-memory cache. | ||
755 | |||
756 | XXX TODO Note: this cache is never actually cleared. It's assumed that | ||
757 | the data won't get stale over the lifetime of a locator instance (not | ||
758 | necessarily true for the default_locator). | ||
759 | """ | ||
760 | # http://peak.telecommunity.com/DevCenter/EasyInstall#package-index-api | ||
761 | scheme, netloc, path, _, _, _ = urlparse(url) | ||
762 | if scheme == 'file' and os.path.isdir(url2pathname(path)): | ||
763 | url = urljoin(ensure_slash(url), 'index.html') | ||
764 | |||
765 | if url in self._page_cache: | ||
766 | result = self._page_cache[url] | ||
767 | logger.debug('Returning %s from cache: %s', url, result) | ||
768 | else: | ||
769 | host = netloc.split(':', 1)[0] | ||
770 | result = None | ||
771 | if host in self._bad_hosts: | ||
772 | logger.debug('Skipping %s due to bad host %s', url, host) | ||
773 | else: | ||
774 | req = Request(url, headers={'Accept-encoding': 'identity'}) | ||
775 | try: | ||
776 | logger.debug('Fetching %s', url) | ||
777 | resp = self.opener.open(req, timeout=self.timeout) | ||
778 | logger.debug('Fetched %s', url) | ||
779 | headers = resp.info() | ||
780 | content_type = headers.get('Content-Type', '') | ||
781 | if HTML_CONTENT_TYPE.match(content_type): | ||
782 | final_url = resp.geturl() | ||
783 | data = resp.read() | ||
784 | encoding = headers.get('Content-Encoding') | ||
785 | if encoding: | ||
786 | decoder = self.decoders[encoding] # fail if not found | ||
787 | data = decoder(data) | ||
788 | encoding = 'utf-8' | ||
789 | m = CHARSET.search(content_type) | ||
790 | if m: | ||
791 | encoding = m.group(1) | ||
792 | try: | ||
793 | data = data.decode(encoding) | ||
794 | except UnicodeError: # pragma: no cover | ||
795 | data = data.decode('latin-1') # fallback | ||
796 | result = Page(data, final_url) | ||
797 | self._page_cache[final_url] = result | ||
798 | except HTTPError as e: | ||
799 | if e.code != 404: | ||
800 | logger.exception('Fetch failed: %s: %s', url, e) | ||
801 | except URLError as e: # pragma: no cover | ||
802 | logger.exception('Fetch failed: %s: %s', url, e) | ||
803 | with self._lock: | ||
804 | self._bad_hosts.add(host) | ||
805 | except Exception as e: # pragma: no cover | ||
806 | logger.exception('Fetch failed: %s: %s', url, e) | ||
807 | finally: | ||
808 | self._page_cache[url] = result # even if None (failure) | ||
809 | return result | ||
810 | |||
811 | _distname_re = re.compile('<a href=[^>]*>([^<]+)<') | ||
812 | |||
813 | def get_distribution_names(self): | ||
814 | """ | ||
815 | Return all the distribution names known to this locator. | ||
816 | """ | ||
817 | result = set() | ||
818 | page = self.get_page(self.base_url) | ||
819 | if not page: | ||
820 | raise DistlibException('Unable to get %s' % self.base_url) | ||
821 | for match in self._distname_re.finditer(page.data): | ||
822 | result.add(match.group(1)) | ||
823 | return result | ||
824 | |||
825 | class DirectoryLocator(Locator): | ||
826 | """ | ||
827 | This class locates distributions in a directory tree. | ||
828 | """ | ||
829 | |||
830 | def __init__(self, path, **kwargs): | ||
831 | """ | ||
832 | Initialise an instance. | ||
833 | :param path: The root of the directory tree to search. | ||
834 | :param kwargs: Passed to the superclass constructor, | ||
835 | except for: | ||
836 | * recursive - if True (the default), subdirectories are | ||
837 | recursed into. If False, only the top-level directory | ||
838 | is searched, | ||
839 | """ | ||
840 | self.recursive = kwargs.pop('recursive', True) | ||
841 | super(DirectoryLocator, self).__init__(**kwargs) | ||
842 | path = os.path.abspath(path) | ||
843 | if not os.path.isdir(path): # pragma: no cover | ||
844 | raise DistlibException('Not a directory: %r' % path) | ||
845 | self.base_dir = path | ||
846 | |||
847 | def should_include(self, filename, parent): | ||
848 | """ | ||
849 | Should a filename be considered as a candidate for a distribution | ||
850 | archive? As well as the filename, the directory which contains it | ||
851 | is provided, though not used by the current implementation. | ||
852 | """ | ||
853 | return filename.endswith(self.downloadable_extensions) | ||
854 | |||
855 | def _get_project(self, name): | ||
856 | result = {'urls': {}, 'digests': {}} | ||
857 | for root, dirs, files in os.walk(self.base_dir): | ||
858 | for fn in files: | ||
859 | if self.should_include(fn, root): | ||
860 | fn = os.path.join(root, fn) | ||
861 | url = urlunparse(('file', '', | ||
862 | pathname2url(os.path.abspath(fn)), | ||
863 | '', '', '')) | ||
864 | info = self.convert_url_to_download_info(url, name) | ||
865 | if info: | ||
866 | self._update_version_data(result, info) | ||
867 | if not self.recursive: | ||
868 | break | ||
869 | return result | ||
870 | |||
871 | def get_distribution_names(self): | ||
872 | """ | ||
873 | Return all the distribution names known to this locator. | ||
874 | """ | ||
875 | result = set() | ||
876 | for root, dirs, files in os.walk(self.base_dir): | ||
877 | for fn in files: | ||
878 | if self.should_include(fn, root): | ||
879 | fn = os.path.join(root, fn) | ||
880 | url = urlunparse(('file', '', | ||
881 | pathname2url(os.path.abspath(fn)), | ||
882 | '', '', '')) | ||
883 | info = self.convert_url_to_download_info(url, None) | ||
884 | if info: | ||
885 | result.add(info['name']) | ||
886 | if not self.recursive: | ||
887 | break | ||
888 | return result | ||
889 | |||
890 | class JSONLocator(Locator): | ||
891 | """ | ||
892 | This locator uses special extended metadata (not available on PyPI) and is | ||
893 | the basis of performant dependency resolution in distlib. Other locators | ||
894 | require archive downloads before dependencies can be determined! As you | ||
895 | might imagine, that can be slow. | ||
896 | """ | ||
897 | def get_distribution_names(self): | ||
898 | """ | ||
899 | Return all the distribution names known to this locator. | ||
900 | """ | ||
901 | raise NotImplementedError('Not available from this locator') | ||
902 | |||
903 | def _get_project(self, name): | ||
904 | result = {'urls': {}, 'digests': {}} | ||
905 | data = get_project_data(name) | ||
906 | if data: | ||
907 | for info in data.get('files', []): | ||
908 | if info['ptype'] != 'sdist' or info['pyversion'] != 'source': | ||
909 | continue | ||
910 | # We don't store summary in project metadata as it makes | ||
911 | # the data bigger for no benefit during dependency | ||
912 | # resolution | ||
913 | dist = make_dist(data['name'], info['version'], | ||
914 | summary=data.get('summary', | ||
915 | 'Placeholder for summary'), | ||
916 | scheme=self.scheme) | ||
917 | md = dist.metadata | ||
918 | md.source_url = info['url'] | ||
919 | # TODO SHA256 digest | ||
920 | if 'digest' in info and info['digest']: | ||
921 | dist.digest = ('md5', info['digest']) | ||
922 | md.dependencies = info.get('requirements', {}) | ||
923 | dist.exports = info.get('exports', {}) | ||
924 | result[dist.version] = dist | ||
925 | result['urls'].setdefault(dist.version, set()).add(info['url']) | ||
926 | return result | ||
927 | |||
928 | class DistPathLocator(Locator): | ||
929 | """ | ||
930 | This locator finds installed distributions in a path. It can be useful for | ||
931 | adding to an :class:`AggregatingLocator`. | ||
932 | """ | ||
933 | def __init__(self, distpath, **kwargs): | ||
934 | """ | ||
935 | Initialise an instance. | ||
936 | |||
937 | :param distpath: A :class:`DistributionPath` instance to search. | ||
938 | """ | ||
939 | super(DistPathLocator, self).__init__(**kwargs) | ||
940 | assert isinstance(distpath, DistributionPath) | ||
941 | self.distpath = distpath | ||
942 | |||
943 | def _get_project(self, name): | ||
944 | dist = self.distpath.get_distribution(name) | ||
945 | if dist is None: | ||
946 | result = {'urls': {}, 'digests': {}} | ||
947 | else: | ||
948 | result = { | ||
949 | dist.version: dist, | ||
950 | 'urls': {dist.version: set([dist.source_url])}, | ||
951 | 'digests': {dist.version: set([None])} | ||
952 | } | ||
953 | return result | ||
954 | |||
955 | |||
956 | class AggregatingLocator(Locator): | ||
957 | """ | ||
958 | This class allows you to chain and/or merge a list of locators. | ||
959 | """ | ||
960 | def __init__(self, *locators, **kwargs): | ||
961 | """ | ||
962 | Initialise an instance. | ||
963 | |||
964 | :param locators: The list of locators to search. | ||
965 | :param kwargs: Passed to the superclass constructor, | ||
966 | except for: | ||
967 | * merge - if False (the default), the first successful | ||
968 | search from any of the locators is returned. If True, | ||
969 | the results from all locators are merged (this can be | ||
970 | slow). | ||
971 | """ | ||
972 | self.merge = kwargs.pop('merge', False) | ||
973 | self.locators = locators | ||
974 | super(AggregatingLocator, self).__init__(**kwargs) | ||
975 | |||
976 | def clear_cache(self): | ||
977 | super(AggregatingLocator, self).clear_cache() | ||
978 | for locator in self.locators: | ||
979 | locator.clear_cache() | ||
980 | |||
981 | def _set_scheme(self, value): | ||
982 | self._scheme = value | ||
983 | for locator in self.locators: | ||
984 | locator.scheme = value | ||
985 | |||
986 | scheme = property(Locator.scheme.fget, _set_scheme) | ||
987 | |||
988 | def _get_project(self, name): | ||
989 | result = {} | ||
990 | for locator in self.locators: | ||
991 | d = locator.get_project(name) | ||
992 | if d: | ||
993 | if self.merge: | ||
994 | files = result.get('urls', {}) | ||
995 | digests = result.get('digests', {}) | ||
996 | # next line could overwrite result['urls'], result['digests'] | ||
997 | result.update(d) | ||
998 | df = result.get('urls') | ||
999 | if files and df: | ||
1000 | for k, v in files.items(): | ||
1001 | if k in df: | ||
1002 | df[k] |= v | ||
1003 | else: | ||
1004 | df[k] = v | ||
1005 | dd = result.get('digests') | ||
1006 | if digests and dd: | ||
1007 | dd.update(digests) | ||
1008 | else: | ||
1009 | # See issue #18. If any dists are found and we're looking | ||
1010 | # for specific constraints, we only return something if | ||
1011 | # a match is found. For example, if a DirectoryLocator | ||
1012 | # returns just foo (1.0) while we're looking for | ||
1013 | # foo (>= 2.0), we'll pretend there was nothing there so | ||
1014 | # that subsequent locators can be queried. Otherwise we | ||
1015 | # would just return foo (1.0) which would then lead to a | ||
1016 | # failure to find foo (>= 2.0), because other locators | ||
1017 | # weren't searched. Note that this only matters when | ||
1018 | # merge=False. | ||
1019 | if self.matcher is None: | ||
1020 | found = True | ||
1021 | else: | ||
1022 | found = False | ||
1023 | for k in d: | ||
1024 | if self.matcher.match(k): | ||
1025 | found = True | ||
1026 | break | ||
1027 | if found: | ||
1028 | result = d | ||
1029 | break | ||
1030 | return result | ||
1031 | |||
1032 | def get_distribution_names(self): | ||
1033 | """ | ||
1034 | Return all the distribution names known to this locator. | ||
1035 | """ | ||
1036 | result = set() | ||
1037 | for locator in self.locators: | ||
1038 | try: | ||
1039 | result |= locator.get_distribution_names() | ||
1040 | except NotImplementedError: | ||
1041 | pass | ||
1042 | return result | ||
1043 | |||
1044 | |||
1045 | # We use a legacy scheme simply because most of the dists on PyPI use legacy | ||
1046 | # versions which don't conform to PEP 426 / PEP 440. | ||
1047 | default_locator = AggregatingLocator( | ||
1048 | JSONLocator(), | ||
1049 | SimpleScrapingLocator('https://pypi.python.org/simple/', | ||
1050 | timeout=3.0), | ||
1051 | scheme='legacy') | ||
1052 | |||
1053 | locate = default_locator.locate | ||
1054 | |||
1055 | NAME_VERSION_RE = re.compile(r'(?P<name>[\w-]+)\s*' | ||
1056 | r'\(\s*(==\s*)?(?P<ver>[^)]+)\)$') | ||
1057 | |||
1058 | class DependencyFinder(object): | ||
1059 | """ | ||
1060 | Locate dependencies for distributions. | ||
1061 | """ | ||
1062 | |||
1063 | def __init__(self, locator=None): | ||
1064 | """ | ||
1065 | Initialise an instance, using the specified locator | ||
1066 | to locate distributions. | ||
1067 | """ | ||
1068 | self.locator = locator or default_locator | ||
1069 | self.scheme = get_scheme(self.locator.scheme) | ||
1070 | |||
1071 | def add_distribution(self, dist): | ||
1072 | """ | ||
1073 | Add a distribution to the finder. This will update internal information | ||
1074 | about who provides what. | ||
1075 | :param dist: The distribution to add. | ||
1076 | """ | ||
1077 | logger.debug('adding distribution %s', dist) | ||
1078 | name = dist.key | ||
1079 | self.dists_by_name[name] = dist | ||
1080 | self.dists[(name, dist.version)] = dist | ||
1081 | for p in dist.provides: | ||
1082 | name, version = parse_name_and_version(p) | ||
1083 | logger.debug('Add to provided: %s, %s, %s', name, version, dist) | ||
1084 | self.provided.setdefault(name, set()).add((version, dist)) | ||
1085 | |||
1086 | def remove_distribution(self, dist): | ||
1087 | """ | ||
1088 | Remove a distribution from the finder. This will update internal | ||
1089 | information about who provides what. | ||
1090 | :param dist: The distribution to remove. | ||
1091 | """ | ||
1092 | logger.debug('removing distribution %s', dist) | ||
1093 | name = dist.key | ||
1094 | del self.dists_by_name[name] | ||
1095 | del self.dists[(name, dist.version)] | ||
1096 | for p in dist.provides: | ||
1097 | name, version = parse_name_and_version(p) | ||
1098 | logger.debug('Remove from provided: %s, %s, %s', name, version, dist) | ||
1099 | s = self.provided[name] | ||
1100 | s.remove((version, dist)) | ||
1101 | if not s: | ||
1102 | del self.provided[name] | ||
1103 | |||
1104 | def get_matcher(self, reqt): | ||
1105 | """ | ||
1106 | Get a version matcher for a requirement. | ||
1107 | :param reqt: The requirement | ||
1108 | :type reqt: str | ||
1109 | :return: A version matcher (an instance of | ||
1110 | :class:`distlib.version.Matcher`). | ||
1111 | """ | ||
1112 | try: | ||
1113 | matcher = self.scheme.matcher(reqt) | ||
1114 | except UnsupportedVersionError: # pragma: no cover | ||
1115 | # XXX compat-mode if cannot read the version | ||
1116 | name = reqt.split()[0] | ||
1117 | matcher = self.scheme.matcher(name) | ||
1118 | return matcher | ||
1119 | |||
1120 | def find_providers(self, reqt): | ||
1121 | """ | ||
1122 | Find the distributions which can fulfill a requirement. | ||
1123 | |||
1124 | :param reqt: The requirement. | ||
1125 | :type reqt: str | ||
1126 | :return: A set of distribution which can fulfill the requirement. | ||
1127 | """ | ||
1128 | matcher = self.get_matcher(reqt) | ||
1129 | name = matcher.key # case-insensitive | ||
1130 | result = set() | ||
1131 | provided = self.provided | ||
1132 | if name in provided: | ||
1133 | for version, provider in provided[name]: | ||
1134 | try: | ||
1135 | match = matcher.match(version) | ||
1136 | except UnsupportedVersionError: | ||
1137 | match = False | ||
1138 | |||
1139 | if match: | ||
1140 | result.add(provider) | ||
1141 | break | ||
1142 | return result | ||
1143 | |||
1144 | def try_to_replace(self, provider, other, problems): | ||
1145 | """ | ||
1146 | Attempt to replace one provider with another. This is typically used | ||
1147 | when resolving dependencies from multiple sources, e.g. A requires | ||
1148 | (B >= 1.0) while C requires (B >= 1.1). | ||
1149 | |||
1150 | For successful replacement, ``provider`` must meet all the requirements | ||
1151 | which ``other`` fulfills. | ||
1152 | |||
1153 | :param provider: The provider we are trying to replace with. | ||
1154 | :param other: The provider we're trying to replace. | ||
1155 | :param problems: If False is returned, this will contain what | ||
1156 | problems prevented replacement. This is currently | ||
1157 | a tuple of the literal string 'cantreplace', | ||
1158 | ``provider``, ``other`` and the set of requirements | ||
1159 | that ``provider`` couldn't fulfill. | ||
1160 | :return: True if we can replace ``other`` with ``provider``, else | ||
1161 | False. | ||
1162 | """ | ||
1163 | rlist = self.reqts[other] | ||
1164 | unmatched = set() | ||
1165 | for s in rlist: | ||
1166 | matcher = self.get_matcher(s) | ||
1167 | if not matcher.match(provider.version): | ||
1168 | unmatched.add(s) | ||
1169 | if unmatched: | ||
1170 | # can't replace other with provider | ||
1171 | problems.add(('cantreplace', provider, other, | ||
1172 | frozenset(unmatched))) | ||
1173 | result = False | ||
1174 | else: | ||
1175 | # can replace other with provider | ||
1176 | self.remove_distribution(other) | ||
1177 | del self.reqts[other] | ||
1178 | for s in rlist: | ||
1179 | self.reqts.setdefault(provider, set()).add(s) | ||
1180 | self.add_distribution(provider) | ||
1181 | result = True | ||
1182 | return result | ||
1183 | |||
1184 | def find(self, requirement, meta_extras=None, prereleases=False): | ||
1185 | """ | ||
1186 | Find a distribution and all distributions it depends on. | ||
1187 | |||
1188 | :param requirement: The requirement specifying the distribution to | ||
1189 | find, or a Distribution instance. | ||
1190 | :param meta_extras: A list of meta extras such as :test:, :build: and | ||
1191 | so on. | ||
1192 | :param prereleases: If ``True``, allow pre-release versions to be | ||
1193 | returned - otherwise, don't return prereleases | ||
1194 | unless they're all that's available. | ||
1195 | |||
1196 | Return a set of :class:`Distribution` instances and a set of | ||
1197 | problems. | ||
1198 | |||
1199 | The distributions returned should be such that they have the | ||
1200 | :attr:`required` attribute set to ``True`` if they were | ||
1201 | from the ``requirement`` passed to ``find()``, and they have the | ||
1202 | :attr:`build_time_dependency` attribute set to ``True`` unless they | ||
1203 | are post-installation dependencies of the ``requirement``. | ||
1204 | |||
1205 | The problems should be a tuple consisting of the string | ||
1206 | ``'unsatisfied'`` and the requirement which couldn't be satisfied | ||
1207 | by any distribution known to the locator. | ||
1208 | """ | ||
1209 | |||
1210 | self.provided = {} | ||
1211 | self.dists = {} | ||
1212 | self.dists_by_name = {} | ||
1213 | self.reqts = {} | ||
1214 | |||
1215 | meta_extras = set(meta_extras or []) | ||
1216 | if ':*:' in meta_extras: | ||
1217 | meta_extras.remove(':*:') | ||
1218 | # :meta: and :run: are implicitly included | ||
1219 | meta_extras |= set([':test:', ':build:', ':dev:']) | ||
1220 | |||
1221 | if isinstance(requirement, Distribution): | ||
1222 | dist = odist = requirement | ||
1223 | logger.debug('passed %s as requirement', odist) | ||
1224 | else: | ||
1225 | dist = odist = self.locator.locate(requirement, | ||
1226 | prereleases=prereleases) | ||
1227 | if dist is None: | ||
1228 | raise DistlibException('Unable to locate %r' % requirement) | ||
1229 | logger.debug('located %s', odist) | ||
1230 | dist.requested = True | ||
1231 | problems = set() | ||
1232 | todo = set([dist]) | ||
1233 | install_dists = set([odist]) | ||
1234 | while todo: | ||
1235 | dist = todo.pop() | ||
1236 | name = dist.key # case-insensitive | ||
1237 | if name not in self.dists_by_name: | ||
1238 | self.add_distribution(dist) | ||
1239 | else: | ||
1240 | #import pdb; pdb.set_trace() | ||
1241 | other = self.dists_by_name[name] | ||
1242 | if other != dist: | ||
1243 | self.try_to_replace(dist, other, problems) | ||
1244 | |||
1245 | ireqts = dist.run_requires | dist.meta_requires | ||
1246 | sreqts = dist.build_requires | ||
1247 | ereqts = set() | ||
1248 | if meta_extras and dist in install_dists: | ||
1249 | for key in ('test', 'build', 'dev'): | ||
1250 | e = ':%s:' % key | ||
1251 | if e in meta_extras: | ||
1252 | ereqts |= getattr(dist, '%s_requires' % key) | ||
1253 | all_reqts = ireqts | sreqts | ereqts | ||
1254 | for r in all_reqts: | ||
1255 | providers = self.find_providers(r) | ||
1256 | if not providers: | ||
1257 | logger.debug('No providers found for %r', r) | ||
1258 | provider = self.locator.locate(r, prereleases=prereleases) | ||
1259 | # If no provider is found and we didn't consider | ||
1260 | # prereleases, consider them now. | ||
1261 | if provider is None and not prereleases: | ||
1262 | provider = self.locator.locate(r, prereleases=True) | ||
1263 | if provider is None: | ||
1264 | logger.debug('Cannot satisfy %r', r) | ||
1265 | problems.add(('unsatisfied', r)) | ||
1266 | else: | ||
1267 | n, v = provider.key, provider.version | ||
1268 | if (n, v) not in self.dists: | ||
1269 | todo.add(provider) | ||
1270 | providers.add(provider) | ||
1271 | if r in ireqts and dist in install_dists: | ||
1272 | install_dists.add(provider) | ||
1273 | logger.debug('Adding %s to install_dists', | ||
1274 | provider.name_and_version) | ||
1275 | for p in providers: | ||
1276 | name = p.key | ||
1277 | if name not in self.dists_by_name: | ||
1278 | self.reqts.setdefault(p, set()).add(r) | ||
1279 | else: | ||
1280 | other = self.dists_by_name[name] | ||
1281 | if other != p: | ||
1282 | # see if other can be replaced by p | ||
1283 | self.try_to_replace(p, other, problems) | ||
1284 | |||
1285 | dists = set(self.dists.values()) | ||
1286 | for dist in dists: | ||
1287 | dist.build_time_dependency = dist not in install_dists | ||
1288 | if dist.build_time_dependency: | ||
1289 | logger.debug('%s is a build-time dependency only.', | ||
1290 | dist.name_and_version) | ||
1291 | logger.debug('find done for %s', odist) | ||
1292 | return dists, problems | ||