summaryrefslogtreecommitdiff
path: root/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/requests/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/requests/utils.py')
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/requests/utils.py904
1 files changed, 904 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/requests/utils.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/requests/utils.py
new file mode 100644
index 0000000..fc4f894
--- /dev/null
+++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/requests/utils.py
@@ -0,0 +1,904 @@
1# -*- coding: utf-8 -*-
2
3"""
4requests.utils
5~~~~~~~~~~~~~~
6
7This module provides utility functions that are used within Requests
8that are also useful for external consumption.
9"""
10
11import cgi
12import codecs
13import collections
14import contextlib
15import io
16import os
17import platform
18import re
19import socket
20import struct
21import warnings
22
23from .__version__ import __version__
24from . import certs
25# to_native_string is unused here, but imported here for backwards compatibility
26from ._internal_utils import to_native_string
27from .compat import parse_http_list as _parse_list_header
28from .compat import (
29 quote, urlparse, bytes, str, OrderedDict, unquote, getproxies,
30 proxy_bypass, urlunparse, basestring, integer_types, is_py3,
31 proxy_bypass_environment, getproxies_environment)
32from .cookies import cookiejar_from_dict
33from .structures import CaseInsensitiveDict
34from .exceptions import (
35 InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
36
37NETRC_FILES = ('.netrc', '_netrc')
38
39DEFAULT_CA_BUNDLE_PATH = certs.where()
40
41
42if platform.system() == 'Windows':
43 # provide a proxy_bypass version on Windows without DNS lookups
44
45 def proxy_bypass_registry(host):
46 if is_py3:
47 import winreg
48 else:
49 import _winreg as winreg
50 try:
51 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
52 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
53 proxyEnable = winreg.QueryValueEx(internetSettings,
54 'ProxyEnable')[0]
55 proxyOverride = winreg.QueryValueEx(internetSettings,
56 'ProxyOverride')[0]
57 except OSError:
58 return False
59 if not proxyEnable or not proxyOverride:
60 return False
61
62 # make a check value list from the registry entry: replace the
63 # '<local>' string by the localhost entry and the corresponding
64 # canonical entry.
65 proxyOverride = proxyOverride.split(';')
66 # now check if we match one of the registry values.
67 for test in proxyOverride:
68 if test == '<local>':
69 if '.' not in host:
70 return True
71 test = test.replace(".", r"\.") # mask dots
72 test = test.replace("*", r".*") # change glob sequence
73 test = test.replace("?", r".") # change glob char
74 if re.match(test, host, re.I):
75 return True
76 return False
77
78 def proxy_bypass(host): # noqa
79 """Return True, if the host should be bypassed.
80
81 Checks proxy settings gathered from the environment, if specified,
82 or the registry.
83 """
84 if getproxies_environment():
85 return proxy_bypass_environment(host)
86 else:
87 return proxy_bypass_registry(host)
88
89
90def dict_to_sequence(d):
91 """Returns an internal sequence dictionary update."""
92
93 if hasattr(d, 'items'):
94 d = d.items()
95
96 return d
97
98
99def super_len(o):
100 total_length = None
101 current_position = 0
102
103 if hasattr(o, '__len__'):
104 total_length = len(o)
105
106 elif hasattr(o, 'len'):
107 total_length = o.len
108
109 elif hasattr(o, 'fileno'):
110 try:
111 fileno = o.fileno()
112 except io.UnsupportedOperation:
113 pass
114 else:
115 total_length = os.fstat(fileno).st_size
116
117 # Having used fstat to determine the file length, we need to
118 # confirm that this file was opened up in binary mode.
119 if 'b' not in o.mode:
120 warnings.warn((
121 "Requests has determined the content-length for this "
122 "request using the binary size of the file: however, the "
123 "file has been opened in text mode (i.e. without the 'b' "
124 "flag in the mode). This may lead to an incorrect "
125 "content-length. In Requests 3.0, support will be removed "
126 "for files in text mode."),
127 FileModeWarning
128 )
129
130 if hasattr(o, 'tell'):
131 try:
132 current_position = o.tell()
133 except (OSError, IOError):
134 # This can happen in some weird situations, such as when the file
135 # is actually a special file descriptor like stdin. In this
136 # instance, we don't know what the length is, so set it to zero and
137 # let requests chunk it instead.
138 if total_length is not None:
139 current_position = total_length
140 else:
141 if hasattr(o, 'seek') and total_length is None:
142 # StringIO and BytesIO have seek but no useable fileno
143 try:
144 # seek to end of file
145 o.seek(0, 2)
146 total_length = o.tell()
147
148 # seek back to current position to support
149 # partially read file-like objects
150 o.seek(current_position or 0)
151 except (OSError, IOError):
152 total_length = 0
153
154 if total_length is None:
155 total_length = 0
156
157 return max(0, total_length - current_position)
158
159
160def get_netrc_auth(url, raise_errors=False):
161 """Returns the Requests tuple auth for a given url from netrc."""
162
163 try:
164 from netrc import netrc, NetrcParseError
165
166 netrc_path = None
167
168 for f in NETRC_FILES:
169 try:
170 loc = os.path.expanduser('~/{0}'.format(f))
171 except KeyError:
172 # os.path.expanduser can fail when $HOME is undefined and
173 # getpwuid fails. See http://bugs.python.org/issue20164 &
174 # https://github.com/requests/requests/issues/1846
175 return
176
177 if os.path.exists(loc):
178 netrc_path = loc
179 break
180
181 # Abort early if there isn't one.
182 if netrc_path is None:
183 return
184
185 ri = urlparse(url)
186
187 # Strip port numbers from netloc. This weird `if...encode`` dance is
188 # used for Python 3.2, which doesn't support unicode literals.
189 splitstr = b':'
190 if isinstance(url, str):
191 splitstr = splitstr.decode('ascii')
192 host = ri.netloc.split(splitstr)[0]
193
194 try:
195 _netrc = netrc(netrc_path).authenticators(host)
196 if _netrc:
197 # Return with login / password
198 login_i = (0 if _netrc[0] else 1)
199 return (_netrc[login_i], _netrc[2])
200 except (NetrcParseError, IOError):
201 # If there was a parsing error or a permissions issue reading the file,
202 # we'll just skip netrc auth unless explicitly asked to raise errors.
203 if raise_errors:
204 raise
205
206 # AppEngine hackiness.
207 except (ImportError, AttributeError):
208 pass
209
210
211def guess_filename(obj):
212 """Tries to guess the filename of the given object."""
213 name = getattr(obj, 'name', None)
214 if (name and isinstance(name, basestring) and name[0] != '<' and
215 name[-1] != '>'):
216 return os.path.basename(name)
217
218
219def from_key_val_list(value):
220 """Take an object and test to see if it can be represented as a
221 dictionary. Unless it can not be represented as such, return an
222 OrderedDict, e.g.,
223
224 ::
225
226 >>> from_key_val_list([('key', 'val')])
227 OrderedDict([('key', 'val')])
228 >>> from_key_val_list('string')
229 ValueError: need more than 1 value to unpack
230 >>> from_key_val_list({'key': 'val'})
231 OrderedDict([('key', 'val')])
232
233 :rtype: OrderedDict
234 """
235 if value is None:
236 return None
237
238 if isinstance(value, (str, bytes, bool, int)):
239 raise ValueError('cannot encode objects that are not 2-tuples')
240
241 return OrderedDict(value)
242
243
244def to_key_val_list(value):
245 """Take an object and test to see if it can be represented as a
246 dictionary. If it can be, return a list of tuples, e.g.,
247
248 ::
249
250 >>> to_key_val_list([('key', 'val')])
251 [('key', 'val')]
252 >>> to_key_val_list({'key': 'val'})
253 [('key', 'val')]
254 >>> to_key_val_list('string')
255 ValueError: cannot encode objects that are not 2-tuples.
256
257 :rtype: list
258 """
259 if value is None:
260 return None
261
262 if isinstance(value, (str, bytes, bool, int)):
263 raise ValueError('cannot encode objects that are not 2-tuples')
264
265 if isinstance(value, collections.Mapping):
266 value = value.items()
267
268 return list(value)
269
270
271# From mitsuhiko/werkzeug (used with permission).
272def parse_list_header(value):
273 """Parse lists as described by RFC 2068 Section 2.
274
275 In particular, parse comma-separated lists where the elements of
276 the list may include quoted-strings. A quoted-string could
277 contain a comma. A non-quoted string could have quotes in the
278 middle. Quotes are removed automatically after parsing.
279
280 It basically works like :func:`parse_set_header` just that items
281 may appear multiple times and case sensitivity is preserved.
282
283 The return value is a standard :class:`list`:
284
285 >>> parse_list_header('token, "quoted value"')
286 ['token', 'quoted value']
287
288 To create a header from the :class:`list` again, use the
289 :func:`dump_header` function.
290
291 :param value: a string with a list header.
292 :return: :class:`list`
293 :rtype: list
294 """
295 result = []
296 for item in _parse_list_header(value):
297 if item[:1] == item[-1:] == '"':
298 item = unquote_header_value(item[1:-1])
299 result.append(item)
300 return result
301
302
303# From mitsuhiko/werkzeug (used with permission).
304def parse_dict_header(value):
305 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
306 convert them into a python dict:
307
308 >>> d = parse_dict_header('foo="is a fish", bar="as well"')
309 >>> type(d) is dict
310 True
311 >>> sorted(d.items())
312 [('bar', 'as well'), ('foo', 'is a fish')]
313
314 If there is no value for a key it will be `None`:
315
316 >>> parse_dict_header('key_without_value')
317 {'key_without_value': None}
318
319 To create a header from the :class:`dict` again, use the
320 :func:`dump_header` function.
321
322 :param value: a string with a dict header.
323 :return: :class:`dict`
324 :rtype: dict
325 """
326 result = {}
327 for item in _parse_list_header(value):
328 if '=' not in item:
329 result[item] = None
330 continue
331 name, value = item.split('=', 1)
332 if value[:1] == value[-1:] == '"':
333 value = unquote_header_value(value[1:-1])
334 result[name] = value
335 return result
336
337
338# From mitsuhiko/werkzeug (used with permission).
339def unquote_header_value(value, is_filename=False):
340 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`).
341 This does not use the real unquoting but what browsers are actually
342 using for quoting.
343
344 :param value: the header value to unquote.
345 :rtype: str
346 """
347 if value and value[0] == value[-1] == '"':
348 # this is not the real unquoting, but fixing this so that the
349 # RFC is met will result in bugs with internet explorer and
350 # probably some other browsers as well. IE for example is
351 # uploading files with "C:\foo\bar.txt" as filename
352 value = value[1:-1]
353
354 # if this is a filename and the starting characters look like
355 # a UNC path, then just return the value without quotes. Using the
356 # replace sequence below on a UNC path has the effect of turning
357 # the leading double slash into a single slash and then
358 # _fix_ie_filename() doesn't work correctly. See #458.
359 if not is_filename or value[:2] != '\\\\':
360 return value.replace('\\\\', '\\').replace('\\"', '"')
361 return value
362
363
364def dict_from_cookiejar(cj):
365 """Returns a key/value dictionary from a CookieJar.
366
367 :param cj: CookieJar object to extract cookies from.
368 :rtype: dict
369 """
370
371 cookie_dict = {}
372
373 for cookie in cj:
374 cookie_dict[cookie.name] = cookie.value
375
376 return cookie_dict
377
378
379def add_dict_to_cookiejar(cj, cookie_dict):
380 """Returns a CookieJar from a key/value dictionary.
381
382 :param cj: CookieJar to insert cookies into.
383 :param cookie_dict: Dict of key/values to insert into CookieJar.
384 :rtype: CookieJar
385 """
386
387 return cookiejar_from_dict(cookie_dict, cj)
388
389
390def get_encodings_from_content(content):
391 """Returns encodings from given content string.
392
393 :param content: bytestring to extract encodings from.
394 """
395 warnings.warn((
396 'In requests 3.0, get_encodings_from_content will be removed. For '
397 'more information, please see the discussion on issue #2266. (This'
398 ' warning should only appear once.)'),
399 DeprecationWarning)
400
401 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
402 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
403 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
404
405 return (charset_re.findall(content) +
406 pragma_re.findall(content) +
407 xml_re.findall(content))
408
409
410def get_encoding_from_headers(headers):
411 """Returns encodings from given HTTP Header Dict.
412
413 :param headers: dictionary to extract encoding from.
414 :rtype: str
415 """
416
417 content_type = headers.get('content-type')
418
419 if not content_type:
420 return None
421
422 content_type, params = cgi.parse_header(content_type)
423
424 if 'charset' in params:
425 return params['charset'].strip("'\"")
426
427 if 'text' in content_type:
428 return 'ISO-8859-1'
429
430
431def stream_decode_response_unicode(iterator, r):
432 """Stream decodes a iterator."""
433
434 if r.encoding is None:
435 for item in iterator:
436 yield item
437 return
438
439 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
440 for chunk in iterator:
441 rv = decoder.decode(chunk)
442 if rv:
443 yield rv
444 rv = decoder.decode(b'', final=True)
445 if rv:
446 yield rv
447
448
449def iter_slices(string, slice_length):
450 """Iterate over slices of a string."""
451 pos = 0
452 if slice_length is None or slice_length <= 0:
453 slice_length = len(string)
454 while pos < len(string):
455 yield string[pos:pos + slice_length]
456 pos += slice_length
457
458
459def get_unicode_from_response(r):
460 """Returns the requested content back in unicode.
461
462 :param r: Response object to get unicode content from.
463
464 Tried:
465
466 1. charset from content-type
467 2. fall back and replace all unicode characters
468
469 :rtype: str
470 """
471 warnings.warn((
472 'In requests 3.0, get_unicode_from_response will be removed. For '
473 'more information, please see the discussion on issue #2266. (This'
474 ' warning should only appear once.)'),
475 DeprecationWarning)
476
477 tried_encodings = []
478
479 # Try charset from content-type
480 encoding = get_encoding_from_headers(r.headers)
481
482 if encoding:
483 try:
484 return str(r.content, encoding)
485 except UnicodeError:
486 tried_encodings.append(encoding)
487
488 # Fall back:
489 try:
490 return str(r.content, encoding, errors='replace')
491 except TypeError:
492 return r.content
493
494
495# The unreserved URI characters (RFC 3986)
496UNRESERVED_SET = frozenset(
497 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
498
499
500def unquote_unreserved(uri):
501 """Un-escape any percent-escape sequences in a URI that are unreserved
502 characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
503
504 :rtype: str
505 """
506 parts = uri.split('%')
507 for i in range(1, len(parts)):
508 h = parts[i][0:2]
509 if len(h) == 2 and h.isalnum():
510 try:
511 c = chr(int(h, 16))
512 except ValueError:
513 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
514
515 if c in UNRESERVED_SET:
516 parts[i] = c + parts[i][2:]
517 else:
518 parts[i] = '%' + parts[i]
519 else:
520 parts[i] = '%' + parts[i]
521 return ''.join(parts)
522
523
524def requote_uri(uri):
525 """Re-quote the given URI.
526
527 This function passes the given URI through an unquote/quote cycle to
528 ensure that it is fully and consistently quoted.
529
530 :rtype: str
531 """
532 safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
533 safe_without_percent = "!#$&'()*+,/:;=?@[]~"
534 try:
535 # Unquote only the unreserved characters
536 # Then quote only illegal characters (do not quote reserved,
537 # unreserved, or '%')
538 return quote(unquote_unreserved(uri), safe=safe_with_percent)
539 except InvalidURL:
540 # We couldn't unquote the given URI, so let's try quoting it, but
541 # there may be unquoted '%'s in the URI. We need to make sure they're
542 # properly quoted so they do not cause issues elsewhere.
543 return quote(uri, safe=safe_without_percent)
544
545
546def address_in_network(ip, net):
547 """This function allows you to check if an IP belongs to a network subnet
548
549 Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
550 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
551
552 :rtype: bool
553 """
554 ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
555 netaddr, bits = net.split('/')
556 netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
557 network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
558 return (ipaddr & netmask) == (network & netmask)
559
560
561def dotted_netmask(mask):
562 """Converts mask from /xx format to xxx.xxx.xxx.xxx
563
564 Example: if mask is 24 function returns 255.255.255.0
565
566 :rtype: str
567 """
568 bits = 0xffffffff ^ (1 << 32 - mask) - 1
569 return socket.inet_ntoa(struct.pack('>I', bits))
570
571
572def is_ipv4_address(string_ip):
573 """
574 :rtype: bool
575 """
576 try:
577 socket.inet_aton(string_ip)
578 except socket.error:
579 return False
580 return True
581
582
583def is_valid_cidr(string_network):
584 """
585 Very simple check of the cidr format in no_proxy variable.
586
587 :rtype: bool
588 """
589 if string_network.count('/') == 1:
590 try:
591 mask = int(string_network.split('/')[1])
592 except ValueError:
593 return False
594
595 if mask < 1 or mask > 32:
596 return False
597
598 try:
599 socket.inet_aton(string_network.split('/')[0])
600 except socket.error:
601 return False
602 else:
603 return False
604 return True
605
606
607@contextlib.contextmanager
608def set_environ(env_name, value):
609 """Set the environment variable 'env_name' to 'value'
610
611 Save previous value, yield, and then restore the previous value stored in
612 the environment variable 'env_name'.
613
614 If 'value' is None, do nothing"""
615 value_changed = value is not None
616 if value_changed:
617 old_value = os.environ.get(env_name)
618 os.environ[env_name] = value
619 try:
620 yield
621 finally:
622 if value_changed:
623 if old_value is None:
624 del os.environ[env_name]
625 else:
626 os.environ[env_name] = old_value
627
628
629def should_bypass_proxies(url, no_proxy):
630 """
631 Returns whether we should bypass proxies or not.
632
633 :rtype: bool
634 """
635 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
636
637 # First check whether no_proxy is defined. If it is, check that the URL
638 # we're getting isn't in the no_proxy list.
639 no_proxy_arg = no_proxy
640 if no_proxy is None:
641 no_proxy = get_proxy('no_proxy')
642 netloc = urlparse(url).netloc
643
644 if no_proxy:
645 # We need to check whether we match here. We need to see if we match
646 # the end of the netloc, both with and without the port.
647 no_proxy = (
648 host for host in no_proxy.replace(' ', '').split(',') if host
649 )
650
651 ip = netloc.split(':')[0]
652 if is_ipv4_address(ip):
653 for proxy_ip in no_proxy:
654 if is_valid_cidr(proxy_ip):
655 if address_in_network(ip, proxy_ip):
656 return True
657 elif ip == proxy_ip:
658 # If no_proxy ip was defined in plain IP notation instead of cidr notation &
659 # matches the IP of the index
660 return True
661 else:
662 for host in no_proxy:
663 if netloc.endswith(host) or netloc.split(':')[0].endswith(host):
664 # The URL does match something in no_proxy, so we don't want
665 # to apply the proxies on this URL.
666 return True
667
668 # If the system proxy settings indicate that this URL should be bypassed,
669 # don't proxy.
670 # The proxy_bypass function is incredibly buggy on OS X in early versions
671 # of Python 2.6, so allow this call to fail. Only catch the specific
672 # exceptions we've seen, though: this call failing in other ways can reveal
673 # legitimate problems.
674 with set_environ('no_proxy', no_proxy_arg):
675 try:
676 bypass = proxy_bypass(netloc)
677 except (TypeError, socket.gaierror):
678 bypass = False
679
680 if bypass:
681 return True
682
683 return False
684
685
686def get_environ_proxies(url, no_proxy=None):
687 """
688 Return a dict of environment proxies.
689
690 :rtype: dict
691 """
692 if should_bypass_proxies(url, no_proxy=no_proxy):
693 return {}
694 else:
695 return getproxies()
696
697
698def select_proxy(url, proxies):
699 """Select a proxy for the url, if applicable.
700
701 :param url: The url being for the request
702 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
703 """
704 proxies = proxies or {}
705 urlparts = urlparse(url)
706 if urlparts.hostname is None:
707 return proxies.get(urlparts.scheme, proxies.get('all'))
708
709 proxy_keys = [
710 urlparts.scheme + '://' + urlparts.hostname,
711 urlparts.scheme,
712 'all://' + urlparts.hostname,
713 'all',
714 ]
715 proxy = None
716 for proxy_key in proxy_keys:
717 if proxy_key in proxies:
718 proxy = proxies[proxy_key]
719 break
720
721 return proxy
722
723
724def default_user_agent(name="python-requests"):
725 """
726 Return a string representing the default user agent.
727
728 :rtype: str
729 """
730 return '%s/%s' % (name, __version__)
731
732
733def default_headers():
734 """
735 :rtype: requests.structures.CaseInsensitiveDict
736 """
737 return CaseInsensitiveDict({
738 'User-Agent': default_user_agent(),
739 'Accept-Encoding': ', '.join(('gzip', 'deflate')),
740 'Accept': '*/*',
741 'Connection': 'keep-alive',
742 })
743
744
745def parse_header_links(value):
746 """Return a dict of parsed link headers proxies.
747
748 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
749
750 :rtype: list
751 """
752
753 links = []
754
755 replace_chars = ' \'"'
756
757 for val in re.split(', *<', value):
758 try:
759 url, params = val.split(';', 1)
760 except ValueError:
761 url, params = val, ''
762
763 link = {'url': url.strip('<> \'"')}
764
765 for param in params.split(';'):
766 try:
767 key, value = param.split('=')
768 except ValueError:
769 break
770
771 link[key.strip(replace_chars)] = value.strip(replace_chars)
772
773 links.append(link)
774
775 return links
776
777
778# Null bytes; no need to recreate these on each call to guess_json_utf
779_null = '\x00'.encode('ascii') # encoding to ASCII for Python 3
780_null2 = _null * 2
781_null3 = _null * 3
782
783
784def guess_json_utf(data):
785 """
786 :rtype: str
787 """
788 # JSON always starts with two ASCII characters, so detection is as
789 # easy as counting the nulls and from their location and count
790 # determine the encoding. Also detect a BOM, if present.
791 sample = data[:4]
792 if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
793 return 'utf-32' # BOM included
794 if sample[:3] == codecs.BOM_UTF8:
795 return 'utf-8-sig' # BOM included, MS style (discouraged)
796 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
797 return 'utf-16' # BOM included
798 nullcount = sample.count(_null)
799 if nullcount == 0:
800 return 'utf-8'
801 if nullcount == 2:
802 if sample[::2] == _null2: # 1st and 3rd are null
803 return 'utf-16-be'
804 if sample[1::2] == _null2: # 2nd and 4th are null
805 return 'utf-16-le'
806 # Did not detect 2 valid UTF-16 ascii-range characters
807 if nullcount == 3:
808 if sample[:3] == _null3:
809 return 'utf-32-be'
810 if sample[1:] == _null3:
811 return 'utf-32-le'
812 # Did not detect a valid UTF-32 ascii-range character
813 return None
814
815
816def prepend_scheme_if_needed(url, new_scheme):
817 """Given a URL that may or may not have a scheme, prepend the given scheme.
818 Does not replace a present scheme with the one provided as an argument.
819
820 :rtype: str
821 """
822 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
823
824 # urlparse is a finicky beast, and sometimes decides that there isn't a
825 # netloc present. Assume that it's being over-cautious, and switch netloc
826 # and path if urlparse decided there was no netloc.
827 if not netloc:
828 netloc, path = path, netloc
829
830 return urlunparse((scheme, netloc, path, params, query, fragment))
831
832
833def get_auth_from_url(url):
834 """Given a url with authentication components, extract them into a tuple of
835 username,password.
836
837 :rtype: (str,str)
838 """
839 parsed = urlparse(url)
840
841 try:
842 auth = (unquote(parsed.username), unquote(parsed.password))
843 except (AttributeError, TypeError):
844 auth = ('', '')
845
846 return auth
847
848
849# Moved outside of function to avoid recompile every call
850_CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
851_CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
852
853
854def check_header_validity(header):
855 """Verifies that header value is a string which doesn't contain
856 leading whitespace or return characters. This prevents unintended
857 header injection.
858
859 :param header: tuple, in the format (name, value).
860 """
861 name, value = header
862
863 if isinstance(value, bytes):
864 pat = _CLEAN_HEADER_REGEX_BYTE
865 else:
866 pat = _CLEAN_HEADER_REGEX_STR
867 try:
868 if not pat.match(value):
869 raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
870 except TypeError:
871 raise InvalidHeader("Value for header {%s: %s} must be of type str or "
872 "bytes, not %s" % (name, value, type(value)))
873
874
875def urldefragauth(url):
876 """
877 Given a url remove the fragment and the authentication part.
878
879 :rtype: str
880 """
881 scheme, netloc, path, params, query, fragment = urlparse(url)
882
883 # see func:`prepend_scheme_if_needed`
884 if not netloc:
885 netloc, path = path, netloc
886
887 netloc = netloc.rsplit('@', 1)[-1]
888
889 return urlunparse((scheme, netloc, path, params, query, ''))
890
891
892def rewind_body(prepared_request):
893 """Move file pointer back to its recorded starting position
894 so it can be read again on redirect.
895 """
896 body_seek = getattr(prepared_request.body, 'seek', None)
897 if body_seek is not None and isinstance(prepared_request._body_position, integer_types):
898 try:
899 body_seek(prepared_request._body_position)
900 except (IOError, OSError):
901 raise UnrewindableBodyError("An error occurred when rewinding request "
902 "body for redirect.")
903 else:
904 raise UnrewindableBodyError("Unable to rewind request body for redirect.")