diff options
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/webencodings/__init__.py')
-rw-r--r-- | venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/webencodings/__init__.py | 342 |
1 files changed, 342 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/webencodings/__init__.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/webencodings/__init__.py new file mode 100644 index 0000000..16671ef --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/webencodings/__init__.py | |||
@@ -0,0 +1,342 @@ | |||
1 | # coding: utf-8 | ||
2 | """ | ||
3 | |||
4 | webencodings | ||
5 | ~~~~~~~~~~~~ | ||
6 | |||
7 | This is a Python implementation of the `WHATWG Encoding standard | ||
8 | <http://encoding.spec.whatwg.org/>`. See README for details. | ||
9 | |||
10 | :copyright: Copyright 2012 by Simon Sapin | ||
11 | :license: BSD, see LICENSE for details. | ||
12 | |||
13 | """ | ||
14 | |||
15 | from __future__ import unicode_literals | ||
16 | |||
17 | import codecs | ||
18 | |||
19 | from .labels import LABELS | ||
20 | |||
21 | |||
22 | VERSION = '0.5.1' | ||
23 | |||
24 | |||
25 | # Some names in Encoding are not valid Python aliases. Remap these. | ||
26 | PYTHON_NAMES = { | ||
27 | 'iso-8859-8-i': 'iso-8859-8', | ||
28 | 'x-mac-cyrillic': 'mac-cyrillic', | ||
29 | 'macintosh': 'mac-roman', | ||
30 | 'windows-874': 'cp874'} | ||
31 | |||
32 | CACHE = {} | ||
33 | |||
34 | |||
35 | def ascii_lower(string): | ||
36 | r"""Transform (only) ASCII letters to lower case: A-Z is mapped to a-z. | ||
37 | |||
38 | :param string: An Unicode string. | ||
39 | :returns: A new Unicode string. | ||
40 | |||
41 | This is used for `ASCII case-insensitive | ||
42 | <http://encoding.spec.whatwg.org/#ascii-case-insensitive>`_ | ||
43 | matching of encoding labels. | ||
44 | The same matching is also used, among other things, | ||
45 | for `CSS keywords <http://dev.w3.org/csswg/css-values/#keywords>`_. | ||
46 | |||
47 | This is different from the :meth:`~py:str.lower` method of Unicode strings | ||
48 | which also affect non-ASCII characters, | ||
49 | sometimes mapping them into the ASCII range: | ||
50 | |||
51 | >>> keyword = u'Bac\N{KELVIN SIGN}ground' | ||
52 | >>> assert keyword.lower() == u'background' | ||
53 | >>> assert ascii_lower(keyword) != keyword.lower() | ||
54 | >>> assert ascii_lower(keyword) == u'bac\N{KELVIN SIGN}ground' | ||
55 | |||
56 | """ | ||
57 | # This turns out to be faster than unicode.translate() | ||
58 | return string.encode('utf8').lower().decode('utf8') | ||
59 | |||
60 | |||
61 | def lookup(label): | ||
62 | """ | ||
63 | Look for an encoding by its label. | ||
64 | This is the spec’s `get an encoding | ||
65 | <http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm. | ||
66 | Supported labels are listed there. | ||
67 | |||
68 | :param label: A string. | ||
69 | :returns: | ||
70 | An :class:`Encoding` object, or :obj:`None` for an unknown label. | ||
71 | |||
72 | """ | ||
73 | # Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020. | ||
74 | label = ascii_lower(label.strip('\t\n\f\r ')) | ||
75 | name = LABELS.get(label) | ||
76 | if name is None: | ||
77 | return None | ||
78 | encoding = CACHE.get(name) | ||
79 | if encoding is None: | ||
80 | if name == 'x-user-defined': | ||
81 | from .x_user_defined import codec_info | ||
82 | else: | ||
83 | python_name = PYTHON_NAMES.get(name, name) | ||
84 | # Any python_name value that gets to here should be valid. | ||
85 | codec_info = codecs.lookup(python_name) | ||
86 | encoding = Encoding(name, codec_info) | ||
87 | CACHE[name] = encoding | ||
88 | return encoding | ||
89 | |||
90 | |||
91 | def _get_encoding(encoding_or_label): | ||
92 | """ | ||
93 | Accept either an encoding object or label. | ||
94 | |||
95 | :param encoding: An :class:`Encoding` object or a label string. | ||
96 | :returns: An :class:`Encoding` object. | ||
97 | :raises: :exc:`~exceptions.LookupError` for an unknown label. | ||
98 | |||
99 | """ | ||
100 | if hasattr(encoding_or_label, 'codec_info'): | ||
101 | return encoding_or_label | ||
102 | |||
103 | encoding = lookup(encoding_or_label) | ||
104 | if encoding is None: | ||
105 | raise LookupError('Unknown encoding label: %r' % encoding_or_label) | ||
106 | return encoding | ||
107 | |||
108 | |||
109 | class Encoding(object): | ||
110 | """Reresents a character encoding such as UTF-8, | ||
111 | that can be used for decoding or encoding. | ||
112 | |||
113 | .. attribute:: name | ||
114 | |||
115 | Canonical name of the encoding | ||
116 | |||
117 | .. attribute:: codec_info | ||
118 | |||
119 | The actual implementation of the encoding, | ||
120 | a stdlib :class:`~codecs.CodecInfo` object. | ||
121 | See :func:`codecs.register`. | ||
122 | |||
123 | """ | ||
124 | def __init__(self, name, codec_info): | ||
125 | self.name = name | ||
126 | self.codec_info = codec_info | ||
127 | |||
128 | def __repr__(self): | ||
129 | return '<Encoding %s>' % self.name | ||
130 | |||
131 | |||
132 | #: The UTF-8 encoding. Should be used for new content and formats. | ||
133 | UTF8 = lookup('utf-8') | ||
134 | |||
135 | _UTF16LE = lookup('utf-16le') | ||
136 | _UTF16BE = lookup('utf-16be') | ||
137 | |||
138 | |||
139 | def decode(input, fallback_encoding, errors='replace'): | ||
140 | """ | ||
141 | Decode a single string. | ||
142 | |||
143 | :param input: A byte string | ||
144 | :param fallback_encoding: | ||
145 | An :class:`Encoding` object or a label string. | ||
146 | The encoding to use if :obj:`input` does note have a BOM. | ||
147 | :param errors: Type of error handling. See :func:`codecs.register`. | ||
148 | :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. | ||
149 | :return: | ||
150 | A ``(output, encoding)`` tuple of an Unicode string | ||
151 | and an :obj:`Encoding`. | ||
152 | |||
153 | """ | ||
154 | # Fail early if `encoding` is an invalid label. | ||
155 | fallback_encoding = _get_encoding(fallback_encoding) | ||
156 | bom_encoding, input = _detect_bom(input) | ||
157 | encoding = bom_encoding or fallback_encoding | ||
158 | return encoding.codec_info.decode(input, errors)[0], encoding | ||
159 | |||
160 | |||
161 | def _detect_bom(input): | ||
162 | """Return (bom_encoding, input), with any BOM removed from the input.""" | ||
163 | if input.startswith(b'\xFF\xFE'): | ||
164 | return _UTF16LE, input[2:] | ||
165 | if input.startswith(b'\xFE\xFF'): | ||
166 | return _UTF16BE, input[2:] | ||
167 | if input.startswith(b'\xEF\xBB\xBF'): | ||
168 | return UTF8, input[3:] | ||
169 | return None, input | ||
170 | |||
171 | |||
172 | def encode(input, encoding=UTF8, errors='strict'): | ||
173 | """ | ||
174 | Encode a single string. | ||
175 | |||
176 | :param input: An Unicode string. | ||
177 | :param encoding: An :class:`Encoding` object or a label string. | ||
178 | :param errors: Type of error handling. See :func:`codecs.register`. | ||
179 | :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. | ||
180 | :return: A byte string. | ||
181 | |||
182 | """ | ||
183 | return _get_encoding(encoding).codec_info.encode(input, errors)[0] | ||
184 | |||
185 | |||
186 | def iter_decode(input, fallback_encoding, errors='replace'): | ||
187 | """ | ||
188 | "Pull"-based decoder. | ||
189 | |||
190 | :param input: | ||
191 | An iterable of byte strings. | ||
192 | |||
193 | The input is first consumed just enough to determine the encoding | ||
194 | based on the precense of a BOM, | ||
195 | then consumed on demand when the return value is. | ||
196 | :param fallback_encoding: | ||
197 | An :class:`Encoding` object or a label string. | ||
198 | The encoding to use if :obj:`input` does note have a BOM. | ||
199 | :param errors: Type of error handling. See :func:`codecs.register`. | ||
200 | :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. | ||
201 | :returns: | ||
202 | An ``(output, encoding)`` tuple. | ||
203 | :obj:`output` is an iterable of Unicode strings, | ||
204 | :obj:`encoding` is the :obj:`Encoding` that is being used. | ||
205 | |||
206 | """ | ||
207 | |||
208 | decoder = IncrementalDecoder(fallback_encoding, errors) | ||
209 | generator = _iter_decode_generator(input, decoder) | ||
210 | encoding = next(generator) | ||
211 | return generator, encoding | ||
212 | |||
213 | |||
214 | def _iter_decode_generator(input, decoder): | ||
215 | """Return a generator that first yields the :obj:`Encoding`, | ||
216 | then yields output chukns as Unicode strings. | ||
217 | |||
218 | """ | ||
219 | decode = decoder.decode | ||
220 | input = iter(input) | ||
221 | for chunck in input: | ||
222 | output = decode(chunck) | ||
223 | if output: | ||
224 | assert decoder.encoding is not None | ||
225 | yield decoder.encoding | ||
226 | yield output | ||
227 | break | ||
228 | else: | ||
229 | # Input exhausted without determining the encoding | ||
230 | output = decode(b'', final=True) | ||
231 | assert decoder.encoding is not None | ||
232 | yield decoder.encoding | ||
233 | if output: | ||
234 | yield output | ||
235 | return | ||
236 | |||
237 | for chunck in input: | ||
238 | output = decode(chunck) | ||
239 | if output: | ||
240 | yield output | ||
241 | output = decode(b'', final=True) | ||
242 | if output: | ||
243 | yield output | ||
244 | |||
245 | |||
246 | def iter_encode(input, encoding=UTF8, errors='strict'): | ||
247 | """ | ||
248 | “Pull”-based encoder. | ||
249 | |||
250 | :param input: An iterable of Unicode strings. | ||
251 | :param encoding: An :class:`Encoding` object or a label string. | ||
252 | :param errors: Type of error handling. See :func:`codecs.register`. | ||
253 | :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. | ||
254 | :returns: An iterable of byte strings. | ||
255 | |||
256 | """ | ||
257 | # Fail early if `encoding` is an invalid label. | ||
258 | encode = IncrementalEncoder(encoding, errors).encode | ||
259 | return _iter_encode_generator(input, encode) | ||
260 | |||
261 | |||
262 | def _iter_encode_generator(input, encode): | ||
263 | for chunck in input: | ||
264 | output = encode(chunck) | ||
265 | if output: | ||
266 | yield output | ||
267 | output = encode('', final=True) | ||
268 | if output: | ||
269 | yield output | ||
270 | |||
271 | |||
272 | class IncrementalDecoder(object): | ||
273 | """ | ||
274 | “Push”-based decoder. | ||
275 | |||
276 | :param fallback_encoding: | ||
277 | An :class:`Encoding` object or a label string. | ||
278 | The encoding to use if :obj:`input` does note have a BOM. | ||
279 | :param errors: Type of error handling. See :func:`codecs.register`. | ||
280 | :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. | ||
281 | |||
282 | """ | ||
283 | def __init__(self, fallback_encoding, errors='replace'): | ||
284 | # Fail early if `encoding` is an invalid label. | ||
285 | self._fallback_encoding = _get_encoding(fallback_encoding) | ||
286 | self._errors = errors | ||
287 | self._buffer = b'' | ||
288 | self._decoder = None | ||
289 | #: The actual :class:`Encoding` that is being used, | ||
290 | #: or :obj:`None` if that is not determined yet. | ||
291 | #: (Ie. if there is not enough input yet to determine | ||
292 | #: if there is a BOM.) | ||
293 | self.encoding = None # Not known yet. | ||
294 | |||
295 | def decode(self, input, final=False): | ||
296 | """Decode one chunk of the input. | ||
297 | |||
298 | :param input: A byte string. | ||
299 | :param final: | ||
300 | Indicate that no more input is available. | ||
301 | Must be :obj:`True` if this is the last call. | ||
302 | :returns: An Unicode string. | ||
303 | |||
304 | """ | ||
305 | decoder = self._decoder | ||
306 | if decoder is not None: | ||
307 | return decoder(input, final) | ||
308 | |||
309 | input = self._buffer + input | ||
310 | encoding, input = _detect_bom(input) | ||
311 | if encoding is None: | ||
312 | if len(input) < 3 and not final: # Not enough data yet. | ||
313 | self._buffer = input | ||
314 | return '' | ||
315 | else: # No BOM | ||
316 | encoding = self._fallback_encoding | ||
317 | decoder = encoding.codec_info.incrementaldecoder(self._errors).decode | ||
318 | self._decoder = decoder | ||
319 | self.encoding = encoding | ||
320 | return decoder(input, final) | ||
321 | |||
322 | |||
323 | class IncrementalEncoder(object): | ||
324 | """ | ||
325 | “Push”-based encoder. | ||
326 | |||
327 | :param encoding: An :class:`Encoding` object or a label string. | ||
328 | :param errors: Type of error handling. See :func:`codecs.register`. | ||
329 | :raises: :exc:`~exceptions.LookupError` for an unknown encoding label. | ||
330 | |||
331 | .. method:: encode(input, final=False) | ||
332 | |||
333 | :param input: An Unicode string. | ||
334 | :param final: | ||
335 | Indicate that no more input is available. | ||
336 | Must be :obj:`True` if this is the last call. | ||
337 | :returns: A byte string. | ||
338 | |||
339 | """ | ||
340 | def __init__(self, encoding=UTF8, errors='strict'): | ||
341 | encoding = _get_encoding(encoding) | ||
342 | self.encode = encoding.codec_info.incrementalencoder(errors).encode | ||