diff options
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/idna/core.py')
-rw-r--r-- | venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/idna/core.py | 387 |
1 files changed, 0 insertions, 387 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/idna/core.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/idna/core.py deleted file mode 100644 index 944ff98..0000000 --- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/idna/core.py +++ /dev/null | |||
@@ -1,387 +0,0 @@ | |||
1 | from . import idnadata | ||
2 | import bisect | ||
3 | import unicodedata | ||
4 | import re | ||
5 | import sys | ||
6 | from .intranges import intranges_contain | ||
7 | |||
8 | _virama_combining_class = 9 | ||
9 | _alabel_prefix = b'xn--' | ||
10 | _unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]') | ||
11 | |||
12 | if sys.version_info[0] == 3: | ||
13 | unicode = str | ||
14 | unichr = chr | ||
15 | |||
16 | class IDNAError(UnicodeError): | ||
17 | """ Base exception for all IDNA-encoding related problems """ | ||
18 | pass | ||
19 | |||
20 | |||
21 | class IDNABidiError(IDNAError): | ||
22 | """ Exception when bidirectional requirements are not satisfied """ | ||
23 | pass | ||
24 | |||
25 | |||
26 | class InvalidCodepoint(IDNAError): | ||
27 | """ Exception when a disallowed or unallocated codepoint is used """ | ||
28 | pass | ||
29 | |||
30 | |||
31 | class InvalidCodepointContext(IDNAError): | ||
32 | """ Exception when the codepoint is not valid in the context it is used """ | ||
33 | pass | ||
34 | |||
35 | |||
36 | def _combining_class(cp): | ||
37 | return unicodedata.combining(unichr(cp)) | ||
38 | |||
39 | def _is_script(cp, script): | ||
40 | return intranges_contain(ord(cp), idnadata.scripts[script]) | ||
41 | |||
42 | def _punycode(s): | ||
43 | return s.encode('punycode') | ||
44 | |||
45 | def _unot(s): | ||
46 | return 'U+{0:04X}'.format(s) | ||
47 | |||
48 | |||
49 | def valid_label_length(label): | ||
50 | |||
51 | if len(label) > 63: | ||
52 | return False | ||
53 | return True | ||
54 | |||
55 | |||
56 | def valid_string_length(label, trailing_dot): | ||
57 | |||
58 | if len(label) > (254 if trailing_dot else 253): | ||
59 | return False | ||
60 | return True | ||
61 | |||
62 | |||
63 | def check_bidi(label, check_ltr=False): | ||
64 | |||
65 | # Bidi rules should only be applied if string contains RTL characters | ||
66 | bidi_label = False | ||
67 | for (idx, cp) in enumerate(label, 1): | ||
68 | direction = unicodedata.bidirectional(cp) | ||
69 | if direction == '': | ||
70 | # String likely comes from a newer version of Unicode | ||
71 | raise IDNABidiError('Unknown directionality in label {0} at position {1}'.format(repr(label), idx)) | ||
72 | if direction in ['R', 'AL', 'AN']: | ||
73 | bidi_label = True | ||
74 | break | ||
75 | if not bidi_label and not check_ltr: | ||
76 | return True | ||
77 | |||
78 | # Bidi rule 1 | ||
79 | direction = unicodedata.bidirectional(label[0]) | ||
80 | if direction in ['R', 'AL']: | ||
81 | rtl = True | ||
82 | elif direction == 'L': | ||
83 | rtl = False | ||
84 | else: | ||
85 | raise IDNABidiError('First codepoint in label {0} must be directionality L, R or AL'.format(repr(label))) | ||
86 | |||
87 | valid_ending = False | ||
88 | number_type = False | ||
89 | for (idx, cp) in enumerate(label, 1): | ||
90 | direction = unicodedata.bidirectional(cp) | ||
91 | |||
92 | if rtl: | ||
93 | # Bidi rule 2 | ||
94 | if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: | ||
95 | raise IDNABidiError('Invalid direction for codepoint at position {0} in a right-to-left label'.format(idx)) | ||
96 | # Bidi rule 3 | ||
97 | if direction in ['R', 'AL', 'EN', 'AN']: | ||
98 | valid_ending = True | ||
99 | elif direction != 'NSM': | ||
100 | valid_ending = False | ||
101 | # Bidi rule 4 | ||
102 | if direction in ['AN', 'EN']: | ||
103 | if not number_type: | ||
104 | number_type = direction | ||
105 | else: | ||
106 | if number_type != direction: | ||
107 | raise IDNABidiError('Can not mix numeral types in a right-to-left label') | ||
108 | else: | ||
109 | # Bidi rule 5 | ||
110 | if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']: | ||
111 | raise IDNABidiError('Invalid direction for codepoint at position {0} in a left-to-right label'.format(idx)) | ||
112 | # Bidi rule 6 | ||
113 | if direction in ['L', 'EN']: | ||
114 | valid_ending = True | ||
115 | elif direction != 'NSM': | ||
116 | valid_ending = False | ||
117 | |||
118 | if not valid_ending: | ||
119 | raise IDNABidiError('Label ends with illegal codepoint directionality') | ||
120 | |||
121 | return True | ||
122 | |||
123 | |||
124 | def check_initial_combiner(label): | ||
125 | |||
126 | if unicodedata.category(label[0])[0] == 'M': | ||
127 | raise IDNAError('Label begins with an illegal combining character') | ||
128 | return True | ||
129 | |||
130 | |||
131 | def check_hyphen_ok(label): | ||
132 | |||
133 | if label[2:4] == '--': | ||
134 | raise IDNAError('Label has disallowed hyphens in 3rd and 4th position') | ||
135 | if label[0] == '-' or label[-1] == '-': | ||
136 | raise IDNAError('Label must not start or end with a hyphen') | ||
137 | return True | ||
138 | |||
139 | |||
140 | def check_nfc(label): | ||
141 | |||
142 | if unicodedata.normalize('NFC', label) != label: | ||
143 | raise IDNAError('Label must be in Normalization Form C') | ||
144 | |||
145 | |||
146 | def valid_contextj(label, pos): | ||
147 | |||
148 | cp_value = ord(label[pos]) | ||
149 | |||
150 | if cp_value == 0x200c: | ||
151 | |||
152 | if pos > 0: | ||
153 | if _combining_class(ord(label[pos - 1])) == _virama_combining_class: | ||
154 | return True | ||
155 | |||
156 | ok = False | ||
157 | for i in range(pos-1, -1, -1): | ||
158 | joining_type = idnadata.joining_types.get(ord(label[i])) | ||
159 | if joining_type == ord('T'): | ||
160 | continue | ||
161 | if joining_type in [ord('L'), ord('D')]: | ||
162 | ok = True | ||
163 | break | ||
164 | |||
165 | if not ok: | ||
166 | return False | ||
167 | |||
168 | ok = False | ||
169 | for i in range(pos+1, len(label)): | ||
170 | joining_type = idnadata.joining_types.get(ord(label[i])) | ||
171 | if joining_type == ord('T'): | ||
172 | continue | ||
173 | if joining_type in [ord('R'), ord('D')]: | ||
174 | ok = True | ||
175 | break | ||
176 | return ok | ||
177 | |||
178 | if cp_value == 0x200d: | ||
179 | |||
180 | if pos > 0: | ||
181 | if _combining_class(ord(label[pos - 1])) == _virama_combining_class: | ||
182 | return True | ||
183 | return False | ||
184 | |||
185 | else: | ||
186 | |||
187 | return False | ||
188 | |||
189 | |||
190 | def valid_contexto(label, pos, exception=False): | ||
191 | |||
192 | cp_value = ord(label[pos]) | ||
193 | |||
194 | if cp_value == 0x00b7: | ||
195 | if 0 < pos < len(label)-1: | ||
196 | if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c: | ||
197 | return True | ||
198 | return False | ||
199 | |||
200 | elif cp_value == 0x0375: | ||
201 | if pos < len(label)-1 and len(label) > 1: | ||
202 | return _is_script(label[pos + 1], 'Greek') | ||
203 | return False | ||
204 | |||
205 | elif cp_value == 0x05f3 or cp_value == 0x05f4: | ||
206 | if pos > 0: | ||
207 | return _is_script(label[pos - 1], 'Hebrew') | ||
208 | return False | ||
209 | |||
210 | elif cp_value == 0x30fb: | ||
211 | for cp in label: | ||
212 | if cp == u'\u30fb': | ||
213 | continue | ||
214 | if _is_script(cp, 'Hiragana') or _is_script(cp, 'Katakana') or _is_script(cp, 'Han'): | ||
215 | return True | ||
216 | return False | ||
217 | |||
218 | elif 0x660 <= cp_value <= 0x669: | ||
219 | for cp in label: | ||
220 | if 0x6f0 <= ord(cp) <= 0x06f9: | ||
221 | return False | ||
222 | return True | ||
223 | |||
224 | elif 0x6f0 <= cp_value <= 0x6f9: | ||
225 | for cp in label: | ||
226 | if 0x660 <= ord(cp) <= 0x0669: | ||
227 | return False | ||
228 | return True | ||
229 | |||
230 | |||
231 | def check_label(label): | ||
232 | |||
233 | if isinstance(label, (bytes, bytearray)): | ||
234 | label = label.decode('utf-8') | ||
235 | if len(label) == 0: | ||
236 | raise IDNAError('Empty Label') | ||
237 | |||
238 | check_nfc(label) | ||
239 | check_hyphen_ok(label) | ||
240 | check_initial_combiner(label) | ||
241 | |||
242 | for (pos, cp) in enumerate(label): | ||
243 | cp_value = ord(cp) | ||
244 | if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']): | ||
245 | continue | ||
246 | elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']): | ||
247 | if not valid_contextj(label, pos): | ||
248 | raise InvalidCodepointContext('Joiner {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label))) | ||
249 | elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']): | ||
250 | if not valid_contexto(label, pos): | ||
251 | raise InvalidCodepointContext('Codepoint {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label))) | ||
252 | else: | ||
253 | raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label))) | ||
254 | |||
255 | check_bidi(label) | ||
256 | |||
257 | |||
258 | def alabel(label): | ||
259 | |||
260 | try: | ||
261 | label = label.encode('ascii') | ||
262 | try: | ||
263 | ulabel(label) | ||
264 | except IDNAError: | ||
265 | raise IDNAError('The label {0} is not a valid A-label'.format(label)) | ||
266 | if not valid_label_length(label): | ||
267 | raise IDNAError('Label too long') | ||
268 | return label | ||
269 | except UnicodeEncodeError: | ||
270 | pass | ||
271 | |||
272 | if not label: | ||
273 | raise IDNAError('No Input') | ||
274 | |||
275 | label = unicode(label) | ||
276 | check_label(label) | ||
277 | label = _punycode(label) | ||
278 | label = _alabel_prefix + label | ||
279 | |||
280 | if not valid_label_length(label): | ||
281 | raise IDNAError('Label too long') | ||
282 | |||
283 | return label | ||
284 | |||
285 | |||
286 | def ulabel(label): | ||
287 | |||
288 | if not isinstance(label, (bytes, bytearray)): | ||
289 | try: | ||
290 | label = label.encode('ascii') | ||
291 | except UnicodeEncodeError: | ||
292 | check_label(label) | ||
293 | return label | ||
294 | |||
295 | label = label.lower() | ||
296 | if label.startswith(_alabel_prefix): | ||
297 | label = label[len(_alabel_prefix):] | ||
298 | else: | ||
299 | check_label(label) | ||
300 | return label.decode('ascii') | ||
301 | |||
302 | label = label.decode('punycode') | ||
303 | check_label(label) | ||
304 | return label | ||
305 | |||
306 | |||
307 | def uts46_remap(domain, std3_rules=True, transitional=False): | ||
308 | """Re-map the characters in the string according to UTS46 processing.""" | ||
309 | from .uts46data import uts46data | ||
310 | output = u"" | ||
311 | try: | ||
312 | for pos, char in enumerate(domain): | ||
313 | code_point = ord(char) | ||
314 | uts46row = uts46data[code_point if code_point < 256 else | ||
315 | bisect.bisect_left(uts46data, (code_point, "Z")) - 1] | ||
316 | status = uts46row[1] | ||
317 | replacement = uts46row[2] if len(uts46row) == 3 else None | ||
318 | if (status == "V" or | ||
319 | (status == "D" and not transitional) or | ||
320 | (status == "3" and std3_rules and replacement is None)): | ||
321 | output += char | ||
322 | elif replacement is not None and (status == "M" or | ||
323 | (status == "3" and std3_rules) or | ||
324 | (status == "D" and transitional)): | ||
325 | output += replacement | ||
326 | elif status != "I": | ||
327 | raise IndexError() | ||
328 | return unicodedata.normalize("NFC", output) | ||
329 | except IndexError: | ||
330 | raise InvalidCodepoint( | ||
331 | "Codepoint {0} not allowed at position {1} in {2}".format( | ||
332 | _unot(code_point), pos + 1, repr(domain))) | ||
333 | |||
334 | |||
335 | def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False): | ||
336 | |||
337 | if isinstance(s, (bytes, bytearray)): | ||
338 | s = s.decode("ascii") | ||
339 | if uts46: | ||
340 | s = uts46_remap(s, std3_rules, transitional) | ||
341 | trailing_dot = False | ||
342 | result = [] | ||
343 | if strict: | ||
344 | labels = s.split('.') | ||
345 | else: | ||
346 | labels = _unicode_dots_re.split(s) | ||
347 | while labels and not labels[0]: | ||
348 | del labels[0] | ||
349 | if not labels: | ||
350 | raise IDNAError('Empty domain') | ||
351 | if labels[-1] == '': | ||
352 | del labels[-1] | ||
353 | trailing_dot = True | ||
354 | for label in labels: | ||
355 | result.append(alabel(label)) | ||
356 | if trailing_dot: | ||
357 | result.append(b'') | ||
358 | s = b'.'.join(result) | ||
359 | if not valid_string_length(s, trailing_dot): | ||
360 | raise IDNAError('Domain too long') | ||
361 | return s | ||
362 | |||
363 | |||
364 | def decode(s, strict=False, uts46=False, std3_rules=False): | ||
365 | |||
366 | if isinstance(s, (bytes, bytearray)): | ||
367 | s = s.decode("ascii") | ||
368 | if uts46: | ||
369 | s = uts46_remap(s, std3_rules, False) | ||
370 | trailing_dot = False | ||
371 | result = [] | ||
372 | if not strict: | ||
373 | labels = _unicode_dots_re.split(s) | ||
374 | else: | ||
375 | labels = s.split(u'.') | ||
376 | while labels and not labels[0]: | ||
377 | del labels[0] | ||
378 | if not labels: | ||
379 | raise IDNAError('Empty domain') | ||
380 | if not labels[-1]: | ||
381 | del labels[-1] | ||
382 | trailing_dot = True | ||
383 | for label in labels: | ||
384 | result.append(ulabel(label)) | ||
385 | if trailing_dot: | ||
386 | result.append(u'') | ||
387 | return u'.'.join(result) | ||