summaryrefslogtreecommitdiff
path: root/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/idna/core.py
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/idna/core.py')
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/idna/core.py387
1 files changed, 0 insertions, 387 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/idna/core.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/idna/core.py
deleted file mode 100644
index 944ff98..0000000
--- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/idna/core.py
+++ /dev/null
@@ -1,387 +0,0 @@
1from . import idnadata
2import bisect
3import unicodedata
4import re
5import sys
6from .intranges import intranges_contain
7
8_virama_combining_class = 9
9_alabel_prefix = b'xn--'
10_unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
11
12if sys.version_info[0] == 3:
13 unicode = str
14 unichr = chr
15
16class IDNAError(UnicodeError):
17 """ Base exception for all IDNA-encoding related problems """
18 pass
19
20
21class IDNABidiError(IDNAError):
22 """ Exception when bidirectional requirements are not satisfied """
23 pass
24
25
26class InvalidCodepoint(IDNAError):
27 """ Exception when a disallowed or unallocated codepoint is used """
28 pass
29
30
31class InvalidCodepointContext(IDNAError):
32 """ Exception when the codepoint is not valid in the context it is used """
33 pass
34
35
36def _combining_class(cp):
37 return unicodedata.combining(unichr(cp))
38
39def _is_script(cp, script):
40 return intranges_contain(ord(cp), idnadata.scripts[script])
41
42def _punycode(s):
43 return s.encode('punycode')
44
45def _unot(s):
46 return 'U+{0:04X}'.format(s)
47
48
49def valid_label_length(label):
50
51 if len(label) > 63:
52 return False
53 return True
54
55
56def valid_string_length(label, trailing_dot):
57
58 if len(label) > (254 if trailing_dot else 253):
59 return False
60 return True
61
62
63def check_bidi(label, check_ltr=False):
64
65 # Bidi rules should only be applied if string contains RTL characters
66 bidi_label = False
67 for (idx, cp) in enumerate(label, 1):
68 direction = unicodedata.bidirectional(cp)
69 if direction == '':
70 # String likely comes from a newer version of Unicode
71 raise IDNABidiError('Unknown directionality in label {0} at position {1}'.format(repr(label), idx))
72 if direction in ['R', 'AL', 'AN']:
73 bidi_label = True
74 break
75 if not bidi_label and not check_ltr:
76 return True
77
78 # Bidi rule 1
79 direction = unicodedata.bidirectional(label[0])
80 if direction in ['R', 'AL']:
81 rtl = True
82 elif direction == 'L':
83 rtl = False
84 else:
85 raise IDNABidiError('First codepoint in label {0} must be directionality L, R or AL'.format(repr(label)))
86
87 valid_ending = False
88 number_type = False
89 for (idx, cp) in enumerate(label, 1):
90 direction = unicodedata.bidirectional(cp)
91
92 if rtl:
93 # Bidi rule 2
94 if not direction in ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
95 raise IDNABidiError('Invalid direction for codepoint at position {0} in a right-to-left label'.format(idx))
96 # Bidi rule 3
97 if direction in ['R', 'AL', 'EN', 'AN']:
98 valid_ending = True
99 elif direction != 'NSM':
100 valid_ending = False
101 # Bidi rule 4
102 if direction in ['AN', 'EN']:
103 if not number_type:
104 number_type = direction
105 else:
106 if number_type != direction:
107 raise IDNABidiError('Can not mix numeral types in a right-to-left label')
108 else:
109 # Bidi rule 5
110 if not direction in ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM']:
111 raise IDNABidiError('Invalid direction for codepoint at position {0} in a left-to-right label'.format(idx))
112 # Bidi rule 6
113 if direction in ['L', 'EN']:
114 valid_ending = True
115 elif direction != 'NSM':
116 valid_ending = False
117
118 if not valid_ending:
119 raise IDNABidiError('Label ends with illegal codepoint directionality')
120
121 return True
122
123
124def check_initial_combiner(label):
125
126 if unicodedata.category(label[0])[0] == 'M':
127 raise IDNAError('Label begins with an illegal combining character')
128 return True
129
130
131def check_hyphen_ok(label):
132
133 if label[2:4] == '--':
134 raise IDNAError('Label has disallowed hyphens in 3rd and 4th position')
135 if label[0] == '-' or label[-1] == '-':
136 raise IDNAError('Label must not start or end with a hyphen')
137 return True
138
139
140def check_nfc(label):
141
142 if unicodedata.normalize('NFC', label) != label:
143 raise IDNAError('Label must be in Normalization Form C')
144
145
146def valid_contextj(label, pos):
147
148 cp_value = ord(label[pos])
149
150 if cp_value == 0x200c:
151
152 if pos > 0:
153 if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
154 return True
155
156 ok = False
157 for i in range(pos-1, -1, -1):
158 joining_type = idnadata.joining_types.get(ord(label[i]))
159 if joining_type == ord('T'):
160 continue
161 if joining_type in [ord('L'), ord('D')]:
162 ok = True
163 break
164
165 if not ok:
166 return False
167
168 ok = False
169 for i in range(pos+1, len(label)):
170 joining_type = idnadata.joining_types.get(ord(label[i]))
171 if joining_type == ord('T'):
172 continue
173 if joining_type in [ord('R'), ord('D')]:
174 ok = True
175 break
176 return ok
177
178 if cp_value == 0x200d:
179
180 if pos > 0:
181 if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
182 return True
183 return False
184
185 else:
186
187 return False
188
189
190def valid_contexto(label, pos, exception=False):
191
192 cp_value = ord(label[pos])
193
194 if cp_value == 0x00b7:
195 if 0 < pos < len(label)-1:
196 if ord(label[pos - 1]) == 0x006c and ord(label[pos + 1]) == 0x006c:
197 return True
198 return False
199
200 elif cp_value == 0x0375:
201 if pos < len(label)-1 and len(label) > 1:
202 return _is_script(label[pos + 1], 'Greek')
203 return False
204
205 elif cp_value == 0x05f3 or cp_value == 0x05f4:
206 if pos > 0:
207 return _is_script(label[pos - 1], 'Hebrew')
208 return False
209
210 elif cp_value == 0x30fb:
211 for cp in label:
212 if cp == u'\u30fb':
213 continue
214 if _is_script(cp, 'Hiragana') or _is_script(cp, 'Katakana') or _is_script(cp, 'Han'):
215 return True
216 return False
217
218 elif 0x660 <= cp_value <= 0x669:
219 for cp in label:
220 if 0x6f0 <= ord(cp) <= 0x06f9:
221 return False
222 return True
223
224 elif 0x6f0 <= cp_value <= 0x6f9:
225 for cp in label:
226 if 0x660 <= ord(cp) <= 0x0669:
227 return False
228 return True
229
230
231def check_label(label):
232
233 if isinstance(label, (bytes, bytearray)):
234 label = label.decode('utf-8')
235 if len(label) == 0:
236 raise IDNAError('Empty Label')
237
238 check_nfc(label)
239 check_hyphen_ok(label)
240 check_initial_combiner(label)
241
242 for (pos, cp) in enumerate(label):
243 cp_value = ord(cp)
244 if intranges_contain(cp_value, idnadata.codepoint_classes['PVALID']):
245 continue
246 elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTJ']):
247 if not valid_contextj(label, pos):
248 raise InvalidCodepointContext('Joiner {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
249 elif intranges_contain(cp_value, idnadata.codepoint_classes['CONTEXTO']):
250 if not valid_contexto(label, pos):
251 raise InvalidCodepointContext('Codepoint {0} not allowed at position {1} in {2}'.format(_unot(cp_value), pos+1, repr(label)))
252 else:
253 raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
254
255 check_bidi(label)
256
257
258def alabel(label):
259
260 try:
261 label = label.encode('ascii')
262 try:
263 ulabel(label)
264 except IDNAError:
265 raise IDNAError('The label {0} is not a valid A-label'.format(label))
266 if not valid_label_length(label):
267 raise IDNAError('Label too long')
268 return label
269 except UnicodeEncodeError:
270 pass
271
272 if not label:
273 raise IDNAError('No Input')
274
275 label = unicode(label)
276 check_label(label)
277 label = _punycode(label)
278 label = _alabel_prefix + label
279
280 if not valid_label_length(label):
281 raise IDNAError('Label too long')
282
283 return label
284
285
286def ulabel(label):
287
288 if not isinstance(label, (bytes, bytearray)):
289 try:
290 label = label.encode('ascii')
291 except UnicodeEncodeError:
292 check_label(label)
293 return label
294
295 label = label.lower()
296 if label.startswith(_alabel_prefix):
297 label = label[len(_alabel_prefix):]
298 else:
299 check_label(label)
300 return label.decode('ascii')
301
302 label = label.decode('punycode')
303 check_label(label)
304 return label
305
306
307def uts46_remap(domain, std3_rules=True, transitional=False):
308 """Re-map the characters in the string according to UTS46 processing."""
309 from .uts46data import uts46data
310 output = u""
311 try:
312 for pos, char in enumerate(domain):
313 code_point = ord(char)
314 uts46row = uts46data[code_point if code_point < 256 else
315 bisect.bisect_left(uts46data, (code_point, "Z")) - 1]
316 status = uts46row[1]
317 replacement = uts46row[2] if len(uts46row) == 3 else None
318 if (status == "V" or
319 (status == "D" and not transitional) or
320 (status == "3" and std3_rules and replacement is None)):
321 output += char
322 elif replacement is not None and (status == "M" or
323 (status == "3" and std3_rules) or
324 (status == "D" and transitional)):
325 output += replacement
326 elif status != "I":
327 raise IndexError()
328 return unicodedata.normalize("NFC", output)
329 except IndexError:
330 raise InvalidCodepoint(
331 "Codepoint {0} not allowed at position {1} in {2}".format(
332 _unot(code_point), pos + 1, repr(domain)))
333
334
335def encode(s, strict=False, uts46=False, std3_rules=False, transitional=False):
336
337 if isinstance(s, (bytes, bytearray)):
338 s = s.decode("ascii")
339 if uts46:
340 s = uts46_remap(s, std3_rules, transitional)
341 trailing_dot = False
342 result = []
343 if strict:
344 labels = s.split('.')
345 else:
346 labels = _unicode_dots_re.split(s)
347 while labels and not labels[0]:
348 del labels[0]
349 if not labels:
350 raise IDNAError('Empty domain')
351 if labels[-1] == '':
352 del labels[-1]
353 trailing_dot = True
354 for label in labels:
355 result.append(alabel(label))
356 if trailing_dot:
357 result.append(b'')
358 s = b'.'.join(result)
359 if not valid_string_length(s, trailing_dot):
360 raise IDNAError('Domain too long')
361 return s
362
363
364def decode(s, strict=False, uts46=False, std3_rules=False):
365
366 if isinstance(s, (bytes, bytearray)):
367 s = s.decode("ascii")
368 if uts46:
369 s = uts46_remap(s, std3_rules, False)
370 trailing_dot = False
371 result = []
372 if not strict:
373 labels = _unicode_dots_re.split(s)
374 else:
375 labels = s.split(u'.')
376 while labels and not labels[0]:
377 del labels[0]
378 if not labels:
379 raise IDNAError('Empty domain')
380 if not labels[-1]:
381 del labels[-1]
382 trailing_dot = True
383 for label in labels:
384 result.append(ulabel(label))
385 if trailing_dot:
386 result.append(u'')
387 return u'.'.join(result)