summaryrefslogtreecommitdiff
path: root/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/utf8prober.py
diff options
context:
space:
mode:
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/utf8prober.py')
-rw-r--r--venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/utf8prober.py82
1 files changed, 0 insertions, 82 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/utf8prober.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/utf8prober.py
deleted file mode 100644
index 4573267..0000000
--- a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/utf8prober.py
+++ /dev/null
@@ -1,82 +0,0 @@
1######################## BEGIN LICENSE BLOCK ########################
2# The Original Code is mozilla.org code.
3#
4# The Initial Developer of the Original Code is
5# Netscape Communications Corporation.
6# Portions created by the Initial Developer are Copyright (C) 1998
7# the Initial Developer. All Rights Reserved.
8#
9# Contributor(s):
10# Mark Pilgrim - port to Python
11#
12# This library is free software; you can redistribute it and/or
13# modify it under the terms of the GNU Lesser General Public
14# License as published by the Free Software Foundation; either
15# version 2.1 of the License, or (at your option) any later version.
16#
17# This library is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# Lesser General Public License for more details.
21#
22# You should have received a copy of the GNU Lesser General Public
23# License along with this library; if not, write to the Free Software
24# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25# 02110-1301 USA
26######################### END LICENSE BLOCK #########################
27
28from .charsetprober import CharSetProber
29from .enums import ProbingState, MachineState
30from .codingstatemachine import CodingStateMachine
31from .mbcssm import UTF8_SM_MODEL
32
33
34
35class UTF8Prober(CharSetProber):
36 ONE_CHAR_PROB = 0.5
37
38 def __init__(self):
39 super(UTF8Prober, self).__init__()
40 self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
41 self._num_mb_chars = None
42 self.reset()
43
44 def reset(self):
45 super(UTF8Prober, self).reset()
46 self.coding_sm.reset()
47 self._num_mb_chars = 0
48
49 @property
50 def charset_name(self):
51 return "utf-8"
52
53 @property
54 def language(self):
55 return ""
56
57 def feed(self, byte_str):
58 for c in byte_str:
59 coding_state = self.coding_sm.next_state(c)
60 if coding_state == MachineState.ERROR:
61 self._state = ProbingState.NOT_ME
62 break
63 elif coding_state == MachineState.ITS_ME:
64 self._state = ProbingState.FOUND_IT
65 break
66 elif coding_state == MachineState.START:
67 if self.coding_sm.get_current_charlen() >= 2:
68 self._num_mb_chars += 1
69
70 if self.state == ProbingState.DETECTING:
71 if self.get_confidence() > self.SHORTCUT_THRESHOLD:
72 self._state = ProbingState.FOUND_IT
73
74 return self.state
75
76 def get_confidence(self):
77 unlike = 0.99
78 if self._num_mb_chars < 6:
79 unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
80 return 1.0 - unlike
81 else:
82 return unlike