diff options
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/codingstatemachine.py')
-rw-r--r-- | venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/codingstatemachine.py | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/codingstatemachine.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/codingstatemachine.py new file mode 100644 index 0000000..c562e1d --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/codingstatemachine.py | |||
@@ -0,0 +1,88 @@ | |||
1 | ######################## BEGIN LICENSE BLOCK ######################## | ||
2 | # The Original Code is mozilla.org code. | ||
3 | # | ||
4 | # The Initial Developer of the Original Code is | ||
5 | # Netscape Communications Corporation. | ||
6 | # Portions created by the Initial Developer are Copyright (C) 1998 | ||
7 | # the Initial Developer. All Rights Reserved. | ||
8 | # | ||
9 | # Contributor(s): | ||
10 | # Mark Pilgrim - port to Python | ||
11 | # | ||
12 | # This library is free software; you can redistribute it and/or | ||
13 | # modify it under the terms of the GNU Lesser General Public | ||
14 | # License as published by the Free Software Foundation; either | ||
15 | # version 2.1 of the License, or (at your option) any later version. | ||
16 | # | ||
17 | # This library is distributed in the hope that it will be useful, | ||
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
20 | # Lesser General Public License for more details. | ||
21 | # | ||
22 | # You should have received a copy of the GNU Lesser General Public | ||
23 | # License along with this library; if not, write to the Free Software | ||
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA | ||
25 | # 02110-1301 USA | ||
26 | ######################### END LICENSE BLOCK ######################### | ||
27 | |||
28 | import logging | ||
29 | |||
30 | from .enums import MachineState | ||
31 | |||
32 | |||
33 | class CodingStateMachine(object): | ||
34 | """ | ||
35 | A state machine to verify a byte sequence for a particular encoding. For | ||
36 | each byte the detector receives, it will feed that byte to every active | ||
37 | state machine available, one byte at a time. The state machine changes its | ||
38 | state based on its previous state and the byte it receives. There are 3 | ||
39 | states in a state machine that are of interest to an auto-detector: | ||
40 | |||
41 | START state: This is the state to start with, or a legal byte sequence | ||
42 | (i.e. a valid code point) for character has been identified. | ||
43 | |||
44 | ME state: This indicates that the state machine identified a byte sequence | ||
45 | that is specific to the charset it is designed for and that | ||
46 | there is no other possible encoding which can contain this byte | ||
47 | sequence. This will to lead to an immediate positive answer for | ||
48 | the detector. | ||
49 | |||
50 | ERROR state: This indicates the state machine identified an illegal byte | ||
51 | sequence for that encoding. This will lead to an immediate | ||
52 | negative answer for this encoding. Detector will exclude this | ||
53 | encoding from consideration from here on. | ||
54 | """ | ||
55 | def __init__(self, sm): | ||
56 | self._model = sm | ||
57 | self._curr_byte_pos = 0 | ||
58 | self._curr_char_len = 0 | ||
59 | self._curr_state = None | ||
60 | self.logger = logging.getLogger(__name__) | ||
61 | self.reset() | ||
62 | |||
63 | def reset(self): | ||
64 | self._curr_state = MachineState.START | ||
65 | |||
66 | def next_state(self, c): | ||
67 | # for each byte we get its class | ||
68 | # if it is first byte, we also get byte length | ||
69 | byte_class = self._model['class_table'][c] | ||
70 | if self._curr_state == MachineState.START: | ||
71 | self._curr_byte_pos = 0 | ||
72 | self._curr_char_len = self._model['char_len_table'][byte_class] | ||
73 | # from byte's class and state_table, we get its next state | ||
74 | curr_state = (self._curr_state * self._model['class_factor'] | ||
75 | + byte_class) | ||
76 | self._curr_state = self._model['state_table'][curr_state] | ||
77 | self._curr_byte_pos += 1 | ||
78 | return self._curr_state | ||
79 | |||
80 | def get_current_charlen(self): | ||
81 | return self._curr_char_len | ||
82 | |||
83 | def get_coding_state_machine(self): | ||
84 | return self._model['name'] | ||
85 | |||
86 | @property | ||
87 | def language(self): | ||
88 | return self._model['language'] | ||