diff options
author | Shubham Saini <shubham6405@gmail.com> | 2018-12-11 10:01:23 +0000 |
---|---|---|
committer | Shubham Saini <shubham6405@gmail.com> | 2018-12-11 10:01:23 +0000 |
commit | 68df54d6629ec019142eb149dd037774f2d11e7c (patch) | |
tree | 345bc22d46b4e01a4ba8303b94278952a4ed2b9e /venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/mbcssm.py |
First commit
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/mbcssm.py')
-rw-r--r-- | venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/mbcssm.py | 572 |
1 files changed, 572 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/mbcssm.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/mbcssm.py new file mode 100644 index 0000000..d68f6f6 --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/mbcssm.py | |||
@@ -0,0 +1,572 @@ | |||
1 | ######################## BEGIN LICENSE BLOCK ######################## | ||
2 | # The Original Code is mozilla.org code. | ||
3 | # | ||
4 | # The Initial Developer of the Original Code is | ||
5 | # Netscape Communications Corporation. | ||
6 | # Portions created by the Initial Developer are Copyright (C) 1998 | ||
7 | # the Initial Developer. All Rights Reserved. | ||
8 | # | ||
9 | # Contributor(s): | ||
10 | # Mark Pilgrim - port to Python | ||
11 | # | ||
12 | # This library is free software; you can redistribute it and/or | ||
13 | # modify it under the terms of the GNU Lesser General Public | ||
14 | # License as published by the Free Software Foundation; either | ||
15 | # version 2.1 of the License, or (at your option) any later version. | ||
16 | # | ||
17 | # This library is distributed in the hope that it will be useful, | ||
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
20 | # Lesser General Public License for more details. | ||
21 | # | ||
22 | # You should have received a copy of the GNU Lesser General Public | ||
23 | # License along with this library; if not, write to the Free Software | ||
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA | ||
25 | # 02110-1301 USA | ||
26 | ######################### END LICENSE BLOCK ######################### | ||
27 | |||
28 | from .enums import MachineState | ||
29 | |||
30 | # BIG5 | ||
31 | |||
32 | BIG5_CLS = ( | ||
33 | 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value | ||
34 | 1,1,1,1,1,1,0,0, # 08 - 0f | ||
35 | 1,1,1,1,1,1,1,1, # 10 - 17 | ||
36 | 1,1,1,0,1,1,1,1, # 18 - 1f | ||
37 | 1,1,1,1,1,1,1,1, # 20 - 27 | ||
38 | 1,1,1,1,1,1,1,1, # 28 - 2f | ||
39 | 1,1,1,1,1,1,1,1, # 30 - 37 | ||
40 | 1,1,1,1,1,1,1,1, # 38 - 3f | ||
41 | 2,2,2,2,2,2,2,2, # 40 - 47 | ||
42 | 2,2,2,2,2,2,2,2, # 48 - 4f | ||
43 | 2,2,2,2,2,2,2,2, # 50 - 57 | ||
44 | 2,2,2,2,2,2,2,2, # 58 - 5f | ||
45 | 2,2,2,2,2,2,2,2, # 60 - 67 | ||
46 | 2,2,2,2,2,2,2,2, # 68 - 6f | ||
47 | 2,2,2,2,2,2,2,2, # 70 - 77 | ||
48 | 2,2,2,2,2,2,2,1, # 78 - 7f | ||
49 | 4,4,4,4,4,4,4,4, # 80 - 87 | ||
50 | 4,4,4,4,4,4,4,4, # 88 - 8f | ||
51 | 4,4,4,4,4,4,4,4, # 90 - 97 | ||
52 | 4,4,4,4,4,4,4,4, # 98 - 9f | ||
53 | 4,3,3,3,3,3,3,3, # a0 - a7 | ||
54 | 3,3,3,3,3,3,3,3, # a8 - af | ||
55 | 3,3,3,3,3,3,3,3, # b0 - b7 | ||
56 | 3,3,3,3,3,3,3,3, # b8 - bf | ||
57 | 3,3,3,3,3,3,3,3, # c0 - c7 | ||
58 | 3,3,3,3,3,3,3,3, # c8 - cf | ||
59 | 3,3,3,3,3,3,3,3, # d0 - d7 | ||
60 | 3,3,3,3,3,3,3,3, # d8 - df | ||
61 | 3,3,3,3,3,3,3,3, # e0 - e7 | ||
62 | 3,3,3,3,3,3,3,3, # e8 - ef | ||
63 | 3,3,3,3,3,3,3,3, # f0 - f7 | ||
64 | 3,3,3,3,3,3,3,0 # f8 - ff | ||
65 | ) | ||
66 | |||
67 | BIG5_ST = ( | ||
68 | MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 | ||
69 | MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f | ||
70 | MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17 | ||
71 | ) | ||
72 | |||
73 | BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0) | ||
74 | |||
75 | BIG5_SM_MODEL = {'class_table': BIG5_CLS, | ||
76 | 'class_factor': 5, | ||
77 | 'state_table': BIG5_ST, | ||
78 | 'char_len_table': BIG5_CHAR_LEN_TABLE, | ||
79 | 'name': 'Big5'} | ||
80 | |||
81 | # CP949 | ||
82 | |||
83 | CP949_CLS = ( | ||
84 | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f | ||
85 | 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f | ||
86 | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f | ||
87 | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f | ||
88 | 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f | ||
89 | 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f | ||
90 | 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f | ||
91 | 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f | ||
92 | 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f | ||
93 | 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f | ||
94 | 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af | ||
95 | 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf | ||
96 | 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf | ||
97 | 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df | ||
98 | 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef | ||
99 | 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff | ||
100 | ) | ||
101 | |||
102 | CP949_ST = ( | ||
103 | #cls= 0 1 2 3 4 5 6 7 8 9 # previous state = | ||
104 | MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START | ||
105 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR | ||
106 | MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME | ||
107 | MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3 | ||
108 | MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4 | ||
109 | MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5 | ||
110 | MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6 | ||
111 | ) | ||
112 | |||
113 | CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) | ||
114 | |||
115 | CP949_SM_MODEL = {'class_table': CP949_CLS, | ||
116 | 'class_factor': 10, | ||
117 | 'state_table': CP949_ST, | ||
118 | 'char_len_table': CP949_CHAR_LEN_TABLE, | ||
119 | 'name': 'CP949'} | ||
120 | |||
121 | # EUC-JP | ||
122 | |||
123 | EUCJP_CLS = ( | ||
124 | 4,4,4,4,4,4,4,4, # 00 - 07 | ||
125 | 4,4,4,4,4,4,5,5, # 08 - 0f | ||
126 | 4,4,4,4,4,4,4,4, # 10 - 17 | ||
127 | 4,4,4,5,4,4,4,4, # 18 - 1f | ||
128 | 4,4,4,4,4,4,4,4, # 20 - 27 | ||
129 | 4,4,4,4,4,4,4,4, # 28 - 2f | ||
130 | 4,4,4,4,4,4,4,4, # 30 - 37 | ||
131 | 4,4,4,4,4,4,4,4, # 38 - 3f | ||
132 | 4,4,4,4,4,4,4,4, # 40 - 47 | ||
133 | 4,4,4,4,4,4,4,4, # 48 - 4f | ||
134 | 4,4,4,4,4,4,4,4, # 50 - 57 | ||
135 | 4,4,4,4,4,4,4,4, # 58 - 5f | ||
136 | 4,4,4,4,4,4,4,4, # 60 - 67 | ||
137 | 4,4,4,4,4,4,4,4, # 68 - 6f | ||
138 | 4,4,4,4,4,4,4,4, # 70 - 77 | ||
139 | 4,4,4,4,4,4,4,4, # 78 - 7f | ||
140 | 5,5,5,5,5,5,5,5, # 80 - 87 | ||
141 | 5,5,5,5,5,5,1,3, # 88 - 8f | ||
142 | 5,5,5,5,5,5,5,5, # 90 - 97 | ||
143 | 5,5,5,5,5,5,5,5, # 98 - 9f | ||
144 | 5,2,2,2,2,2,2,2, # a0 - a7 | ||
145 | 2,2,2,2,2,2,2,2, # a8 - af | ||
146 | 2,2,2,2,2,2,2,2, # b0 - b7 | ||
147 | 2,2,2,2,2,2,2,2, # b8 - bf | ||
148 | 2,2,2,2,2,2,2,2, # c0 - c7 | ||
149 | 2,2,2,2,2,2,2,2, # c8 - cf | ||
150 | 2,2,2,2,2,2,2,2, # d0 - d7 | ||
151 | 2,2,2,2,2,2,2,2, # d8 - df | ||
152 | 0,0,0,0,0,0,0,0, # e0 - e7 | ||
153 | 0,0,0,0,0,0,0,0, # e8 - ef | ||
154 | 0,0,0,0,0,0,0,0, # f0 - f7 | ||
155 | 0,0,0,0,0,0,0,5 # f8 - ff | ||
156 | ) | ||
157 | |||
158 | EUCJP_ST = ( | ||
159 | 3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 | ||
160 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f | ||
161 | MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17 | ||
162 | MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f | ||
163 | 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27 | ||
164 | ) | ||
165 | |||
166 | EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0) | ||
167 | |||
168 | EUCJP_SM_MODEL = {'class_table': EUCJP_CLS, | ||
169 | 'class_factor': 6, | ||
170 | 'state_table': EUCJP_ST, | ||
171 | 'char_len_table': EUCJP_CHAR_LEN_TABLE, | ||
172 | 'name': 'EUC-JP'} | ||
173 | |||
174 | # EUC-KR | ||
175 | |||
176 | EUCKR_CLS = ( | ||
177 | 1,1,1,1,1,1,1,1, # 00 - 07 | ||
178 | 1,1,1,1,1,1,0,0, # 08 - 0f | ||
179 | 1,1,1,1,1,1,1,1, # 10 - 17 | ||
180 | 1,1,1,0,1,1,1,1, # 18 - 1f | ||
181 | 1,1,1,1,1,1,1,1, # 20 - 27 | ||
182 | 1,1,1,1,1,1,1,1, # 28 - 2f | ||
183 | 1,1,1,1,1,1,1,1, # 30 - 37 | ||
184 | 1,1,1,1,1,1,1,1, # 38 - 3f | ||
185 | 1,1,1,1,1,1,1,1, # 40 - 47 | ||
186 | 1,1,1,1,1,1,1,1, # 48 - 4f | ||
187 | 1,1,1,1,1,1,1,1, # 50 - 57 | ||
188 | 1,1,1,1,1,1,1,1, # 58 - 5f | ||
189 | 1,1,1,1,1,1,1,1, # 60 - 67 | ||
190 | 1,1,1,1,1,1,1,1, # 68 - 6f | ||
191 | 1,1,1,1,1,1,1,1, # 70 - 77 | ||
192 | 1,1,1,1,1,1,1,1, # 78 - 7f | ||
193 | 0,0,0,0,0,0,0,0, # 80 - 87 | ||
194 | 0,0,0,0,0,0,0,0, # 88 - 8f | ||
195 | 0,0,0,0,0,0,0,0, # 90 - 97 | ||
196 | 0,0,0,0,0,0,0,0, # 98 - 9f | ||
197 | 0,2,2,2,2,2,2,2, # a0 - a7 | ||
198 | 2,2,2,2,2,3,3,3, # a8 - af | ||
199 | 2,2,2,2,2,2,2,2, # b0 - b7 | ||
200 | 2,2,2,2,2,2,2,2, # b8 - bf | ||
201 | 2,2,2,2,2,2,2,2, # c0 - c7 | ||
202 | 2,3,2,2,2,2,2,2, # c8 - cf | ||
203 | 2,2,2,2,2,2,2,2, # d0 - d7 | ||
204 | 2,2,2,2,2,2,2,2, # d8 - df | ||
205 | 2,2,2,2,2,2,2,2, # e0 - e7 | ||
206 | 2,2,2,2,2,2,2,2, # e8 - ef | ||
207 | 2,2,2,2,2,2,2,2, # f0 - f7 | ||
208 | 2,2,2,2,2,2,2,0 # f8 - ff | ||
209 | ) | ||
210 | |||
211 | EUCKR_ST = ( | ||
212 | MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 | ||
213 | MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f | ||
214 | ) | ||
215 | |||
216 | EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0) | ||
217 | |||
218 | EUCKR_SM_MODEL = {'class_table': EUCKR_CLS, | ||
219 | 'class_factor': 4, | ||
220 | 'state_table': EUCKR_ST, | ||
221 | 'char_len_table': EUCKR_CHAR_LEN_TABLE, | ||
222 | 'name': 'EUC-KR'} | ||
223 | |||
224 | # EUC-TW | ||
225 | |||
226 | EUCTW_CLS = ( | ||
227 | 2,2,2,2,2,2,2,2, # 00 - 07 | ||
228 | 2,2,2,2,2,2,0,0, # 08 - 0f | ||
229 | 2,2,2,2,2,2,2,2, # 10 - 17 | ||
230 | 2,2,2,0,2,2,2,2, # 18 - 1f | ||
231 | 2,2,2,2,2,2,2,2, # 20 - 27 | ||
232 | 2,2,2,2,2,2,2,2, # 28 - 2f | ||
233 | 2,2,2,2,2,2,2,2, # 30 - 37 | ||
234 | 2,2,2,2,2,2,2,2, # 38 - 3f | ||
235 | 2,2,2,2,2,2,2,2, # 40 - 47 | ||
236 | 2,2,2,2,2,2,2,2, # 48 - 4f | ||
237 | 2,2,2,2,2,2,2,2, # 50 - 57 | ||
238 | 2,2,2,2,2,2,2,2, # 58 - 5f | ||
239 | 2,2,2,2,2,2,2,2, # 60 - 67 | ||
240 | 2,2,2,2,2,2,2,2, # 68 - 6f | ||
241 | 2,2,2,2,2,2,2,2, # 70 - 77 | ||
242 | 2,2,2,2,2,2,2,2, # 78 - 7f | ||
243 | 0,0,0,0,0,0,0,0, # 80 - 87 | ||
244 | 0,0,0,0,0,0,6,0, # 88 - 8f | ||
245 | 0,0,0,0,0,0,0,0, # 90 - 97 | ||
246 | 0,0,0,0,0,0,0,0, # 98 - 9f | ||
247 | 0,3,4,4,4,4,4,4, # a0 - a7 | ||
248 | 5,5,1,1,1,1,1,1, # a8 - af | ||
249 | 1,1,1,1,1,1,1,1, # b0 - b7 | ||
250 | 1,1,1,1,1,1,1,1, # b8 - bf | ||
251 | 1,1,3,1,3,3,3,3, # c0 - c7 | ||
252 | 3,3,3,3,3,3,3,3, # c8 - cf | ||
253 | 3,3,3,3,3,3,3,3, # d0 - d7 | ||
254 | 3,3,3,3,3,3,3,3, # d8 - df | ||
255 | 3,3,3,3,3,3,3,3, # e0 - e7 | ||
256 | 3,3,3,3,3,3,3,3, # e8 - ef | ||
257 | 3,3,3,3,3,3,3,3, # f0 - f7 | ||
258 | 3,3,3,3,3,3,3,0 # f8 - ff | ||
259 | ) | ||
260 | |||
261 | EUCTW_ST = ( | ||
262 | MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07 | ||
263 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f | ||
264 | MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17 | ||
265 | MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f | ||
266 | 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27 | ||
267 | MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f | ||
268 | ) | ||
269 | |||
270 | EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3) | ||
271 | |||
272 | EUCTW_SM_MODEL = {'class_table': EUCTW_CLS, | ||
273 | 'class_factor': 7, | ||
274 | 'state_table': EUCTW_ST, | ||
275 | 'char_len_table': EUCTW_CHAR_LEN_TABLE, | ||
276 | 'name': 'x-euc-tw'} | ||
277 | |||
278 | # GB2312 | ||
279 | |||
280 | GB2312_CLS = ( | ||
281 | 1,1,1,1,1,1,1,1, # 00 - 07 | ||
282 | 1,1,1,1,1,1,0,0, # 08 - 0f | ||
283 | 1,1,1,1,1,1,1,1, # 10 - 17 | ||
284 | 1,1,1,0,1,1,1,1, # 18 - 1f | ||
285 | 1,1,1,1,1,1,1,1, # 20 - 27 | ||
286 | 1,1,1,1,1,1,1,1, # 28 - 2f | ||
287 | 3,3,3,3,3,3,3,3, # 30 - 37 | ||
288 | 3,3,1,1,1,1,1,1, # 38 - 3f | ||
289 | 2,2,2,2,2,2,2,2, # 40 - 47 | ||
290 | 2,2,2,2,2,2,2,2, # 48 - 4f | ||
291 | 2,2,2,2,2,2,2,2, # 50 - 57 | ||
292 | 2,2,2,2,2,2,2,2, # 58 - 5f | ||
293 | 2,2,2,2,2,2,2,2, # 60 - 67 | ||
294 | 2,2,2,2,2,2,2,2, # 68 - 6f | ||
295 | 2,2,2,2,2,2,2,2, # 70 - 77 | ||
296 | 2,2,2,2,2,2,2,4, # 78 - 7f | ||
297 | 5,6,6,6,6,6,6,6, # 80 - 87 | ||
298 | 6,6,6,6,6,6,6,6, # 88 - 8f | ||
299 | 6,6,6,6,6,6,6,6, # 90 - 97 | ||
300 | 6,6,6,6,6,6,6,6, # 98 - 9f | ||
301 | 6,6,6,6,6,6,6,6, # a0 - a7 | ||
302 | 6,6,6,6,6,6,6,6, # a8 - af | ||
303 | 6,6,6,6,6,6,6,6, # b0 - b7 | ||
304 | 6,6,6,6,6,6,6,6, # b8 - bf | ||
305 | 6,6,6,6,6,6,6,6, # c0 - c7 | ||
306 | 6,6,6,6,6,6,6,6, # c8 - cf | ||
307 | 6,6,6,6,6,6,6,6, # d0 - d7 | ||
308 | 6,6,6,6,6,6,6,6, # d8 - df | ||
309 | 6,6,6,6,6,6,6,6, # e0 - e7 | ||
310 | 6,6,6,6,6,6,6,6, # e8 - ef | ||
311 | 6,6,6,6,6,6,6,6, # f0 - f7 | ||
312 | 6,6,6,6,6,6,6,0 # f8 - ff | ||
313 | ) | ||
314 | |||
315 | GB2312_ST = ( | ||
316 | MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07 | ||
317 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f | ||
318 | MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17 | ||
319 | 4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f | ||
320 | MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27 | ||
321 | MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f | ||
322 | ) | ||
323 | |||
324 | # To be accurate, the length of class 6 can be either 2 or 4. | ||
325 | # But it is not necessary to discriminate between the two since | ||
326 | # it is used for frequency analysis only, and we are validating | ||
327 | # each code range there as well. So it is safe to set it to be | ||
328 | # 2 here. | ||
329 | GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2) | ||
330 | |||
331 | GB2312_SM_MODEL = {'class_table': GB2312_CLS, | ||
332 | 'class_factor': 7, | ||
333 | 'state_table': GB2312_ST, | ||
334 | 'char_len_table': GB2312_CHAR_LEN_TABLE, | ||
335 | 'name': 'GB2312'} | ||
336 | |||
337 | # Shift_JIS | ||
338 | |||
339 | SJIS_CLS = ( | ||
340 | 1,1,1,1,1,1,1,1, # 00 - 07 | ||
341 | 1,1,1,1,1,1,0,0, # 08 - 0f | ||
342 | 1,1,1,1,1,1,1,1, # 10 - 17 | ||
343 | 1,1,1,0,1,1,1,1, # 18 - 1f | ||
344 | 1,1,1,1,1,1,1,1, # 20 - 27 | ||
345 | 1,1,1,1,1,1,1,1, # 28 - 2f | ||
346 | 1,1,1,1,1,1,1,1, # 30 - 37 | ||
347 | 1,1,1,1,1,1,1,1, # 38 - 3f | ||
348 | 2,2,2,2,2,2,2,2, # 40 - 47 | ||
349 | 2,2,2,2,2,2,2,2, # 48 - 4f | ||
350 | 2,2,2,2,2,2,2,2, # 50 - 57 | ||
351 | 2,2,2,2,2,2,2,2, # 58 - 5f | ||
352 | 2,2,2,2,2,2,2,2, # 60 - 67 | ||
353 | 2,2,2,2,2,2,2,2, # 68 - 6f | ||
354 | 2,2,2,2,2,2,2,2, # 70 - 77 | ||
355 | 2,2,2,2,2,2,2,1, # 78 - 7f | ||
356 | 3,3,3,3,3,2,2,3, # 80 - 87 | ||
357 | 3,3,3,3,3,3,3,3, # 88 - 8f | ||
358 | 3,3,3,3,3,3,3,3, # 90 - 97 | ||
359 | 3,3,3,3,3,3,3,3, # 98 - 9f | ||
360 | #0xa0 is illegal in sjis encoding, but some pages does | ||
361 | #contain such byte. We need to be more error forgiven. | ||
362 | 2,2,2,2,2,2,2,2, # a0 - a7 | ||
363 | 2,2,2,2,2,2,2,2, # a8 - af | ||
364 | 2,2,2,2,2,2,2,2, # b0 - b7 | ||
365 | 2,2,2,2,2,2,2,2, # b8 - bf | ||
366 | 2,2,2,2,2,2,2,2, # c0 - c7 | ||
367 | 2,2,2,2,2,2,2,2, # c8 - cf | ||
368 | 2,2,2,2,2,2,2,2, # d0 - d7 | ||
369 | 2,2,2,2,2,2,2,2, # d8 - df | ||
370 | 3,3,3,3,3,3,3,3, # e0 - e7 | ||
371 | 3,3,3,3,3,4,4,4, # e8 - ef | ||
372 | 3,3,3,3,3,3,3,3, # f0 - f7 | ||
373 | 3,3,3,3,3,0,0,0) # f8 - ff | ||
374 | |||
375 | |||
376 | SJIS_ST = ( | ||
377 | MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 | ||
378 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f | ||
379 | MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17 | ||
380 | ) | ||
381 | |||
382 | SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0) | ||
383 | |||
384 | SJIS_SM_MODEL = {'class_table': SJIS_CLS, | ||
385 | 'class_factor': 6, | ||
386 | 'state_table': SJIS_ST, | ||
387 | 'char_len_table': SJIS_CHAR_LEN_TABLE, | ||
388 | 'name': 'Shift_JIS'} | ||
389 | |||
390 | # UCS2-BE | ||
391 | |||
392 | UCS2BE_CLS = ( | ||
393 | 0,0,0,0,0,0,0,0, # 00 - 07 | ||
394 | 0,0,1,0,0,2,0,0, # 08 - 0f | ||
395 | 0,0,0,0,0,0,0,0, # 10 - 17 | ||
396 | 0,0,0,3,0,0,0,0, # 18 - 1f | ||
397 | 0,0,0,0,0,0,0,0, # 20 - 27 | ||
398 | 0,3,3,3,3,3,0,0, # 28 - 2f | ||
399 | 0,0,0,0,0,0,0,0, # 30 - 37 | ||
400 | 0,0,0,0,0,0,0,0, # 38 - 3f | ||
401 | 0,0,0,0,0,0,0,0, # 40 - 47 | ||
402 | 0,0,0,0,0,0,0,0, # 48 - 4f | ||
403 | 0,0,0,0,0,0,0,0, # 50 - 57 | ||
404 | 0,0,0,0,0,0,0,0, # 58 - 5f | ||
405 | 0,0,0,0,0,0,0,0, # 60 - 67 | ||
406 | 0,0,0,0,0,0,0,0, # 68 - 6f | ||
407 | 0,0,0,0,0,0,0,0, # 70 - 77 | ||
408 | 0,0,0,0,0,0,0,0, # 78 - 7f | ||
409 | 0,0,0,0,0,0,0,0, # 80 - 87 | ||
410 | 0,0,0,0,0,0,0,0, # 88 - 8f | ||
411 | 0,0,0,0,0,0,0,0, # 90 - 97 | ||
412 | 0,0,0,0,0,0,0,0, # 98 - 9f | ||
413 | 0,0,0,0,0,0,0,0, # a0 - a7 | ||
414 | 0,0,0,0,0,0,0,0, # a8 - af | ||
415 | 0,0,0,0,0,0,0,0, # b0 - b7 | ||
416 | 0,0,0,0,0,0,0,0, # b8 - bf | ||
417 | 0,0,0,0,0,0,0,0, # c0 - c7 | ||
418 | 0,0,0,0,0,0,0,0, # c8 - cf | ||
419 | 0,0,0,0,0,0,0,0, # d0 - d7 | ||
420 | 0,0,0,0,0,0,0,0, # d8 - df | ||
421 | 0,0,0,0,0,0,0,0, # e0 - e7 | ||
422 | 0,0,0,0,0,0,0,0, # e8 - ef | ||
423 | 0,0,0,0,0,0,0,0, # f0 - f7 | ||
424 | 0,0,0,0,0,0,4,5 # f8 - ff | ||
425 | ) | ||
426 | |||
427 | UCS2BE_ST = ( | ||
428 | 5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07 | ||
429 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f | ||
430 | MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17 | ||
431 | 6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f | ||
432 | 6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27 | ||
433 | 5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f | ||
434 | 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37 | ||
435 | ) | ||
436 | |||
437 | UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2) | ||
438 | |||
439 | UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS, | ||
440 | 'class_factor': 6, | ||
441 | 'state_table': UCS2BE_ST, | ||
442 | 'char_len_table': UCS2BE_CHAR_LEN_TABLE, | ||
443 | 'name': 'UTF-16BE'} | ||
444 | |||
445 | # UCS2-LE | ||
446 | |||
447 | UCS2LE_CLS = ( | ||
448 | 0,0,0,0,0,0,0,0, # 00 - 07 | ||
449 | 0,0,1,0,0,2,0,0, # 08 - 0f | ||
450 | 0,0,0,0,0,0,0,0, # 10 - 17 | ||
451 | 0,0,0,3,0,0,0,0, # 18 - 1f | ||
452 | 0,0,0,0,0,0,0,0, # 20 - 27 | ||
453 | 0,3,3,3,3,3,0,0, # 28 - 2f | ||
454 | 0,0,0,0,0,0,0,0, # 30 - 37 | ||
455 | 0,0,0,0,0,0,0,0, # 38 - 3f | ||
456 | 0,0,0,0,0,0,0,0, # 40 - 47 | ||
457 | 0,0,0,0,0,0,0,0, # 48 - 4f | ||
458 | 0,0,0,0,0,0,0,0, # 50 - 57 | ||
459 | 0,0,0,0,0,0,0,0, # 58 - 5f | ||
460 | 0,0,0,0,0,0,0,0, # 60 - 67 | ||
461 | 0,0,0,0,0,0,0,0, # 68 - 6f | ||
462 | 0,0,0,0,0,0,0,0, # 70 - 77 | ||
463 | 0,0,0,0,0,0,0,0, # 78 - 7f | ||
464 | 0,0,0,0,0,0,0,0, # 80 - 87 | ||
465 | 0,0,0,0,0,0,0,0, # 88 - 8f | ||
466 | 0,0,0,0,0,0,0,0, # 90 - 97 | ||
467 | 0,0,0,0,0,0,0,0, # 98 - 9f | ||
468 | 0,0,0,0,0,0,0,0, # a0 - a7 | ||
469 | 0,0,0,0,0,0,0,0, # a8 - af | ||
470 | 0,0,0,0,0,0,0,0, # b0 - b7 | ||
471 | 0,0,0,0,0,0,0,0, # b8 - bf | ||
472 | 0,0,0,0,0,0,0,0, # c0 - c7 | ||
473 | 0,0,0,0,0,0,0,0, # c8 - cf | ||
474 | 0,0,0,0,0,0,0,0, # d0 - d7 | ||
475 | 0,0,0,0,0,0,0,0, # d8 - df | ||
476 | 0,0,0,0,0,0,0,0, # e0 - e7 | ||
477 | 0,0,0,0,0,0,0,0, # e8 - ef | ||
478 | 0,0,0,0,0,0,0,0, # f0 - f7 | ||
479 | 0,0,0,0,0,0,4,5 # f8 - ff | ||
480 | ) | ||
481 | |||
482 | UCS2LE_ST = ( | ||
483 | 6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07 | ||
484 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f | ||
485 | MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17 | ||
486 | 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f | ||
487 | 7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27 | ||
488 | 5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f | ||
489 | 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37 | ||
490 | ) | ||
491 | |||
492 | UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2) | ||
493 | |||
494 | UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS, | ||
495 | 'class_factor': 6, | ||
496 | 'state_table': UCS2LE_ST, | ||
497 | 'char_len_table': UCS2LE_CHAR_LEN_TABLE, | ||
498 | 'name': 'UTF-16LE'} | ||
499 | |||
500 | # UTF-8 | ||
501 | |||
502 | UTF8_CLS = ( | ||
503 | 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value | ||
504 | 1,1,1,1,1,1,0,0, # 08 - 0f | ||
505 | 1,1,1,1,1,1,1,1, # 10 - 17 | ||
506 | 1,1,1,0,1,1,1,1, # 18 - 1f | ||
507 | 1,1,1,1,1,1,1,1, # 20 - 27 | ||
508 | 1,1,1,1,1,1,1,1, # 28 - 2f | ||
509 | 1,1,1,1,1,1,1,1, # 30 - 37 | ||
510 | 1,1,1,1,1,1,1,1, # 38 - 3f | ||
511 | 1,1,1,1,1,1,1,1, # 40 - 47 | ||
512 | 1,1,1,1,1,1,1,1, # 48 - 4f | ||
513 | 1,1,1,1,1,1,1,1, # 50 - 57 | ||
514 | 1,1,1,1,1,1,1,1, # 58 - 5f | ||
515 | 1,1,1,1,1,1,1,1, # 60 - 67 | ||
516 | 1,1,1,1,1,1,1,1, # 68 - 6f | ||
517 | 1,1,1,1,1,1,1,1, # 70 - 77 | ||
518 | 1,1,1,1,1,1,1,1, # 78 - 7f | ||
519 | 2,2,2,2,3,3,3,3, # 80 - 87 | ||
520 | 4,4,4,4,4,4,4,4, # 88 - 8f | ||
521 | 4,4,4,4,4,4,4,4, # 90 - 97 | ||
522 | 4,4,4,4,4,4,4,4, # 98 - 9f | ||
523 | 5,5,5,5,5,5,5,5, # a0 - a7 | ||
524 | 5,5,5,5,5,5,5,5, # a8 - af | ||
525 | 5,5,5,5,5,5,5,5, # b0 - b7 | ||
526 | 5,5,5,5,5,5,5,5, # b8 - bf | ||
527 | 0,0,6,6,6,6,6,6, # c0 - c7 | ||
528 | 6,6,6,6,6,6,6,6, # c8 - cf | ||
529 | 6,6,6,6,6,6,6,6, # d0 - d7 | ||
530 | 6,6,6,6,6,6,6,6, # d8 - df | ||
531 | 7,8,8,8,8,8,8,8, # e0 - e7 | ||
532 | 8,8,8,8,8,9,8,8, # e8 - ef | ||
533 | 10,11,11,11,11,11,11,11, # f0 - f7 | ||
534 | 12,13,13,13,14,15,0,0 # f8 - ff | ||
535 | ) | ||
536 | |||
537 | UTF8_ST = ( | ||
538 | MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07 | ||
539 | 9, 11, 8, 7, 6, 5, 4, 3,#08-0f | ||
540 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17 | ||
541 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f | ||
542 | MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27 | ||
543 | MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f | ||
544 | MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37 | ||
545 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f | ||
546 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47 | ||
547 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f | ||
548 | MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57 | ||
549 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f | ||
550 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67 | ||
551 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f | ||
552 | MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77 | ||
553 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f | ||
554 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87 | ||
555 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f | ||
556 | MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97 | ||
557 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f | ||
558 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7 | ||
559 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af | ||
560 | MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7 | ||
561 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf | ||
562 | MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7 | ||
563 | MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf | ||
564 | ) | ||
565 | |||
566 | UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) | ||
567 | |||
568 | UTF8_SM_MODEL = {'class_table': UTF8_CLS, | ||
569 | 'class_factor': 16, | ||
570 | 'state_table': UTF8_ST, | ||
571 | 'char_len_table': UTF8_CHAR_LEN_TABLE, | ||
572 | 'name': 'UTF-8'} | ||