diff options
author | Shubham Saini <shubham6405@gmail.com> | 2018-12-11 10:01:23 +0000 |
---|---|---|
committer | Shubham Saini <shubham6405@gmail.com> | 2018-12-11 10:01:23 +0000 |
commit | 68df54d6629ec019142eb149dd037774f2d11e7c (patch) | |
tree | 345bc22d46b4e01a4ba8303b94278952a4ed2b9e /venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/charsetgroupprober.py |
First commit
Diffstat (limited to 'venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/charsetgroupprober.py')
-rw-r--r-- | venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/charsetgroupprober.py | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/charsetgroupprober.py b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/charsetgroupprober.py new file mode 100644 index 0000000..1720ddc --- /dev/null +++ b/venv/lib/python3.7/site-packages/pip-10.0.1-py3.7.egg/pip/_vendor/chardet/charsetgroupprober.py | |||
@@ -0,0 +1,106 @@ | |||
1 | ######################## BEGIN LICENSE BLOCK ######################## | ||
2 | # The Original Code is Mozilla Communicator client code. | ||
3 | # | ||
4 | # The Initial Developer of the Original Code is | ||
5 | # Netscape Communications Corporation. | ||
6 | # Portions created by the Initial Developer are Copyright (C) 1998 | ||
7 | # the Initial Developer. All Rights Reserved. | ||
8 | # | ||
9 | # Contributor(s): | ||
10 | # Mark Pilgrim - port to Python | ||
11 | # | ||
12 | # This library is free software; you can redistribute it and/or | ||
13 | # modify it under the terms of the GNU Lesser General Public | ||
14 | # License as published by the Free Software Foundation; either | ||
15 | # version 2.1 of the License, or (at your option) any later version. | ||
16 | # | ||
17 | # This library is distributed in the hope that it will be useful, | ||
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
20 | # Lesser General Public License for more details. | ||
21 | # | ||
22 | # You should have received a copy of the GNU Lesser General Public | ||
23 | # License along with this library; if not, write to the Free Software | ||
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA | ||
25 | # 02110-1301 USA | ||
26 | ######################### END LICENSE BLOCK ######################### | ||
27 | |||
28 | from .enums import ProbingState | ||
29 | from .charsetprober import CharSetProber | ||
30 | |||
31 | |||
32 | class CharSetGroupProber(CharSetProber): | ||
33 | def __init__(self, lang_filter=None): | ||
34 | super(CharSetGroupProber, self).__init__(lang_filter=lang_filter) | ||
35 | self._active_num = 0 | ||
36 | self.probers = [] | ||
37 | self._best_guess_prober = None | ||
38 | |||
39 | def reset(self): | ||
40 | super(CharSetGroupProber, self).reset() | ||
41 | self._active_num = 0 | ||
42 | for prober in self.probers: | ||
43 | if prober: | ||
44 | prober.reset() | ||
45 | prober.active = True | ||
46 | self._active_num += 1 | ||
47 | self._best_guess_prober = None | ||
48 | |||
49 | @property | ||
50 | def charset_name(self): | ||
51 | if not self._best_guess_prober: | ||
52 | self.get_confidence() | ||
53 | if not self._best_guess_prober: | ||
54 | return None | ||
55 | return self._best_guess_prober.charset_name | ||
56 | |||
57 | @property | ||
58 | def language(self): | ||
59 | if not self._best_guess_prober: | ||
60 | self.get_confidence() | ||
61 | if not self._best_guess_prober: | ||
62 | return None | ||
63 | return self._best_guess_prober.language | ||
64 | |||
65 | def feed(self, byte_str): | ||
66 | for prober in self.probers: | ||
67 | if not prober: | ||
68 | continue | ||
69 | if not prober.active: | ||
70 | continue | ||
71 | state = prober.feed(byte_str) | ||
72 | if not state: | ||
73 | continue | ||
74 | if state == ProbingState.FOUND_IT: | ||
75 | self._best_guess_prober = prober | ||
76 | return self.state | ||
77 | elif state == ProbingState.NOT_ME: | ||
78 | prober.active = False | ||
79 | self._active_num -= 1 | ||
80 | if self._active_num <= 0: | ||
81 | self._state = ProbingState.NOT_ME | ||
82 | return self.state | ||
83 | return self.state | ||
84 | |||
85 | def get_confidence(self): | ||
86 | state = self.state | ||
87 | if state == ProbingState.FOUND_IT: | ||
88 | return 0.99 | ||
89 | elif state == ProbingState.NOT_ME: | ||
90 | return 0.01 | ||
91 | best_conf = 0.0 | ||
92 | self._best_guess_prober = None | ||
93 | for prober in self.probers: | ||
94 | if not prober: | ||
95 | continue | ||
96 | if not prober.active: | ||
97 | self.logger.debug('%s not active', prober.charset_name) | ||
98 | continue | ||
99 | conf = prober.get_confidence() | ||
100 | self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf) | ||
101 | if best_conf < conf: | ||
102 | best_conf = conf | ||
103 | self._best_guess_prober = prober | ||
104 | if not self._best_guess_prober: | ||
105 | return 0.0 | ||
106 | return best_conf | ||