
    Nf j3                         d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ  G d d          ZdS )a  
Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use.

:author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco
    N   )CharSetGroupProber)
InputStateLanguageFilterProbingState)EscCharSetProber)Latin1Prober)MBCSGroupProber)SBCSGroupProber)UTF1632Proberc            	           e Zd ZdZdZ ej        d          Z ej        d          Z ej        d          Z	dddd	d
ddddZ
ej        fdZed             Zed             Zed             Zd Zd Zd ZdS )UniversalDetectoraq  
    The ``UniversalDetector`` class underlies the ``chardet.detect`` function
    and coordinates all of the different charset probers.

    To get a ``dict`` containing an encoding and its confidence, you can simply
    run:

    .. code::

            u = UniversalDetector()
            u.feed(some_bytes)
            u.close()
            detected = u.result

    g?s   [-]s   (|~{)s   [-]zWindows-1252zWindows-1250zWindows-1251zWindows-1256zWindows-1253zWindows-1255zWindows-1254zWindows-1257)z
iso-8859-1z
iso-8859-2z
iso-8859-5z
iso-8859-6z
iso-8859-7z
iso-8859-8z
iso-8859-9ziso-8859-13c                     d | _         d | _        g | _        d | _        d | _        d | _        d | _        d | _        || _        t          j
        t                    | _        d | _        |                                  d S N)_esc_charset_prober_utf1632_prober_charset_probersresultdone	_got_data_input_state
_last_charlang_filterlogging	getLogger__name__logger_has_win_bytesreset)selfr   s     yC:\Users\Terasoftware\OneDrive\Desktop\faahhh\fyndo\fyndo\venv\Lib\site-packages\pip/_vendor/chardet/universaldetector.py__init__zUniversalDetector.__init__T   sq    #' # "	 &'11"

    c                     | j         S r   )r   r    s    r!   input_statezUniversalDetector.input_stateb   s      r#   c                     | j         S r   )r   r%   s    r!   has_win_byteszUniversalDetector.has_win_bytesf   s    ""r#   c                     | j         S r   )r   r%   s    r!   charset_probersz!UniversalDetector.charset_probersj   s    $$r#   c                 2   dddd| _         d| _        d| _        d| _        t          j        | _        d| _        | j        r| j        	                                 | j
        r| j
        	                                 | j        D ]}|	                                 dS )z
        Reset the UniversalDetector and all of its probers back to their
        initial states.  This is called by ``__init__``, so you only need to
        call this directly in between analyses of different documents.
        N        encoding
confidencelanguageFr#   )r   r   r   r   r   
PURE_ASCIIr   r   r   r   r   r   )r    probers     r!   r   zUniversalDetector.resetn   s     $(sMM	#&1# 	-$**,,, 	) &&(((+ 	 	FLLNNNN	 	r#   c                 $   | j         rdS |sdS t          |t                    st          |          }| j        s|                    t
          j                  rdddd| _        n|                    t
          j        t
          j	        f          rdddd| _        nx|                    d          rdddd| _        nW|                    d	          rd
ddd| _        n6|                    t
          j
        t
          j        f          rdddd| _        d| _        | j        d         	d| _         dS | j        t          j        k    rt| j                            |          rt          j        | _        nH| j        t          j        k    r3| j                            | j        |z             rt          j        | _        |dd         | _        | j        st-                      | _        | j        j        t0          j        k    r]| j                            |          t0          j        k    r5| j        j        | j                                        dd| _        d| _         dS | j        t          j        k    r| j        st?          | j                   | _        | j                            |          t0          j        k    r?| j        j        | j                                        | j        j!        d| _        d| _         dS dS | j        t          j        k    r| j"        sztG          | j                   g| _"        | j         tH          j%        z  r&| j"        &                    tO                                 | j"        &                    tQ                                 | j"        D ]U}|                    |          t0          j        k    r0|j        |                                |j!        d| _        d| _          nV| j)                            |          rd| _*        dS dS dS )a  
        Takes a chunk of a document and feeds it through all of the relevant
        charset probers.

        After calling ``feed``, you can check the value of the ``done``
        attribute to see if you need to continue feeding the
        ``UniversalDetector`` more data, or if it has made a prediction
        (in the ``result`` attribute).

        .. note::
           You should always call ``close`` when you're done feeding in your
           document if ``done`` is not already ``True``.
        Nz	UTF-8-SIG      ? r-   zUTF-32s     zX-ISO-10646-UCS-4-3412s     zX-ISO-10646-UCS-4-2143zUTF-16Tr.   )+r   
isinstance	bytearrayr   
startswithcodecsBOM_UTF8r   BOM_UTF32_LEBOM_UTF32_BEBOM_LEBOM_BEr   r   r1   HIGH_BYTE_DETECTORsearch	HIGH_BYTEESC_DETECTORr   	ESC_ASCIIr   r   stater   	DETECTINGfeedFOUND_ITcharset_nameget_confidencer   r   r   r0   r   r
   r   NON_CJKappendr   r	   WIN_BYTE_DETECTORr   )r    byte_strr2   s      r!   rG   zUniversalDetector.feed   sT    9 	F 	F(I.. 	+ **H ~ #	""6?33 X !,"% " 
 $$f&96;N%OPP X ,43TVWW$$%899 X !9"% " 
 $$%899 
X !9"% " 
 $$fmV]%CDD X ,43TVWW!DN{:&  	 
 55 	9&--h77 9$.$8!!!Z%::9%,,T_x-GHH9 %/$8!"233- # 	3#0??D %)?? 	#((22l6KK  $ 4 A"&"6"E"E"G"G " 
 !	 
 44 !	++ N+;D<L+M+M(',,X66,:OO ! $ 8 E"&":"I"I"K"K $ 8 A 
 !			! ! *"66 	+( =)89I)J)J(K%#n&<< D)001B1BCCC%,,\^^<<</  ;;x((L,AA $*$7&,&;&;&=&=$*O# #DK
 !%DIE %,,X66 +&*####	+ 	+ + +r#   c           	      6   | j         r| j        S d| _         | j        s| j                            d           n| j        t          j        k    rdddd| _        n| j        t          j        k    rd}d}d}| j	        D ]#}|s|
                                }||k    r|}|}$|r|| j        k    r{|j        }|j                                        }|
                                }|                    d	          r"| j        r| j                            ||          }|||j        d| _        | j                                        t(          j        k    r| j        d
         | j                            d           | j	        D ]}|st-          |t.                    rD|j        D ];}| j                            d|j        |j        |
                                           <^| j                            d|j        |j        |
                                           | j        S )z
        Stop analyzing the current document and come up with a final
        prediction.

        :returns:  The ``result`` attribute, a ``dict`` with the keys
                   `encoding`, `confidence`, and `language`.
        Tzno data received!asciir4   r5   r-   Nr,   ziso-8859r.   z no probers hit minimum thresholdz%s %s confidence = %s)r   r   r   r   debugr   r   r1   rB   r   rJ   MINIMUM_THRESHOLDrI   lowerr9   r   ISO_WIN_MAPgetr0   getEffectiveLevelr   DEBUGr7   r   probers)	r    prober_confidencemax_prober_confidence
max_proberr2   rI   lower_charset_namer/   group_probers	            r!   closezUniversalDetector.close  s    9 	;	~ "	K12222 *"77 	'.crRRDKK *"66 	 $$'!J/ ( ( $*$9$9$;$;!$'<< (,=)!'J 4t7MM )6%/%<%B%B%D%D"'6688
 &00<< * '+'7';';.( ( !-", * 3  ;((**gm; 	{:& !!"DEEE$($9  L' ! !,0BCC &2&:  F K-- 7 & 3 & & 5 5 7 7	    ))3(5(1(7799	    {r#   N)r   
__module____qualname____doc__rR   recompiler@   rC   rM   rT   r   ALLr"   propertyr&   r(   r*   r   rG   r^    r#   r!   r   r   4   s"          #N332:l++L"
>22$$$$$$$%	 	K $2#5     ! ! X! # # X# % % X%  &~+ ~+ ~+@G G G G Gr#   r   )ra   r:   r   rb   charsetgroupproberr   enumsr   r   r   	escproberr   latin1proberr	   mbcsgroupproberr
   sbcsgroupproberr   utf1632proberr   r   rf   r#   r!   <module>rn      s   8    				 2 2 2 2 2 2 ; ; ; ; ; ; ; ; ; ; ' ' ' ' ' ' & & & & & & , , , , , , , , , , , , ( ( ( ( ( (T T T T T T T T T Tr#   