
    f js+                         U d Z ddlmZmZ dedefdZdZeed<    e	e          Z
e	e         ed<   d	  ee          D             Zeeef         ed
<   dedefdZdededefdZdedefdZdededz  fdZdS )a  Early detection of escape-sequence-based encodings (ISO-2022, HZ-GB-2312, UTF-7).

These encodings use ESC (0x1B), tilde (~), or plus (+) sequences to switch
character sets.  They must be detected before binary detection (ESC is a control
byte) and before ASCII detection (HZ-GB-2312 and UTF-7 use only printable ASCII
bytes plus their respective shift markers).

Note: ``from __future__ import annotations`` is intentionally omitted because
this module is compiled with mypyc, which does not support PEP 563 string
annotations.
    )DETERMINISTIC_CONFIDENCEDetectionResultdatareturnc                 4   d}	 |                      d|          }|dk    rdS |                      d|dz             }|dk    rdS | |dz   |         }t          |          dk    r1t          |          dz  dk    rt          d |D                       rdS |dz   })	a  Check that at least one ~{...~} region contains valid GB2312 byte pairs.

    In HZ-GB-2312 GB mode, characters are encoded as pairs of bytes in the
    0x21-0x7E range.  We require at least one region with a non-empty, even-
    length run of such bytes.
    r   T   ~{F   ~}   c              3   6   K   | ]}d |cxk    odk    nc V  dS )!   ~   N ).0bs     kC:\Users\Terasoftware\OneDrive\Desktop\faahhh\fyndo\fyndo\venv\Lib\site-packages\chardet/pipeline/escape.py	<genexpr>z(_has_valid_hz_regions.<locals>.<genexpr>$   s>      66!DA%%%%%%%%666666    )findlenall)r   startbeginendregions        r   _has_valid_hz_regionsr      s     E		%''B; 	5iiuqy))"9 	5eai#o& KK1	Fa1$	 66v66666	
 4ar   s@   ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/
_B64_CHARS_UTF7_BASE64c                     i | ]\  }}||	S r   r   )r   ics      r   
<dictcomp>r"   /   s    FFF1q!FFFr   _B64_DECODE	b64_bytesc                    t          |           }|dz  }|dz  }|dk    r"t          | d                  }d|z  dz
  }||z  rdS |dz  }t          |          }d}d}	d}
| D ]5}|dz  t          |         z  }|	dz  }	|	dk    r|	dz  }	||	z	  dz  ||
<   |
dz  }
6d}t          d|dz
  d	          D ]P}||         dz  ||dz            z  }d
|cxk    rdk    rn n|r dS d}1d|cxk    rdk    rn n|s dS d}I|r dS d}Q| S )u  Check if base64 bytes decode to valid UTF-16BE with correct padding.

    A valid UTF-7 shifted sequence must:
    1. Contain at least 3 Base64 characters (18 bits, enough for one 16-bit
       UTF-16 code unit).
    2. Have zero-valued trailing padding bits (the unused low bits of the last
       Base64 sextet after the last complete 16-bit code unit).
    3. Decode to valid UTF-16BE — no lone surrogates.

    This rejects accidental ``+<alphanum>-`` patterns found in URLs, MIME
    boundaries, hex-encoded hashes (e.g. SHA-1 git refs), and other ASCII data.

    The caller (``_has_valid_utf7_sequences``) already checks ``b64_len >= 3``
    before calling this function, so *b64_bytes* is always at least 3 bytes.
          r   r	      F      r   i   i  Ti   i  )r   r#   	bytearrayrange)r$   n
total_bitspadding_bitslast_valmask	num_bytesrawbit_buf	bit_countout_idxr!   	prev_highr    	code_units                  r   _is_valid_utf7_b64r9   2   s     	IAQJ ?La y}-\!Q&d? 	5
 aI
I

CGIG  a<;q>1Q	> 	NI#y0D8CLqLGI1i!mQ''  Vq[CAJ.	Y 	 	 	 	& 	 	 	 	 	 uuIIy 	 	 	 	F 	 	 	 	 	 uuII uuII=r   posc                     t           t          d          z  }d}|dz
  }|dk    r(| |         }|dv r|dz  }||v r|dz  }|dz  }nn|dk    (|dk    S )ae  Return True if the ``+`` at *pos* is embedded in a base64 stream.

    Walks backward from *pos*, skipping CR/LF, and counts consecutive base64
    characters (including ``=`` for padding).  If 4 or more are found, the
    ``+`` is likely part of a PEM certificate, email attachment, or similar
    base64 blob rather than a real UTF-7 shift character.
       =r   r(   >   
         )r   	frozenset)r   r:   b64_with_padcountr    r   s         r   _is_embedded_in_base64rC   o   s     $0)D//#ALEaA
q& 	G 	FA 	QJEFAA q& 	 A:r   c                 N   d}	 |                      t          d          |          }|dk    rdS |dz   }|t          |           k     r| |         t          d          k    r|dz   }c|t          |           k     ry| |         t          d          k    r`|t          |           k     rJ| |         t          d          k    r1|dz  }|t          |           k     r| |         t          d          k    1|}t          | |          r|}|}|t          |           k     r6| |         t          v r'|dz  }|t          |           k     r| |         t          v '||z
  }| ||         }|dk    r|                                r|}{|dk    rt          |          rdS t          ||          })	as  Check that *data* contains at least one valid UTF-7 shifted sequence.

    A valid shifted sequence is ``+<base64 chars>`` terminated by either an
    explicit ``-`` or any non-Base64 character (per RFC 2152).  The base64
    portion must decode to valid UTF-16BE with correct zero-padding bits.
    The sequence ``+-`` is a literal plus sign and is **not** counted.
    r   T+r	   Fr(   -   )r   ordr   rC   r   islowerr9   max)r   r   	shift_posr:   r    b64_lenb64_datas          r   _has_valid_utf7_sequencesrN      s    E0IIc#hh..	? 	5!mT? 	tCyCHH4 	!GE T? 	tCyCHH4 	D		/ d3i3s88&; q D		/ d3i3s88&; E "$	22 	E#d))m 	Q< 7 	FA #d))m 	Q< 7 	c'A; a< 	H,,.. 	E a< 	.x88 	4Ca0r   Nc                 F   d| v }d| v }d| v }|s|s|sdS |rd| v sd| v sd| v rt          dt          d	
          S d| v rt          dt          d	
          S d| v sd| v sd| v sd| v r6d| v rd| v rt          dt          d	
          S t          dt          d	
          S d| v rt          dt          d
          S |r.d| v r*d| v r&t          |           rt          dt          d
          S |r9t          |           dk     r&t	          |           rt          dt          d
          S dS )zDetect ISO-2022, HZ-GB-2312, and UTF-7 from escape/tilde/plus sequences.

    :param data: The raw byte data to examine.
    :returns: A :class:`DetectionResult` if an escape encoding is found, or ``None``.
          ~   +Ns   $(Os   $(Ps   $(Qiso2022_jp_2004ja)encoding
confidencelanguages   (Iiso2022_jp_exts   $Bs   $@s   (Js   $(D      iso2022_jp_2s   $)C
iso2022_krkor   r
   hzzh   zutf-7)r   r   r   rJ   rN   )r   has_esc	has_tildehas_pluss       r   detect_escape_encodingrd      s    oGIt|H 9 X t - 	t!3 	zT7I 	"*3     	")3    	D 	 D 	 T!		 $ 7d? &-7!    #'3     	"%3     
Ud] 
u} 
9Nt9T9T 
/
 
 
 	
  
CII$ 
)B4)H)H 
/
 
 
 	
 4r   )__doc__chardet.pipeliner   r   bytesboolr   r   __annotations__r@   r   int	enumerater#   dictr9   rC   rN   rd   r   r   r   <module>rm      s]  
 
 
 G F F F F F F F $    6 X
E W W W(y44in 4 4 4 GF		*0E0EFFFT#s(^ F F F:% :D : : : :z S T    09E 9d 9 9 9 9xO O?T+A O O O O O Or   