o
    ja*                     @   s   U d Z ddlmZmZ dedefddZdZeed< e	eZ
e	e ed	< d
d eeD Zeeef ed< dedefddZdededefddZdedefddZdededB fddZdS )a  Early detection of escape-sequence-based encodings (ISO-2022, HZ-GB-2312, UTF-7).

These encodings use ESC (0x1B), tilde (~), or plus (+) sequences to switch
character sets.  They must be detected before binary detection (ESC is a control
byte) and before ASCII detection (HZ-GB-2312 and UTF-7 use only printable ASCII
bytes plus their respective shift markers).

Note: ``from __future__ import annotations`` is intentionally omitted because
this module is compiled with mypyc, which does not support PEP 563 string
annotations.
    )DETERMINISTIC_CONFIDENCEDetectionResultdatareturnc                 C   s   d}	 |  d|}|dkrdS |  d|d }|dkrdS | |d | }t|dkr>t|d dkr>tdd	 |D r>dS |d }q)
a  Check that at least one ~{...~} region contains valid GB2312 byte pairs.

    In HZ-GB-2312 GB mode, characters are encoded as pairs of bytes in the
    0x21-0x7E range.  We require at least one region with a non-empty, even-
    length run of such bytes.
    r   T   ~{F   ~}   c                 s   s(    | ]}d |  kodkn  V  qdS )!   ~   N ).0br   r   ]/var/www/html/fyndo/pharma/fyndo/venv/lib/python3.10/site-packages/chardet/pipeline/escape.py	<genexpr>$   s   & z(_has_valid_hz_regions.<locals>.<genexpr>)findlenall)r   startbeginendregionr   r   r   _has_valid_hz_regions   s    r   s@   ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/
_B64_CHARS_UTF7_BASE64c                 C   s   i | ]\}}||qS r   r   )r   icr   r   r   
<dictcomp>/   s    r   _B64_DECODE	b64_bytesc                 C   s@  t | }|d }|d }|dkr"t| d  }d|> d }||@ r"dS |d }t|}d}d}	d}
| D ]"}|d> t| B }|	d7 }	|	dkrT|	d8 }	||	? d@ ||
< |
d7 }
q2d}td|d d	D ]=}|| d> ||d  B }d
|  krwdkrn n|r~ dS d}q_d|  krdkrn n|s dS d}q_|r dS d}q_| S )u  Check if base64 bytes decode to valid UTF-16BE with correct padding.

    A valid UTF-7 shifted sequence must:
    1. Contain at least 3 Base64 characters (18 bits, enough for one 16-bit
       UTF-16 code unit).
    2. Have zero-valued trailing padding bits (the unused low bits of the last
       Base64 sextet after the last complete 16-bit code unit).
    3. Decode to valid UTF-16BE — no lone surrogates.

    This rejects accidental ``+<alphanum>-`` patterns found in URLs, MIME
    boundaries, hex-encoded hashes (e.g. SHA-1 git refs), and other ASCII data.

    The caller (``_has_valid_utf7_sequences``) already checks ``b64_len >= 3``
    before calling this function, so *b64_bytes* is always at least 3 bytes.
          r   r      F      r	   i   i  Ti   i  )r   r   	bytearrayrange)r   n
total_bitspadding_bitslast_valmask	num_bytesrawbit_buf	bit_countout_idxr   	prev_highr   	code_unitr   r   r   _is_valid_utf7_b642   sH   r3   posc                 C   sn   t tdB }d}|d }|dkr3| | }|dv r|d8 }q||v r*|d7 }|d8 }n	 |dkS |dks|dkS )ae  Return True if the ``+`` at *pos* is embedded in a base64 stream.

    Walks backward from *pos*, skipping CR/LF, and counts consecutive base64
    characters (including ``=`` for padding).  If 4 or more are found, the
    ``+`` is likely part of a PEM certificate, email attachment, or similar
    base64 blob rather than a real UTF-7 shift character.
       =r   r"   >   
         )r   	frozenset)r   r4   b64_with_padcountr   r   r   r   r   _is_embedded_in_base64o   s   

r<   c                 C   sH  d}	 |  td|}|dkrdS |d }|t| k r(| | tdkr(|d }q|t| k rY| | tdkrY|t| k rV| | tdkrV|d7 }|t| k rV| | tdksD|}qt| |ra|}q|}|t| k r| | tv r|d7 }|t| k r| | tv so|| }| || }|dkr| r|}q|dkrt|rdS t||}q)	as  Check that *data* contains at least one valid UTF-7 shifted sequence.

    A valid shifted sequence is ``+<base64 chars>`` terminated by either an
    explicit ``-`` or any non-Base64 character (per RFC 2152).  The base64
    portion must decode to valid UTF-16BE with correct zero-padding bits.
    The sequence ``+-`` is a literal plus sign and is **not** counted.
    r   T+r   Fr"   -   )r   ordr   r<   r   islowerr3   max)r   r   	shift_posr4   r   b64_lenb64_datar   r   r   _has_valid_utf7_sequences   s>   

rF   Nc                 C   s  d| v }d| v }d| v }|s|s|sdS |red| v s"d| v s"d| v r)t dtd	d
S d| v r4t dtd	d
S d| v sDd| v sDd| v sDd| v rZd| v rSd| v rSt dtd	d
S t dtd	d
S d| v ret dtdd
S |rzd| v rzd| v rzt| rzt dtdd
S |rt| dk rt| rt dtdd
S dS )zDetect ISO-2022, HZ-GB-2312, and UTF-7 from escape/tilde/plus sequences.

    :param data: The raw byte data to examine.
    :returns: A :class:`DetectionResult` if an escape encoding is found, or ``None``.
          ~   +Ns   $(Os   $(Ps   $(Qiso2022_jp_2004ja)encoding
confidencelanguages   (Iiso2022_jp_exts   $Bs   $@s   (Js   $(D      iso2022_jp_2s   $)C
iso2022_krkor   r   hzzh   zutf-7)r   r   r   rB   rF   )r   has_esc	has_tildehas_plusr   r   r   detect_escape_encoding   sh   	r[   )__doc__chardet.pipeliner   r   bytesboolr   r   __annotations__r9   r   int	enumerater   dictr3   r<   rF   r[   r   r   r   r   <module>   s    "=<