o
    j8                     @  s  U d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 i de
dd	hd
e
ddhde
dhde
dhde
dhde
ddhde
ddhde
dhde
dhde
dhde
dhde
dhde
h d d!e
d"d#hd$e
d%hd&e
d	hd'e
d(he
d)he
d*he
d+he
d,he
d-he
d.he
d/d0he
d1he
d1he
d2d3he
d4he
d4hd5Zd6ed7< d	dd	d(d)d*d+d,d-dd.dd8Zd9ed:< dd?d@ZddAdBZeZi dddCdDdEdFddGdddHdddIdJdKd"ddLdMdNd&dOdPdQdRdSdTdUdVdWdXdYdZi d[d\d]d^dd_d`d
d1dad4dbdcddd)ded	dfd+dgd-dhd,didjdkdldmdndodpdqdrdsZd9edt< ddudvZdwZdxedy< dzZdxed{< d|d} fdddZeeZd6ed< dddZi Zd6ed< e D ] \ZZe	epeZe
dd eD Zeee
 eB ee< qeedd} dZd6ed< dddZ dddZ!e
ddhZ"ded< dddZ#dddZ$dS )a$  Encoding equivalences and name remapping.

This module defines:

1. **Directional supersets** for accuracy evaluation: detecting a superset
   encoding when the expected encoding is a subset is correct (e.g., detecting
   UTF-8 when expected is ASCII), but not the reverse.

2. **Bidirectional equivalents**: groups of encodings where detecting any
   member when another member was expected is considered correct.  This
   includes UTF-16/UTF-32 endian variants (which encode the same text with
   different byte order) and ISO-2022-JP branch variants (which are
   compatible extensions of the same base encoding).

3. **Preferred superset mapping** for the ``prefer_superset`` API option:
   replaces detected ISO/subset encoding names with their Windows/CP superset
   equivalents that modern software actually uses.

4. **Compatibility names** for the default ``compat_names=True`` mode: maps
   internal Python codec names to the names chardet 5.x/6.x returned,
   preserving backward compatibility for callers that compare encoding
   strings directly.
    )annotationsN)Callable)DetectionDictlookup_encodingASCIIzutf-8cp1252zTIS-620
iso8859-11cp874zISO-8859-11GB2312gb18030GBKBig5	big5hkscscp950	Shift_JIScp932shift_jis_2004zShift-JISX0213zEUC-JPeuc_jis_2004zEUC-JISX0213zEUC-KRcp949CP037cp1140zISO-2022-JP>   iso2022_jp_2iso2022_jp_extiso2022_jp_2004zISO2022-JP-1r   r   zISO2022-JP-3r   z
ISO-8859-1z
ISO-8859-2cp1250cp1251cp1256cp1253cp1255cp1254cp1257z	utf-16-lez	utf-16-bezutf-16z	utf-32-lez	utf-32-bezutf-32)
ISO-8859-5z
ISO-8859-6
ISO-8859-7
ISO-8859-8
ISO-8859-9zISO-8859-13UTF-16z	UTF-16-LEz	UTF-16-BEUTF-32z	UTF-32-LEz	UTF-32-BEdict[str, frozenset[str]]	SUPERSETS)asciieuc_kr	iso8859-1z	iso8859-2	iso8859-5z	iso8859-6	iso8859-7	iso8859-8	iso8859-9r	   z
iso8859-13tis-620dict[str, str]PREFERRED_SUPERSETresultr   mappingreturnc                 C  s(   |  d}t|tr| ||| d< | S )zGReplace the encoding name using *mapping*, modifying *result* in-place.encoding)get
isinstancestr)r4   r5   enc r<   Z/var/www/html/fyndo/pharma/fyndo/venv/lib/python3.10/site-packages/chardet/equivalences.py_remap_encodingl   s   

r>   c                 C  
   t | tS )a7  Replace the encoding name with its preferred Windows/CP superset.

    Modifies the ``"encoding"`` value in *result* in-place and returns *result*
    for fluent chaining.

    :param result: A detection result dict containing an ``"encoding"`` key.
    :returns: The same *result* dict, modified in-place.
    )r>   r3   r4   r<   r<   r=   apply_preferred_supersett      
rA   cp855IBM855cp866IBM866CP949r+   GB18030hzz
HZ-GB-2312
iso2022_krzISO-2022-KRr,   r-   r"   r.   r#   r/   r$   r0   r%   johabJohabzkoi8-rzKOI8-Rzmac-cyrillicMacCyrillicz	mac-romanMacRoman	SHIFT_JISr1   r&   r'   z	utf-8-sigz	UTF-8-SIGzWindows-1251zWindows-1252zWindows-1253zWindows-1254zWindows-1255kz1048KZ1048z	mac-greekMacGreekzmac-iceland
MacIcelandz
mac-latin2	MacLatin2zmac-turkish
MacTurkish_COMPAT_NAMESc                 C  r?   )a7  Convert internal codec names to chardet 5.x/6.x compatible names.

    Modifies the ``"encoding"`` value in *result* in-place and returns *result*
    for fluent chaining.

    :param result: A detection result dict containing an ``"encoding"`` key.
    :returns: The same *result* dict, modified in-place.
    )r>   rV   r@   r<   r<   r=   apply_compat_names   rB   rW   ))r   r   r   tuple[tuple[str, ...], ...]BIDIRECTIONAL_GROUPS))skcs)ukrubgbe)msid)nodasvLANGUAGE_EQUIVALENCESc                 C  s   | S Nr<   )xr<   r<   r=   <lambda>   s    rh   groups	normalizeCallable[[str], str]c                   s>   i }| D ]}t  fdd|D }|D ]}|| |< qq|S )zJBuild a lookup: key -> frozenset of all equivalent keys in the same group.c                 3  s    | ]} |V  qd S rf   r<   ).0nrj   r<   r=   	<genexpr>   s    z%_build_group_index.<locals>.<genexpr>)	frozenset)ri   rj   r4   groupnormednamer<   rn   r=   _build_group_index   s   rt   _LANGUAGE_EQUIVexpectedr:   detectedboolc                 C  s&   | |krdS t | }|duo||v S )a  Check whether *detected* is an acceptable language for *expected*.

    Returns ``True`` when *expected* and *detected* are the same ISO 639-1
    code, or belong to the same equivalence group in
    :data:`LANGUAGE_EQUIVALENCES`.

    :param expected: Expected ISO 639-1 language code.
    :param detected: Detected ISO 639-1 language code.
    :returns: ``True`` if the languages are equivalent.
    TN)ru   r8   )rv   rw   rq   r<   r<   r=   is_language_equivalent   s   
ry   _NORMALIZED_SUPERSETSc                 c  s    | ]	}t |p	|V  qd S rf   r   )rl   sr<   r<   r=   ro   	  s    ro   c                 C  s   t | p| S rf   r   )rm   r<   r<   r=   rh     s    rn   _NORMALIZED_BIDIR
str | Nonec                 C  st   | du r|du S |du rdS t | p|  }t |p| }||kr$dS |tv r0|t| v r0dS |tv o9|t| v S )a  Check whether *detected* is an acceptable answer for *expected*.

    Acceptable means:

    1. Exact match (after normalization), OR
    2. Both belong to the same bidirectional byte-order group, OR
    3. *detected* is a known superset of *expected*.

    :param expected: The expected encoding name, or ``None`` for binary files.
    :param detected: The detected encoding name, or ``None``.
    :returns: ``True`` if the detection is acceptable.
    NFT)r   lowerr|   rz   )rv   rw   norm_expnorm_detr<   r<   r=   
is_correct  s   
r   textc                 C  s    t d| }ddd |D S )z4NFKD-normalize *text* and strip all combining marks.NFKD c                 s  s    | ]
}t |s|V  qd S rf   )unicodedata	combining)rl   cr<   r<   r=   ro   8      z#_strip_combining.<locals>.<genexpr>)r   rj   join)r   nfkdr<   r<   r=   _strip_combining5  s   r   )   ¤   €)r   r   zfrozenset[tuple[str, str]]_EQUIVALENT_SYMBOL_PAIRSabc                 C  s,   | |krdS | |ft v rdS t| t|kS )u   Return True if characters *a* and *b* are functionally equivalent.

    Equivalent means:
    - Same character, OR
    - Same base letter after stripping combining marks, OR
    - An explicitly listed symbol equivalence (e.g. ¤ ↔ €)
    T)r   r   )r   r   r<   r<   r=   _chars_equivalentE  s
   r   databytesc              	   C  s   |du r|du S |du rdS t |p| }t |p| }||kr$dS z| |}| |}W n ttfy<   Y dS w ||krCdS t|t|krMdS tdd t||ddD S )u  Check whether *detected* produces functionally identical text to *expected*.

    Returns ``True`` when:

    1. *detected* is not ``None`` and both encoding names normalize to the same
       codec, OR
    2. Decoding *data* with both encodings yields identical strings, OR
    3. Every differing character pair is functionally equivalent: same base
       letter after stripping combining marks, or an explicitly listed symbol
       equivalence (e.g. ¤ ↔ €).

    Returns ``False`` if *detected* is ``None``, either encoding is unknown,
    or either encoding cannot decode *data*.

    :param data: The raw byte data that was detected.
    :param expected: The expected encoding name, or ``None`` for binary files.
    :param detected: The detected encoding name, or ``None``.
    :returns: ``True`` if decoding with *detected* yields functionally identical
        text to decoding with *expected*.
    NFTc                 s  s    | ]
\}}t ||V  qd S rf   )r   )rl   r   r   r<   r<   r=   ro     r   z*is_equivalent_detection.<locals>.<genexpr>)strict)r   r~   decodeUnicodeDecodeErrorLookupErrorlenallzip)r   rv   rw   r   r   text_exptext_detr<   r<   r=   is_equivalent_detectionU  s&   
r   )r4   r   r5   r2   r6   r   )r4   r   r6   r   )ri   rX   rj   rk   r6   r(   )rv   r:   rw   r:   r6   rx   )rv   r}   rw   r}   r6   rx   )r   r:   r6   r:   )r   r:   r   r:   r6   rx   )r   r   rv   r}   rw   r}   r6   rx   )%__doc__
__future__r   r   collections.abcr   chardet.pipeliner   chardet.registryr   rp   r)   __annotations__r3   r>   rA   apply_legacy_renamerV   rW   rY   re   rt   ru   ry   rz   items_subset
_supersets_key_normedr8   r|   r   r   r   r   r   r<   r<   r<   r=   <module>   sF   




	








3

	
 !"#$
(


#
