
    f jA                     @   U d Z ddlmZ ddlmZmZ ddlmZ dede	e
eef         fdZdede	e
eef         fdZdede	e
eef         fd	Zdede	e
eef         fd
Zdede	e
eef         fdZdede	e
eef         fdZdede	e
eef         fdZdede	e
eef         fdZdede	e
eef         fdZeeeeeeeedZeeeege	e
eef         f         f         ed<   dededede	e
eef         dz  fdZdededede
fdZ	 ddededededz  de
f
dZdedededefdZdS )a  Stage 2b: Multi-byte structural probing.

Computes how well byte patterns in the data match the expected multi-byte
structure for a given encoding.  Used after byte-validity filtering (Stage 2a)
to further rank multi-byte encoding candidates.

Note: ``from __future__ import annotations`` is intentionally omitted because
this module is compiled with mypyc, which does not support PEP 563 string
annotations.
    )Callable)
HIGH_BYTESPipelineContext)EncodingInfodatareturnc                    d}d}d}t                      }d}t          |           }||k     r| |         }d|cxk    rdk    sn d|cxk    rdk    rqn nn|dz  }|dz   |k     rZ| |dz            }d|cxk    rdk    sn d	|cxk    rd
k    r3n n0|dz  }|                    |           |dz  }|dk    r|dz  }|dz  }|dz  }n|dz  }||k     |dk    r||z  nd}	|	|t          |          fS )zSingle-pass Shift_JIS structural analysis.

    Lead bytes: 0x81-0x9F, 0xE0-0xEF
    Trail bytes: 0x40-0x7E, 0x80-0xFC

    Returns (pair_ratio, mb_bytes, lead_diversity).
    r                  @   ~                       setlenadd
r   
lead_countvalid_countmbleadsilengthbtrailratios
             oC:\Users\Terasoftware\OneDrive\Desktop\faahhh\fyndo\fyndo\venv\Lib\site-packages\chardet/pipeline/structural.py_analyze_shift_jisr%      s    JK	
BeeE	AYYF
f* GA 	 	 	 	 	 	 	 	41 	 	 	 	 	 	 	 	 	!OJ1uv~ 
QUE    T    tu         1$KIIaLLL!GBt|  aFAFAAFA# f* $ )3Q?K*$$CE"c%jj      c                    d}d}d}t                      }d}t          |           }||k     r| |         }d|cxk    rdk    sn d|cxk    rdk    rqn nn|dz  }|dz   |k     rZ| |dz            }d|cxk    rdk    sn d	|cxk    rdk    r3n n0|dz  }|                    |           |dz  }|d
k    r|dz  }|dz  }|dz  }n|dz  }||k     |dk    r||z  nd}	|	|t          |          fS )aB  Single-pass CP932 structural analysis.

    Lead bytes: 0x81-0x9F, 0xE0-0xFC
    Trail bytes: 0x40-0x7E, 0x80-0xFC

    Extends Shift_JIS by raising the lead byte ceiling from 0xEF to 0xFC,
    covering IBM vendor-defined characters (NEC-selected, IBM extensions).

    Returns (pair_ratio, mb_bytes, lead_diversity).
    r   r
   r   r   r   r   r   r   r   r   r   r   r   r   s
             r$   _analyze_cp932r(   E   s    JK	
BeeE	AYYF
f* GA 	 	 	 	 	 	 	 	41 	 	 	 	 	 	 	 	 	!OJ1uv~ 
QUE    T    tu         1$KIIaLLL!GBt|  aFAFAAFA# f* $ )3Q?K*$$CE"c%jj  r&   c                    d}d}d}t                      }d}t          |           }||k     r@| |         }|dk    rR|dz  }|dz   |k     r>d| |dz            cxk    rdk    r(n n%|dz  }|                    |           |dz  }|dz  }a|dz  }n|dk    rk|dz  }|dz   |k     rWd| |dz            cxk    rdk    rAn n>d| |dz            cxk    rdk    r(n n%|dz  }|                    |           |d	z  }|d	z  }|dz  }nhd|cxk    rdk    rVn nS|dz  }|dz   |k     r?d| |dz            cxk    rdk    r)n n&|dz  }|                    |           |dz  }|dz  }5|dz  }n|dz  }||k     @|dk    r||z  nd
}||t          |          fS )zSingle-pass EUC-JP structural analysis.

    Two-byte: Lead 0xA1-0xFE, Trail 0xA1-0xFE
    SS2 (half-width katakana): 0x8E + 0xA1-0xDF
    SS3 (JIS X 0212): 0x8F + 0xA1-0xFE + 0xA1-0xFE

    Returns (pair_ratio, mb_bytes, lead_diversity).
    r      r         r            r   r   	r   r   r   r   r   r   r    r!   r#   s	            r$   _analyze_euc_jpr1   n   s    JK	
BeeE	AYYF
f* $G9 "	!OJ1uv~ $$q1u+         q 		!aQFAA$Y 	!OJA	DQK	 	 	 	+/	 	 	 	 	 DQK	 	 	 	 ,0	 	 	 	 	
 q 		!aQFAAQ 
	 
	 
	 
	$ 
	 
	 
	 
	 
	!OJ1uv~ $$q1u+         q 		!aQFAAFAI f* $J )3Q?K*$$CE"c%jj  r&   c                    d}d}d}t                      }d}t          |           }||k     ru| |         }d|cxk    rdk    rUn nR|dz  }|dz   |k     r>d| |dz            cxk    rdk    r(n n%|dz  }|                    |           |dz  }|dz  }j|dz  }n|dz  }||k     u|dk    r||z  nd}||t          |          fS )zSingle-pass EUC-KR structural analysis.

    Lead 0xA1-0xFE; Trail 0xA1-0xFE

    Returns (pair_ratio, mb_bytes, lead_diversity).
    r   r+   r.   r   r   r   r   r0   s	            r$   _analyze_euc_krr3      sF    JK	
BeeE	AYYF
f* G1 
	 
	 
	 
	 
	 
	 
	 
	 
	!OJ1uv~ $$q1u+         q 		!aQFAAFA f*  )3Q?K*$$CE"c%jj  r&   c                    d}d}d}t                      }d}t          |           }||k     r| |         }d|cxk    rdk    sn d|cxk    rdk    rn n}|dz  }|dz   |k     ri| |dz            }d|cxk    rdk    s!n d	|cxk    rd
k    sn d|cxk    rdk    r3n n0|dz  }|                    |           |dz  }|dk    r|dz  }|dz  }|dz  }n|dz  }||k     |dk    r||z  nd}	|	|t          |          fS )at  Single-pass CP949 (Unified Hangul Code) structural analysis.

    Lead bytes: 0x81-0xC8, 0xCA-0xFD
    Trail bytes: 0x41-0x5A, 0x61-0x7A, 0x81-0xFE

    Extends EUC-KR by lowering the lead byte floor from 0xA1 to 0x81 and
    adding ASCII letter trail ranges plus 0x81-0xA0.  0xC9 is not a valid
    UHC lead byte.

    Returns (pair_ratio, mb_bytes, lead_diversity).
    r   r
            r   A   Z   a   z   r.   r   r   r   r   r   s
             r$   _analyze_cp949r<      s    JK	
BeeE	AYYF
f* GA 	 	 	 	 	 	 	 	41 	 	 	 	 	 	 	 	 	!OJ1uv~ QUU   &*      )-        *.    
  1$KIIaLLL!GBt|  aFAFAAFA+ f* , )3Q?K*$$CE"c%jj  r&   c                    d}d}d}t                      }d}t          |           }||k     r| |         }d|cxk    rdk    rn n|dz  }|dz   |k     rpd| |dz            cxk    rdk    rZn nWd| |dz            cxk    rdk    rAn n>d| |dz            cxk    rdk    r(n n%|dz  }|                    |           |dz  }|d	z  }d
|cxk    rdk    rJn nG|dz   |k     r>d
| |dz            cxk    rdk    r(n n%|dz  }|                    |           |dz  }|dz  }|dz  }n|dz  }||k     |dk    r||z  nd}||t          |          fS )a  Single-pass GB18030 / GB2312 structural analysis.

    Only counts strict GB2312 2-byte pairs (lead 0xA1-0xF7, trail 0xA1-0xFE)
    and GB18030 4-byte sequences.  The broader GBK extension range
    (lead 0x81-0xFE, trail 0x40-0x7E / 0x80-0xFE) is intentionally excluded
    because it is so permissive that unrelated single-byte data (EBCDIC, DOS
    codepages, etc.) can score 1.0, leading to false positives.

    Returns (pair_ratio, mb_bytes, lead_diversity).
    r   r
   r.   r   r/   0   9   r      r+      r   r   r0   s	            r$   _analyze_gb18030rB      sz    JK	
BeeE	AYYF
f* G1 	 	 	 	 	 	 	 	 	!OJ A
DQK
 
 
 
+/
 
 
 
 
 DQK
 
 
 
 ,0
 
 
 
 
 DQK	
 
 
 
 ,0	
 
 
 
 
 q 		!aQq    D     QUV^ QU    t     q 		!aQFAAFA3 f* 4 )3Q?K*$$CE"c%jj  r&   c                    d}d}d}t                      }d}t          |           }||k     r| |         }d|cxk    rdk    rqn nn|dz  }|dz   |k     rZ| |dz            }d|cxk    rdk    sn d|cxk    rdk    r3n n0|dz  }|                    |           |dz  }|dk    r|dz  }|d	z  }|dz  }n|dz  }||k     |dk    r||z  nd
}	|	|t          |          fS )zSingle-pass Big5 structural analysis.

    Lead 0xA1-0xF9; Trail 0x40-0x7E, 0xA1-0xFE

    Returns (pair_ratio, mb_bytes, lead_diversity).
    r   r+      r   r   r   r.   r   r   r   r   r   s
             r$   _analyze_big5rE   '  s    JK	
BeeE	AYYF
f* G1 	 	 	 	 	 	 	 	 	!OJ1uv~ 
QUE    T    tu         1$KIIaLLL!GBt|  aFAFAAFA# f* $ )3Q?K*$$CE"c%jj  r&   c                    d}d}d}t                      }d}t          |           }||k     r| |         }d|cxk    rdk    rqn nn|dz  }|dz   |k     rZ| |dz            }d|cxk    rdk    sn d|cxk    rdk    r3n n0|dz  }|                    |           |dz  }|dk    r|dz  }|d	z  }|dz  }n|dz  }||k     |dk    r||z  nd
}	|	|t          |          fS )aW  Single-pass Big5-HKSCS structural analysis.

    Lead bytes: 0x87-0xFE
    Trail bytes: 0x40-0x7E, 0xA1-0xFE

    Extends Big5 by lowering the lead byte floor from 0xA1 to 0x87 and
    raising the ceiling from 0xF9 to 0xFE.  0x7F and 0x80-0xA0 are not
    valid Big5/HKSCS trail bytes.

    Returns (pair_ratio, mb_bytes, lead_diversity).
    r      r.   r   r   r   r+   r   r   r   r   r   s
             r$   _analyze_big5hkscsrH   L  s    JK	
BeeE	AYYF
f* G1 	 	 	 	 	 	 	 	 	!OJ1uv~ 
QUE    T    tu         1$KIIaLLL!GBt|  aFAFAAFA# f* $ )3Q?K*$$CE"c%jj  r&   c                     d}d}d}t                      }d}t          |           }||k     r| |         }d|cxk    rdk    s!n d|cxk    rdk    sn d|cxk    rdk    rwn nt|dz  }|dz   |k     r`| |dz            }d	|cxk    rd
k    sn d|cxk    rdk    r9n n6|dz  }|                    |           |dk    r|dz  }|dk    r|dz  }|dz  }|dz  }n|dz  }||k     |dk    r||z  nd}	|	|t          |          fS )zSingle-pass Johab structural analysis.

    Lead: 0x84-0xD3, 0xD8-0xDE, 0xE0-0xF9
    Trail: 0x31-0x7E, 0x91-0xFE

    Returns (pair_ratio, mb_bytes, lead_diversity).
    r               r   rD   r   1   r      r.   r   r   r   r   r   s
             r$   _analyze_johabrP   v  s    JK	
BeeE	AYYF
f* GA 	 	 	 	 	 	 	 	41 	 	 	 	 	 	 	 	$! 	 	 	 	t 	 	 	 	 	!OJ1uv~ 
QUE    T    tu         1$KIIaLLL4x  at|  aFAFAAFA# f* $ )3Q?K*$$CE"c%jj  r&   )shift_jis_2004cp932euc_jis_2004euc_krcp949gb18030	big5hkscsjohab
_ANALYZERSnamectxNc                     |j                             |          }||S t                              |          }|dS  ||           }||j         |<   |S )z/Return cached analysis or compute and cache it.N)analysis_cachegetrY   )r   rZ   r[   cachedanalyzerresults         r$   _get_analysisrb     sd     ##D))F ~~d##H tXd^^F%CtMr&   encoding_infoc                 \    | r|j         sdS t          | |j        |          }|dS |d         S )a  Return 0.0--1.0 indicating how well *data* matches the encoding's structure.

    For single-byte encodings, always returns 0.0.  For empty data, always
    returns 0.0.

    :param data: The raw byte data to analyze.
    :param encoding_info: Metadata for the encoding to probe.
    :param ctx: Pipeline context for caching analysis results.
    :returns: A structural fit score between 0.0 and 1.0.
    r   Nr   is_multibyterb   rZ   r   rc   r[   ra   s       r$   compute_structural_scorerh     sF      }1 s4!3S99F s!9r&   non_ascii_countc                     | r|j         sdS t          | |j        |          }|dS |d         }||n7t          |           t          |                     dt
                              z
  }|dk    rdS ||z  S )av  Ratio of non-ASCII bytes that participate in valid multi-byte sequences.

    Genuine CJK text has nearly all non-ASCII bytes paired into valid
    multi-byte sequences (coverage close to 1.0), while Latin text with
    scattered high bytes has many orphan bytes (coverage well below 1.0).

    :param data: The raw byte data to analyze.
    :param encoding_info: Metadata for the encoding to probe.
    :param ctx: Pipeline context for caching analysis results.
    :param non_ascii_count: Pre-computed count of non-ASCII bytes, or ``None``
        to compute from *data*.
    :returns: A coverage ratio between 0.0 and 1.0.
    r   Nr   r   )rf   rb   rZ   r   	translater   )r   rc   r[   ri   ra   mb_bytes	non_asciis          r$   compute_multibyte_byte_coveragern     s    &  }1 s4!3S99F sayH 	?YYT^^D*==>>> 
 A~ sir&   c                 \    | r|j         sdS t          | |j        |          }|dS |d         S )a  Count distinct lead byte values in valid multi-byte pairs.

    Genuine CJK text uses lead bytes from across the encoding's full
    repertoire.  European text falsely matching a CJK structural scorer
    clusters lead bytes in a narrow band.

    :param data: The raw byte data to analyze.
    :param encoding_info: Metadata for the encoding to probe.
    :param ctx: Pipeline context for caching analysis results.
    :returns: The number of distinct lead byte values found.
    r   N   r   re   rg   s       r$   compute_lead_byte_diversityrq     sF      }1 q4!3S99F s!9r&   )N)__doc__collections.abcr   chardet.pipeliner   r   chardet.registryr   bytestuplefloatintr%   r(   r1   r3   r<   rB   rE   rH   rP   rY   dictstr__annotations__rb   rh   rn   rq    r&   r$   <module>r~      sc  	 	 	 % $ $ $ $ $ 8 8 8 8 8 8 8 8 ) ) ) ) ) ) #!
#!
5#s?#! #! #! #!L&!
&!
5#s?&! &! &! &!R7!
7!
5#s?7! 7! 7! 7!t!
!
5#s?! ! ! !@+!
+!
5#s?+! +! +! +!\.!
.!
5#s?.! .! .! .!b"!
"!
5#s?"! "! "! "!J'!
'!
5#s?'! '! '! '!T#!
#!
5#s?#! #! #! #!V )##	D 	D
DhweS#o(>>??@ 	 	 	
!0
5#s?d"   (
 ,3B
   6 #'	$  $ 
$ $  
$  4Z	$ 
 $  $  $  $ N
 ,3B     r&   