o
    jm                     @  s   d Z ddlmZ ddlZddlmZmZ ddlmZ dZ	e
dejZe
dejZe
d	ejZe
d
ejZdddZdddZdddZdS )z<Stage 1b: charset declaration extraction (HTML/XML/PEP 263).    )annotationsN)DETERMINISTIC_CONFIDENCEDetectionResult)lookup_encodingi   s*   <\?xml[^>]+encoding\s*=\s*['"]([^'"]+)['"]s,   <meta[^>]+charset\s*=\s*['"]?\s*([^\s'">;]+)s6   <meta[^>]+content\s*=\s*['"][^'"]*charset=([^\s'">;]+)s&   ^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)databytesreturnDetectionResult | Nonec              	   C  s   d| dd vr
dS d | dddd }t|}|rLz|dd }W n ttfy6   Y dS w t	|}|durLt
| |rLt|tddd	S dS )
aB  Check the first two lines of *data* for a PEP 263 encoding declaration.

    PEP 263 declarations (e.g. ``# -*- coding: utf-8 -*-``) are only valid
    on line 1 or line 2 of a Python source file.

    :param data: The raw byte data to scan.
    :returns: A :class:`DetectionResult` with confidence 0.95, or ``None``.
       #N      
      asciiztext/x-pythonencoding
confidencelanguage	mime_type)joinsplit
_PEP263_REsearchgroupdecodestripUnicodeDecodeError
ValueErrorr   _validate_bytesr   r   )r   first_two_linesmatchraw_namer    r"   ]/var/www/html/fyndo/pharma/fyndo/venv/lib/python3.10/site-packages/chardet/pipeline/markup.py_detect_pep263   s&   

r$   c              
   C  s   | sdS | dt  }tttfD ]@}||}|rOz|dd }W n tt	fy/   Y qw t
|}|durOt| |rO|tu rCdnd}t|td|d  S qt| S )a  Scan the first bytes of *data* for a charset declaration.

    Checks for:

    1. ``<?xml ... encoding="..."?>``
    2. ``<meta charset="...">``
    3. ``<meta http-equiv="Content-Type" content="...; charset=...">``
    4. PEP 263 ``# -*- coding: ... -*-`` (first two lines only)

    :param data: The raw byte data to scan.
    :returns: A :class:`DetectionResult` with confidence 0.95, or ``None``.
    Nr   r   ztext/xmlz	text/htmlr   )_SCAN_LIMIT_XML_ENCODING_RE_HTML5_CHARSET_RE_HTML4_CONTENT_TYPE_REr   r   r   r   r   r   r   r   r   r   r$   )r   headpatternr    r!   r   r   r"   r"   r#   detect_markup_charset:   s,   

r+   r   strboolc              
   C  s4   z| dt  | W dS  tttfy   Y dS w )zCheck that *data* can be decoded under *encoding* without errors.

    Only validates the first ``_SCAN_LIMIT`` bytes to avoid decoding a
    full 200 kB input just to verify a charset declaration found in the
    header.
    NFT)r%   r   r   LookupErrorr   )r   r   r"   r"   r#   r   `   s   r   )r   r   r   r	   )r   r   r   r,   r   r-   )__doc__
__future__r   rechardet.pipeliner   r   chardet.registryr   r%   compile
IGNORECASEr&   r'   r(   	MULTILINEr   r$   r+   r   r"   r"   r"   r#   <module>   s&    

&