o
    jC!                     @   s`  U d Z ddlZddlmZmZmZmZ ddlmZ ddl	m
Z
mZmZ daeed< daeed	< g aee ed
< dZeed< G dd deZ			d4deeedf deeedf deeee df deeeee f fddZdee dee dee fddZdee defddZdedeee ee f deee ee f deee ee f dee
 d eed!f d"ed#ed$eeeeeeegdf  d%ed&ed'edeeeee ee f fd(d)Zd*eeeef  d+ee d,ee d-ed eed!f deeef fd.d/Zded+ee d,ee dee
 d-ed0ed#ed1ed$eeeeeeegdf  deeeef fd2d3ZdS )5ze
Code related to text extraction.

Some parts are still in _page.py. In doubt, they will stay there.
    N)AnyCallableOptionalUnion   )Font)DictionaryObjectTextStringObjectencode_pdfdocencodingCUSTOM_RTL_MINCUSTOM_RTL_MAXCUSTOM_RTL_SPECIAL_CHARS    LAYOUT_NEW_BT_GROUP_SPACE_WIDTHSc                   @   s   e Zd ZdS )OrientationNotFoundErrorN)__name__
__module____qualname__ r   r   e/var/www/html/fyndo/pharma/fyndo/venv/lib/python3.10/site-packages/pypdf/_text_extraction/__init__.pyr      s    r   _min_maxspecialsreturnc                 C   sv   t | tr| an	t | trt| at |tr|an	t |tr"t|at |tr/dd |D ant |tr6|atttfS )a  
    Change the Right-To-Left and special characters custom parameters.

    Args:
        _min: The new minimum value for the range of custom characters that
            will be written right to left.
            If set to ``None``, the value will not be changed.
            If set to an integer or string, it will be converted to its ASCII code.
            The default value is -1, which sets no additional range to be converted.
        _max: The new maximum value for the range of custom characters that will
            be written right to left.
            If set to ``None``, the value will not be changed.
            If set to an integer or string, it will be converted to its ASCII code.
            The default value is -1, which sets no additional range to be converted.
        specials: The new list of special characters to be inserted in the
            current insertion order.
            If set to ``None``, the current value will not be changed.
            If set to a string, it will be converted to a list of ASCII codes.
            The default value is an empty list.

    Returns:
        A tuple containing the new values for ``CUSTOM_RTL_MIN``,
        ``CUSTOM_RTL_MAX``, and ``CUSTOM_RTL_SPECIAL_CHARS``.

    c                 S   s   g | ]}t |qS r   )ord.0xr   r   r   
<listcomp>?   s    z"set_custom_rtl.<locals>.<listcomp>)
isinstanceintr   strr   r   r   list)r   r   r   r   r   r   set_custom_rtl   s   






r$   mnc              	   C   s   | d |d  | d |d   | d |d  | d |d   | d |d  | d |d   | d |d  | d |d   | d |d  | d |d   |d  | d |d  | d |d   |d  gS )Nr      r         r   r   )r%   r&   r   r   r   multE   s   &&r*   c                 C   s4   | d dkrdS | d dk rdS | d dkrdS dS )	Nr(   gư>r   gư   r'   Z     r   )r%   r   r   r   orientP   s   r.   text	cmtm_prevcmtm_matrix	memo_cmtmfont_resourceorientations.output	font_sizevisitor_text
str_widths
spacewidth
str_heightc                 C   s  |d }|d }|d }|d }|d }|d }t ||}t ||}t|}|d |d  }|d |d  }t|d d |d d  }t|d d |d d  }t|d d |d d  }|}||vrkt|dv rt|}|}n|dv r||}|}zDt|d	t|| ||  kr||  d
 dkr|| d 7 }|d ur|| d |||| d} n||
|	 | kr||  d
 dkr| d7 } W n	 ty   Y nw | }| }| |||fS )Nr   r'   r)   r   r   r(   )r   r+   )r,   r-   g?r   
  )	r*   r.   mathsqrtr   absmin	Exceptioncopy)r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   cm_prevtm_prev	cm_matrix	tm_matrixmemo_cmmemo_tmm_prevr%   orientationdelta_xdelta_yscale_prev_xscale_prev_yscale_ymoved_heightmoved_widthr   r   r   crlf_space_checkZ   s^   

rS   operandsrF   rG   fontc           
         s   d}d}t ||}t|}||v rst| dkrst| d tr(| d }d}||fS d}t| d tr7t| d n| d }	t jtrgz|	 jd}W ||fS  tyf   |	 jdkr]dndd}Y ||fS w d	 fdd	|	D }||fS )
Nr<   Fr   Tsurrogatepasscharmapz	utf-16-bec                    s.   g | ]}| j v r j | nt|f qS r   )encodingbytesdecoder   rU   r   r   r      s   . z%get_text_operands.<locals>.<listcomp>)
r*   r.   lenr    r"   r
   rX   rZ   rB   join)
rT   rF   rG   rU   r4   tis_str_operandsr%   rK   ttr   r[   r   get_text_operands   s:   
	ra   text_operandsrtl_dirc	                    sx  d}	 fdd|D D ]}
t |
dkrt|
}nd}|dksCd|  kr(dksCn d|  kr3d	ksCn d
|  kr>dksCn |tv rN|rI|
|  n| |
 } nZd|  krXdks{n d|  krcdks{n d|  krndks{n t|  krytkrn n|sd}|d ur|| |||| d} |
|  } n|rd}|d ur|| |||| d} | |
 } |	|
dkr jn |
7 }	q| ||	fS )Ng        c                    s   g | ]	} j ||qS r   )character_mapgetr   r[   r   r   r      s    z#get_display_str.<locals>.<listcomp>r'   /   :   @   i    io   i   i!  i  i  i  i  ip  i  Tr<   Fr=   )r\   r   r   r   r   space_width
text_width)r/   rF   rG   r3   rU   rb   r6   rc   r7   widthsr   xxr   r[   r   get_display_str   s:   


rm   )NNN) __doc__r>   typingr   r   r   r   _fontr   genericr   r	   r
   r   r!   __annotations__r   r   r#   r   rB   r   r"   tupler$   floatr*   r.   rS   boolra   rm   r   r   r   r   <module>   s    
".

	

A


(	
