
    є j48                     |    d dl Z d dlmZmZmZmZ ddlmZmZ ddl	m
Z
mZ ddlmZmZmZmZmZ  G d d	          ZdS )
    N)AnyCallableOptionalUnion   )FontFontDescriptor)DictionaryObjectTextStringObject   )OrientationNotFoundErrorcrlf_space_checkget_display_strget_text_operandsmultc                   B   e Zd ZdZd)dZ	 	 	 	 d*deedf         deee	e	e	e	e	gdf                  d	ee
eef                  d
ee
eef                  ddf
dZdedefdZdedee	         ddfdZdeddfdZdedeeeef                  dee         dee         dee         dedeedf         dededeee	e	e	e	e	gdf                  de
eef         deeee
eef         f         fdZd)dZdee	         ddfdZdee	         ddfdZdee	         ddfdZdee	         ddfdZdee	         ddfd Zdee	         ddfd!Zdee	         ddfd"Z dee	         ddfd#Z!dee	         ddfd$Z"dee	         defd%Z#dee	         defd&Z$dee	         defd'Z%dee	         defd(Z&dS )+TextExtractiona  
    A class to handle PDF text extraction operations.

    This class encapsulates all the state and operations needed for extracting
    text from PDF content streams, replacing the nested functions and nonlocal
    variables in the original implementation.
    returnNc                 L   i | _         g d| _        g d| _        g | _        g d| _        g d| _        g d| _        g d| _        d| _        d| _	        d| _
        ddd| _        d| _        d| _        d| _        d| _        d| _        d | _        t%          d	d
dt'                                | _        d| _        d | _        i | _        i | _        | j        | j        | j        | j        | j        | j        | j        | j         | j!        | j"        | j#        | j$        | j%        d| _&        d S )N      ?        r   r   r   r   r   g     @@r   )
str_widths
str_heightg      (@ FNotInitializedUnknowncharmap)namesub_typeencodingfont_descriptorr   Z      i  )s   BTs   ET   q   Qs   cms   Tzs   Tws   TLs   Tf   Td   Tm   T*   Tj)'_font_width_maps	cm_matrix	tm_matrixcm_stackcm_prevtm_prevmemo_cmmemo_tm
char_scalespace_scale_space_width_actual_str_sizeTL	font_sizetextoutputrtl_dirfont_resourcer   r	   fontorientationsvisitor_textfont_resourcesfonts
_handle_bt
_handle_et_handle_save_graphics_state_handle_restore_graphics_state
_handle_cm
_handle_tz
_handle_tw
_handle_tl
_handle_tf
_handle_td
_handle_tm_handle_t_star_handle_tj_operationoperation_handlersselfs    zC:\Users\Terasoftware\OneDrive\Desktop\faahhh\fyndo\fyndo\venv\Lib\site-packages\pypdf/_text_extraction/_text_extractor.py__init__zTextExtraction.__init__/   sm   PR 'E&D&D&D&D&D  	 %C$B$B$B$B$B %C$B$B$B$B$B#(3
 3
  	"9=#*,,	  	 .?QU;=&(
 ??25???????&,#
 #
    r#   r?   .r@   rA   rB   c                 p    || _         || _        |pi | _        |pi | _        d| _        d| _        d| _        dS )z4Initialize the extractor with extraction parameters.r   FN)r?   r@   rA   rB   r:   r;   r<   )rR   r?   r@   rA   rB   s        rS   initialize_extractionz$TextExtraction.initialize_extractions   sG     )(,2[b
 	rU   r   c                     |dz  S )Ni   rR   r   s     rS   compute_str_widthsz!TextExtraction.compute_str_widths   s    D  rU   operatoroperandsc                     || j         v r5| j         |         } ||          }|dv r|                     |pd           d S d S d S )N>   r*   r(   r+   r)   r   )rP   _post_process_text_operation)rR   r\   r]   handlerr   s        rS   process_operationz TextExtraction.process_operation   sw    t.. 	E-h7G **J 77 E11*2CDDDDD	E 	E
E ErU   c                    	 t          | j        | j        | j        f| j        | j        f| j        | j        f| j        | j	        | j
        | j        | j        ||                     | j        | j        z            | j        d                   \  | _        | _
        | _        | _        | j        dk    r>| j                                        | _        | j                                        | _        dS dS # t"          $ r Y dS w xY w)z>Handle common post-processing for text positioning operations.r   r   N)r   r:   r0   r1   r-   r.   r2   r3   r=   r?   r;   r9   r@   r[   r6   r7   copyr   rZ   s     rS   r_   z+TextExtraction._post_process_text_operation   s   	AQ	t|,0t|,"!!''9J(JKK%l3B B>DIt{DL$, yB 5#~2244#~22445 5 ( 	 	 	DD	s   C'C- -
C;:C;r:   r-   r.   r=   r>   r9   r<   actual_str_sizec                     t          ||||          \  }}|r!||z  }t          fd|D                       }nt          |||||||	|
	  	        \  }}	}|dxx         ||z  z  cc<   ||d<   ||	|fS )Nc                 T    g | ]$}|d k    rj         n                    |          %S ) )space_width
text_width).0xr>   s     rS   
<listcomp>z-TextExtraction._handle_tj.<locals>.<listcomp>   s8    kkkXY18St//QRASASkkkrU   r   r   )r   sumr   )rR   r:   r]   r-   r.   r=   r>   r?   r9   r<   r@   rd   text_operandsis_str_operandsfont_widthss         `        rS   
_handle_tjzTextExtraction._handle_tj   s     *;iD,*
 *
&  	M!Dkkkk]jkkkllKK)8
* 
*&D'; 	%%%y)@@%%%(1%Wo--rU   c                 (   | xj         | j        z  c_         | j        2|                     | j        | j        | j        | j        | j                   d| _        | j                                        | _        | j	                                        | _        dS )z=Flush accumulated text to output and call visitor if present.Nr   )
r;   r:   r@   r2   r3   r=   r9   r-   rc   r.   rQ   s    rS   _flush_textzTextExtraction._flush_text   s~    ty  	idit|TEWY]Yghhh	~**,,~**,,rU   c                 @    g d| _         |                                  dS )z6Handle BT (Begin Text) operation - Table 5.4 page 405.r   N)r.   rs   rR   r]   s     rS   rC   zTextExtraction._handle_bt   s'    777rU   c                 .    |                                   dS )z4Handle ET (End Text) operation - Table 5.4 page 405.N)rs   ru   s     rS   rD   zTextExtraction._handle_et   s    rU   c           	          | j                             | j        | j        | j        | j        | j        | j        | j        f           dS )z>Handle q (Save graphics state) operation - Table 4.7 page 219.N)	r/   appendr-   r=   r>   r9   r4   r5   r8   ru   s     rS   rE   z*TextExtraction._handle_save_graphics_state   sP    "	 
	
 
	
 
	
 
	
 
	
rU   c                     	 | j                                         \  | _        | _        | _        | _        | _        | _        | _        dS # t          $ r g d| _        Y dS w xY w)zAHandle Q (Restore graphics state) operation - Table 4.7 page 219.r   N)
r/   popr-   r=   r>   r9   r4   r5   r8   	Exceptionru   s     rS   rF   z-TextExtraction._handle_restore_graphics_state   sv    	< !!##"	  	< 	< 	<;;;DNNNN	<s   AA AAc                    | xj         | j        z  c_         | j        2|                     | j        | j        | j        | j        | j                   d| _        	 t          d |dd         D             | j                  | _        n# t          $ r g d| _        Y nw xY w| j        
                                | _        | j        
                                | _        dS )zAHandle cm (Modify current matrix) operation - Table 4.7 page 219.Nr   c                 ,    g | ]}t          |          S rY   floatrj   operands     rS   rl   z-TextExtraction._handle_cm.<locals>.<listcomp>  s    "N"N"Ng5>>"N"N"NrU      r   )r;   r:   r@   r2   r3   r=   r9   r   r-   r{   rc   r.   ru   s     rS   rG   zTextExtraction._handle_cm   s    ty  	idit|TEWY]Yghhh		<!"N"N"1""N"N"NPTP^__DNN 	< 	< 	<;;;DNNN	<~**,,~**,,s   ,B BBc                 H    |rt          |d                   dz  nd| _        dS )zGHandle Tz (Set horizontal text scaling) operation - Table 5.2 page 398.r   d   r   N)r   r4   ru   s     rS   rH   zTextExtraction._handle_tz
  s(    6>G%,,s22CrU   c                 H    dt          |r|d         nd          z   | _        dS )z<Handle Tw (Set word spacing) operation - Table 5.2 page 398.r   r   r   N)r   r5   ru   s     rS   rI   zTextExtraction._handle_tw  s*    h'Gx{{C!H!HHrU   c                     t          j        | j        d         dz  | j        d         dz  z             }t          |r|d         nd          | j        z  |z  | _        dS )z<Handle TL (Set Text Leading) operation - Table 5.2 page 398.r   r   r   N)mathsqrtr.   r   r9   r8   )rR   r]   scale_xs      rS   rJ   zTextExtraction._handle_tl  sZ    )DN1-2T^A5F!5KKLLx8S99DNJWTrU   c           
         | j         dk    rN| xj        | j         z  c_        | j        2|                     | j         | j        | j        | j        | j                   d| _         | j                                        | _        | j	                                        | _        	 | j
        |d                  | _        | j        |d                  | _        nd# t          $ rW d| _        t                      }t          ddt                               t%          d          d          |i           | _        Y nw xY w| j        j        d	z  | _        	 t+          |d
                   | _        dS # t,          $ r Y dS w xY w)z9Handle Tf (Set font size) operation - Table 5.2 page 398.r   Nr   r         u   �)rh   r!   r"   character_mapr   r   )r:   r;   r@   r2   r3   r=   r9   r-   rc   r.   rA   rB   r>   KeyErrorr	   r   dictfromkeysrangerh   r6   r   r{   )rR   r]   r"   s      rS   rK   zTextExtraction._handle_tf  ss   9? 	mKK49$KK  m!!$)T\4<I[]a]klll	~**,,~**,,	!%!4Xa[!AD
8A;/DII 		 		 		!%D,..OuSzz599 /   DIII		 !I1A5	"8A;//DNNN 	 	 	DD	s%   0C AD0/D0E$ $
E21E2c                 ~   t          |d                   t          |d                   }}| j        dxx         || j        d         z  || j        d         z  z   z  cc<   | j        dxx         || j        d         z  || j        d         z  z   z  cc<   |                     | j        d                   }d| j        d<   |S )	z>Handle Td (Move text position) operation - Table 5.5 page 406.r   r      r         r   r   )r   r.   r[   r7   )rR   r]   txtyr   s        rS   rL   zTextExtraction._handle_td4  s    
 x{##U8A;%7%7BqR$."33b4>!;L6LLLqR$."33b4>!;L6LLL,,T-B<-PQQ
.1l+rU   c                     d |dd         D             | _         |                     | j        d                   }d| j        d<   |S )z;Handle Tm (Set text matrix) operation - Table 5.5 page 406.c                 ,    g | ]}t          |          S rY   r~   r   s     rS   rl   z-TextExtraction._handle_tm.<locals>.<listcomp>B  s    EEEW%..EEErU   Nr   r   r   )r.   r[   r7   rR   r]   r   s      rS   rM   zTextExtraction._handle_tm@  sM    EE!EEE,,T-B<-PQQ
.1l+rU   c                     | j         dxx         | j        | j         d         z  z  cc<   | j         dxx         | j        | j         d         z  z  cc<   |                     | j        d                   }d| j        d<   |S )z=Handle T* (Move to next line) operation - Table 5.5 page 406.r   r   r   r   r   r   )r.   r8   r[   r7   r   s      rS   rN   zTextExtraction._handle_t_starG  s    qTWt~a'888qTWt~a'888,,T-B<-PQQ
.1l+rU   c                     |                      | j        || j        | j        | j        | j        | j        | j        | j        | j	        | j
                  \  | _        | _        | _
        dS )z5Handle Tj (Show text) operation - Table 5.5 page 406.r   )rq   r:   r-   r.   r=   r>   r?   r9   r<   r@   r7   ru   s     rS   rO   z#TextExtraction._handle_tj_operationO  sc    9=INNINL!:
 :
6	4<!6 srU   )r   N)r#   NNN)'__name__
__module____qualname____doc__rT   tupleintr   r   r   r   strr
   r   rW   r   r[   byteslistra   r_   r   r   boolrq   rs   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rY   rU   rS   r   r   &   sB        B
 B
 B
 B
L ):LP@D+/ CHo xc3S(A4(GHI !c+;&;!<=	
 S$Y( 
   $!U !u ! ! ! !E% E49 E E E E Eu     .".". uS"2234". ;	".
 ;".   01". ". CHo". ". ". xc3S(A4(GHI". c5j)". 
sD$sEz**	+". ". ". ".H- - - -49     
49     
DI 
$ 
 
 
 
<tCy <T < < < <-49 - - - - -H49 H H H H HI49 I I I I IU49 U U U U U
49     :
49 
 
 
 
 
49     tCy U    T#Y 5      rU   r   )r   typingr   r   r   r   _fontr   r	   genericr
   r   r   r   r   r   r   r   r   rY   rU   rS   <module>r      s   <  1 1 1 1 1 1 1 1 1 1 1 1 ( ( ( ( ( ( ( ( 8 8 8 8 8 8 8 8 b b b b b b b b b b b b b bx x x x x x x x x xrU   