
    @ ji9                     :   d dl Z d dlmZ d dlmZmZmZ d dlmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d
dlmZ  e j        e          Zerd
dlm Z  d
dl!m"Z" e G d d                      Z# G d de$          Z% G d d          Z&dS )    N)deque)asdict	dataclassfield)TYPE_CHECKINGAnyDictIteratorListOptionalTuple)
NumberTree)PDFPage)	PDFParser)	PDFObjRefresolve1)	PSLiteral   )decode_text)Page)PDFc                   v   e Zd ZU eed<   ee         ed<   ee         ed<   ee         ed<   ee         ed<   ee         ed<   ee         ed<   ee         ed<    ee	          Z	e
eef         ed
<    ee	          Zee         ed<    ee	          Zed          ed<   ded          fdZde
eef         fdZdS )PDFStructElementtyperevisionidlangalt_textactual_texttitlepage_number)default_factory
attributesmcidschildrenreturnc                 *    t          | j                  S Niterr%   selfs    hC:\Users\Terasoftware\OneDrive\Desktop\faahhh\fyndo\fyndo\venv\Lib\site-packages\pdfplumber/structure.py__iter__zPDFStructElement.__iter__$       DM"""    c                 @   t          |           }t          |g          }|r||                                }t          |                                          D ]%}||         ||         g k    s||         i k    r||= &d|v r|                    |d                    |||S )z'Return a compacted dict representation.Nr%   )r   r   popleftlistkeysextend)r,   rdelks        r-   to_dictzPDFStructElement.to_dict'   s    4LL1#JJ 	)B"''))__  a5 BqERK 2a5B; 1R )J(((  	) r0   N)__name__
__module____qualname__str__annotations__r   intr   dictr#   r	   r   r3   r$   r   r%   r
   r.   r:    r0   r-   r   r      s<        
IIIsm
3-sm#C=#!&t!<!<!<JS#X<<<uT222E49222).t)D)D)DHd%&DDD#(#56 # # # #c3h      r0   r   c                       e Zd ZdS )StructTreeMissingN)r;   r<   r=   rB   r0   r-   rD   rD   5   s        Dr0   rD   c                   <   e Zd ZU dZee         ed<   dddded         fdZdee	e
f         d	ee         d
ee	e
f         fdZde
d
eee         ee
         f         fdZdee
         d
dfdZdee	e
f         d
efdZddZdee	e
f         d
dfdZd
ee         fdZdS )PDFStructTreeaz  Parse the structure tree of a PDF.

    The constructor takes a `pdfplumber.PDF` and optionally a
    `pdfplumber.Page`.  To avoid creating the entire tree for a large
    document it is recommended to provide a page.

    This class creates a representation of the portion of the
    structure tree that reaches marked content sections, either for a
    single page, or for the whole document.  Note that this is slightly
    different from the behaviour of other PDF libraries which will
    also include structure elements with no content.

    If the PDF has no structure, the constructor will raise
    `StructTreeMissing`.

    pageNdocr   r   c                 >   |j         | _         d| j         j        vrt          d          t          | j         j        d                   | _        t          | j                            di                     | _        t          | j                            di                     | _        g | _        ||j	        | _
        d | _        | j                            d          }||                                  d S t          |          }d| j
        j        vrd S | j
        j        d         t          t          fd|j        D                                 }|                     |           d S d | _
        d |j        D             | _        |                                  d S )	NStructTreeRootzPDF has no structureRoleMapClassMap
ParentTreeStructParentsc              3   .   K   | ]\  }}|k    |V  d S r(   rB   ).0numarray	parent_ids      r-   	<genexpr>z)PDFStructTree.__init__.<locals>.<genexpr>n   s1      XX:3siGWXXXXXXXr0   c                 2    i | ]}|j         j        |j        S rB   )page_objpageidr!   )rP   rG   s     r-   
<dictcomp>z*PDFStructTree.__init__.<locals>.<dictcomp>t   s/       ;?$d&6  r0   )rH   catalogrD   r   rootgetrole_map	class_mapr%   rV   rG   	page_dict_parse_struct_treer   attrsnextvalues_parse_parent_treepages)r,   rH   rG   parent_tree_objparent_treeparent_arrayrS   s         @r-   __init__zPDFStructTree.__init__M   s   748#33 	<#$:;;;TX-.>?@@	 y"!=!=>>!$)--
B"?"?@@02  	&DI!DN #immL99O 6'')))))(99 #$)/9 F IOO<	'XXXX1CXXXXX    ''55555DI CF9  DN ##%%%%%r0   objr   r&   c                 &   g }dD ]\}||vrt          ||                   }t          |t                    r|                    |           G|                    |           ]g }d }|D ]]}t          |t
                    r ||k    r||                    |           d }7||                    |           t          |          }^||                    |           i }	|D ]}t          |t                    rFt          |j                  }|| j	        vrt                              d|           P| j	        |         }|                                D ]=\  }
}t          |t                    rt          |j                  |	|
<   2||
         |	|
<   >|	S )N)CAzUnknown attribute class %s)r   
isinstancer3   r5   appendr@   r   r   namer]   loggerwarningitems)r,   ri   r   attr_obj_listkeyattr_obj	attr_objsprev_objarefattrr9   vs               r-   _make_attributeszPDFStructTree._make_attributesy   s     	/ 	/C#~ C))H(D)) /$$X....$$X....	! 	* 	*D $$$ *8# / /$$X... /$$X...#D>> 	'X&&&  	% 	%C#y)) *!#(++dn, NN#?EEEnS)		 % %1a++ %)!&11DGG!!fDGG	%
 r0   c                    d|vsJ d|z              d|vsJ d|z              d }| j         4d|v r0|d         j        }|| j         v sJ d|z              | j         |         }d}d|v rBt          |d         j                  }|| j        v rt          | j        |         j                  }d	|v rt          |d	                   ng }t          |t                    r|g}nt          |t                    r	|d	         g}|	                    d
          }| 
                    ||          }d|v rt          |d                   nd }d|v rt          |d                   nd }	d|v rt          |d                   nd }
d|v rt          |d                   nd }d|v rt          |d                   nd }t          |||||
|	|||	  	        }||fS )NMCIDzUncaught MCR: %sObjzUncaught OBJR: %sPgzObject on unparsed page: %s SKRIDTLangAlt
ActualText)	r   r   r!   r   r   r    r   r   r#   )r^   objidr   ro   r\   r   rm   r@   rA   r[   r{   r   )r,   ri   r!   
page_objidobj_tagr%   r   r#   
element_idr    r   r   r   elements                 r-   _make_elementzPDFStructTree._make_element   s4   S ::"4s"::::C::!4s!::::> 	5$#+ 	5TJ/TT1NQT1TTTT.4K#: 	C!#c(-00G$-' C%dmG&<&ABB),;8CH%%%h$$ 	" zHH$'' 	"CzH773<<**399
/3s{D[T+++
),=CH%%%+1S=B{3v;'''d.3slD;s5z***8D8KUk#l"3444QU"##!

 

 

   r0   rg   c                    t          |          }i }d}|r|                                }|t          j        k    r't	          |          |v r9t          |          }d|v r!t          |d         j                  dk    rd}nK|                     |          \  }}|J ||f|t	          |          <   |	                    |d                    ||sJ | 
                    |           dS )zYPopulate the structure tree using the leaves of the parent tree for
        a given page.FTyperJ   TNP)r   r2   r   KEYWORD_NULLreprr   r   ro   r   rn   _resolve_children)	r,   rg   r7   s
found_rootrefri   r   r%   s	            r-   rc   z PDFStructTree._parse_parent_tree   s    ,
 	#))++C i,, CyyA~ 3--C} 	#S[-=!>!>BR!R 	#!

 %)$6$6s$;$;!***&0$s))S"""'  	#* q!!!!!r0   c                 ~    d|vrdS |d         j         }| j        	|| j        v S | j        || j        j        k    rdS dS )Nr   TF)r   r^   rG   rW   )r,   ri   r   s      r-   on_parsed_pagezPDFStructTree.on_parsed_page   s[    s? 	4Y_
> 	0//9 	TY-- utr0   c                    	 t           j        d                   }t          |t                    r j        d         g}t	          |          }i 	|r%|                                }t          |          	v r)t          |          }t          |t                    r1d|v r-                     |          sg|d         }t          |          }                     |          \  }}||f	t          |          <   |D ]x}t          |          }t          |t                    r(                     |          s<d|v r	|d         }nd|v rNt          |t                    r|
                    |           y|%dt          t                   dt          t                   f	 fd |                                	           dS )zgPopulate the structure tree starting from the root, skipping
        unparsed pages and empty elements.r   r~   r}   elementsr&   c                    g }| D ]}t          |          }t          |t                    r|                    |           <t          |t                    rB                    |          sgd|v r|                    |d                    d|v r|d         }t          |                   \  }} |          }||st          |          = ||ft          |          <   |                    |           |S )Nr}   r~   )r   rm   r@   rn   rA   r   r   )	r   next_elementsr   ri   r   r%   pruner   r,   s	         r-   r   z/PDFStructTree._parse_struct_tree.<locals>.prune%  s"   M . .smmc3'' 
)!((---T** )..s33 ! } )%,,S[999 # )!%j$%d3iiL! 5?? .( .$s))#*H#4Ad3iiL!((----  r0   N)r   rZ   rm   rA   r   r2   r   r   r   r   rn   r   r   r   )
r,   rZ   r7   r   ri   r   r%   childr   r   s
   `       @@r-   r_   z PDFStructTree._parse_struct_tree   s    	#'' dD!! 	$IcN#D$KK 	$))++CCyyA~ 3--C#t$$ $# $**3// %jsmm $ 2 23 7 7GX #H,Ad3iiL! 
$ 
$uooc4(( !..s33 ! | ! #E
3 ! eY// $HHUOOO5  	$<	!DI 	!$s) 	! 	! 	! 	! 	! 	! 	! 	!2 	dq!!!!!r0   seenc                 R   t          | j        d                   }t          |t                    r| j        d         g}g | _        g }|D ]n}t          |          }t          |t                    r"d|v r|                     |          s@|d         }t          |          v r|                    |           ot          |          }|rI|	                                }t          |                   \  }}|
J d            |D ]
}	t          |	          }t          |t                    r|j                            |           n\t          |t                    rG|                     |          smd|v r!|j                            |d                    nd|v r|d         }	t          |	t                    rW                    t          |	          d          \  }
}|
/|j                            |
           |                    |	           |Ifd|D             | _        dS )z|Resolve children starting from the tree root based on references we
        saw when traversing the structure tree.
        r   r~   NzUnparsed elementr}   )NNc                 F    g | ]}t          |                   d          S )r   )r   )rP   r   r   s     r-   
<listcomp>z3PDFStructTree._resolve_children.<locals>.<listcomp>k  s(    CCCd3ii+CCCr0   )r   rZ   rm   rA   r%   r   r   rn   r   r2   r@   r$   r   r[   )r,   r   rZ   parsed_rootr   ri   r7   r   r%   r   child_element_s    `          r-   r   zPDFStructTree._resolve_childrenA  sI    	#''dD!! 	$IcN#D 	( 	(C3--C#t$$ !# !**3// %jCyyD  (""3'''+ 	())++C $T#YYGX::(::::! ( (uooc3'' 	+M((----T** +..s33 ! } +,,S[9999# + #E
eY// ('+xxU\'J'J$M1$ ((//>>>+  	(, DCCC{CCCr0   c                 *    t          | j                  S r(   r)   r+   s    r-   r.   zPDFStructTree.__iter__m  r/   r0   r(   )r&   N)r;   r<   r=   __doc__r   r   r?   rh   r	   r>   r   r@   r{   r   r   r   r   rc   boolr   r_   r   r
   r.   rB   r0   r-   rF   rF   9   s         " 7
*& *&E *&&)9 *& *& *& *&X/S>/-5c]/	c3h/ / / /b&! &!x8H/I4PS9/T)U &! &! &! &!P"tCy "T " " " ">
$sCx. 
T 
 
 
 
B" B" B" B"H*Dd38n *D *D *D *D *DX#(#34 # # # # # #r0   rF   )'loggingcollectionsr   dataclassesr   r   r   typingr   r   r	   r
   r   r   r   pdfminer.data_structuresr   pdfminer.pdfpager   pdfminer.pdfparserr   pdfminer.pdftypesr   r   pdfminer.psparserr   utilsr   	getLoggerr;   rp   rG   r   pdfr   r   
ValueErrorrD   rF   rB   r0   r-   <module>r      s          0 0 0 0 0 0 0 0 0 0 L L L L L L L L L L L L L L L L L L / / / / / / $ $ $ $ $ $ ( ( ( ( ( ( 1 1 1 1 1 1 1 1 ' ' ' ' ' '      		8	$	$          <	 	 	 	 	
 	 	 	u# u# u# u# u# u# u# u# u# u#r0   