
    A j?                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZ	ddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZ ddlmZmZmZmZ ddlmZ  ej        d	          Z G d
 de          Z dS )    N   )
BaseParser   )Table)scale_image	scale_pdfsegments_in_bboxtext_in_bboxmerge_close_linesget_table_indexcompute_accuracycompute_whitespace)adaptive_threshold
find_linesfind_contoursfind_joints)BACKENDScamelotc                       e Zd ZdZdddddddgddddddd	d
ddfdZed             Zed             Zedd            Zd Z	d Z
d Zdi fdZdS )Latticea-  Lattice method of parsing looks for lines between text
    to parse the table.

    Parameters
    ----------
    table_regions : list, optional (default: None)
        List of page regions that may contain tables of the form x1,y1,x2,y2
        where (x1, y1) -> left-top and (x2, y2) -> right-bottom
        in PDF coordinate space.
    table_areas : list, optional (default: None)
        List of table area strings of the form x1,y1,x2,y2
        where (x1, y1) -> left-top and (x2, y2) -> right-bottom
        in PDF coordinate space.
    process_background : bool, optional (default: False)
        Process background lines.
    line_scale : int, optional (default: 15)
        Line size scaling factor. The larger the value the smaller
        the detected lines. Making it very large will lead to text
        being detected as lines.
    copy_text : list, optional (default: None)
        {'h', 'v'}
        Direction in which text in a spanning cell will be copied
        over.
    shift_text : list, optional (default: ['l', 't'])
        {'l', 'r', 't', 'b'}
        Direction in which text in a spanning cell will flow.
    split_text : bool, optional (default: False)
        Split text that spans across multiple cells.
    flag_size : bool, optional (default: False)
        Flag text based on font size. Useful to detect
        super/subscripts. Adds <s></s> around flagged text.
    strip_text : str, optional (default: '')
        Characters that should be stripped from a string before
        assigning it to a cell.
    line_tol : int, optional (default: 2)
        Tolerance parameter used to merge close vertical and horizontal
        lines.
    joint_tol : int, optional (default: 2)
        Tolerance parameter used to decide whether the detected lines
        and points lie close to each other.
    threshold_blocksize : int, optional (default: 15)
        Size of a pixel neighborhood that is used to calculate a
        threshold value for the pixel: 3, 5, 7, and so on.

        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
    threshold_constant : int, optional (default: -2)
        Constant subtracted from the mean or weighted mean.
        Normally, it is positive but may be zero or negative as well.

        For more information, refer `OpenCV's adaptiveThreshold <https://docs.opencv.org/2.4/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold>`_.
    iterations : int, optional (default: 0)
        Number of times for erosion/dilation is applied.

        For more information, refer `OpenCV's dilate <https://docs.opencv.org/2.4/modules/imgproc/doc/filtering.html#dilate>`_.
    resolution : int, optional (default: 300)
        Resolution used for PDF to PNG conversion.

    NF   lt r   r   i,  ghostscriptc                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        || _
        || _        || _        || _        || _        t                              |          | _        d S N)table_regionstable_areasprocess_background
line_scale	copy_text
shift_text
split_text	flag_size
strip_textline_tol	joint_tolthreshold_blocksizethreshold_constant
iterations
resolutionr   _get_backendbackend)selfr   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r/   kwargss                     kC:\Users\Terasoftware\OneDrive\Desktop\faahhh\fyndo\fyndo\venv\Lib\site-packages\camelot/parsers/lattice.py__init__zLattice.__init__a   s    ( +&"4$"$$"$ "#6 "4$$++G44    c                 .     fd}t           t                    r\ t          j                    vrt	          d  d           dk    rt          j        dt                     t                                S  |            st	          d  d           S )Nc                  >    d t                    D             } d| v S )Nc                 @    g | ]}|                     d           du |S )__F)
startswith).0methods     r2   
<listcomp>zDLattice._get_backend.<locals>.implements_convert.<locals>.<listcomp>   sA       !V5F5Ft5L5LPU5U  r4   convert)dir)methodsr/   s    r2   implements_convertz0Lattice._get_backend.<locals>.implements_convert   s4     %(\\  G ''r4   zUnknown backend 'z:' specified. Please use either 'poppler' or 'ghostscript'.r   z'ghostscript' will be replaced by 'poppler' as the default image conversion backend in v0.12.0. You can try out 'poppler' with backend='poppler'.'z#' must implement a 'convert' method)
isinstancestrr   keysNotImplementedErrorwarningswarnDeprecationWarning)r/   r@   s   ` r2   r.   zLattice._get_backend   s    	( 	( 	( 	( 	( gs## 	hmoo- )kkkk   -' ]&   G$&&&%%'' )DDDD   Nr4   c                    g }|D ]r\  }}}|D ]O}|dk    rM| j         |         |         j        r5| j         |         |         j        s|dz  }| j         |         |         j        |dk    rM| j         |         |         j        r5| j         |         |         j        s|dz  }| j         |         |         j        |dk    rM| j         |         |         j        r5| j         |         |         j        s|dz  }| j         |         |         j        |dk    rM| j         |         |         j        r5| j         |         |         j        s|dz  }| j         |         |         j        Q|                    |||f           t|S )a  Reduces index of a text object if it lies within a spanning
        cell.

        Parameters
        ----------
        table : camelot.core.Table
        idx : list
            List of tuples of the form (r_idx, c_idx, text).
        shift_text : list
            {'l', 'r', 't', 'b'}
            Select one or more strings from above and pass them as a
            list to specify where the text in a spanning cell should
            flow.

        Returns
        -------
        indices : list
            List of tuples of the form (r_idx, c_idx, text) where
            r_idx and c_idx are new row and column indices for text.

        r   r   rr   b)cellshspanleftrightvspantopbottomappend)r   idxr$   indicesr_idxc_idxtextds           r2   _reduce_indexzLattice._reduce_index   s   . "% 	1 	1E5$ ' '8 'wu~e,2 '"#'%."7"< '!QJE #$'%."7"< '8 'wu~e,2 '"#'%."7"= '!QJE #$'%."7"= '8 'wu~e,2 '"#'%."7"; '!QJE #$'%."7"; '8 'wu~e,2 '"#'%."7"> '!QJE #$'%."7"> 'NNE5$/0000r4   c                    |D ]}|dk    rt          t          | j                            D ]}t          t          | j        |                             D ]}| j        |         |         j                                        dk    ra| j        |         |         j        rI| j        |         |         j        s1| j        |         |dz
           j        | j        |         |         _        |dk    rt          t          | j                            D ]}t          t          | j        |                             D ]}| j        |         |         j                                        dk    ra| j        |         |         j        rI| j        |         |         j        s1| j        |dz
           |         j        | j        |         |         _        | S )a  Copies over text in empty spanning cells.

        Parameters
        ----------
        t : camelot.core.Table
        copy_text : list, optional (default: None)
            {'h', 'v'}
            Select one or more strings from above and pass them as a list
            to specify the direction in which text should be copied over
            when a cell spans multiple rows or columns.

        Returns
        -------
        t : camelot.core.Table

        hr   r   v)	rangelenrL   rX   striprM   rN   rP   rQ   )r   r#   fijs        r2   _copy_spanning_textzLattice._copy_spanning_text   s   $  	L 	LACx Ls17||,, L LA"3qwqz??33 L L71:a=-3355; L wqz!}2 L171:a=;M L56WQZA5F5K
1 2LL
 c Ls17||,, L LA"3qwqz??33 L L71:a=-3355; L wqz!}2 L171:a=;L L56WQU^A5F5K
1 2L r4   c                    fd}t          | j        | j        | j        | j                  \  | _        | _        | j        j        d         }| j        j        d         }|t          | j	                  z  }|t          | j
                  z  }| j	        t          |          z  }| j
        t          |          z  }||| j
        f|||f}| j        d }	| j         || j                  }	t          | j        |	d| j        | j                  \  }
}t          | j        |	d| j        | j                  \  }}t!          |
|          }t#          ||
|          }nkt          | j        d| j        | j                  \  }
}t          | j        d| j        | j                  \  }} || j                  }t#          ||
|          }t%          j        |          | _        t+          ||||          \  | _        | _        | _        d S )	Nc           
      f   g }| D ]}|                     d          \  }}}}t          |          }t          |          }t          |          }t          |          }t          ||||f          \  }}}}|                    ||t	          ||z
            t	          ||z
            f           |S )N,)splitfloatr   rS   abs)areasscaled_areasareax1y1x2y2image_scalerss          r2   scale_areasz1Lattice._generate_table_bbox.<locals>.scale_areas   s    L J J!%CBB2YY2YY2YY2YY!*BB+;]!K!KBB##RSb\\3rBw<<$HIIIIr4   )r!   	blocksizecr   r   vertical)regions	directionr"   r,   
horizontal)rx   r"   r,   )r   	imagenamer!   r*   r+   image	thresholdshaperi   	pdf_width
pdf_heightr    r   r   r"   r,   r   r   copydeepcopytable_bbox_unscaledr   
table_bboxvertical_segmentshorizontal_segments)r0   rs   image_widthimage_heightimage_width_scalerimage_height_scalerpdf_width_scalerpdf_height_scalerpdf_scalersrw   vertical_maskr   horizontal_maskr   contoursr   rk   rr   s                    @r2   _generate_table_bboxzLattice._generate_table_bbox   sV   
	  
	  
	  
	  
	  &8N#6.%	&
 &
 &
"
DN j&q)z'*(5+@+@@*U4?-C-CC>E+,>,>> OeL.A.AA+-@$/R'):LI %	LG! :%+d&899/9$??0 0 0,M, 4>&??4 4 40O0 %]ODDH$X}oNNJJ/9$??	0 0 0,M, 4>&??	4 4 40O0  K 011E$UM?KKJ#'=#<#< LW)+>M
 M
I/1I1I1Ir4   c                 l   i }t          || j        | j                  \  }}t          || j                  |d<   t          || j                  |d<   |d                             d            |d                             d            || _        t          | j	        |          \  t                    t                    c                    |d         |d         g                               |d         |d	         g           t          t                    | j        
          t          t          d          | j        
          fdt          dt!                    dz
            D             fdt          dt!                    dz
            D             ||fS )Nry   rv   c                      | j          | j        fS r   )y0x0xs    r2   <lambda>z4Lattice._generate_columns_and_rows.<locals>.<lambda>E  s    !$ r4   )keyc                      | j         | j         fS r   )r   r   r   s    r2   r   z4Lattice._generate_columns_and_rows.<locals>.<lambda>F  s    qtadUm r4   r   r   r      )r(   T)reversec                 6    g | ]}|         |d z            fS r    )r:   rb   colss     r2   r<   z6Lattice._generate_columns_and_rows.<locals>.<listcomp>R  *    HHH1a$q1u+&HHHr4   c                 6    g | ]}|         |d z            fS r   r   )r:   rb   rowss     r2   r<   z6Lattice._generate_columns_and_rows.<locals>.<listcomp>S  r   r4   )r	   r   r   r
   horizontal_textvertical_textsortt_bboxzipr   listextendr   sortedr(   r^   r_   )r0   	table_idxtkr   v_sh_sr   r   s         @@r2   _generate_columns_and_rowsz"Lattice._generate_columns_and_rows<  s   #&(@
 
S  ,B0DEE|)"d.@AAz|!!&=&=!>>>z$;$;<<<$/"-.
d$ZZd
dRUBqEN###RUBqEN### FFF d!;!;!;dmTTTHHHHaTQ0G0GHHHHHHHaTQ0G0GHHHT3##r4   c           
      d   |                     d          }|                     d          }||'t          d                    | j                            t	          ||          }|                    ||| j                  }|                                }|                                }g }dD ]}	| j	        |	         D ]}
t          ||
|	| j        | j        | j                  \  }}|d d         dk    rX|                    |           t                              ||| j        	          }|D ]\  }}}||j        |         |         _        t)          d
|gg          }| j        !t                              || j                  }|j        }t1          j        |          |_        |j        j        |_        t9          |          }d|_        ||_        ||_        |dz   |_         tC          tD          j#        $                    | j                  %                    dd                    |_&        g }|'                    d | j(        D                        |'                    d | j)        D                        ||_*        | j+        | j,        f|_-        | j.        | j/        f|_0        d |_1        |S )Nr   r   zNo segments found on {})r)   )rv   ry   )r%   r&   r'   r   )r   )r$   d   )r#   latticer   zpage-r   c                 B    g | ]}|j         |j        |j        |j        fS r   r   r   rn   ro   r:   r   s     r2   r<   z+Lattice._generate_table.<locals>.<listcomp>  s)    MMM1qtQT14.MMMr4   c                 B    g | ]}|j         |j        |j        |j        fS r   r   r   s     r2   r<   z+Lattice._generate_table.<locals>.<listcomp>  s)    KKK1qtQT14.KKKr4   )2get
ValueErrorformatrootnamer   	set_edgesr)   
set_borderset_spanr   r   r%   r&   r'   rS   r   rZ   r$   rL   rX   r   r#   rd   datapd	DataFramedfr}   r   flavoraccuracy
whitespaceorderintospathbasenamereplacepager   r   r   _textr{   r   _imager   r   	_segments
_textedges)r0   r   r   r   r1   r   r   table
pos_errorsrx   r   rU   errorrV   rW   rX   r   r   r   r   s                       r2   _generate_tablezLattice._generate_tableW  s   jjjj 	N# 	N6==dmLLMMMdD!!SDNCC  ""  
 4 	> 	>I[+ > >!0#"n#" " " 2A2;(* >%%e,,,%33w4? 4  G /6 > >*ud9=E*5166>  $c:%6$788> 	Q///PPEz<%%hn'--
 !%!m))$-88@@"MMNN
 MM8LMMMNNNKK8JKKKLLL
D$<=143KLr4   c                    |                      ||           |sOt                              d                    t          j                            | j                                       | j        s| j	        rJt          j        d                    t          j                            | j                                       nIt          j        d                    t          j                            | j                                       g S | j                            | j        | j                   |                                  g }t#          t%          | j                                        d d                    D ]V\  }}|                     ||          \  }}}	}
|                     ||||	|
          }||_        |                    |           W|S )NzProcessing {}z:{} is image-based, camelot only works on text-based pages.zNo tables found on {}c                     | d         S )Nr   r   r   s    r2   r   z(Lattice.extract_tables.<locals>.<lambda>  s
    1 r4   T)r   r   )r   r   )_generate_layoutloggerinfor   r   r   r   r   r   imagesrF   rG   r/   r=   filenamerz   r   	enumerater   r   rD   r   r   _bboxrS   )r0   r   suppress_stdoutlayout_kwargs_tablesr   r   r   r   r   r   r   s               r2   extract_tableszLattice.extract_tables  s   h666 	QKK..rw/?/?/N/NOOPPP# 
	{ ))/0@0@0O0O)P)P   
 +22273C3CDM3R3RSS   IT]DN;;;!!###&4?''))~~tLLL
 
 	" 	"MIr $(#B#B9b#Q#Q D$S((D$CS(QQEEKNN5!!!!r4   r   )__name__
__module____qualname____doc__r3   staticmethodr.   rZ   rd   r   r   r   r   r   r4   r2   r   r   %   s       9 9z  :##5 #5 #5 #5J   \: * * \*X    \@H
 H
 H
T$ $ $6; ; ;z 8=B      r4   r   )!r   sysr   localeloggingrF   numpynppandasr   baser   corer   utilsr   r   r	   r
   r   r   r   r   image_processingr   r   r   r   backends.image_conversionr   	getLoggerr   r   r   r4   r2   <module>r      s   
			 



                        	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	            1 0 0 0 0 0 
	9	%	%N N N N Nj N N N N Nr4   