
    A j                     ~    d dl Z d dlZd dlmZmZ ddlmZ ddlmZm	Z	 ddl
mZmZmZmZmZmZ  G d de          ZdS )	    N)	PdfReader	PdfWriter   )	TableList)StreamLattice)TemporaryDirectoryget_page_layoutget_text_objectsget_rotationis_urldownload_urlc                   4    e Zd ZdZd
dZd Zd Zddi fd	ZdS )
PDFHandlera  Handles all operations like temp directory creation, splitting
    file into single page PDFs, parsing each PDF and then removing the
    temp directory.

    Parameters
    ----------
    filepath : str
        Filepath or URL of the PDF file.
    pages : str, optional (default: '1')
        Comma-separated page numbers.
        Example: '1,3,4' or '1,4-end' or 'all'.
    password : str, optional (default: None)
        Password for decryption.

    1Nc                 |   t          |          rt          |          }|| _        |                                                    d          st          d          |d| _        n<|| _        t          j        d         dk     r| j        	                    d          | _        | 
                    |          | _        d S )N.pdfzFile format not supported r      ascii)r   r   filepathlowerendswithNotImplementedErrorpasswordsysversion_infoencode
_get_pagespages)selfr   r    r   s       dC:\Users\Terasoftware\OneDrive\Desktop\faahhh\fyndo\fyndo\venv\Lib\site-packages\camelot/handlers.py__init__zPDFHandler.__init__%   s    ( 	.#H--H ~~((00 	C%&ABBB 	>DMM$DM"Q& > $ 4 4W = =__U++


    c                 v   g }|dk    r|                     ddd           nCt          | j        d          5 }t          |d          }|j        r|                    | j                   |dk    r+|                     dt          |j                  d           n|	                    d          D ]}d	|v re|	                    d	          \  }}|d
k    rt          |j                  }|                     t          |          t          |          d           k|                     t          |          t          |          d           ddd           n# 1 swxY w Y   g }|D ]4}	|                    t          |	d         |	d
         dz                        5t          t          |                    S )a~  Converts pages string to list of ints.

        Parameters
        ----------
        filepath : str
            Filepath or URL of the PDF file.
        pages : str, optional (default: '1')
            Comma-separated page numbers.
            Example: '1,3,4' or '1,4-end' or 'all'.

        Returns
        -------
        P : list
            List of int page numbers.

        r   r   )startendrbFstrictall,-r'   Nr&   )appendopenr   r   is_encrypteddecryptr   lenr    splitintextendrangesortedset)
r!   r    page_numbersfinfilerabPps
             r"   r   zPDFHandler._get_pages4   s   " C< 	R!A 6 67777dmT** Ra"1U333& 2NN4=111E> 
R ''!C<M<M(N(NOOOO"[[-- R R!8 R#$773<<DAq Ez 6$'$5$5(//#a&&Q0P0PQQQQ(//#a&&Q0P0PQQQQ!R R R R R R R R R R R R R R R$  	6 	6AHHU1W:qx!|445555c!ff~~s   DEE"Ec                 X   t          |d          5 }t          |d          }|j        r|                    | j                   t
          j                            |d| d          }t
          j                            |          \  }}|j	        |dz
           }	t                      }
|
                    |	           t          |d          5 }|
                    |           ddd           n# 1 swxY w Y   t          |          \  }}t          |d	
          }t          |d
          }t          |d
          }t          |||          }|dk    r;d                    |                    dd          d|g          }t          j        ||           t          |d          }t          |d          }|j        r|                    | j                   t                      }
|j	        d         }	|dk    r|	                    d           n|dk    r|	                    d           |
                    |	           t          |d          5 }|
                    |           ddd           n# 1 swxY w Y   |                                 ddd           dS # 1 swxY w Y   dS )a  Saves specified page from PDF into a temporary directory.

        Parameters
        ----------
        filepath : str
            Filepath or URL of the PDF file.
        page : int
            Page number.
        temp : str
            Tmp directory.

        r(   Fr)   page-r   r   wbNchar)ltypehorizontal_textvertical_textr   pager@   _rotatedr   anticlockwiseZ   	clockwisei)r/   r   r0   r1   r   ospathjoinsplitextr    r   add_pagewriter
   r   r   replacerenamerotateclose)r!   r   rH   tempfileobjr;   fpathfrootfextr@   outfiler:   layoutdimcharsrF   rG   rotation	fpath_newinstreams                       r"   
_save_pagezPDFHandler._save_pagea   sE    (D!! !	!Wwu555F" .t}---GLL'9t'9'9'9::E'**511KE4TAX&AkkGQeT"" !aa   ! ! ! ! ! ! ! ! ! ! ! ! ! ! !)%00KFC$V6:::E.v=NOOOO,V?KKKM#E?MJJH2~ !GGU]]63%?%?T$RSS		%+++	400"8E:::& 2NN4=111#++LO. "HHRLLLL, "HHSMMM  ###%&& %!MM!$$$% % % % % % % % % % % % % % %   C!	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	! !	!s[   B<JC/#J/C3	3J6C3	7EJI3'J3I7	7J:I7	;JJ#&J#latticeFc                   
 g }t                      5 
| j        D ]}|                     | j        |
           
fd| j        D             }|dk    rt	          di |nt          di |}|D ]/}|                    |||          }	|                    |	           0	 ddd           n# 1 swxY w Y   t          t          |                    S )a  Extracts tables by calling parser.get_tables on all single
        page PDFs.

        Parameters
        ----------
        flavor : str (default: 'lattice')
            The parsing method to use ('lattice' or 'stream').
            Lattice is used by default.
        suppress_stdout : str (default: False)
            Suppress logs and warnings.
        layout_kwargs : dict, optional (default: {})
            A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
        kwargs : dict
            See camelot.read_pdf kwargs.

        Returns
        -------
        tables : camelot.core.TableList
            List of tables found in PDF.

        c                 X    g | ]&}t           j                            d | d          'S )rB   r   )rM   rN   rO   ).0r@   tempdirs     r"   
<listcomp>z$PDFHandler.parse.<locals>.<listcomp>   s1    PPPRW\\'?1???;;PPPr$   rd   )suppress_stdoutlayout_kwargsN )
r	   r    rc   r   r   r   extract_tablesr5   r   r7   )r!   flavorrj   rk   kwargstablesr@   r    parsertrh   s             @r"   parsezPDFHandler.parse   sA   0 !! 		!WZ ; ;q'::::PPPPTZPPPE*0I*=SW&&v&&&6CSCSFCSCSF ! !))m *   a    	!		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! (((s   B
B))B-0B-)r   N)__name__
__module____qualname____doc__r#   r   rc   rs   rl   r$   r"   r   r      so          , , , ,+ + +Z.! .! .!b R#) #) #) #) #) #)r$   r   )rM   r   pypdfr   r   corer   parsersr   r   utilsr	   r
   r   r   r   r   objectr   rl   r$   r"   <module>r}      s    
			 



 & & & & & & & &       $ $ $ $ $ $ $ $               `) `) `) `) `) `) `) `) `) `)r$   