o
    j                     @   sp   d dl Z d dlZd dlmZmZ ddlmZ ddlmZm	Z	 ddl
mZmZmZmZmZmZ G dd deZdS )	    N)	PdfReader	PdfWriter   )	TableList)StreamLattice)TemporaryDirectoryget_page_layoutget_text_objectsget_rotationis_urldownload_urlc                   @   s:   e Zd ZdZdddZdd Zdd	 Zd
di fddZdS )
PDFHandlera  Handles all operations like temp directory creation, splitting
    file into single page PDFs, parsing each PDF and then removing the
    temp directory.

    Parameters
    ----------
    filepath : str
        Filepath or URL of the PDF file.
    pages : str, optional (default: '1')
        Comma-separated page numbers.
        Example: '1,3,4' or '1,4-end' or 'all'.
    password : str, optional (default: None)
        Password for decryption.

    1Nc                 C   sn   t |rt|}|| _| dstd|d u rd| _n|| _tjd dk r/| j	d| _| 
|| _d S )N.pdfzFile format not supported r      ascii)r   r   filepathlowerendswithNotImplementedErrorpasswordsysversion_infoencode
_get_pagespages)selfr   r   r    r   V/var/www/html/fyndo/pharma/fyndo/venv/lib/python3.10/site-packages/camelot/handlers.py__init__%   s   zPDFHandler.__init__c           
      C   s(  g }|dkr| ddd njt| jd\}t|dd}|jr%|| j |dkr5| dt|jd n5|	dD ]/}d	|v r]|	d	\}}|d
krPt|j}| t
|t
|d q:| t
|t
|d q:W d   n1 stw   Y  g }|D ]}	|t|	d |	d
 d  q}tt|S )a~  Converts pages string to list of ints.

        Parameters
        ----------
        filepath : str
            Filepath or URL of the PDF file.
        pages : str, optional (default: '1')
            Comma-separated page numbers.
            Example: '1,3,4' or '1,4-end' or 'all'.

        Returns
        -------
        P : list
            List of int page numbers.

        r   r   )startendrbFstrictall,-r#   Nr"   )appendopenr   r   is_encrypteddecryptr   lenr   splitintextendrangesortedset)
r   r   page_numbersfinfilerabPpr   r   r    r   4   s,   
zPDFHandler._get_pagesc              	   C   s  t |d}t|dd}|jr|| j tj|d| d}tj|\}}|j	|d  }	t
 }
|
|	 t |d}|
| W d   n1 sLw   Y  t|\}}t|d	d
}t|dd
}t|dd
}t|||}|dkrd|ddd|g}t|| t |d}t|dd}|jr|| j t
 }
|j	d }	|dkr|	d n	|dkr|	d |
|	 t |d}|
| W d   n1 sw   Y  |  W d   dS W d   dS 1 sw   Y  dS )a  Saves specified page from PDF into a temporary directory.

        Parameters
        ----------
        filepath : str
            Filepath or URL of the PDF file.
        page : int
            Page number.
        temp : str
            Tmp directory.

        r$   Fr%   page-r   r   wbNchar)ltypehorizontal_textvertical_textr   pager<   _rotatedr   anticlockwiseZ   	clockwisei)r+   r   r,   r-   r   ospathjoinsplitextr   r   add_pagewriter	   r
   r   replacerenamerotateclose)r   r   rC   tempfileobjr7   fpathfrootfextr<   outfiler6   layoutdimcharsrA   rB   rotation	fpath_newinstreamr   r   r    
_save_pagea   sL   





"zPDFHandler._save_pagelatticeFc           
         s   g }t  D | jD ]
}| | j|  q	 fdd| jD }|dkr)tdi |ntdi |}|D ]}|j|||d}	||	 q2W d   n1 sLw   Y  tt	|S )a  Extracts tables by calling parser.get_tables on all single
        page PDFs.

        Parameters
        ----------
        flavor : str (default: 'lattice')
            The parsing method to use ('lattice' or 'stream').
            Lattice is used by default.
        suppress_stdout : str (default: False)
            Suppress logs and warnings.
        layout_kwargs : dict, optional (default: {})
            A dict of `pdfminer.layout.LAParams <https://github.com/euske/pdfminer/blob/master/pdfminer/layout.py#L33>`_ kwargs.
        kwargs : dict
            See camelot.read_pdf kwargs.

        Returns
        -------
        tables : camelot.core.TableList
            List of tables found in PDF.

        c                    s"   g | ]}t j d | dqS )r=   r   )rH   rI   rJ   ).0r<   tempdirr   r    
<listcomp>   s   " z$PDFHandler.parse.<locals>.<listcomp>r_   )suppress_stdoutlayout_kwargsNr   )
r   r   r^   r   r   r   extract_tablesr1   r   r3   )
r   flavorrd   re   kwargstablesr<   r   parsertr   ra   r    parse   s   
$
zPDFHandler.parse)r   N)__name__
__module____qualname____doc__r!   r   r^   rl   r   r   r   r    r      s    
-1r   )rH   r   pypdfr   r   corer   parsersr   r   utilsr   r	   r
   r   r   r   objectr   r   r   r   r    <module>   s    
