o
    j]_                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlZ	d dl
ZdZdZG dd deZG dd	 d	eZG d
d deZG dd deZG dd deZdS )    N)chain)
itemgetter   
   c                   @   s,   e Zd ZdZdddZdd Zddd	Zd
S )TextEdgeat  Defines a text edge coordinates relative to a left-bottom
    origin. (PDF coordinate space)

    Parameters
    ----------
    x : float
        x-coordinate of the text edge.
    y0 : float
        y-coordinate of bottommost point.
    y1 : float
        y-coordinate of topmost point.
    align : string, optional (default: 'left')
        {'left', 'right', 'middle'}

    Attributes
    ----------
    intersections: int
        Number of intersections with horizontal text rows.
    is_valid: bool
        A text edge is valid if it intersections with at least
        TEXTEDGE_REQUIRED_ELEMENTS horizontal text rows.

    leftc                 C   s(   || _ || _|| _|| _d| _d| _d S )Nr   F)xy0y1alignintersectionsis_valid)selfr   r	   r
   r    r   R/var/www/html/fyndo/pharma/fyndo/venv/lib/python3.10/site-packages/camelot/core.py__init__.   s   
zTextEdge.__init__c                 C   sL   t | jd}t | jd}t | jd}d| d| d| d| j d| j dS )N   z<TextEdge x=z y0= y1=z align=z valid=>)roundr   r	   r
   r   r   )r   r   r	   r
   r   r   r   __repr__6   s
   &zTextEdge.__repr__2   c                 C   sb   t j| j||dr-| j| j | t| jd  | _|| _|  jd7  _| jtkr/d| _dS dS dS )zdUpdates the text edge's x and bottom y coordinates and sets
        the is_valid attribute.
        atol   TN)npiscloser	   r   r   floatTEXTEDGE_REQUIRED_ELEMENTSr   )r   r   r	   edge_tolr   r   r   update_coords>   s    

zTextEdge.update_coordsN)r   r   )__name__
__module____qualname____doc__r   r   r    r   r   r   r   r      s
    
r   c                   @   sV   e Zd ZdZdddZedd Zdd Zd	d
 Zdd Z	dd Z
dd Zdd ZdS )	TextEdgeszDefines a dict of left, right and middle text edges found on
    the PDF page. The dict has three keys based on the alignments,
    and each key's value is a list of camelot.core.TextEdge objects.
    r   c                 C   s   || _ g g g d| _d S )Nr   rightmiddle)r   
_textedges)r   r   r   r   r   r   R   s   zTextEdges.__init__c                 C   s0   | j }| j}||| d  }|||d}|| S )zYReturns the x coordinate of a text row based on the
        specified alignment.
        g       @)r   r)   r(   )x0x1)textliner   x_leftx_rightx_middlex_coordr   r   r   get_x_coordV   s
   zTextEdges.get_x_coordc                 C   s6   t | j| D ]\}}tj|j|ddr|  S qdS )zkReturns the index of an existing text edge using
        the specified x coordinate and alignment.
        g      ?r   N)	enumerater*   r   r   r   )r   r1   r   iter   r   r   finda   s
   zTextEdges.findc                 C   s<   |  ||}|j}|j}t||||d}| j| | dS )z)Adds a new text edge to the current dict.)r   N)r2   r	   r
   r   r*   append)r   r-   r   r   r	   r
   r5   r   r   r   addj   s
   zTextEdges.addc                 C   sZ   dD ](}|  ||}| ||}|du r| || q| j| | j||j| jd qdS )z2Updates an existing text edge in the current dict.r'   N)r   )r2   r6   r8   r*   r    r	   r   )r   r-   r   r1   idxr   r   r   updater   s   
zTextEdges.updatec                 C   s,   |D ]}t |  dkr| | qdS )zMGenerates the text edges dict based on horizontal text
        rows.
        r   N)lenget_textstripr:   )r   	textlinestlr   r   r   generate~   s
   
zTextEdges.generatec                 C   sj   t dd | jd D t dd | jd D t dd | jd D d}t| td	d
d }| j| S )zReturns the list of relevant text edges (all share the same
        alignment) based on which list intersects horizontal text rows
        the most.
        c                 s       | ]	}|j r|jV  qd S Nr   r   .0r5   r   r   r   	<genexpr>       
z)TextEdges.get_relevant.<locals>.<genexpr>r   c                 s   rA   rB   rC   rD   r   r   r   rF      rG   r(   c                 s   rA   rB   rC   rD   r   r   r   rF      rG   r)   r'   r   keyr   )sumr*   maxitemsr   )r   intersections_sumrelevant_alignr   r   r   get_relevant   s   
zTextEdges.get_relevantc                 C   s  dd }|j dd d i }|D ]a}|jrq|s$d||j|j|j|jf< qd}|D ]}|j|d kr<|j|d kr<|} nq(|du rNd||j|j|j|jf< q|| |d	 t|j|d t|d
 |jt|d |jf}d||< qd	}	|D ]N}
|	|
j|
j 7 }	d}|D ]}|
j|d kr|
j|d kr|} nq|dur|| t|
j|d	 t|
j|d t|d
 |
j	t|d |
jf}d||< qv|	t
t| }i }|D ]	}d||||< q|S )zpReturns a dict of interesting table areas on the PDF page
        calculated using relevant text edges.
        c                 S   s@   | d t  }| d t  }| d t  }| d |d  }||||fS )Nr   r   r         )TABLE_AREA_PADDING)areaaverage_row_heightr+   r	   r,   r
   r   r   r   pad   s
   z&TextEdges.get_table_areas.<locals>.padc                 S   s   | j  | jfS rB   )r	   r   )r5   r   r   r   <lambda>   s    z+TextEdges.get_table_areas.<locals>.<lambda>rH   Nr   rP   r   r   )sortr   r   r	   r
   popminrK   r+   r,   r   r;   )r   r>   relevant_textedgesrU   table_areasr5   foundrS   updated_areasum_textline_heightr?   average_textline_heighttable_areas_paddedr   r   r   get_table_areas   s\   	

zTextEdges.get_table_areasNr!   )r"   r#   r$   r%   r   staticmethodr2   r6   r8   r:   r@   rO   ra   r   r   r   r   r&   L   s    


	r&   c                   @   sF   e Zd ZdZdd Zdd Zedd Zejdd Zed	d
 Z	dS )Cella\  Defines a cell in a table with coordinates relative to a
    left-bottom origin. (PDF coordinate space)

    Parameters
    ----------
    x1 : float
        x-coordinate of left-bottom point.
    y1 : float
        y-coordinate of left-bottom point.
    x2 : float
        x-coordinate of right-top point.
    y2 : float
        y-coordinate of right-top point.

    Attributes
    ----------
    lb : tuple
        Tuple representing left-bottom coordinates.
    lt : tuple
        Tuple representing left-top coordinates.
    rb : tuple
        Tuple representing right-bottom coordinates.
    rt : tuple
        Tuple representing right-top coordinates.
    left : bool
        Whether or not cell is bounded on the left.
    right : bool
        Whether or not cell is bounded on the right.
    top : bool
        Whether or not cell is bounded on the top.
    bottom : bool
        Whether or not cell is bounded on the bottom.
    hspan : bool
        Whether or not cell spans horizontally.
    vspan : bool
        Whether or not cell spans vertically.
    text : string
        Text assigned to cell.

    c                 C   sn   || _ || _|| _|| _||f| _||f| _||f| _||f| _d| _d| _	d| _
d| _d| _d| _d| _d S )NF )r,   r
   x2y2lbltrbrtr   r(   topbottomhspanvspan_textr   r,   r
   re   rf   r   r   r   r     s   




zCell.__init__c              	   C   sF   t | j}t | j}t | j}t | j}d| d| d| d| d	S )Nz	<Cell x1=r   z x2=z y2=r   )r   r,   r
   re   rf   rp   r   r   r   r   "  s
   



zCell.__repr__c                 C   s   | j S rB   )ro   r   r   r   r   text)  s   z	Cell.textc                 C   s   d | j|g| _d S )Nrd   )joinro   )r   tr   r   r   rr   -  s   c                 C   s   | j | j | j | j S )z1The number of sides on which the cell is bounded.)rk   rl   r   r(   rq   r   r   r   bound1  s   z
Cell.boundN)
r"   r#   r$   r%   r   r   propertyrr   setterru   r   r   r   r   rc      s    )

rc   c                   @   s   e Zd ZdZdd Zdd Zdd Zedd	 Zed
d Z	dd Z
d"ddZdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!S )#Tablea  Defines a table with coordinates relative to a left-bottom
    origin. (PDF coordinate space)

    Parameters
    ----------
    cols : list
        List of tuples representing column x-coordinates in increasing
        order.
    rows : list
        List of tuples representing row y-coordinates in decreasing
        order.

    Attributes
    ----------
    df : :class:`pandas.DataFrame`
    shape : tuple
        Shape of the table.
    accuracy : float
        Accuracy with which text was assigned to the cell.
    whitespace : float
        Percentage of whitespace in the table.
    order : int
        Table number on PDF page.
    page : int
        PDF page number.

    c                    sH    | _ || _ fdd|D | _d | _d| _d| _d| _d | _d | _d S )Nc                    s   g | ]  fd dD qS )c                    s*   g | ]}t |d   d |d  d  qS r   r   )rc   )rE   crr   r   
<listcomp>W  s   * z-Table.__init__.<locals>.<listcomp>.<listcomp>r   )rE   colsr{   r   r}   W  s    z"Table.__init__.<locals>.<listcomp>)r   r   r   )	r   rowscellsdfshapeaccuracy
whitespaceorderpage)r   r   r   r   r~   r   r   T  s   
zTable.__init__c                 C      d| j j d| j dS )N<z shape=r   )	__class__r"   r   rq   r   r   r   r   _     zTable.__repr__c                 C   s0   | j |j kr| j|jk rdS | j |j k rdS d S )NT)r   r   )r   otherr   r   r   __lt__b  s   zTable.__lt__c                 C   s(   g }| j D ]}|dd |D  q|S )z1Returns two-dimensional list of strings in table.c                 S   s   g | ]}|j  qS r   )rr   r=   )rE   cellr   r   r   r}   n  s    zTable.data.<locals>.<listcomp>)r   r7   )r   drowr   r   r   datai  s   
z
Table.datac                 C   s&   t | jdt | jd| j| jd}|S )zlReturns a parsing report with %accuracy, %whitespace,
        table number on page and page number.
        r   )r   r   r   r   )r   r   r   r   r   )r   reportr   r   r   parsing_reportq  s   

zTable.parsing_reportc                 C   s2   | j D ]}|D ]}d |_ |_ |_|_qq| S )zSets all table edges to True.T)r   r   r(   rk   rl   )r   r   r   r   r   r   set_all_edges  s
   
zTable.set_all_edgesr   c           
         s  |D ]fddt | jD }fddt | jD }fddt | jD }|s.q|d }|dgkrp|d }|rV|d }	||	k rUd| j| | _|d7 }||	k sEqt| j}	||	k rod| j| | _|d7 }||	k s_q|g krt| jd }|r|d }	||	k rd| j| | _|d7 }||	k sqt| j}	||	k rd| j| | _|d7 }||	k sq|d }|r|d }	||	k rd| j| | _d| j| |d  _|d7 }||	k sqt| j}	||	k rd| j| | _d| j| |d  _|d7 }||	k sq|D ]  fddt | jD } fd	dt | jD } fd
dt | jD }|s-q|d }|dgkru|d }|rY|d }	||	k rXd| j| | _|d7 }||	k sGqt| j}	||	k rtd| j| | _|d7 }||	k scq|g krt| jd }|r|d }	||	k rd| j| | _|d7 }||	k sqt| j}	||	k rd| j| | _|d7 }||	k sq|d }|r|d }	||	k rd| j| | _d| j|d  | _|d7 }||	k sqt| j}	||	k rd| j| | _d| j|d  | _|d7 }||	k sq| S )aN  Sets a cell's edges to True depending on whether the cell's
        coordinates overlap with the line's coordinates within a
        tolerance.

        Parameters
        ----------
        vertical : list
            List of detected vertical lines.
        horizontal : list
            List of detected horizontal lines.

        c                    s,   g | ]\}}t jd  |d   dr|qS r   r   r   r   rE   r4   rt   	joint_tolvr   r   r}         z#Table.set_edges.<locals>.<listcomp>c                    ,   g | ]\}}t jd  |d  dr|qS )rP   r   r   r   rE   jrt   r   r   r   r}     r   c                    r   r   r   r   r   rE   krt   r   r   r   r}     r   r   Tr   c                    ,   g | ]\}}t j d  |d dr|qS r   r   r   hr   r   r   r}     r   c                    s,   g | ]\}}t j d  |d  dr|qS r   r   r   r   r   r   r}     r   c                    r   )r   r   r   r   r   r   r   r   r}     r   )	r3   r   r   r   r   r;   r(   rk   rl   )
r   vertical
horizontalr   r4   r   r   JLKr   )r   r   r   r   	set_edges  s   




















zTable.set_edgesc                 C   s   t t| jD ]}d| j| d _d| j| t| jd  _qt t| jD ]}d| jd | _d| jt| jd  | _q&| S )z Sets table border edges to True.Tr   r   )	ranger;   r   r   r   r   r(   rk   rl   )r   r|   rz   r   r   r   
set_border  s   zTable.set_borderc                 C   s   | j D ]v}|D ]q}|j}|j}|j}|j}|jdkrq|jdkrP|s,|r,|r,|r,d|_q|s8|r8|r8|r8d|_q|sD|rD|rD|rDd|_q|sO|rO|rO|rOd|_q|jdkrm|ra|ra|sa|sad|_q|rl|rl|sl|sld|_q|jdv rxd|_d|_qq| S )zSets a cell's hspan or vspan attribute to True depending
        on whether the cell spans horizontally or vertically.
        r   rP   Tr   ry   )r   r   r(   rk   rl   ru   rm   rn   )r   r   r   r   r(   rk   rl   r   r   r   set_span  s<   




zTable.set_spanc                 K   s0   ddddd}| | | jj|fi | dS )zWrites Table to a comma-separated values (csv) file.

        For kwargs, check :meth:`pandas.DataFrame.to_csv`.

        Parameters
        ----------
        path : str
            Output filepath.

        utf-8Fr   )encodingindexheaderquotingN)r:   r   to_csv)r   pathkwargskwr   r   r   r   6  s   
zTable.to_csvc                 K   s^   ddi}| | | jjdi |}t|d}|| W d   dS 1 s(w   Y  dS )zWrites Table to a JSON file.

        For kwargs, check :meth:`pandas.DataFrame.to_json`.

        Parameters
        ----------
        path : str
            Output filepath.

        orientrecordswNr   )r:   r   to_jsonopenwrite)r   r   r   r   json_stringfr   r   r   r   E  s   
"zTable.to_jsonc                 K   sN   d| j  d| j dd}|| t|}| jj|fi | |  dS )zWrites Table to an Excel file.

        For kwargs, check :meth:`pandas.DataFrame.to_excel`.

        Parameters
        ----------
        path : str
            Output filepath.

        page--table-r   
sheet_namer   N)r   r   r:   pdExcelWriterr   to_excelsave)r   r   r   r   writerr   r   r   r   V  s   

zTable.to_excelc                 K   P   | j jdi |}t|ddd}|| W d   dS 1 s!w   Y  dS )zWrites Table to an HTML file.

        For kwargs, check :meth:`pandas.DataFrame.to_html`.

        Parameters
        ----------
        path : str
            Output filepath.

        r   r   r   Nr   )r   to_htmlr   r   )r   r   r   html_stringr   r   r   r   r   j     "zTable.to_htmlc                 K   r   )zWrites Table to a Markdown file.

        For kwargs, check :meth:`pandas.DataFrame.to_markdown`.

        Parameters
        ----------
        path : str
            Output filepath.

        r   r   r   Nr   )r   to_markdownr   r   )r   r   r   	md_stringr   r   r   r   r   y  r   zTable.to_markdownc                 K   s\   ddd}| | t|}d| j d| j }| jj||fi | |  |  dS )zWrites Table to sqlite database.

        For kwargs, check :meth:`pandas.DataFrame.to_sql`.

        Parameters
        ----------
        path : str
            Output filepath.

        replaceF)	if_existsr   r   r   N)	r:   sqlite3connectr   r   r   to_sqlcommitclose)r   r   r   r   conn
table_namer   r   r   	to_sqlite  s   


zTable.to_sqliteN)r   )r"   r#   r$   r%   r   r   r   rv   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rx   7  s(    


 
rx   c                   @   sd   e Zd ZdZdd Zdd Zdd Zdd	 Zed
d Z	e
dd ZdddZdd ZdddZdS )	TableListzDefines a list of camelot.core.Table objects. Each table can
    be accessed using its index.

    Attributes
    ----------
    n : int
        Number of tables in the list.

    c                 C   s
   || _ d S rB   _tables)r   tablesr   r   r   r        
zTableList.__init__c                 C   r   )Nr   z n=r   )r   r"   nrq   r   r   r   r     r   zTableList.__repr__c                 C   s
   t | jS rB   )r;   r   rq   r   r   r   __len__  r   zTableList.__len__c                 C   s
   | j | S rB   r   )r   r9   r   r   r   __getitem__  r   zTableList.__getitem__c                 C   s   t | d| S )Nto_)getattr)tabler   r   r   r   _format_func  s   zTableList._format_funcc                 C   s   t | S rB   )r;   rq   r   r   r   r     s   zTableList.nNc           
      K   sl   | d}| d}| d}| jD ]!}| d|j d|j | }tj||}| ||}	|	| qd S )Ndirnamerootext-page-r   )getr   r   r   osr   rs   r   )
r   r   r   r   r   r   r   filenamefilepath	to_formatr   r   r   _write_file  s   




zTableList._write_filec                 K   s   | d}| d}| d}| d}tjtj||d }tj|ddd.}| jD ]!}| d	|j d
|j	 | }	tj||	}
|
|
tj|
 q-W d    d S 1 sZw   Y  d S )Nr   r   r   r   .zipr   T
allowZip64r   r   )r   r   r   rs   r   zipfileZipFiler   r   r   r   basename)r   r   r   r   r   r   zipnamezr   r   r   r   r   r   _compress_dir  s   




"zTableList._compress_dircsvFc                 C   s  t j|}t j|}t j|\}}|rt }||||d}|dv r=| jdd|i| |r;| jdi | dS dS |dkrt j	||}	t
|	}
| jD ]}d|j d|j }|jj|
|dd qP|
  |rt j	t j||d	 }tj|d
dd}||	t j|	 W d   dS 1 sw   Y  dS dS |dkrt j	||}	| jD ]}||	 q|rt j	t j||d	 }tj|d
dd}||	t j|	 W d   dS 1 sw   Y  dS dS dS )aC  Exports the list of tables to specified file format.

        Parameters
        ----------
        path : str
            Output filepath.
        f : str
            File format. Can be csv, excel, html, json, markdown or sqlite.
        compress : bool
            Whether or not to add files to a ZIP archive.

        )r   r   r   r   )r   htmljsonmarkdownr   excelr   r   r   r   r   r   Tr   Nsqliter   )r   r   r   r   splitexttempfilemkdtempr   r   rs   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   compressr   r   r   r   r   r   r   r   r   r   r   r   r   r   export  sF   

"
"zTableList.exportrB   )r   F)r"   r#   r$   r%   r   r   r   r   rb   r   rv   r   r   r   r  r   r   r   r   r     s    




r   )r   r   r   r   	itertoolsr   operatorr   numpyr   pandasr   r   rR   objectr   r&   rc   rx   r   r   r   r   r   <module>   s$   7 P  g