
    ;3hO]                     l   d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZmZmZ ddlmZ ddlmZmZmZmZmZmZ dZdZdZdZeeeeef   f   Zed	eeef   f   Zerdd
lm Z  eefdedededefdZ!efdedededefdZ"dedededededefdZ#efdede$defdZ%efdede$defdZ&	 d0dedededefdZ'dede	e   fdZ(de	e   de	e	e      fd Z) G d! d"e*      Z+ G d# d$e+      Z, G d% d&e+      Z- G d' d(e*      Z.g d)Z/g d*Z0 G d+ d,e1      Z2 e2d       Z3e G d- d	             Z4 G d. d/e*      Z5y)1    N)	dataclass)
itemgetter)	TYPE_CHECKINGAnyDictListOptionalSetTupleTypeUnion   )utils)T_bboxT_numT_obj
T_obj_iter
T_obj_listT_point   TableSettings)Pageedgesx_tolerancey_tolerancereturnc                     g g d}| D ]  }||d      j                  |        t        j                  |d   d|      }t        j                  |d   d|      }||z   S )zs
    Given a list of edges, snap any within `tolerance` pixels of one another
    to their positional average.
    vhorientationr   x0r    top)appendr   snap_objects)r   r   r   by_orientatione	snapped_v	snapped_hs          P/var/www/html/audio-gradio/venv/lib/python3.12/site-packages/pdfplumber/table.py
snap_edgesr+      st     352,>N 3q'(//23 "">##6kJI"">##6{KIy      r!   	tolerancec                 B   |dk(  rd\  }}n|dk(  rd\  }}nt        d      t        t        | t        |                  }|d   g}|dd	 D ]P  }|d
   }||   ||   |z   k  r*||   ||   kD  s"t	        j
                  ||||         |d
<   @|j                  |       R |S )z
    Given a list of edges along the same infinite line, join those that
    are within `tolerance` pixels of one another.
    r    )r"   x1r   )r#   bottomzOrientation must be 'v' or 'h'keyr   r   N)
ValueErrorlistsortedr   r   resize_objectr$   )	r   r!   r-   min_propmax_propsorted_edgesjoinedr'   lasts	            r*   join_edge_groupr=   '   s     c'(		,(9::u*X*>?@L1oF!" bzX;4>I56{T(^+"00x8Mr
 MM! Mr,   snap_x_tolerancesnap_y_tolerancejoin_x_tolerancejoin_y_tolerancec                    dt         dt        t        t        f   fd}|dkD  s|dkD  rt	        | ||      } t        | |      }t        j                  ||      }fd|D        }t        t        j                  |       } | S )z|
    Using the `snap_edges` and `join_edge_group` methods above,
    merge a list of edges into a more "seamless" list.
    edger   c                 .    | d   dk(  rd| d   fS d| d   fS )Nr!   r    r#   r   r"    )rC   s    r*   	get_groupzmerge_edges.<locals>.get_groupP   s-    #%e%%d$$r,   r   r1   c              3   X   K   | ]!  \  }}t        ||d    |d    dk(  rn       # yw)r   r    N)r=   ).0kitemsr@   rA   s      r*   	<genexpr>zmerge_edges.<locals>.<genexpr>[   s=       Au 	1Q4adck*?O	
s   '*)
r   r   strr   r+   r6   	itertoolsgroupbyr5   chain)	r   r>   r?   r@   rA   rF   _sortededge_groupsedge_gens	      ``    r*   merge_edgesrS   D   s    % %%U
"3 % !/!35"24DEU	*G##G;K $	H (+,ELr,   wordsword_thresholdc           
         t        j                  | t        d      d      }t        fd|      }t	        t        t         j                  |            }t        |      dk(  rg S t        t        t        d      |            }t        t        t        d      |            }g }|D ])  }||||d   |d   ||z
  dd|||d	   |d	   ||z
  ddgz  }+ |S )
zi
    Find (imaginary) horizontal lines that connect the tops
    of at least `word_threshold` words.
    r#   r   c                      t        |       k\  S NlenxrU   s    r*   <lambda>z"words_to_edges_h.<locals>.<lambda>m       c!f&> r,   r   r"   r/   r    )r"   r/   r#   r0   widthr!   r0   )
r   cluster_objectsr   filterr5   mapobjects_to_rectrZ   minmax)	rT   rU   by_toplarge_clustersrectsmin_x0max_x1r   rs	    `       r*   words_to_edges_hrl   e   s     ""5*U*;Q?F>GNU**N;<E
5zQ	Z%u-.FZ%u-.FE 
 xE(&" {H+&"
 	

0 Lr,   c           
      r   t        j                  | t        d      d      }t        j                  | t        d      d      }dt        dt        fd}t        j                  | |d      }||z   |z   }t        |d       }t        fd	|      }t        t        t         j                  |            }	g }
|	D ]*  t        fd
|
D              }|r|
j                         , t        |
      dk(  rg S t        t         j                  |
      }t        t        |t        d                  }t        t        t        d      |            }t        t        t        d      |            }t        t        t        d      |            }|D cg c]  }|d   |d   ||||z
  dd c}||||||z
  ddgz   S c c}w )zy
    Find (imaginary) vertical lines that connect the left, right, or
    center of at least `word_threshold` words.
    r"   r   r/   wordr   c                 0    t        | d   | d   z         dz  S )Nr"   r/      )float)rn   s    r*   
get_centerz$words_to_edges_v.<locals>.get_center   s    T$Z$t*,-11r,   c                     t        |        S rX   rY   )r\   s    r*   r]   z"words_to_edges_v.<locals>.<lambda>   s    c!fW r,   r1   c                      t        |       k\  S rX   rY   r[   s    r*   r]   z"words_to_edges_v.<locals>.<lambda>   r^   r,   c              3   J   K   | ]  }t        j                  |        y wrX   )r   get_bbox_overlap)rH   cbboxs     r*   rK   z#words_to_edges_v.<locals>.<genexpr>   s     P!e,,T15Ps    #r   r#   r0   r   r"   r/   r#   r0   heightr!   )r   r`   r   r   r   r6   ra   r5   rb   objects_to_bboxanyr$   rZ   bbox_to_rectre   rd   )rT   rU   by_x0by_x1rr   	by_centerclusterssorted_clustersrg   bboxescondensed_bboxesoverlapcondensed_rectssorted_rectsrj   min_top
max_bottombrx   s    `                @r*   words_to_edges_vr      s    !!%D)91=E!!%D)91=E2 25 2 %%eZ;Iu}y(H X+<=O>PN #e++^<=F &( *P?OPP##D)*
 !	%,,.>?OJt4DEFLZ%|45F#j'67GSH-|<=J 
  D'D'  7*	

   7*	
		  
s   
F4c           	         i }dD cg c]  t        t        fd|              c}\  }}t        |t        dd            D ]  }t        |t        dd            D ]  }|d   |d   |z   k  s|d   |d   |z
  k\  s!|d   |d   |z
  k\  s0|d   |d   |z   k  s?|d   |d   f}	|	|vrg g d||	<   ||	   d   j	                  |       ||	   d	   j	                  |         |S c c}w )
zi
    Given a list of edges, return the points at which they intersect
    within `tolerance` pixels.
    r   c                     | d   k(  S )Nr!   rE   )r\   os    r*   r]   z(edges_to_intersections.<locals>.<lambda>   s    a.!3 r,   r"   r#   r1   r0   r/   r   r    )r5   ra   r6   r   r$   )
r   r   r   intersectionsr   v_edgesh_edgesr   r    vertexs
       `     r*   edges_to_intersectionsr      s'    &(MFPABV3U;<GW GD%!89 5Zt%<= 	5A5ah45x[QuX%;<tW4;!67tW4;!67D'1U8,.242,>M&)f%c*11!4f%c*11!4	55 !s   C(r   c                 L    dt         dt         dt        f fdt        t         j	                                     t              dt        t            dt        dt        t           f fdfdt        t                    D        }t        t        d	|            S )
a8  
    Given a list of points (`intersections`), return all rectangular "cells"
    that those points describe.

    `intersections` should be a dictionary with (x0, top) tuples as keys,
    and a list of edge objects as values. The edge objects should correspond
    to the edges that touch the intersection.
    p1p2r   c                 4   dt         dt        t           fd}| d   |d   k(  r5 ||    d         j                   ||   d               }t	        |      ry| d   |d   k(  r5 ||    d         j                   ||   d               }t	        |      ryy	)
Nr   r   c                 H    t        t        t        j                  |             S rX   )setrb   r   obj_to_bbox)r   s    r*   edges_to_setzCintersections_to_cells.<locals>.edge_connects.<locals>.edges_to_set   s    s5,,e455r,   r   r   Tr   r    F)r   r
   r   intersectionrZ   )r   r   r   commonr   s       r*   edge_connectsz-intersections_to_cells.<locals>.edge_connects   s    	6
 	6s6{ 	6 a5BqE>!-"3C"89FF]2.s34F 6{a5BqE>!-"3C"89FF]2.s34F 6{r,   pointsic                 f   |dz
  k(  ry | |   }| |dz   d  }|D cg c]  }|d   |d   k(  s| }}|D cg c]  }|d   |d   k(  s| }}|D ]U  } 
||      s|D ]D  } 
||      s|d   |d   f}	|	v s 
|	|      s& 
|	|      s0|d   |d   |	d   |	d   fc c S  W y c c}w c c}w )Nr   r   rE   )r   r   ptrestr\   belowrightbelow_ptright_ptbottom_rightr   r   n_pointss             r*   find_smallest_cellz2intersections_to_cells.<locals>.find_smallest_cell
  s   1AYa!eg 2qAaDBqEM22 2qAaDBqEM22 	LH X.! L$R2 (Xa[9 "]2%lH=%lH= qE2a5,q/<?KKL		L" ' 32s   B)B)B.B.c              3   0   K   | ]  } |        y wrX   rE   )rH   r   r   r   s     r*   rK   z)intersections_to_cells.<locals>.<genexpr>%  s     J!"61-Js   N)r   boolr5   r6   keysrZ   r   intr	   r   rangera   )r   cell_genr   r   r   r   s   ` @@@@r*   intersections_to_cellsr      s    ' w 4 & &++-./F6{H4= S Xf=M 6 KuS[7IJHtX&''r,   cellsc                 N   dt         dt        t        t        t        t        f   fd}t        |       }t	               g }g }t        |      rt        |      }t        |      D ]  } ||      }t        |      dk(  r1t	        |      z  |j                  |       |j                  |       Jt        fd|D              }|dkD  sdt	        |      z  |j                  |       |j                  |        t        |      |k(  r:|j                  t        |             j                          |j                          t        |      rt        |      r|j                  t        |             t        |d       }	|	D 
cg c]  }
t        |
      dkD  s|
 }}
|S c c}
w )	z
    Given a list of bounding boxes (`cells`), return a list of tables that
    hold those cells most simply (and contiguously).
    rx   r   c                 ,    | \  }}}}||f||f||f||ffS rX   rE   )rx   r"   r#   r/   r0   s        r*   bbox_to_cornersz(cells_to_tables.<locals>.bbox_to_corners/  s/    "CVS	B<"cRLAAr,   r   c              3   &   K   | ]  }|v  
 y wrX   rE   )rH   rw   current_cornerss     r*   rK   z"cells_to_tables.<locals>.<genexpr>H  s     "NA1#7"Ns   c                 &    t        d | D              S )Nc              3   0   K   | ]  }|d    |d   f  yw)r   r   NrE   )rH   rw   s     r*   rK   z4cells_to_tables.<locals>.<lambda>.<locals>.<genexpr>a  s     .G!ad|.Gs   )rd   )ts    r*   r]   z!cells_to_tables.<locals>.<lambda>a  s    3.GQ.G+G r,   r1   r   )r   r   r   r5   r   rZ   r$   removesumclearr6   )r   r   remaining_cellscurrent_cellstablesinitial_cell_countcellcell_cornerscorner_countrP   r   filteredr   s               @r*   cells_to_tablesr   )  s   Bf Bw'/Q)R B 5kO
 %(EO"$MF
o
 /) 	1D*40L=!Q&3|#44$$T*&&t,  #"N"NN  !##s<'88O!((.#**40#	1( }!33MM$}-.!!#!5 o
> =d=)* V!GHG"1ac!fqj1H1O 2s   F"F"c                   $    e Zd Zdeee      fdZy)	CellGroupr   c                 X   || _         t        t        t        d      t	        d |                  t        t        t        d      t	        d |                  t        t        t        d      t	        d |                  t        t        t        d      t	        d |                  f| _        y Nr   r   rp   r   )r   rd   rb   r   ra   re   rx   )selfr   s     r*   __init__zCellGroup.__init__g  sz    
JqM6$#678JqM6$#678JqM6$#678JqM6$#678	
	r,   N)__name__
__module____qualname__r   r	   r   r   rE   r,   r*   r   r   f  s    
d8F#34 
r,   r   c                       e Zd Zy)RowNr   r   r   rE   r,   r*   r   r   q      r,   r   c                       e Zd Zy)ColumnNr   rE   r,   r*   r   r   u  r   r,   r   c                       e Zd Zdddee   fdZedefd       Zdee	   dee	   fdZ
edee	   fd	       Zedee	   fd
       Zdedeeee         fdZy)Tablepager   r   c                      || _         || _        y rX   )r   r   )r   r   r   s      r*   r   zTable.__init__z  s    	
r,   r   c           
         | j                   }t        t        t        d      |            t        t        t        d      |            t	        t        t        d      |            t	        t        t        d      |            fS r   )r   rd   rb   r   re   )r   rw   s     r*   rx   z
Table.bbox~  sa    JJJqM1%&JqM1%&JqM1%&JqM1%&	
 	
r,   kindc                    |t         u rdnd}t        |       }t        | j                  t	        ||            }t        t        t        t        t	        |      | j                                          }t        j                  |t	        |            }g }|D ]N  \  }}	|	D 
ci c]  }
|
|   |

 }}
 ||D cg c]  }|j                  |       c}      }|j                  |       P |S c c}
w c c}w )Nr   r   r1   )r   r   r6   r   r   r5   r   rb   rM   rN   getr$   )r   r   axisantiaxisrP   xsgroupedrowsy	row_cellsr   xdictr\   rows                 r*   _get_rows_or_colszTable._get_rows_or_cols  s    CKqQ4x= Hd)CD &SD!14::>?@A ##GZ-AB# 	LAy2;<$T$Z%<E<b1		!12CKK	  =1s   !C*6C/
c                 ,    | j                  t              S rX   )r   r   r   s    r*   r   z
Table.rows  s    %%c**r,   c                 ,    | j                  t              S rX   )r   r   r   s    r*   columnszTable.columns  s    %%f--r,   kwargsc           	         | j                   j                  }g }dt        dt        dt        fd}| j
                  D ]  }g }|D cg c]  } |||j                        s| }}|j                  D ]z  }	|	d }
nb|D cg c]  } |||	      s| }}t        |      r<d|v r!|	d   |	d   z
  |d<   |	d	   |	d
   z
  |d<   |	|d<   t        j                  |fi |}
nd}
|j                  |
       | |j                  |        |S c c}w c c}w )Ncharrx   r   c                     | d   | d   z   dz  }| d   | d   z   dz  }|\  }}}}t        ||k\  xr ||k  xr ||k\  xr ||k        S )Nr#   r0   rp   r"   r/   )r   )r   rx   v_midh_midr"   r#   r/   r0   s           r*   char_in_bboxz#Table.extract.<locals>.char_in_bbox  sm    %[4>1Q6E$Z$t*,1E"&BR"V52:VESLVuv~ r,   layoutrp   r   layout_widthr   r   layout_heightlayout_bbox )r   charsr   r   r   r   rx   r   rZ   r   extract_textr$   )r   r   r   	table_arrr   r   arrr   	row_charsr   	cell_text
cell_charss               r*   extractzTable.extract  s<   				u 	F 	t 	 99 	"CC*/P$<chh3OPIP		 &< $I *3"!%l46N"J " :#v-59!WtAw5FF>26:1gQ6GF?348F=1$)$6$6z$LV$L	$&	

9%!&" S!+	". + Q"s   DD;D
DN)r   r   r   r   r   r   propertyrx   r   r   r   r   r   r   r	   rL   r   rE   r,   r*   r   r   y  s    V DL  
f 
 
d9o $y/ * +d9o + + .i . .$ $T(3--@(A $r,   r   )lineslines_stricttextexplicit)snap_tolerancer>   r?   join_tolerancer@   rA   edge_min_lengthmin_words_verticalmin_words_horizontalintersection_toleranceintersection_x_toleranceintersection_y_tolerancec                       e Zd Zy)
UnsetFloatNr   rE   r,   r*   r  r    r   r,   r  c                   `   e Zd ZU dZeed<   dZeed<   dZee	e
eef         ed<   dZee	e
eef         ed<   eZeed<   eZeed<   eZeed	<   eZeed
<   eZeed<   eZeed<   dZeed<   eZeed<   eZeed<   dZeed<   eZeed<   eZeed<   dZ ee!ee"f      ed<   ddZ#e$dee%   dd fd       Z&y)r   r  vertical_strategyhorizontal_strategyNexplicit_vertical_linesexplicit_horizontal_linesr  r>   r?   r  r@   rA   r   r  r  r	  r
  r  r  text_settingsr   c           	      $   t         D ]$  }t        | |      xs ddk  st        d| d       dD ]=  }t        | |dz         }|t        vst        | ddj	                  t               d       | j
                  i | _        d
D ]:  }|| j
                  vs| j
                  j                  dd      | j
                  |<   < d| j
                  v r| j
                  d= dD ]/  \  }}t        | |      t        u st        | |t        | |             1 y	)a  Clean up user-provided table settings.

        Validates that the table settings provided consists of acceptable values and
        returns a cleaned up version. The cleaned up version fills out the missing
        values with the default values in the provided settings.

        TODO: Can be further used to validate that the values are of the correct
            type. For example, raising a value error when a non-boolean input is
            provided for the key ``keep_blank_chars``.

        :param table_settings: User-provided table settings.
        :returns: A cleaned up version of the user-provided table settings.
        :raises ValueError: When an unrecognised key is provided.
        r   zTable setting 'z' cannot be negative)
horizontalvertical	_strategyz_strategy must be one of{,}N)r   r   r-   r   ))r>   r  )r?   r  )r@   r  )rA   r  )r  r
  )r  r
  )	NON_NEGATIVE_SETTINGSgetattrr4   TABLE_STRATEGIESjoinr  r   UNSETsetattr)r   settingr!   strategyattrfallbacks         r*   __post_init__zTableSettings.__post_init__  sH     - 	RGg&+!q0 ?7);O!PQQ	R 6 	Kt[;%>?H// "m $"234B8 	 %!#D 3 	RD4---+/+=+=+A+A+q+Q""4(	R $,,,"";/
 		=ND( tT"e+dGD($;<		=r,   settingsc                     | |        S t        ||       r|S t        |t              r?i }i }|j                         D ]  \  }}|d d dk(  r	|||dd  <   |||<    ||d<    | di |S t        d|       )N   text_r  zCannot resolve settings: rE   )
isinstancedictrJ   r4   )clsr&  core_settingsr  rI   r   s         r*   resolvezTableSettings.resolve+  s    5L#&O$'MM ( )1Ra5G#+,M!AB%('(M!$	)
 .;M/*'''8
CDDr,   )r   N)'r   r   r   r  rL   __annotations__r  r  r	   r   r   r   r   r  DEFAULT_SNAP_TOLERANCEr  r  r>   r?   DEFAULT_JOIN_TOLERANCEr  r@   rA   r  DEFAULT_MIN_WORDS_VERTICALr  r   DEFAULT_MIN_WORDS_HORIZONTALr	  r
  r  r  r  r   r   r%  classmethodT_table_settingsr.  rE   r,   r*   r   r     s   $s$&&CGXd5+>&?@GEIxU5%<-@(ABI2NE2#e##e#2NE2#e##e#OU88 <#<$%E%&+e+&+e+.2M8DcN+21=f Ex(89 Eo E Er,   c                   4    e Zd ZdZd	dddee   fdZdefdZy)
TableFindera0  
    Given a PDF page, find plausible table structures.

    Largely borrowed from Anssi Nurminen's master's thesis:
    http://dspace.cc.tut.fi/dpub/bitstream/handle/123456789/21520/Nurminen.pdf?sequence=3

    ... and inspired by Tabula:
    https://github.com/tabulapdf/tabula-extractor/issues/16
    Nr   r   r&  c                    || _         t        j                  |      | _        | j	                         | _        t        | j
                  | j                  j                  | j                  j                        | _	        t        | j                        | _        t        | j                        D cg c]  }t        | j                   |       c}| _        y c c}w rX   )r   r   r.  r&  	get_edgesr   r   r  r  r   r   r   r   r   r   )r   r   r&  
cell_groups       r*   r   zTableFinder.__init__J  s    	%--h7^^%
3JJMM22MM22

 ,D,>,>?
;J4::;V
-7E$))Z(
 
s   ,Cr   c           
         | j                   }dD ]I  }t        ||dz         }|dk(  st        |d|z   dz         }t        |      dk  s9t        d| d| d	       |j                  }|j
                  }|d
k(  s|d
k(  r* | j                  j                  di |j                  xs i }g }|j                  xs g D ]  }	t        |	t              r5t        j                  |	      D ]  }
|
d   dk(  s|j                  |
        H|j                  |	|	| j                  j                  d   | j                  j                  d   | j                  j                  d   | j                  j                  d   z
  dd        |dk(  r+t        j                   | j                  j"                  d      }nV|dk(  r-t        j                   | j                  j"                  dd      }n$|d
k(  rt%        |j&                        }n|dk(  rg }|z   }g }|j(                  xs g D ]  }	t        |	t              r5t        j                  |	      D ]  }
|
d   dk(  s|j                  |
        H|j                  | j                  j                  d   | j                  j                  d   | j                  j                  d   | j                  j                  d   z
  |	|	dd        |dk(  r+t        j                   | j                  j"                  d      }nV|dk(  r-t        j                   | j                  j"                  dd      }n$|d
k(  rt+        |j,                        }n|dk(  rg }|z   }t/        |      t/        |      z   }t1        ||j2                  |j4                  |j6                  |j8                        }t        j                   ||j:                        S )N)r  r  r  r  	explicit__linesrp   zIf z"_strategy == 'explicit', explicit_zD_lines must be specified as a list/tuple of two or more floats/ints.r  r!   r   r   r   ry   r  r  line)	edge_type)rU   r    r   )r"   r/   r_   r#   r0   r!   )r>   r?   r@   rA   )
min_lengthrE   )r&  r  rZ   r4   r  r  r   extract_wordsr  r  r*  r+  r   obj_to_edgesr$   rx   filter_edgesr   r   r  r  rl   r	  r5   rS   r>   r?   r@   rA   r  )r   r&  r!   r"  r  v_strath_stratrT   
v_explicitdescr'   v_baser   
h_explicith_baser    r   s                    r*   r9  zTableFinder.get_edgesX  s   ==5 
	Kx{)BCH:%+*Ch*NOu:>$k] +$$/= 1'( 
	 ,,..f6 1+DII++Mx/E/E/KME
44: 	D$%++D1 -A'3."))!,- !!""#yy~~a0"&)).."3"&)).."3diinnQ6G"G'*		" g''		=F&''		OF%eH<W<WXF
"FZ
66<" 	D$%++D1 -A'3."))!,- !!"iinnQ/"iinnQ/!%!2TYY^^A5F!F#"&'*		" g''		=F&''		OF%h&C&CF 
"FZQ$q'!%66%66%66%66
 !!%H4L4LMMr,   rX   )	r   r   r   __doc__r	   r5  r   r   r9  rE   r,   r*   r7  r7  ?  s0    
V 
x8H/I 
[N: [Nr,   r7  )r   r   )6rM   dataclassesr   operatorr   typingr   r   r   r   r	   r
   r   r   r   r   r   _typingr   r   r   r   r   r   r0  r1  r2  r3  rL   T_intersectionsr5  r   r   r+   r=   rS   r   rl   r   r   r   r   objectr   r   r   r   r  r  rq   r  r  r   r7  rE   r,   r*   <module>rR     s@    !  T T T  J J     wS*_ 556$sCx.89 
 0/!!! ! 	!& =S$'49:  	
  D .J(('*((X .H<<'*<<@ EF$)<A6<(/ <(d6l <(~:4< :Df,> :z
 
	) 		Y 	PF Pf A   	 	 	1 WE WE WEttN& tNr,   