
    ;3he              	       
   d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZm Z  d d	l!m"Z" d d
l#m$Z$ ddl%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3m4Z4m5Z5 ddl&m6Z6m7Z7m8Z8 ddl9m:Z:m;Z; ddl<m=Z=  ej|                  d      Z? e@g d      ZAerddlBmCZC ddlDmEZE ddddddZFdeGdeHfd ZId!eed"f   deeeeeJeKf   d"f      eeH   f   fd#ZLd!edeeeeeJeKf   d"f      eeH   f   fd$ZMd%eeHef   deeHef   fd&ZN G d' d(e      ZOd9d)e(d*e)de(fd+ZPd)e(d,e)de(fd-ZQ G d. d/e-      ZR G d0 d1eR      ZSd2e(d3e(ddfd4ZT G d5 d6eS      ZU G d7 d8eS      ZVy):    N)	lru_cache)
TYPE_CHECKINGAnyCallableDict	GeneratorListOptionalPatternTupleUnion)	normalize)warn)PDFPageAggregator)LTCharLTComponentLTContainerLTCurveLTItemLTPageLTTextContainer)PDFPageInterpreter	PDFStackT)PDFPage)	PSLiteral   )utils)T_bboxT_numT_obj
T_obj_list)	Container)PDFStructTreeStructTreeMissing)T_table_settingsTableTableFinderTableSettings)decode_textresolve_allresolve_and_decode)MalformedPDFExceptionPdfminerException)TextMapz^LT)advheight	linewidthptssizesrcsizewidthx0x1y0y1bitsmatrixuprightfontnametext	imagemask
colorspaceevenoddfillnon_stroking_colorstrokestroking_colorstreamnamemcidtag)	PageImage)PDFzSimSun,RegularzSimHei,RegularzSimKai,RegularzSimFang,RegularzSimLi,Regular)s   s   s   _GB2312s   _GB2312s   r=   returnc                     d| v r| j                  d      dz   }| d | | |d  }}nd| }}t        j                  |t        |      dd       }t        |      dd |z   S )N   +r          )indexCP936_FONTNAMESgetstr)r=   split_atprefixsuffix
suffix_news        O/var/www/html/audio-gradio/venv/lib/python3.12/site-packages/pdfplumber/page.pyfix_fontname_bytesr[   \   sp    x>>$'!+!)8,hxy.Ah $$VS[2->?Jv;qz))rO   color.c                 r    t        | d   t              r!| d d xs d t        | d   j                        fS | d fS )NrQ   )
isinstancer   r)   rG   )r\   s    rZ   separate_patternr_   g   s?     %)Y'cr
"d[r%@@@d{rO   c                     | yt        | t              r| }t        |      S t        | t              rt        |       }t        |      S | f}t        |      S )N)NN)r^   tuplelistr_   )r\   	tuplefieds     rZ   normalize_colorrd   p   s^     }	E5	!	
 I&&	 
E4	 %L	 I&& H	I&&rO   kwargsc           	          | j                         D ci c]#  \  }}|t        |t              rt        |      n|% c}}S c c}}w N)itemsr^   rb   ra   )re   keyvalues      rZ   tuplify_list_kwargsrk   ~   sB     !,,.C 	j5eEl5@  s   (A c                        e Zd ZU dZdZee   ed<   dZee	   ed<   dde
dee   ddfdZdd	Zdd
Zdef fdZd fdZd fdZ xZS )"PDFPageAggregatorWithMarkedContentzZExtract layout from a specific page, adding marked-content IDs to
    objects where found.Ncur_mcidcur_tagrI   propsrL   c                     t        |j                        | _        t        |t              rd|v r|d   | _        yd| _        y)z5Handle beginning of tag, setting current MCID if any.MCIDN)r)   rG   ro   r^   dictrn   )selfrI   rp   s      rZ   	begin_tagz,PDFPageAggregatorWithMarkedContent.begin_tag   s4    "388,eT"v!&MDM DMrO   c                      d| _         d| _        y)z/Handle beginning of tag, clearing current MCID.N)ro   rn   rt   s    rZ   end_tagz*PDFPageAggregatorWithMarkedContent.end_tag   s    rO   c                     | j                   j                  r<| j                   j                  d   }| j                  |_        | j                  |_        yy)z^Add current MCID to what we hope to be the most recent object created
        by pdfminer.six.rQ   N)cur_item_objsrn   rH   ro   rI   )rt   cur_objs     rZ   tag_cur_itemz/PDFPageAggregatorWithMarkedContent.tag_cur_item   s@     ==mm))"-G==GL,,GK rO   c                 F    t        |   |i |}| j                          |S )z;Hook for rendering characters, adding the `mcid` attribute.)superrender_charr}   )rt   argsre   r/   	__class__s       rZ   r   z.PDFPageAggregatorWithMarkedContent.render_char   s(    g!4262
rO   c                 D    t        |   |i | | j                          y)z7Hook for rendering images, adding the `mcid` attribute.N)r   render_imager}   rt   r   re   r   s      rZ   r   z/PDFPageAggregatorWithMarkedContent.render_image   s!    d-f-rO   c                 D    t        |   |i | | j                          y)zAHook for rendering lines and curves, adding the `mcid` attribute.N)r   
paint_pathr}   r   s      rZ   r   z-PDFPageAggregatorWithMarkedContent.paint_path   s!    D+F+rO   rg   rL   N)__name__
__module____qualname____doc__rn   r
   int__annotations__ro   rU   r   r   ru   rx   r}   floatr   r   r   __classcell__r   s   @rZ   rm   rm      sj     #Hhsm"!GXc]!!Y !x	/B !d !
'e 
 rO   rm   box_rawrotationc                     t        d | D              st        d|        t        | d   | d   f      \  }}t        | d   | d   f      \  }}|dv r||||fS ||||fS )Nc              3   P   K   | ]  }t        |t        j                           y wrg   )r^   numbersNumber.0xs     rZ   	<genexpr>z!_normalize_box.<locals>.<genexpr>   s     >z!W^^,>s   $&z0Bounding box contains non-number coordinate(s): r   rP   r      )Z   i  )allr,   sorted)r   r   r6   r7   r8   r9   s         rZ   _normalize_boxr      s    
 >g>>#>wiH
 	
 WQZ,-FBWQZ,-FB9BBBBrO   	mb_heightc                 (    | \  }}}}|||z
  |||z
  fS rg    )r   r   r6   r8   r7   r9   s         rZ   _invert_boxr      s&    NBB	BIN33rO   c                      e Zd ZU ej                  dgz   Zee   ed<   dZe	ed<   dZ
	 dEddded	ed
efdZdFdZedefd       Zedefd       Zedeeeef      fd       Zedefd       Zedefd       Zedefd       Zedeeef   fd       Zdeeef   deeef   fdZdedefdZ dee!   de"eddf   fdZ#deeef   fdZ$	 dGde%e&   de'fdZ(	 dGde%e&   dee)   fdZ*	 dGde%e&   de%e)   fdZ+	 dGde%e&   deeee%e            fd Z,	 dGde%e&   de%eee%e            fd!Z-d"ede.fd#Z/	 	 	 	 	 dHd$e0ee1e   f   d%e	d&e	d'ed(e	d)e	d"edeeeef      fd*Z2d"edefd+Z3d"edefd,Z4d"edefd-Z5	 dId.e	d(e	d"edefd/Z6	 dJd0e7d1e	d2e	dd3fd4Z8	 dJd0e7d1e	d2e	dd3fd5Z9	 dJd0e7d1e	d2e	dd3fd6Z:d7e;ege	f   dd8fd9Z<d"edd8fd:Z=	 	 	 	 	 dKd;e%e0ee>f      d<e%e0ee>f      d=e%e0ee>f      d>e	d?e	dd@fdAZ?dGdBe%ee      deeef   fdCZ@defdDZAy)LPage_layoutcached_propertiesTis_originalNpdfrK   page_objpage_numberinitial_doctopc                 
   || _         | | _        | _        || _        || _        ddt
        dt        dt        ffd} |dd      }|dz  | _        t         |d      | j                        }|d	   |d
   z
  }t        ||      | _
        dj                  v r,t        t         |d      | j                        |      | _        n| j                  | _        | j                  | _         t               | j                        | _        y )Nri   defaultrL   c                 X    t        j                  j                  |             }||S |S rg   )r*   attrsrT   )ri   r   rj   r   s      rZ   get_attrzPage.__init__.<locals>.get_attr   s+     2 23 78E#m766rO   Rotater   ih  MediaBoxr   r   CropBoxrg   )r   	root_pager   r   r   rU   r   r   r   r   mediaboxr   cropboxbboxr   _get_textmapget_textmap)	rt   r   r   r   r   r   	_rotationmb_rawr   s	     `      rZ   __init__zPage.__init__   s      &,	7# 	7 	7s 	7 Xq)	!C 4dmmD1Iq	)	#FI6&&x	2DMMBIDL  ==DL MM	 '9;t'8'89rO   rL   c                 X    | j                          | j                  j                          y rg   )flush_cacher   cache_clearrw   s    rZ   closez
Page.close   s     $$&rO   c                 @    | j                   d   | j                   d   z
  S )NrP   r   r   rw   s    rZ   r5   z
Page.width      yy|diil**rO   c                 @    | j                   d   | j                   d   z
  S )Nr   r   r   rw   s    rZ   r0   zPage.height  r   rO   c                     	 t        | j                  |       D cg c]  }|j                          c}S c c}w # t        $ r g cY S w xY w)z-Return the structure tree for a page, if any.)r#   r   to_dictr$   )rt   elems     rZ   structure_treezPage.structure_tree  s@    	/<TXXt/LMtDLLNMMM  	I	s   9 49 9 AAc                    t        | d      r| j                  S t        | j                  j                  | j
                  | j                  j                        }t        | j                  j                  |      }	 |j                  | j                         |j                         | _        | j                  S # t        $ r}t        |      d }~ww xY w)Nr   )pagenolaparams)hasattrr   rm   r   rsrcmgrr   r   r   process_pager   	Exceptionr-   
get_result)rt   deviceinterpreteres       rZ   layoutzPage.layout  s    4#<<3HH##XX&&

 ))9)96B	'$$T]]3  &002||  	'#A&&	's   :B6 6	C?C

Cc                 F    dt         t        t        f   dt        dt         t        t        f   f fddt        dt        f fd}t	         j
                  j                        xs g }t        t        ||            }t         t              r j                  |      S |S )NptrrL   c                     |dz  }t        |      D ].  }| \  }}||dz  k(  rj                  nj                  }|||z
  f} 0 | S )Nr   rP   )ranger5   r0   )r   r   turnsir   ycomprt   s          rZ   rotate_pointz!Page.annots.<locals>.rotate_point&  sU    GE5\ %1%&%!)^tzz$(_% IrO   annotc                    | d   \  }}}} ||fj                         } ||fj                         }j                  j                  }t        t	        g ||      |      \  }}	}
}| j                  di       }|j                  d      | j                  d      | j                  d      d}|j                         D ]  \  }}|		 |j                  d      ||<    j                  d|||z
  |
||	z
  j                  |	z   |	||
|z
  ||	z
  d}|j                  |       d| v r| d<   | |d<   |S # t        $ rQ 	 |j                  d      ||<   n8# t        $ r, j                  j                  r t        d	| d
| d       Y nw xY wY w xY w)NRectAURITContents)urititlecontentszutf-8zutf-16zCould not decode z of annotation. z will be missing.r   )r   object_typer6   r8   r7   r9   doctoptopbottomr5   r0   Pdata)r   r   r0   r   r   rT   rh   decodeUnicodeDecodeErrorr   raise_unicode_errorsr   r   r   update)r   _a_b_c_dpt0pt1rhr6   r   r7   r   aextraskvparsedr   rt   s                    rZ   parsezPage.annots.<locals>.parse.  s   "6]NBBBx7CBx7C&&B"-n\s\S\.JB"OBR		#r"AuuU|3!IIj1F
  1=$%HHW$5q	   $//&6k3h--3 b 3,F MM&! e|!c
"F6NM= . 	()(:F1I1 #xx<< % "3A3 7$$%3&7!9	s6   
D22	F<EF2FFFFF)r   r   r   r    r*   r   annotsrb   mapr^   CroppedPage_crop_fn)rt   r  rawr   r   s   `   @rZ   r  zPage.annots$  s    	U5%<0 	S 	U5%<=P 	/	 /	5 /	b $--../52c%o&dK(==((MrO   c                 L    | j                   D cg c]
  }|d   	| c}S c c}w )Nr   )r  )rt   r   s     rZ   
hyperlinkszPage.hyperlinksf  s#    ;;?a!E(*>???s   
!!c                 t    t        | d      r| j                  S | j                         | _        | j                  S N_objects)r   r  parse_objectsrw   s    rZ   objectszPage.objectsj  s0    4$== /3/A/A/C}}rO   r   c                 p    | j                   d   |d   z   | j                   d   | j                  z   |d   z
  fS )Nr   r   )r   r0   )rt   r   s     rZ   point2coordzPage.point2coordq  s:    a 2a5($--*:T[[*H2a5*PQQrO   objc           
         t        j                  t        d|j                  j                        j                         }dt        t        t        f   dt        t        t        t        f      fd}t        t        d t        ||j                  j                                           }||d<   | j                  |d<   dD ]1  }t!        ||      st#        t%        ||      j&                        ||<   3 dD ]!  \  }}||v st)        ||         \  ||<   ||<   # t+        |t,        t.        f      rK|j1                         }| j2                  j4                   t7        | j2                  j4                  |      n||d	<   t+        |t,              rm|j8                  }	t)        |	j:                        \  |d
<   |d<   t)        |	j<                        \  |d<   |d<   t+        |d   t>              rtA        |d         |d<   n~t+        |tB        f      rmtE        t        | jF                  |d               |d<   |jH                  D 
cg c]  ^}
}|
gt        | jF                  |        c}}
|d<   |jJ                  |d<   | jL                  d d \  }}d|v rE| jN                  |d   z
  |z   |d<   | jN                  |d   z
  |z   |d<   | jP                  |d   z   |d<   d|v r|dk7  r|d   |z   |d<   |d   |z   |d<   |S c c}}
w )N itemrL   c                 <    | \  }}|t         v rt        |      }||fS y rg   )	ALL_ATTRSr*   )r  r   r   ress       rZ   process_attrz)Page.process_object.<locals>.process_attrx  s'    DAqI~!!n3xrO   r   r   )ncsscs))rE   stroking_pattern)rC   non_stroking_patternr>   rE   r  rC   r  r=   r2   pathdashrP   r8   r9   r   r   r   r6   r   r7   ))resublt_patr   r   lowerr   rU   r   r
   rs   filterr  __dict__rh   r   r   r+   getattrrG   rd   r^   r   r   get_textr   unicode_normnormalize_unicodegraphicstatescolorncolorbytesr[   r   rb   r  original_pathdashing_styler   r0   r   )rt   r  kindr  attrcs
color_attrpattern_attrr>   gscmdr2   mb_x0mb_tops                 rZ   process_objectzPage.process_objectu  s   vvfb#--"8"89??A	uS#X 	8E#s(O3L 	 F4\3<<3E3E3G!HIJ"]"..]  	EB sB-gc2.>.C.CDR	E)
 	Y$J T!7FtJGW7X4Z $|"4	Y cFO45<<>D 88((4 "$(("7"7> L c6" !!B?N		@<D!"D);$< HW		HDD%&-C(D
 $z*E2#5d:6F#GZ gZ(s4#3#3T%[ABDK QTPaPab93S>3t'7'7#=>bDL,,DL
 bq)v4<;;d3v=DK"kkDJ6&@DN!004;>DN4<EQJde+DJde+DJ% cs   9#K.layout_objectsc              #      K   |D ]r  }t        |t              rM| j                  j                  | j	                  |       | j                  |j                        E d {    `| j	                  |       t y 7 wrg   )r^   r   r   r   r7  iter_layout_objectsr{   )rt   r8  r  s      rZ   r:  zPage.iter_layout_objects  sp      " 		/C#{+88$$0--c2233CII>>>))#..		/ ?s   AA=A; A=c                     i }| j                  | j                  j                        D ]6  }|d   }|dv r|j                  |      g ||<   ||   j	                  |       8 |S )Nr   )anno)r:  r   r{   rT   append)rt   r  r  r.  s       rZ   r  zPage.parse_objects  sq    )+++DKK,=,=> 	&C}%Dx{{4 ( "DM  %	& rO   table_settingsc                 D    t        j                  |      }t        | |      S rg   )r(   resolver'   rt   r>  tsets      rZ   debug_tablefinderzPage.debug_tablefinder  s!     $$^44&&rO   c                 X    t        j                  |      }t        | |      j                  S rg   )r(   r@  r'   tablesrA  s      rZ   find_tableszPage.find_tables  s'     $$^44&---rO   c                     t        j                  |      }| j                  |      }t        |      dk(  ry dt        dt
        t        t        t        f   fd}t        t        ||            d   }|S )Nr   r   rL   c                 h    t        | j                         | j                  d   | j                  d   fS )Nr   r   )lencellsr   r   s    rZ   sorterzPage.find_table.<locals>.sorter  s)    \M166!9affQi88rO   )ri   )
r(   r@  rF  rI  r&   r   r   r   rb   r   )rt   r>  rB  rE  rL  largests         rZ   
find_tablezPage.find_table  so     $$^4!!$'v;!	9e 	9c5%&7 8 	9 vf&1215rO   c           	          t        j                  |      }| j                  |      }|D cg c]"  } |j                  di |j                  xs i $ c}S c c}w Nr   )r(   r@  rF  extracttext_settings)rt   r>  rB  rE  tables        rZ   extract_tableszPage.extract_tables  sQ     $$^4!!$'IOP;!3!3!9r;PPPs   'Ac                     t        j                  |      }| j                  |      }|y  |j                  di |j                  xs i S rP  )r(   r@  rN  rQ  rR  )rt   r>  rB  rS  s       rZ   extract_tablezPage.extract_table  sI     $$^4%= 5==>D$6$6$<">>rO   re   c                     t        | j                        }d|vr|j                  d| j                  i       d|vr|j                  d| j                  i       i ||}t        j                  | j                  fi |S )N)layout_bboxlayout_width_charslayout_widthlayout_height_charslayout_height)rs   r   r   r5   r0   r   chars_to_textmapchars)rt   re   defaultsfull_kwargss       rZ   r   zPage._get_textmap  sx    #'		$
  v-OO^TZZ89 .OO_dkk:;&<&<V&<%%djj@K@@rO   patternregexcase
main_groupreturn_charsreturn_groupsc                 f     | j                   di t        |      }|j                  ||||||      S )N)rb  rc  rd  re  rf  r   )r   rk   search)	rt   ra  rb  rc  rd  re  rf  re   textmaps	            rZ   rh  zPage.search  sG     #$""A%8%@A~~!%'  
 	
rO   c                 L     | j                   di t        |      j                  S rP  )r   rk   	as_stringrt   re   s     rZ   extract_textzPage.extract_text,  s$    t>"5f"=>HHHrO   c                 B    t        j                  | j                  fi |S rg   )r   extract_text_simpler^  rl  s     rZ   ro  zPage.extract_text_simple/  s    ((>v>>rO   c                 B    t        j                  | j                  fi |S rg   )r   extract_wordsr^  rl  s     rZ   rq  zPage.extract_words2  s    ""4::888rO   stripc                 Z     | j                   di t        |      j                  ||      S )N)rr  re  r   )r   rk   extract_text_lines)rt   rr  re  re   s       rZ   rt  zPage.extract_text_lines5  s8      t>"5f"=>QQl R 
 	
rO   r   relativestrictr  c                      t        | |||      S )N)ru  rv  )r  rt   r   ru  rv  s       rZ   cropz	Page.crop<  s     4HHrO   c                 >    t        | |||t        j                        S zS
        Same as .crop, except only includes objects fully within the bbox
        )ru  rv  crop_fn)r  r   within_bboxrx  s       rZ   r}  zPage.within_bboxA  s"     $&%BSBS
 	
rO   c                 >    t        | |||t        j                        S r{  )r  r   outside_bboxrx  s       rZ   r  zPage.outside_bboxK  s"     $&%BTBT
 	
rO   test_functionFilteredPagec                     t        | |      S rg   )r  )rt   r  s     rZ   r"  zPage.filterU  s    D-00rO   c                     t        | d       }| j                  j                         D ci c]  \  }}||
 c}}|_        t	        j
                  | j                  fi ||j                  d<   |S c c}}w )u   
        Removes duplicate chars — those sharing the same text and positioning
        (within `tolerance`) as other characters in the set. Adjust extra_args
        to be more/less restrictive with the properties checked.
        c                      y)NTr   rK  s    rZ   <lambda>z#Page.dedupe_chars.<locals>.<lambda>^  s    rO   char)r  r  rh   r  r   dedupe_charsr^  )rt   re   pr.  objss        rZ   r  zPage.dedupe_charsX  sd     ~.37<<3E3E3GHZT4dDjH
"//

EfE

6 Is   A0
resolutionr5   r0   	antialiasforce_mediaboxrJ   c                     ddl m}m} t        d |||fD              }|dkD  rt	        d|       |d|z  | j
                  z  }n|d|z  | j                  z  } || |xs |||      S )z
        You can pass a maximum of 1 of the following:
        - resolution: The desired number pixels per inch. Defaults to 72.
        - width: The desired image width in pixels.
        - height: The desired image width in pixels.
        r   )DEFAULT_RESOLUTIONrJ   c              3   $   K   | ]  }|d u 
 y wrg   r   r   s     rZ   r   z Page.to_image.<locals>.<genexpr>s  s     K!Ks   zUOnly one of these arguments can be provided: resolution, width, height. You provided H   )r  r  r  )displayr  rJ   sum
ValueErrorr5   r0   )	rt   r  r5   r0   r  r  r  rJ   	num_specss	            rZ   to_imagezPage.to_imagec  s     	;K
E6/JKK	q=ghqgrs  edjj0Jft{{2J!7%7)	
 	
rO   object_typesc           	      L   |(t        | j                  j                               dgz   }n|}| j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  d}|D ]  }t        | |dz         ||dz   <    |S )Nr   )r   r   r   r   r   r   r5   r0   s)rb   r  keysr   r   r   r   r   r   r5   r0   r$  )rt   r  _object_typesdts        rZ   r   zPage.to_dict  s     !2!2!45	AM(M++"11||IIZZkk	
  	0A q3w/Aa#gJ	0rO   c                 "    d| j                    dS )Nz<Page:>)r   rw   s    rZ   __repr__zPage.__repr__  s    (()++rO   r   r   rg   )TTr   TT)TT)FT)NNNFF)Br   r   r   r"   r   r	   rU   r   r   boolpagesr   r   r   r   r   propertyr5   r0   r   r   r   r   r   r!   r  r  r  r   r  r   r    r7  r   r   r:  r  r
   r%   r'   rC  r&   rF  rN  rT  rV  r.   r   r   r   rh  rm  ro  rq  rt  r   ry  r}  r  r   r"  r  r   r  r   r  r   rO   rZ   r   r      s   #,#>#>)#LtCyLKE !"':': ': 	':
 ':R' +u + + + + + T#s(^ 4       ?
 ? ?B @J @ @ c:o.  ReE5L1 ReE5L6I RO& OU Ob/";//	5$$	%/	tCO4 	 <@'&'78'	' <@.&'78.	e. <@&'78	%$ <@Q&'78Q	d4&'	(Q <@?&'78?	$tHSM*+	,?	AS 	AW 	A !"
sGCL()
 
 	

 
 
 
 
 
d38n	
(IS IS I?C ?C ?9c 9j 9 8<

04
GJ
	
 DHII&*I<@I	I DH

&*
<@
	
 DH

&*
<@
	
1HeWd]$; 1 1	S 	^ 	 37-1.2$
U3:./
 c5j)*
 sEz*+	

 
 
 

BHT#Y$7 4S> &,# ,rO   r   c                   (    e Zd ZU dZeed<   defdZy)DerivedPageFr   parent_pagec                    || _         |j                  | _        |j                  | _        |j                  | _        |j                  | _        |j
                  | _        |j                  | _        |j                  | _        |j                  | _        | j                  t        j                          t               | j                        | _        y rg   )r  r   r   r   r   r   r   r   r   r   r"   r   r   r   r   )rt   r  s     rZ   r   zDerivedPage.__init__  s    &$..??#,,&22)88#,,#,,"**445&9;t'8'89rO   N)r   r   r   r   r  r   r   r   r   rO   rZ   r  r    s    K:D :rO   r  r   parent_bboxc                     t        j                  |       }|dk(  rt        d|  d      t        j                  | |      }|t        d|  d|       t        j                  |      }||k  rt        d|  d|       y )Nr   zBounding box z has an area of zero.z. is entirely outside parent page bounding box z. is not fully within parent page bounding box )r   calculate_arear  get_bbox_overlap)r   r  	bbox_areaoverlapoverlap_areas        rZ   test_proposed_bboxr    s    $$T*IA~=.CDEE$$T;7GD6 "((3}6
 	

 ''0LiD6 "((3}6
 	
  rO   c                   ~     e Zd Zej                  ddfdededeeegef   de	de	f
 fdZ
ed	eeef   fd
       Z xZS )r  FTr  	crop_bboxr|  ru  rv  c                 :   |r*|j                   \  }}}}\  }	}
}}|	|z   |
|z   ||z   ||z   f|rt        |j                          dt        dt        ffd}t        |   |       || _        t        j                  u r|j                   | _         y | _         y )Nr  rL   c                      |       S rg   r   )r  r  r|  s    rZ   r  z&CroppedPage.__init__.<locals>._crop_fn  s    4++rO   )r   r  r!   r   r   r  r   r  )rt   r  r  r|  ru  rv  o_x0o_top_r6   r   r7   r   r  r   s     ``          rZ   r   zCroppedPage.__init__  s      + 0 0D%A"+BRdC%KdFUNKIy+*:*:;	,: 	,* 	, 	%  e(((#((DI!DIrO   rL   c                     t        | d      r| j                  S | j                  j                  j	                         D ci c]  \  }}|| j                  |       c}}| _        | j                  S c c}}w r
  )r   r  r  r  rh   r  rt   r   r   s      rZ   r  zCroppedPage.objects  se    4$== ,0,<,<,D,D,J,J,L0
$(AqAt}}Q0
 }}0
s    A1)r   r   r   r   crop_to_bboxr   r   r   r!   r  r   r  r   rU   r  r   r   s   @rZ   r  r    s}    
 ?D>P>P"" " :v.
:;	"
 " ": c:o.  rO   r  c                   T     e Zd Zdedeegef   f fdZede	e
ef   fd       Z xZS )r  r  	filter_fnc                 T    |j                   | _         || _        t        |   |       y rg   )r   r  r   r   )rt   r  r  r   s      rZ   r   zFilteredPage.__init__  s$    $$	"%rO   rL   c                 
   t        | d      r| j                  S | j                  j                  j	                         D ci c]%  \  }}|t        t        | j                  |            ' c}}| _        | j                  S c c}}w r
  )r   r  r  r  rh   rb   r"  r  r  s      rZ   r  zFilteredPage.objects  sq    4$==  ((006680
1 tF4>>1-..0
 }}	0
s    *A?)r   r   r   r   r   r    r  r   r  r   rU   r!   r  r   r   s   @rZ   r  r    sE    &D &Xugtm5L &
 c:o.  rO   r  r  )Wr   r  	functoolsr   typingr   r   r   r   r   r	   r
   r   r   r   unicodedatar   r'  warningsr   pdfminer.converterr   pdfminer.layoutr   r   r   r   r   r   r   pdfminer.pdfinterpr   r   pdfminer.pdfpager   pdfminer.psparserr   r  r   _typingr   r   r    r!   	containerr"   	structurer#   r$   rS  r%   r&   r'   r(   r)   r*   r+   utils.exceptionsr,   r-   
utils.textr.   compiler   setr  r  rJ   r   rK   rS   r+  rU   r[   r   r   r_   rd   rk   rm   r   r   r   r  r  r  r  r   rO   rZ   <module>r     s    	    7  0   = $ '  5 5   7 F F ? ? F 	F		B "
 *) 0 1(* *3 *c?
8E%s
+S012HSMAB''
8E%s
+S012HSMAB'S#X 4S> 0): 0f F  e  F  (4 4E 4f 4
G,9 G,T:$ :"
V 
& 
T 
(%+ %P; rO   