
    ;3h                     <   d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d d	l-m.Z.m/Z/ d d
l0m1Z1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZA  ej                  eC      ZD G d de*      ZE G d deE      ZF ede
ee8      ZG G d deEeeG         ZH G d deHe8         ZI G d deHe8         ZJ G d deHe8         ZK G d deHe8         ZLy)    N)BinaryIODictGenericListOptionalSequenceTextIOTupleTypeVarUnioncast)utils)ImageWriter)LAParamsLTAnnoLTCharLTComponentLTContainerLTCurveLTFigureLTImageLTItemLTLayoutContainerLTLineLTPageLTRectLTText	LTTextBoxLTTextBoxVerticalLTTextGroup
LTTextLineTextGroupElement)PDFColorSpace)PDFTextDevice)PDFValueError)PDFFontPDFUnicodeNotDefined)PDFGraphicStatePDFResourceManager)PDFPage)	PDFStream)
AnyIOMatrixPathSegmentPointRectapply_matrix_ptbbox2strencmake_compat_strmult_matrixc                      e Zd ZU eed<   eed<   	 	 d%dededee	   ddfdZ
d	ededdfd
Zd	eddfdZdedededdfdZdeddfdZdededdfdZdededededee   ddfdZdededededededed edefd!Zdededefd"Zd#eddfd$Zy)&PDFLayoutAnalyzercur_itemctmNrsrcmgrpagenolaparamsreturnc                 Z    t        j                  | |       || _        || _        g | _        y N)r$   __init__r;   r<   _stackselfr:   r;   r<   s       R/var/www/html/audio-gradio/venv/lib/python3.12/site-packages/pdfminer/converter.pyr@   zPDFLayoutAnalyzer.__init__D   s)     	tW- /1    pagec                     |j                   \  }}}}t        |||f      \  }}t        |||f      \  }}ddt        ||z
        t        ||z
        f}t        | j                  |      | _        y )Nr   )mediaboxr1   absr   r;   r8   )rC   rF   r9   x0y0x1y1rH   s           rD   
begin_pagezPDFLayoutAnalyzer.begin_pageO   sl    ==RR"3R1R"3R1Rq#b2g,BG5t{{H5rE   c                    | j                   r#J t        t        | j                                      t        | j                  t
              s#J t        t        | j                                     | j                  %| j                  j                  | j                         | xj                  dz  c_	        | j                  | j                         y )N   )rA   strlen
isinstancer8   r   typer<   analyzer;   receive_layout)rC   rF   s     rD   end_pagezPDFLayoutAnalyzer.end_pageV   s    ;;5C$4 55$--0J#d4==6I2JJ0==$MM!!$--0qDMM*rE   namebboxmatrixc                     | j                   j                  | j                         t        ||t	        || j
                              | _        y r?   )rA   appendr8   r   r5   r9   )rC   rX   rY   rZ   s       rD   begin_figurezPDFLayoutAnalyzer.begin_figure^   s3    4==) t[-JKrE   _c                 
   | j                   }t        | j                   t              s#J t        t	        | j                                      | j
                  j                         | _         | j                   j                  |       y r?   )r8   rS   r   rQ   rT   rA   popadd)rC   r^   figs      rD   
end_figurezPDFLayoutAnalyzer.end_figureb   sV    mm$--2LCT]]8K4LL2)#rE   streamc                 v   t        | j                  t              s#J t        t	        | j                                     t        ||| j                  j                  | j                  j                  | j                  j                  | j                  j                  f      }| j                  j                  |       y r?   )rS   r8   r   rQ   rT   r   rJ   rK   rL   rM   ra   )rC   rX   rd   items       rD   render_imagezPDFLayoutAnalyzer.render_imageh   s    $--2LCT]]8K4LL2]]t}}//1A1A4==CSCST

 	$rE   gstatestrokefillevenoddpathc                 p   dj                  d |D              }|dd dk7  ry|j                  d      dkD  rTt        j                  d|      D ]:  }||j	                  d      |j                  d       }| j                  |||||       < y|D 	cg c]%  }	t        t        |	d   dk7  r|	d	d n|d   d	d       ' }
}	|
D cg c]  }t        | j                  |       }}|D cg c]  }t        |d          }}|D cg c]U  }t        |ddd
   |d
dd
         D cg c]/  \  }}t        | j                  t        |      t        |      f      1 c}}W }}}}t        ||      D 	cg c]  \  }}	t        t        |g|	       }}}	t        |      dkD  r+|d	d dk(  r#|d	   |d   k(  r|dd	 dz   }|j!                          |dv r_t#        |j$                  |d   |d   ||||j&                  |j(                  ||j*                  
      }| j,                  j/                  |       y|dv r|\  \  }}\  }}\  }}\  }}}|d   |d   k(  }||k(  xr ||k(  xr ||k(  xr ||k(  xs ||k(  xr ||k(  xr ||k(  xr ||k(  }|rd|rbt1        |j$                  g |d   |d
   ||||j&                  |j(                  ||j*                  	      } | j,                  j/                  |        yt3        |j$                  |||||j&                  |j(                  ||j*                  	      }!| j,                  j/                  |!       yt3        |j$                  |||||j&                  |j(                  ||j*                  	      }!| j,                  j/                  |!       yc c}	w c c}w c c}w c c}}w c c}}}w c c}	}w )z@Paint paths described in section 4.4 of the PDF reference manual c              3   &   K   | ]	  }|d      yw)r   N ).0xs     rD   	<genexpr>z/PDFLayoutAnalyzer.paint_path.<locals>.<genexpr>z   s     +!+s   NrP   mzm[^m]+r   h      lh>   mlmlh)original_pathdashing_style>   mlllhmllll   )joincountrefinditerstartend
paint_pathr   r/   r1   r9   rQ   zipfloatr.   rR   r`   r   	linewidthscolorncolordashr8   ra   r   r   )"rC   rh   ri   rj   rk   rl   shapert   subpathpraw_ptsptpts	operation	operatorsoperand1operand2transformed_pointsotransformed_pathlinerJ   rK   rL   rM   x2y2x3y3r^   is_closed_loophas_square_coordinatesrectcurves"                                     rD   r   zPDFLayoutAnalyzer.paint_pathq   s    +d++!9 [[![[E2 HqwwqzAEE!H5gwGH OSIJUadckAbcFtAwrs|DG  <CCR?488R0CCC<@AyYq\*AIA "&" "
  /2)ADqD/9QTPQT?.S*( $DHHuXh.PQ" "  	+=> Aq [1'q'*    5zA~%*"4RCF9Jcr
S(	%
 $$FFMMMM"2"(++ !!$',,<?9R(2rHRhr2!$Q3q6!1"HCrCbBhC28*GBhE28EbER2X ' "&<!((*#a&*3q6*(
D MM%%d+#(((
E MM%%e,$$MMMM$KK
 !!%(o DA" s0   *N>N!N > N+4N%N++N2%N+fontfontsizescalingrisecidncsgraphicstatec	                 v   	 |j                  |      }	t        |	t              sJ t        t        |	                   	 |j                  |      }
|j                  |      }t        ||||||	|
|||
      }| j                  j                  |       |j                  S # t        $ r | j                  ||      }	Y zw xY wr?   )	to_unichrrS   rQ   rT   r'   handle_undefined_char
char_width	char_dispr   r8   ra   adv)rC   rZ   r   r   r   r   r   r   r   text	textwidthtextdisprf   s                rD   render_charzPDFLayoutAnalyzer.render_char   s    	9>>#&DdC(9#d4j/9( OOC(	>>#&
 	$xx# $ 	9--dC8D	9s   :B B87B8c                 :    t         j                  d||       d|z  S )Nzundefined: %r, %rz(cid:%d))logdebug)rC   r   r   s      rD   r   z'PDFLayoutAnalyzer.handle_undefined_char  s    		%tS1CrE   ltpagec                      y r?   rp   rC   r   s     rD   rV   z PDFLayoutAnalyzer.receive_layout  s    rE   rP   N) __name__
__module____qualname__r   __annotations__r-   r)   intr   r   r@   r*   rN   rW   rQ   r0   r]   rc   r+   rg   r(   boolr   r.   r   r&   r   r#   r   r   r   rV   rp   rE   rD   r7   r7   @   s   	K
 '+		2#	2 	2 8$		2
 
	26w 6V 6 6+W + +L LD L& LT LC D    i  D  y)y) y) 	y)
 y) {#y) 
y)v  	
     & 
B '      V  rE   r7   c            	       L    e Zd Z	 	 d
dededee   ddfdZdeddfdZ	defd	Z
y)PDFPageAggregatorNr:   r;   r<   r=   c                 D    t         j                  | |||       d | _        y N)r;   r<   )r7   r@   resultrB   s       rD   r@   zPDFPageAggregator.__init__  s"     	""4("S(,rE   r   c                     || _         y r?   r   r   s     rD   rV   z PDFPageAggregator.receive_layout  s	    rE   c                 6    | j                   J | j                   S r?   r   rC   s    rD   
get_resultzPDFPageAggregator.get_result"  s    {{&&&{{rE   r   )r   r   r   r)   r   r   r   r@   r   rV   r   rp   rE   rD   r   r     sY     '+	-#- - 8$	-
 
-V  F rE   r   IOTypec                   T    e Zd Z	 	 	 d
dededededee   ddfdZ	e
dedefd	       Zy)PDFConverterNr:   outfpcodecr;   r<   r=   c                     t         j                  | |||       || _        || _        | j	                  | j                        | _        y r   )r7   r@   r   r   _is_binary_streamoutfp_binary)rC   r:   r   r   r;   r<   s         rD   r@   zPDFConverter.__init__,  s@     	""4("S"

 224::>rE   c                     dt        | dd      v ryt        | d      ryt        | t        j                        ryt        | t        j
                        st        | t        j                        ryy)z"Test if an stream is binary or notbmodern   TF)getattrhasattrrS   ioBytesIOStringIO
TextIOBase)r   s    rD   r   zPDFConverter._is_binary_stream9  sW     '%,,UF#rzz*r{{+z%/OrE   )utf-8rP   N)r   r   r   r)   r   rQ   r   r   r   r@   staticmethodr,   r   r   rp   rE   rD   r   r   +  ss    
 '+?#? ? 	?
 ? 8$? 
?  4  rE   r   c                        e Zd Z	 	 	 	 	 ddededededee   de	dee
   d	df fd
Zded	dfdZded	dfdZdeded	dfdZdede	de	de	dee   d	dfdZ xZS )TextConverterNr:   r   r   r;   r<   
showpagenoimagewriterr=   c                 J    t         |   |||||       || _        || _        y )Nr   r;   r<   )superr@   r   r   )	rC   r:   r   r   r;   r<   r   r   	__class__s	           rD   r@   zTextConverter.__init__J  s,     	%uVhW$&rE   r   c                     t        j                  || j                  d      }| j                  r8t	        t
        | j                        j                  |j                                y t	        t        | j                        j                  |       y )Nignore)
r   compatible_encode_methodr   r   r   r   r   writeencoder	   rC   r   s     rD   
write_textzTextConverter.write_textX  s[    --dDJJI4::&,,T[[];$**40rE   r   c                      dt         dd f fd j                  r j                  d|j                  z          |        j                  d       y )Nrf   r=   c                 Z   t        | t              r| D ]
  } |        n/t        | t              rj                  | j	                                t        | t
              rj                  d       y t        | t              r)j                  j                  j                  |        y y y )N
)	rS   r   r   r   get_textr   r   r   export_image)rf   childrenderrC   s     rD   r   z,TextConverter.receive_layout.<locals>.render`  s    $,! "E5M"D&)0$	*%D'*##/$$11$7 0 +rE   zPage %s
)r   r   r   pageidrC   r   r   s   ` @rD   rV   zTextConverter.receive_layout_  sG    
	8 
	8D 
	8 ??OOK&--78vrE   rX   rd   c                 L    | j                   t        j                  | ||       y y r?   )r   r   rg   )rC   rX   rd   s      rD   rg   zTextConverter.render_imaget  s%    '%%dD&9 (rE   rh   ri   rj   rk   rl   c                      y r?   rp   )rC   rh   ri   rj   rk   rl   s         rD   r   zTextConverter.paint_pathx  s     	rE   )r   rP   NFN)r   r   r   r)   r,   rQ   r   r   r   r   r   r@   r   r   rV   r+   rg   r(   r   r.   r   __classcell__)r   s   @rD   r   r   I  s    
 '+ -1'#' ' 	'
 ' 8$' ' k*' 
'1s 1t 1V  *: :i :D :  	
  {# 
rE   r   c                       e Zd ZdddddddZddd	Z	 	 	 	 	 	 	 	 	 	 	 	 d6dedededede	e
   dededededede	e   dede	eeef      de	eeef      dd
fdZdedd
fdZd7dZd7dZdedd
fdZd ed!ed"ed#ed$ed%edd
fd&Zd ed!ed'edd
fd(Zd'ed!ed"ed#ed$ed%edd
fd)Zd eded"ed#ed*edd
fd+Z	 d8d ed!ed"ed#ed$ed%ed,edd
fd-Zd edd
fd.Zded/ed0edd
fd1Zd7d2Zd3edd
fd4Zd7d5Z y
)9HTMLConverteryellowmagentacyanredblackgray)figuretextlinetextbox	textgroupr   rF   blue)r  charNr:   r   r   r;   r<   scale	fontscale
layoutmoder   
pagemarginr   r   rect_colorstext_colorsr=   c                 F   t         j                  | |||||       | j                  r| j                  st	        d      | j                  s| j                  rt	        d      |ddi}|ddd}|| _        || _        || _        |	| _        |
| _	        || _
        || _        || _        |rJ| j                  j                  | j                         | j                  j                  | j                         | j                  | _        d | _        g | _        | j'                          y )Nr   )Codec is required for a binary I/O outputz1Codec must not be specified for a text I/O outputr  r  r  )r   rF   )r   r@   r   r   r%   r	  r
  r  r   r  r   r  r  updateRECT_COLORSTEXT_COLORS_yoffset_font
_fontstackwrite_header)rC   r:   r   r   r;   r<   r	  r
  r  r   r  r   r   r  r  s                  rD   r@   zHTMLConverter.__init__  s   " 	 	 	
 TZZ KLL  TZZ STT!7+K$+V<K
"$$$&&&##D$4$45##D$4$45#26
=?rE   r   c                     | j                   rCt        t        | j                        j	                  |j                  | j                                y t        t        | j                        j	                  |       y r?   r   r   r   r   r   r   r	   r   s     rD   r   zHTMLConverter.write  H    ::4::&,,T[[-DE$**40rE   c                     | j                  d       | j                  rd| j                  z  }nd}| j                  |       | j                  d       y )Nz<html><head>
zA<meta http-equiv="Content-Type" content="text/html; charset=%s">
z5<meta http-equiv="Content-Type" content="text/html">
z</head><body>
)r   r   )rC   ss     rD   r  zHTMLConverter.write_header  sL    

#$::!#'::. 
 IA

1

$%rE   c                     t        d| j                        D cg c]  }d| d| d }}ddj                  |      z  }| j                  |       | j                  d       y c c}w )NrP   z
<a href="#z">z</a>z8<div style="position:absolute; top:0px;">Page: %s</div>
z, z</body></html>
)ranger;   r   r   )rC   i
page_linksr  s       rD   write_footerzHTMLConverter.write_footer  sh    9>q$++9NOA
1#Rs$/O
OG$))K
 
 	

1

%& Ps   A"c                 8    | j                  t        |             y r?   )r   r3   r   s     rD   r   zHTMLConverter.write_text  s    

3t9rE   colorborderwidthrr   ywru   c                     | j                   j                  |      }|^d|||| j                  z  | j                  |z
  | j                  z  || j                  z  || j                  z  fz  }| j	                  |       y y )Nzn<span style="position:absolute; border: %s %dpx solid; left:%dpx; top:%dpx; width:%dpx; height:%dpx;"></span>
)r  getr	  r  r   )	rC   r#  r$  rr   r%  r&  ru   color2r  s	            rD   
place_rectzHTMLConverter.place_rect  s     !!%%e,K 

N]]Q&$**4

N

N	  JJqM rE   rf   c                     | j                  |||j                  |j                  |j                  |j                         y r?   )r*  rJ   rM   widthheight)rC   r#  r$  rf   s       rD   place_borderzHTMLConverter.place_border  s(    {DGGTWWdjj$++VrE   c                     | j                   | j                   j                  |      }dt        |      ||| j                  z  | j                  |z
  | j                  z  || j                  z  || j                  z  fz  }| j                  |       y y )Nzd<img src="%s" border="%d" style="position:absolute; left:%dpx; top:%dpx;" width="%d" height="%d" />
)r   r   r3   r	  r  r   )	rC   rf   r$  rr   r%  r&  ru   rX   r  s	            rD   place_imagezHTMLConverter.place_image   s     '##006DD I

N]]Q&$**4

N

N	  JJqM (rE   sizec                 :   | j                   j                  |      }|~d||| j                  z  | j                  |z
  | j                  z  || j                  z  | j                  z  fz  }| j                  |       | j                  |       | j                  d       y y )NzP<span style="position:absolute; color:%s; left:%dpx; top:%dpx; font-size:%dpx;"></span>
)r  r(  r	  r  r
  r   r   )rC   r#  r   rr   r%  r1  r)  r  s           rD   
place_textzHTMLConverter.place_text  s     !!%%e,. 

N]]Q&$**44::%6	  JJqMOOD!JJ{# rE   writing_modec           	         | j                   j                  | j                         d | _        d||||| j                  z  | j                  |z
  | j                  z  || j                  z  || j                  z  fz  }| j                  |       y )Nzv<div style="position:absolute; border: %s %dpx solid; writing-mode:%s; left:%dpx; top:%dpx; width:%dpx; height:%dpx;">)r  r\   r  r	  r  r   )	rC   r#  r$  rr   r%  r&  ru   r5  r  s	            rD   	begin_divzHTMLConverter.begin_div1  s     	tzz*
 DJJ"djj0DJJDJJ 	
 	

1rE   c                     | j                   | j                  d       | j                  j                         | _         | j                  d       y )N</span>z</div>)r  r   r  r`   )rC   r#  s     rD   end_divzHTMLConverter.end_divM  s8    ::!JJy!__((*


8rE   fontnamer   c                    ||f}|| j                   k7  rh| j                   | j                  d       |j                  d      d   }| j                  d||| j                  z  | j                  z  fz         || _         | j                  |       y )Nr9  +z.<span style="font-family: %s; font-size:%dpx">)r  r   splitr	  r
  r   )rC   r   r;  r   r   fontname_without_subset_tags         rD   put_textzHTMLConverter.put_textS  s    (#4::zz%

9%*2..*=b*A'JJ@.4::0E0VWX DJrE   c                 &    | j                  d       y )Nz<br>r   r   s    rD   put_newlinezHTMLConverter.put_newlinea  s    

6rE   r   c                      dt         t        t        f   dd f fddt        dd f fd |        xj                   j
                  z  c_        y )Nrf   r=   c                 l    t        | t              r#j                  dd|        | D ]
  } |        y y )Nr  rP   )rS   r    r.  rf   r   rC   
show_groups     rD   rH  z0HTMLConverter.receive_layout.<locals>.show_groupe  s;    $,!!+q$7! &Eu%& -rE   c           
      N   t        | t              r؉xj                  | j                  z  c_        j	                  dd|        j
                  rdj                  dj                  | j                  z
  j                  z  z         j                  d| j                   d| j                   d       | D ]
  } |        | j                  | j                  D ]
  } |        y y t        | t              rj	                  dd|        y t        | t              r_j                  dd| j                  | j                  | j                  | j                         | D ]
  } |        j!                  d       y t        | t"              r?j%                  | d| j                  | j                  | j                  | j                         y j&                  d	k(  rt        | t(              r#j	                  d
d|        | D ]
  } |        y t        | t*              rbj	                  dd|        j-                  dt/        | j0                  dz         | j                  | j                  d       | D ]
  } |        y t        | t2              rUj	                  dd|        j-                  d| j5                         | j                  | j                  | j6                         y y t        | t(              r0| D ]
  } |        j&                  dk7  rj9                          y y t        | t*              rnj                  dd| j                  | j                  | j                  | j                  | j;                                | D ]
  } |        j!                  d       y t        | t2              rAt=        | j>                        }jA                  | j5                         || j6                         y t        | tB              r jE                  | j5                                y y )NrF   rP   z*<div style="position:absolute; top:%dpx;">z	<a name="z">Page z</a></div>
r   r  exactr  r     r  loose)#rS   r   r  rM   r.  r   r   r	  r   groupsr   r   r7  rJ   r,  r-  r:  r   r0  r  r!   r   r4  rQ   indexr   r   r1  rD  get_writing_moder4   r;  rA  r   r   )rf   r   groupr;  r   rC   rH  s       rD   r   z,HTMLConverter.receive_layout.<locals>.renderk  s\   $'(!!&!T2??JJD MMDGG3tzzAC JJ#DKK=}LQ " "E5M";;*!% *"5)* + D'*!!'1d3D(+xDGGTWWdjj$++V! "E5M"X&D'*  q$''477DJJTG+dJ/%%j!T:!% &u&i0%%iD9OO!DJJN+ "& &u&f-%%fa6OO		 . D*-! "E5M"??g-$$& .D),GGGGJJKK))+ " "E5M"Y'D&)*4==9dmmoxCD&)0 *rE   )r   r    r"   r   r  r  rC   r   r   rH  s   ` @@rD   rV   zHTMLConverter.receive_layoutd  sS    	&U;0@#@A 	&d 	&J	1 J	1D J	1X 	v(rE   c                 $    | j                          y r?   r!  r   s    rD   closezHTMLConverter.close      rE   )r   rP   NrP   g      ?normalT2   Nr   NNr=   N)False)!r   r   r   r  r  r)   r,   rQ   r   r   r   r   r   r   r   r@   r   r  r!  r   r*  r   r.  r   r0  r4  r7  r:  rA  rD  r   rV   rT  rp   rE   rD   r   r     s   K K '+"-104043#3 3 	3
 3 8$3 3 3 3 3 3 k*3 3 d38n-3 d38n-3  
!3j1# 1$ 1
&'s t   	
    
2W# WC W{ Wt W  	
    
2$$ $ 	$
 $ $ 
$@ $  	
     
8S T S C 5 T T)V T) T)lrE   r   c                       e Zd Z ej                  d      Z	 	 	 	 	 ddededede	de
e   de
e   d	ed
dfdZded
dfdZddZddZded
dfdZded
dfdZddZy)XMLConverterz[ ---]Nr:   r   r   r;   r<   r   stripcontrolr=   c                     t         j                  | |||||       | j                  | j                   k(  rt	        d      || _        || _        | j                          y )Nr   r  )r   r@   r   r   r%   r   r\  r  )rC   r:   r   r   r;   r<   r   r\  s           rD   r@   zXMLConverter.__init__  sg     	 	 	
 TZZ0 KLL&(rE   r   c                     | j                   rCt        t        | j                        j	                  |j                  | j                                y t        t        | j                        j	                  |       y r?   r  r   s     rD   r   zXMLConverter.write  r  rE   c                     | j                   r| j                  d| j                   z         n| j                  d       | j                  d       y )Nz%<?xml version="1.0" encoding="%s" ?>
z<?xml version="1.0" ?>
z<pages>
r   r   r   s    rD   r  zXMLConverter.write_header  s8    ::JJ?$**LMJJ12

;rE   c                 &    | j                  d       y )Nz	</pages>
rC  r   s    rD   r!  zXMLConverter.write_footer  s    

< rE   c                     | j                   r| j                  j                  d|      }| j                  t	        |             y Nrn   )r\  CONTROLsubr   r3   r   s     rD   r   zXMLConverter.write_text  s1    <<##B-D

3t9rE   r   c                 X     dt         dd f fddt         dd f fd |       y )Nrf   r=   c                 >   t        | t              r4j                  d| j                  t	        | j
                        fz         y t        | t              rHj                  dt	        | j
                        z         | D ]
  } |        j                  d       y y )Nz<textbox id="%d" bbox="%s" />
z<textgroup bbox="%s">
z</textgroup>
)rS   r   r   rN  r2   rY   r    rG  s     rD   rH  z/XMLConverter.receive_layout.<locals>.show_group  s    $	*

5zz8DII#678 D+.

4x		7JJK! &Eu%&

+,	 /rE   c                    t        | t              rd| j                  t        | j                        | j
                  fz  }j                  |       | D ]
  } |        | j                  ;j                  d       | j                  D ]
  } |        j                  d       j                  d       y t        | t              r6d| j                  t        | j                        fz  }j                  |       y t        | t              r6d| j                  t        | j                        fz  }j                  |       y t        | t              rEd| j                  t        | j                        | j                         fz  }j                  |       y t        | t              rXd| j                   d	t        | j                         d
}j                  |       | D ]
  } |        j                  d       y t        | t              rHj                  dt        | j                        z         | D ]
  } |        j                  d       y t        | t               rkd}t        | t"              rd}d| j$                  t        | j                        |fz  }j                  |       | D ]
  } |        j                  d       y t        | t&              rdt)        | j*                        t        | j                        | j,                  j                  | j.                  j0                  | j2                  fz  }j                  |       j5                  | j7                                j                  d       y t        | t8              r#j                  d| j7                         z         y t        | t:              rj<                  Pj<                  j?                  |       }j                  dt)        |      | j@                  | jB                  fz         y j                  d| j@                  | jB                  fz         y J tE        d| f             )Nz%<page id="%s" bbox="%s" rotate="%d">
z	<layout>
z
</layout>
z</page>
z"<line linewidth="%d" bbox="%s" />
z"<rect linewidth="%d" bbox="%s" />
z+<curve linewidth="%d" bbox="%s" pts="%s"/>
z<figure name="z" bbox="z">
z
</figure>
z<textline bbox="%s">
z</textline>
rn   z wmode="vertical"z<textbox id="%d" bbox="%s"%s>
z</textbox>
zD<text font="%s" bbox="%s" colourspace="%s" ncolour="%s" size="%.3f">z</text>
z<text>%s</text>
z*<image src="%s" width="%d" height="%d" />
z!<image width="%d" height="%d" />
	Unhandled)#rS   r   r   r2   rY   rotater   rM  r   r   r   r   get_ptsr   rX   r!   r   r   rN  r   r3   r;  r   r   r   r1  r   r   r   r   r   r   r,  r-  rQ   )	rf   r  r   rP  wmoderX   r   rC   rH  s	         rD   r   z+XMLConverter.receive_layout.<locals>.render  s   $'<KKTYY'KK@ 
 

1! "E5M";;*JJ|,!% *"5)*JJ}-

;'D&)9NNTYY'=  

1D&)9NNTYY'=  

1D'*BNNTYY'LLNF 
 

1D(+$TYYKx8K7LDQ

1! "E5M"

=)D*-

3htyy6IIJ! "E5M"

?+D),d$56/E5JJTYY'9 
 

1! "E5M"

>*D&)0 DMM* +))00		  

10

;'D&)

.@AD'*##/++88>DJJEt9djj$++>?
 JJ<::t{{34
 7c;"566urE   r   rQ  s   ` @@rD   rV   zXMLConverter.receive_layout  s6    
	-V 
	- 
	-Z	7 Z	7D Z	7x 	vrE   c                 $    | j                          y r?   rS  r   s    rD   rT  zXMLConverter.close\  rU  rE   )r   rP   NNFrX  )r   r   r   r   compilerd  r)   r,   rQ   r   r   r   r   r   r@   r   r  r!  r   r   rV   rT  rp   rE   rD   r[  r[    s    bjj89G '+-1"#  	
  8$ k*  
61# 1$ 1 !s t 
iV i iVrE   r[  c                       e Zd ZdZ ej
                  d      Z	 	 	 	 ddedede	de
dee   d	efd
Zdede	fdZde	ddfdZddZddZde	ddfdZddZdeddfdZddZy)HOCRConverterzKExtract an hOCR representation from explicit text information within a PDF.z[\x00-\x08\x0b-\x0c\x0e-\x1f]Nr:   r   r   r;   r<   r\  c                 v    t         j                  | |||||       || _        d| _        | j	                          y )Nr   F)r   r@   r\  within_charsr  )rC   r:   r   r   r;   r<   r\  s          rD   r@   zHOCRConverter.__init__s  sG     	 	 	
 )!rE   rY   r=   c                     |\  }}}}t        |      }t        | j                  d   |z
        }t        |      }t        | j                  d   |z
        }	d| d| d| d|	 S )Nrx   zbbox  )r   	page_bbox)
rC   rY   in_x0in_y0in_x1in_y1out_x0out_y0out_x1out_y1s
             rD   	bbox_reprzHOCRConverter.bbox_repr  sq    '+$ueUT^^A&./UT^^A&./vhaxq&::rE   r   c                     | j                   rE|j                  | j                         }t        t        | j                        j                  |       y t        t        | j                        j                  |       y r?   )r   r   r   r   r   r   r	   )rC   r   encoded_texts      rD   r   zHOCRConverter.write  sM    ::;;tzz2L4::&,,\:$**40rE   c                 j   | j                   r| j                  d| j                   z         n| j                  d       | j                  d       | j                  d       | j                  d       | j                  d       | j                  d       | j                  d       | j                  d	       y )
NzQ<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en' charset='%s'>
zD<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>
z<head>
z<title></title>
zE<meta http-equiv='Content-Type' content='text/html;charset=utf-8' />
zA<meta name='ocr-system' content='pdfminer.six HOCR Converter' />
zR  <meta name='ocr-capabilities' content='ocr_page ocr_block ocr_line ocrx_word'/>
z</head>
z<body>
r`  r   s    rD   r  zHOCRConverter.write_header  s    ::JJ:<@JJG
 JJ- 	

:

&'

T	
 	

P	
 	

C	
 	

;

:rE   c                 H    | j                  d       | j                  d       y )Nz0<!-- comment in the following line to debug -->
zD<!--script src='https://unpkg.com/hocrjs'></script--></body></html>
rC  r   s    rD   r!  zHOCRConverter.write_footer  s    

FG

S	
rE   c                 v    | j                   r| j                  j                  d|      }| j                  |       y rc  )r\  rd  re  r   r   s     rD   r   zHOCRConverter.write_text  s-    <<##B-D

4rE   c                 t   t        | j                        dkD  rd}d| j                  v rd}d| j                  v r|dz  }| j                  d| j                  | j                  || j                  | j                        | j                  | j                  | j                  j                         fz         d| _        y )	Nr   rn   Italiczfont-style: italic; Boldzfont-weight: bold; zg<span style='font:"%s"; font-size:%d; %s' class='ocrx_word' title='%s; x_font %s; x_fsize %d'>%s</span>F)	rR   working_textworking_fontr   working_sizer  working_bboxstriprs  )rC   bold_and_italic_styless     rD   
write_wordzHOCRConverter.write_word  s    t  !A%%'"4,,,)?&***&*??&JJ(
 )))).t'8'89))))))//1	  "rE   r   c                 6     dt         dd f fd |       y )Nrf   r=   c                 d   j                   r t        | t              rj                          t        | t              rm| j
                  _        j                  d| j                  dj                  | j
                        d       | D ]
  } |        j                  d       y t        | t              rNj                  dj                  | j
                        z         | D ]
  } |        j                  d       y t        | t              rZj                  d| j                  j                  | j
                        fz         | D ]
  } |        j                  d       y t        | t              rj                   sPd_         | j                         _        | j
                  _        | j"                  _        | j&                  _        y t+        | j                         j-                               d	k(  r0j                          j                  | j                                y j                   d
   | j
                  d
   k7  s2j$                  | j"                  k7  sj(                  | j&                  k7  rCj                          | j
                  _        | j"                  _        | j&                  _        xj                  | j                         z  c_        j                   d	   j                   d
   | j
                  d   j                   d   f_        y y )Nz<div class='ocr_page' id='z	' title='z'>
z</div>
z"<span class='ocr_line' title='%s'>r3  z+<div class='ocr_block' id='%d' title='%s'>
Tr   rP   rw   rx   )rs  rS   r   r  r   rY   rv  r   r   r  r!   r   rN  r   r   r  r  r;  r  r1  r  rR   r  )rf   r   
child_liner   rC   s      rD   r   z,HOCRConverter.receive_layout.<locals>.render  sy     Zf%=!$'!%

{{DNN499$=? " "E5M"

:&D*-

8DNN499<UV #' 'J:&'

;'D),

Bzz4>>$))#<=> " "E5M"

:&D&)(((,D%(,D%(,		D%(,D%(,		D%..01Q6OO%JJt}}/ ))!,		!<,,=,,		9),0II),0MM),0II)%%8%))!,))!,		!))!,	)D%+ *rE   rm  r   s   ` @rD   rV   zHOCRConverter.receive_layout  s     5	 5	D 5	n 	vrE   c                 $    | j                          y r?   rS  r   s    rD   rT  zHOCRConverter.close  rU  rE   )utf8rP   NFrX  )r   r   r   __doc__r   ro  rd  r)   r,   rQ   r   r   r   r   r@   r0   r  r   r  r!  r   r  r   rV   rT  rp   rE   rD   rq  rq  `  s    U  bjj9:G '+"#  	
  8$ *;d ;s ;1# 1$ 14
s t 
"28V 8 8trE   rq  )Mr   loggingr   typingr   r   r   r   r   r   r	   r
   r   r   r   pdfminerr   pdfminer.imager   pdfminer.layoutr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   pdfminer.pdfcolorr#   pdfminer.pdfdevicer$   pdfminer.pdfexceptionsr%   pdfminer.pdffontr&   r'   pdfminer.pdfinterpr(   r)   pdfminer.pdfpager*   pdfminer.pdftypesr+   pdfminer.utilsr,   r-   r.   r/   r0   r1   r2   r3   r4   r5   	getLoggerr   r   r7   r   r   r   r   r   r[  rq  rp   rE   rD   <module>r     s   	  	     &     * , , 0 : B $ '   g!R Rj) & 
68U	3$gfo <7L' 7txL' xv	_<& _DqL' qrE   