
    ;3h8                        d Z ddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZ ddlZddlmZmZmZ ddlmZmZmZmZ ddlmZ ddlmZ dd	l m!Z!m"Z"m#Z#m$Z$ dd
l%m&Z&m'Z'm(Z( ddl)m*Z*  ejV                           ejX                  e-      Z. ej^                  d      Z0dee1e2f   de1fdZ3d(dede4dee1   ddfdZ5	 d)dedede6ddfdZ7	 	 d*dededee1   de6ddf
dZ8	 	 	 	 d+dede1de	de
e9   de1de6dee1   dee1   ddfdZ: e&d       Z; e&d!      Z<de1de1de1ddfd"Z=	 	 	 	 	 d,dede1dee9   de
e9   de1de6dee1   dee1   de6ddfd#Z>defd$Z?d(d%eee1      ddfd&Z@e-d'k(  r e@        yy)-z#Extract pdf structure in XML format    N)ArgumentParser)	Any	ContainerDictIterableListOptionalTextIOUnioncast)PDFDocumentPDFNoOutlinesPDFXRefFallback)
PDFIOErrorPDFObjectNotFoundPDFTypeErrorPDFValueError)PDFPage)	PDFParser)	PDFObjRef	PDFStreamresolve1stream_value)LIT	PSKeyword	PSLiteral)isnumberz&[\000-\037&<>()"\042\047\134\177-\377]sreturnc                 n    t        | t              rt        | d      }n| }t        j	                  d |      S )Nzlatin-1c                 <    dt        | j                  d            z  S )Nz&#%d;r   )ordgroup)ms    T/var/www/html/audio-gradio/venv/lib/python3.12/site-packages/../../../bin/dumppdf.py<lambda>zescape.<locals>.<lambda>$   s    3qwwqz?!:     )
isinstancebytesstrESC_PATsub)r   uss     r%   escaper.      s/    !UI;;:B??r'   outobjcodecc                    || j                  d       y t        |t              r| j                  dt        |      z         |j	                         D ]G  \  }}| j                  d|z         | j                  d       t        | |       | j                  d       I | j                  d       y t        |t              rS| j                  dt        |      z         |D ]  }t        | |       | j                  d       ! | j                  d	       y t        |t        t        f      r)| j                  d
t        |      t        |      fz         y t        |t              r|dk(  r | j                  |j                                y |dk(  r | j                  |j                                y | j                  d       t        | |j                         | j                  d       |dk(  r8|j                         }| j                  dt        |      t        |      fz         | j                  d       y t        |t              r| j                  d|j                  z         y t        |t               r| j                  d|j"                  z         y t        |t$              r| j                  d|j"                  z         y t'        |      r| j                  d|z         y t)        |      )Nz<null />z<dict size="%d">
z<key>%s</key>
z<value>z	</value>
z</dict>z<list size="%d">

z</list>z<string size="%d">%s</string>rawbinaryz<stream>
<props>
z

</props>
textz<data size="%d">%s</data>
z	</stream>z<ref id="%d" />z<keyword>%s</keyword>z<literal>%s</literal>z<number>%s</number>)writer(   dictlenitemsdumpxmllistr*   r)   r.   r   get_rawdataget_dataattrsr   objidr   namer   r   r   )r/   r0   r1   kvdatas         r%   r;   r;   '   sZ   
{		*#t		&S12IIK 	$DAqII'!+,IIi COIIl#		$
 			)#t		&S12 	ACOIIdO	 			)#U|$		1SXvc{4KKL#y!E>IIcoo'( 	 hIIclln% 	 II+,C#IIn%||~		73t9fTl:SSTIIk"#y!		#cii/0#y!		)CHH45#y!		)CHH45}		'#-.
s
r'   docshow_fallback_xrefc                 6   |j                   D ]Q  }t        |t              r|s| j                  d       t	        | |j                                | j                  d       S t        d |j                   D              }|r|sd}t        j                  |       y y y )Nz
<trailer>
z
</trailer>

c              3   <   K   | ]  }t        |t                y wN)r(   r   ).0xrefs     r%   	<genexpr>zdumptrailers.<locals>.<genexpr>r   s     K:dO4Ks   zThis PDF does not have an xref. Use --show-fallback-xref if you want to display the content of a fallback xref that contains all objects.)	xrefsr(   r   r7   r;   get_trailerallloggerwarning)r/   rE   rF   rK   no_xrefsmsgs         r%   dumptrailersrT   h   s    
 		 *$04FIIm$C))+,II()	*
 KKKH*$ 	
 	s +xr'   c                    t               }| j                  d       |j                  D ]u  }|j                         D ]`  }||v r|j	                  |       	 |j                  |      }|.| j                  d|z         t        | ||       | j                  d       b w t        | ||       | j                  d       y # t        $ r}t        d|z         Y d }~d }~ww xY w)Nz<pdf>z<object id="%d">
r1   z
</object>

znot found: %rz</pdf>)
setr7   rM   
get_objidsaddgetobjr;   r   printrT   )	r/   rE   r1   rF   visitedrK   r@   r0   es	            r%   dumpallobjsr^   |   s     eGIIg		 +__& 	+EKK+jj';		.67S.		+,	++ c-.IIh % +o)**+s   B?)3B??	C CC outfpfnameobjidspagenospassworddumpall
extractdirc                    t        |d      }t        |      }	t        |	|      t        t	        j
                        d      D 
ci c]  \  }
}|j                  |
 }}
}dt        dt        ffd}	 j                         }| j                  d       |D ]  \  }}}}}d }
|r ||      }||d   j                     }
nc|ra|}t        |t              rO|j                  d      }|r<t        |      d	k(  r.|j                  d
      r ||d
         }||d   j                     }
t!        |      }| j                  d|d| d       |.| j                  d       t#        | |       | j                  d       |
| j                  d|
z         | j                  d        | j                  d       |	j'                          |j'                          y c c}}
w # t$        $ r Y 2w xY w)Nrb   destr   c                 <   t        | t        t        f      rt        j	                  |             } n4t        | t
              r$t        j	                  | j                              } t        | t              r| d   } t        | t              r| j                         } | S )ND)
r(   r*   r)   r   get_destr   rA   r8   r   resolve)ri   rE   s    r%   resolve_destz!dumpoutline.<locals>.resolve_dest   sr    dS%L)CLL./Di(CLL34DdD!9DdI&<<>Dr'   z<outlines>
r   Sz/'GoTo'rk   z<outline level="z	" title="z">
z<dest>z</dest>
z<pageno>%r</pageno>
z</outline>
z</outlines>
)openr   r   	enumerater   create_pagespageidobjectr   get_outlinesr7   r@   r(   r8   getreprr.   r;   r   close)r_   r`   ra   rb   rc   rd   r1   re   fpparserpagenopagepagesrn   outlinesleveltitleri   aseactionsubtyper   rE   s                          @r%   dumpoutliner      s    
eT	Br]F
fh
'C ((<(<S(A1EVT 	VE 
	6 	c 	##%N#)1 	(%E5$2F#D)tAw}}-fd+$jjoG4=I#=&**S/+F3K8!&tAw}}!5uAKK*5)9QCtDEH%t$K(!3f<=KK')	(* 	O$ LLNHHJYR  s   G4D=G 	G$#G$FilespecEmbeddedFilec                   
 dt         dt        t        t        f   dd f
fd}t	        | d      5 }t        |      }t        ||      
t               }
j                  D ]p  }|j                         D ][  }
j                  |      }	||vst        |	t              s*|	j                  d      t        u sB|j                  |        |||	       ] r 	 d d d        y # 1 sw Y   y xY w)Nr@   r0   r   c                 r   t         j                  j                  |j                  d      xs- t	        t
        |j                  d            j                               }|d   j                  d      xs |d   j                  d      }j                  |j                        }t        |t              sd|z  }t        |      |j                  d      t        urt        d|z        t         j                  j                  	d| |fz        }t         j                  j                  |      rt        d|z        t!        d	|z         t        j"                  t         j                  j%                  |      d
       t'        |d      }|j)                  |j+                                |j-                          y )NUFFEFz:unable to process PDF: reference for %r is not a PDFStreamTypez>unable to process PDF: reference for %r is not an EmbeddedFilez%.6d-%szfile exists: %rzextracting: %rT)exist_okwb)ospathbasenamerv   r   r)   decoderZ   r@   r(   r   r   LITERAL_EMBEDDEDFILEjoinexistsr   r[   makedirsdirnamerp   r7   r>   rx   )
r@   r0   filenamefilereffileobj	error_msgr   r/   rE   re   s
           r%   extract1z!extractembedded.<locals>.extract1   s[   77##CGGDM$WT%5N5U5U5WXd)--%;Ts);**W]]+'9-&'   	**;;v&::),46  ww||J	UH4E(EF77>>$.566%&
BGGOOD)D94		'""$%		r'   rg   r   )intr   r*   r   rp   r   r   rW   rM   rX   rZ   r(   r8   rv   LITERAL_FILESPECrY   )r`   rc   re   r   ry   rz   extracted_objidsrK   r@   r0   rE   s     `       @r%   extractembeddedr      s     $sCx. T 0 
eT	 )b2&(+5II 		)D* )jj'!11"3-+;;$((/UC()		)	) ) )s   ACCC4CC%c	                    t        |d      }	t        |	      }
t        |
|      }|r&|D ]!  }|j                  |      }t	        | ||       # |rnt        t        j                  |            D ]M  \  }}||v s|r+|j                  D ]  }t        |      }t	        | ||        8t	        | |j                         O |rt        | |||       |s|s|st        | ||       |	j                          |dvr| j                  d       y y )Nrg   rV   )r4   r5   r3   )rp   r   r   rZ   r;   rq   r   rr   contentsr   r?   r^   rT   rx   r7   )r_   r`   ra   rb   rc   rd   r1   re   rF   ry   rz   rE   r@   r0   r{   r|   s                   r%   dumppdfr      s    
eT	Br]F
fh
'C 	-E**U#CE3e,	- %g&:&:3&?@ 	/LFD #}} 9*3/s%89 E4::.	/ E3'9:WwUC!34HHJ%%D &r'   c                     t        t        d      } | j                  dt        d dd       | j                  ddd	d
t        j
                          | j                  ddddd       | j                         }|j                  ddddd       |j                  ddt        d       | j                  dd      }|j                  dt        d dd       |j                  ddt        d        |j                  d!d"t        d#       |j                  d$d%ddd&       |j                  d'dd()       |j                  d*d+t        d,d-.       | j                  d/d0      }|j                  d1d2t        d3d4.       |j                         }|j                  d5d6ddd7       |j                  d8d9ddd:       |j                  d;d<ddd=       | S )>NT)descriptionadd_helpfiles+zOne or more paths to PDF files.)typedefaultnargshelpz	--versionz-vversionzpdfminer.six v)r   r   z--debugz-dF
store_truezUse debug logging level.)r   r   r   z--extract-tocz-TzExtract structure of outlinez--extract-embeddedz-EzExtract embedded files)r   r   ParserzUsed during PDF parsing)r   z--page-numbersz0A space-seperated list of page numbers to parse.z	--pagenosz-pzA comma-separated list of page numbers to parse. Included for legacy applications, use --page-numbers for more idiomatic argument entry.z	--objectsz-iz1Comma separated list of object numbers to extractz--allz-az3If the structure of all objects should be extractedz--show-fallback-xrefzAdditionally show the fallback xref. Use this if the PDF has zero or only invalid xref's. This setting is ignored if --extract-toc or --extract-embedded is used.)r   r   z
--passwordz-P z,The password to use for decrypting PDF file.)r   r   r   OutputzUsed during output generation.z	--outfilez-o-zJPath to file where output is written. Or "-" (default) to write to stdout.z--raw-streamz-rz%Write stream objects without encodingz--binary-streamz-bz)Write stream objects with binary encodingz--text-streamz-tz"Write stream objects as plain text)	r   __doc__add_argumentr*   pdfminer__version__add_mutually_exclusive_groupadd_argument_groupr   )rz   procedure_parserparse_paramsoutput_paramscodec_parsers        r%   create_parserr   #  s   $?F
.    !5!5 67	   '   ::<!!+ "  !!%	 "  ,,- - L ?   	   @	   B   7   ;   --4 . M    !==?L4   8   1   Mr'   argvc                 N   t               }|j                  |       }|j                  r1t        j                         j                  t        j                         |j                  dk(  rt        j                  }nt        |j                  d      }|j                  r2|j                  j                  d      D cg c]  }t        |       }}ng }|j                  r|j                  D ch c]  }|dz
  	 }}nK|j                  r5|j                  j                  d      D ch c]  }t        |      dz
   }}n
t!               }|j"                  }|j$                  rd}n |j&                  rd}n|j(                  rd}nd }|j*                  D ]y  }	|j,                  rt/        ||	||||j0                  |d 	       -|j2                  rt5        |	||j2                  
       Rt7        ||	||||j0                  |d |j8                  	       { |j;                          y c c}w c c}w c c}w )N)argsr   w,rh   r4   r5   r6   )rc   rd   r1   re   )rc   re   )rc   rd   r1   re   rF   )r   
parse_argsdebuglogging	getLoggersetLevelDEBUGoutfilesysstdoutrp   objectssplitr   page_numbersrb   rW   rc   
raw_streambinary_streamtext_streamr   extract_tocr   rO   extract_embeddedr   r   rF   rx   )
r   rz   r   r_   xra   rb   rc   r1   r`   s
             r%   mainr     s   _F$'Dzz$$W]]3||s

T\\3'||"&,,"4"4S"9:Q#a&::"&"3"34Q1q544	'+||'9'9#'>?!3q6A:??%}}H$						 !	 ""EHAVAVW!#'#:#:
6 
KKMc ;
 5?s   9H*H"H"__main__rI   )F)NF)r   FNN)r   FNNF)Ar   r   os.pathr   rer   argparser   typingr   r   r   r   r   r	   r
   r   r   r   pdfminer.pdfdocumentr   r   r   pdfminer.pdfexceptionsr   r   r   r   pdfminer.pdfpager   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   pdfminer.psparserr   r   r   pdfminer.utilsr   basicConfigr   __name__rP   compiler+   r*   r)   r.   rt   r;   boolrT   r^   r   r   r   r   r   r   r   r    r'   r%   <module>r      s   )   	 
 # V V V  L L  % ( J J 7 7 #    			8	$
"**>
?@eCJ @C @> >f >Xc] >d >H  %		  
	.  $			 C= 	
 
>  $999 9 s^	9
 9 9 C=9 9 
9x z? >* &)3 &)# &)3 &)4 &)\  $$!!! SM! s^	!
 ! ! C=! ! ! 
!Hx~ xv>xS	" >d >B zF r'   