
    ;3hg              #          d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
mZ ddlmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlm Z  ddl!m"Z"m#Z#m$Z$ 	 	 	 	 	 	 	 	 	 	 	 	 	 d#dede"de%de%de
e   de&de
ee&      de%de'de&de%de
e%   de(de(de(deddf"dZ)	 	 	 	 	 	 d$de#de%de
ee&      de&d e(de%de
e   de%fd!Z*	 	 	 	 	 d%de#de%de
ee&      de&d e(de
e   de	e   fd"Z+y)&zIFunctions that can be used for the most common use-cases for pdfminer.six    N)StringIO)AnyBinaryIO	ContainerIteratorOptionalcast)HOCRConverterHTMLConverterPDFPageAggregatorTextConverterXMLConverter)ImageWriter)LAParamsLTPage)	PDFDeviceTagExtractor)PDFValueError)PDFPageInterpreterPDFResourceManager)PDFPage)AnyIO
FileOrNameopen_filenameinfoutfpoutput_typecodeclaparamsmaxpagespage_numberspasswordscalerotation
layoutmode
output_dirstrip_controldebugdisable_cachingkwargsreturnc           	         |r1t        j                         j                  t         j                         d}|rt	        |      }t        |       }d}|dk7  r-|t        j                  k(  rt        j                  j                  }|dk(  rt        |||||      }nw|dk(  rt        ||||||      }n`|dk(  rt        |||||
||      }nH|d	k(  rt        |||||
      }n2|dk(  rt        |t        t        |      |      }nd| }t!        |      |J t#        ||      }t%        j&                  | ||||       D ]*  }|j(                  |	z   dz  |_        |j+                  |       , |j-                          y)ak  Parses text from inf-file and writes to outfp file-like object.

    Takes loads of optional arguments but the defaults are somewhat sane.
    Beware laparams: Including an empty LAParams is not the same as passing
    None!

    :param inf: a file-like object to read PDF structure from, such as a
        file handler (using the builtin `open()` function) or a `BytesIO`.
    :param outfp: a file-like object to write the text to.
    :param output_type: May be 'text', 'xml', 'html', 'hocr', 'tag'.
        Only 'text' works properly.
    :param codec: Text decoding codec
    :param laparams: An LAParams object from pdfminer.layout. Default is None
        but may not layout correctly.
    :param maxpages: How many pages to stop parsing after
    :param page_numbers: zero-indexed page numbers to operate on.
    :param password: For encrypted PDFs, the password to decrypt.
    :param scale: Scale factor
    :param rotation: Rotation factor
    :param layoutmode: Default is 'normal', see
        pdfminer.converter.HTMLConverter
    :param output_dir: If given, creates an ImageWriter for extracted images.
    :param strip_control: Does what it says on the tin
    :param debug: Output more logging data
    :param disable_caching: Does what it says on the tin
    :param other:
    :return: nothing, acting as it does on two streams. Use StringIO to get
        strings.
    Ncachingtext)r   r   imagewriterxml)r   r   r0   stripcontrolhtml)r   r#   r%   r   r0   hocr)r   r   r2   tag)r   z1Output type can be text, html, xml or tag but is r    r"   r.   ih  )logging	getLoggersetLevelDEBUGr   r   sysstdoutbufferr   r   r   r
   r   r	   r   r   r   r   	get_pagesrotateprocess_pageclose)r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r0   rsrcmgrdevicemsginterpreterpages                         S/var/www/html/audio-gradio/venv/lib/python3.12/site-packages/pdfminer/high_level.pyextract_text_to_fprH      s   ^ $$W]]3K!*- _)<=G"&Ff#**!4

!!f#
 
	#&
 
	!#
 
	&
 
	gtHe'<EJ B+OC  $Wf5K!!## ' {{X-4  &' LLN    pdf_filer.   c           	         |
t               }t        | d      5 }t               5 }t        t        |      }t        |      }	t        |	|||      }
t        |	|
      }t        j                  |||||      D ]  }|j                  |        |j                         cddd       cddd       S # 1 sw Y   nxY wddd       y# 1 sw Y   yxY w)aw  Parse and return the text contained in a PDF file.

    :param pdf_file: Either a file path or a file-like object for the PDF file
        to be worked on.
    :param password: For encrypted PDFs, the password to decrypt.
    :param page_numbers: List of zero-indexed page numbers to extract.
    :param maxpages: The maximum number of pages to parse
    :param caching: If resources should be cached
    :param codec: Text decoding codec
    :param laparams: An LAParams object from pdfminer.layout. If None, uses
        some default settings that often work well.
    :return: a string containing all of the text extracted.
    Nrbr-   )r   r   r6   )r   r   r   r	   r   r   r   r   r   r>   r@   getvalue)rJ   r"   r!   r    r.   r   r   fpoutput_stringrB   rC   rE   rF   s                rG   extract_textrP      s    , :	x	& ("hj (M(B$W5wUXV(&9%%
 	+D $$T*	+ %%'( ( ( ( ( ( (s#   CA7B.	C.B7	3CCc              #   Z  K   |
t               }t        | d      5 }t        t        |      }t	        |      }t        ||      }t        ||      }	t        j                  |||||      D ]'  }
|	j                  |
       |j                         }| ) 	 ddd       y# 1 sw Y   yxY ww)a  Extract and yield LTPage objects

    :param pdf_file: Either a file path or a file-like object for the PDF file
        to be worked on.
    :param password: For encrypted PDFs, the password to decrypt.
    :param page_numbers: List of zero-indexed page numbers to extract.
    :param maxpages: The maximum number of pages to parse
    :param caching: If resources should be cached
    :param laparams: An LAParams object from pdfminer.layout. If None, uses
        some default settings that often work well.
    :return: LTPage objects
    NrL   r-   )r   r6   )r   r   r	   r   r   r   r   r   r>   r@   
get_result)rJ   r"   r!   r    r.   r   rN   resource_managerrC   rE   rF   layouts               rG   extract_pagesrU      s     ( :	x	& "(B-g>"#3hG()96B%%
 		D $$T*&&(FL		  s   B+A:B	B+B($B+)r/   utf-8Nr   N g      ?r   normalNFFF)rW   Nr   TrV   N)rW   Nr   TN),__doc__r7   r;   ior   typingr   r   r   r   r   r	   pdfminer.converterr
   r   r   r   r   pdfminer.imager   pdfminer.layoutr   r   pdfminer.pdfdevicer   r   pdfminer.pdfexceptionsr   pdfminer.pdfinterpr   r   pdfminer.pdfpager   pdfminer.utilsr   r   r   strintfloatboolrH   rP   rU    rI   rG   <module>ri      s   O  
  E E  ' , 6 0 E $ ; ; #'-1 $!w	ww w 	w
 x w w 9S>*w w w w w w w w w  !w" 
#wx -1#'(((((( 9S>*(( 	((
 (( (( x (( 	((Z -1#'%%% 9S>*% 	%
 % x % f%rI   