
    ;3hx                         d dl Z d dlZd dlmZmZmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZm Z   ejB                  e"      Z# ed
      Z$ ed      Z% G d d      Z&y)    N)	AnyBinaryIO	ContainerDictIteratorListOptionalSetTuple)settings)PDFDocumentPDFNoPageLabelsPDFTextExtractionNotAllowed)PDFObjectNotFoundPDFValueError)	PDFParser)
dict_value	int_value
list_valueresolve1)LIT)Rect
parse_rectPagePagesc                       e Zd ZdZdedededee   ddf
dZdefd	Z	h d
Z
ededed    fd       Ze	 	 	 	 	 ddedeee      dededededed    fd       ZdedefdZdededefdZdedee   fdZy)PDFPageaz  An object that holds the information about a page.

    A PDFPage object is merely a convenience class that has a set
    of keys and values, which describe the properties of a page
    and point to its contents.

    Attributes
    ----------
      doc: a PDFDocument object.
      pageid: any Python object that can uniquely identify the page.
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
      resources: a dictionary of resources used by the page.
      mediabox: the physical size of the page.
      cropbox: the crop rectangle of the page.
      rotate: the page rotation (in degree).
      annots: the page annotations.
      beads: a chain that represents natural reading order.
      label: the page's label (typically, the logical page number).

    docpageidattrslabelreturnNc                    || _         || _        t        |      | _        || _        t        | j                  j                  d            | _        t        | j                  j                  dt                           | _	        | j                  | j                  j                  d            | _        | j                  | j                  j                  d      | j                        | _        | j                  | j                  j                  d            | _        t!        | j                  j                  dd            dz   dz  | _        | j                  j                  d	      | _        | j                  j                  d
      | _        y)zInitialize a page object.

        doc: a PDFDocument object.
        pageid: any Python object that can uniquely identify the page.
        attrs: a dictionary of page attributes.
        label: page label string.
        LastModified	ResourcesMediaBoxCropBoxContentsRotater   ih  AnnotsBN)r   r   r   r    r!   r   getlastmoddict	resources_parse_mediaboxmediabox_parse_cropboxcropbox_parse_contentscontentsr   rotateannotsbeads)selfr   r   r    r!   s        P/var/www/html/audio-gradio/venv/lib/python3.12/site-packages/pdfminer/pdfpage.py__init__zPDFPage.__init__0   s    &



~ >?/7JJNN;/0
 ,,TZZ^^J-GH**4::>>)+DdmmT,,TZZ^^J-GH !!<=CsJjjnnX.ZZ^^C(
    c                 <    d| j                   d| j                  dS )Nz<PDFPage: Resources=z, MediaBox=>)r/   r1   )r9   s    r:   __repr__zPDFPage.__repr__O   s"    %dnn%7{4==BSSTUUr<   >   r)   r'   r&   r%   documentc              #      	K   	 d
dt         dt        t        t         f   dt        t        t               dt
        t        t        t        t         t        t         t         f   f   f      f 	fd		 j                         }d}dj                  v rB 	j                  d   j                        }|D ]  \  }}  ||t        |             d} |svj                  D ]f  }|j                         D ]Q  }	 j!                  |      }t#        |t$              r-|j'                  d	      t(        u r  ||t        |             S h y y # t        $ r t        j                  d       }Y w xY w# t*        $ r Y w xY ww)Nobjparentvisitedr"   c              3     K   t        | t              r+| }t        j                  |            j	                         }n%| j
                  }t        |       j	                         }|
t               }||v ry |j                  |       |j                         D ]  \  }}|	j                  v s||vs|||<     |j                  d      }|!t        j                  s|j                  d      }|t        u rCd|v r?t        j                  d|d          t!        |d         D ]  } 
|||      E d {     y |t"        u rt        j                  d|       ||f y y 7 -w)NTypetypeKidszPages: Kids=%rzPage: %r)
isinstanceintr   getobjcopyobjidsetadditemsINHERITABLE_ATTRSr,   r   STRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)rB   rC   rD   	object_idobject_propertieskvobject_typechildclsdepth_first_searchr@   s            r:   r^   z0PDFPage.create_pages.<locals>.depth_first_searchV   sd    
 #s#	$.xy/I$J$O$O$Q!  II	$.sO$8$8$:! %G#KK	" -1---!;L2L+,%a(- ,//7K"8??/33F;m+:K0K		*,=f,EF'(9&(AB UE1%9JGTTTU ,		*&78 "344 - Us   B'E+E0A>E.E/.EFr   TrF   N)r   r   strr	   r
   r   r   rJ   get_page_labelsr   	itertoolsrepeatcatalognextxrefs
get_objidsrK   rI   r.   r,   rV   r   )
r]   r@   page_labelspagesobjectsrM   treexrefrB   r^   s
   ``       @r:   create_pageszPDFPage.create_pagesT   s    
 +/$	5$	5cN$	5 c#h'$	5 eCc4S>&9!::;<	$	5L	13;3K3K3MK h&&&()9)9')BHDTDTUG& t(E4k1BCC   !__. E&ooe4%c40SWWV_5T"%hsD<M"NN	   	1#**40K	1" - sP   A,F 2E A6F 9AE1F E.+F -E..F 1	E=:F <E==F fppagenosmaxpagespasswordcachingcheck_extractablec              #   "  K   t        |      }t        |||      }|j                  s,|rd|z  }	t        |	      d|z  }
t        j                  |
       t        | j                  |            D ]  \  }}|r||vr| |s||dz   k  s y  y w)N)rq   rr   z"Text extraction is not allowed: %rzThe PDF %r contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case   )r   r   is_extractabler   rT   warning	enumeraterm   )r]   rn   ro   rp   rq   rr   rs   parserr   	error_msgwarning_msgpagenopages                r:   	get_pageszPDFPage.get_pages   s      2&8WE !! @2E	1)<<A DF	F  K(%c&6&6s&;< 	LFDF'1JH
2	s   A?BBBvaluec                     d}|t         j                  d       |S 	 t        d t        |      D              S # t        $ r t         j                  d       |cY S w xY w)N)        r   g      @g     @zHMediaBox missing from /Page (and not inherited), defaulting to US Letterc              3   2   K   | ]  }t        |        y wr_   r   .0vals     r:   	<genexpr>z*PDFPage._parse_mediabox.<locals>.<genexpr>        GhsmG   z2Invalid MediaBox in /Page, defaulting to US LetterrT   rw   r   r   r   )r9   r   	us_letters      r:   r0   zPDFPage._parse_mediabox   sa    ,	=KK* 	GxGGG 	KKLM	s   8  AAr1   c                     |t         j                  d       |S 	 t        d t        |      D              S # t        $ r t         j                  d       |cY S w xY w)Nz2CropBox missing from /Page, defaulting to MediaBoxc              3   2   K   | ]  }t        |        y wr_   r   r   s     r:   r   z)PDFPage._parse_cropbox.<locals>.<genexpr>   r   r   z0Invalid CropBox in /Page, defaulting to MediaBoxr   )r9   r   r1   s      r:   r2   zPDFPage._parse_cropbox   sT    =KKLMO	GxGGG 	KKJKO	s   6  AAc                 J    g }|t        |      }t        |t              s|g}|S r_   )r   rI   list)r9   r   r5   s      r:   r4   zPDFPage._parse_contents   s,     Hh-$:r<   )Nr    TF)__name__
__module____qualname____doc__r   objectr	   r`   r;   r?   rQ   classmethodr   rm   r   r   rJ   boolr~   r   r   r0   r2   r   r4    r<   r:   r   r      s9   .)) ) 	)
 }) 
)>V# V G;K ;HY4G ; ;z  -1"'"" )C.)" 	"
 " "  " 
)	" "HS T "
C 
4 
D 
S T#Y r<   r   )'rb   loggingtypingr   r   r   r   r   r   r	   r
   r   pdfminerr   pdfminer.pdfdocumentr   r   r   pdfminer.pdfexceptionsr   r   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   pdfminer.psparserr   pdfminer.utilsr   r   	getLoggerr   rT   rV   rS   r   r   r<   r:   <module>r      sh      W W W  
 D ( I I ! +g! 6{GB Br<   