
    ;3h;                        d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZmZ ddlmZm Z m!Z!m"Z"m#Z# ddl$m%Z%m&Z&  ejN                  e(      Z) G d	 d
e      Z* G d d      Z+ G d de+      Z, G d de+      Z- G d de-      Z. G d de+      Z/ G d de/      Z0 G d de,      Z1 G d de/      Z2 G d de,      Z3 G d de/      Z4 G d d       Z5 G d! d"e"e          Z6y)#zAdobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).

More information is available on:

  https://github.com/adobe-type-tools/cmap-resources

    N)AnyBinaryIODictIterableIteratorListMutableMappingOptionalSetTextIOTupleUnioncast)name2unicode)PDFExceptionPDFTypeError)PSEOFPSSyntaxError)KWD	PSKeyword	PSLiteralPSStackParserliteral_name)choplistnunpackc                       e Zd Zy)	CMapErrorN__name__
__module____qualname__     O/var/www/html/audio-gradio/venv/lib/python3.12/site-packages/pdfminer/cmapdb.pyr   r   ,   s    r#   r   c                       e Zd ZdZdeddfdZdefdZdededdfd	Z	d
ede
ddfdZde
d
eeee
f   ddfdZddZd
edee
   fdZy)CMapBaser   kwargsreturnNc                 .    |j                         | _        y N)copyattrsselfr'   s     r$   __init__zCMapBase.__init__3   s    28++-
r#   c                 @    | j                   j                  dd      dk7  S )NWModer   r,   getr.   s    r$   is_verticalzCMapBase.is_vertical6   s    zz~~gq)Q..r#   kvc                 "    || j                   |<   y r*   )r,   )r.   r6   r7   s      r$   set_attrzCMapBase.set_attr9   s    

1r#   codecidc                      y r*   r"   )r.   r:   r;   s      r$   add_code2cidzCMapBase.add_code2cid<       r#   c                      y r*   r"   )r.   r;   r:   s      r$   add_cid2unichrzCMapBase.add_cid2unichr?   r>   r#   c                      y r*   r"   )r.   cmaps     r$   use_cmapzCMapBase.use_cmapB   r>   r#   c                     t         r*   )NotImplementedError)r.   r:   s     r$   decodezCMapBase.decodeE   s    !!r#   )rB   r&   r(   N)r   r    r!   debugobjectr/   boolr5   strr9   intr=   r   r   bytesr@   rC   r   rF   r"   r#   r$   r&   r&   0   s    E@ @D @/T /# & T  3 4 # U9eS3H-I d "5 "Xc] "r#   r&   c            	           e Zd Zdeeef   ddfdZdefdZdeddfdZ	de
dee   fd	Zej                  dd
fdedeeeef      deedf   ddfdZy)CMapr'   r(   Nc                 >    t        j                  | fi | i | _        y r*   )r&   r/   code2cidr-   s     r$   r/   zCMap.__init__J   s    $)&)+-r#   c                 >    d| j                   j                  d      z  S )Nz
<CMap: %s>CMapNamer2   r4   s    r$   __repr__zCMap.__repr__N   s    djjnnZ888r#   rB   c                     t        |t              sJ t        t        |                   dt        t
        t        f   dt        t
        t        f   dd ffd | j                  |j                         y )Ndstsrcr(   c                     |j                         D ]+  \  }}t        |t              ri }|| |<    ||       '|| |<   - y r*   )items
isinstancedict)rU   rV   r6   r7   dr+   s        r$   r+   zCMap.use_cmap.<locals>.copyT   sF    		 1a&+-ACFAJCFr#   )rY   rN   rJ   typer   rK   rH   rP   )r.   rB   r+   s     @r$   rC   zCMap.use_cmapQ   s`    $%6s4:6%	d3;' 	d3;.? 	D 	 	T]]DMM*r#   r:   c              #     K   t         j                  d| |       | j                  }t        |      D ]V  }||v rD||   }t	        |t
              r| | j                  }-t        t        t
        t        f   |      }K| j                  }X y w)Nzdecode: %r, %r)	logrG   rP   iterrY   rK   r   r   rH   )r.   r:   r[   ixs        r$   rF   zCMap.decode_   sy     		"D$/MMd 		"AAvaDa%GAT#v+.2AMM		"s   B	Br"   outrP   .c           	         || j                   }d}t        |j                               D ]a  \  }}||fz   }t        |t              r|j                  d||fz         3| j                  |t        t        t        t        f   |      |       c y )Nr"   zcode %r = cid %d
)rb   rP   r:   )
rP   sortedrX   rY   rK   writedumpr   r   rH   )r.   rb   rP   r:   r6   r7   cs          r$   rf   z	CMap.dumpm   s     }}HD8>>+, 	PDAqtA!S!		.!Q78		cDc6k1BA,FQ	O	Pr#   )r   r    r!   r   rJ   rK   r/   rS   r&   rC   rL   r   rF   sysstdoutr   r
   r   rH   r   rf   r"   r#   r$   rN   rN   I   s    .sCx .T .9# 9+X +$ +"5 "Xc] "  jj04 "	PP 4V,-P CHo	P
 
Pr#   rN   c                   &    e Zd Zdedeedf   fdZy)IdentityCMapr:   r(   .c                 V    t        |      dz  }|rt        j                  d|z  |      S y)N   z>%dHr"   lenstructunpackr.   r:   ns      r$   rF   zIdentityCMap.decode   s*    IN==!T22r#   Nr   r    r!   rL   r   rK   rF   r"   r#   r$   rk   rk   ~       5 U38_ r#   rk   c                   &    e Zd Zdedeedf   fdZy)IdentityCMapByter:   r(   .c                 P    t        |      }|rt        j                  d|z  |      S y)Nz>%dBr"   rn   rr   s      r$   rF   zIdentityCMapByte.decode   s&    I==!T22r#   Nrt   r"   r#   r$   rw   rw      ru   r#   rw   c                   j    e Zd Zdeeef   ddfdZdefdZdedefdZe	j                  fdeddfd	Zy)

UnicodeMapr'   r(   Nc                 >    t        j                  | fi | i | _        y r*   )r&   r/   
cid2unichrr-   s     r$   r/   zUnicodeMap.__init__   s    $)&)*,r#   c                 >    d| j                   j                  d      z  S )Nz<UnicodeMap: %s>rR   r2   r4   s    r$   rS   zUnicodeMap.__repr__   s    !DJJNN:$>>>r#   r;   c                 N    t         j                  d| |       | j                  |   S )Nget_unichr: %r, %r)r^   rG   r|   r.   r;   s     r$   
get_unichrzUnicodeMap.get_unichr   s"    		&c2s##r#   rb   c                     t        | j                  j                               D ]  \  }}|j                  d||fz          y )Nzcid %d = unicode %r
)rd   r|   rX   re   )r.   rb   r6   r7   s       r$   rf   zUnicodeMap.dump   s=    4??0023 	8DAqII-A67	8r#   )r   r    r!   r   rJ   rK   r/   rS   r   rh   ri   r   rf   r"   r#   r$   rz   rz      sX    -sCx -T -?# ?$c $c $ "% 8 8 8r#   rz   c                       e Zd ZdedefdZy)IdentityUnicodeMapr;   r(   c                 F    t         j                  d| |       t        |      S )z+Interpret character id as unicode codepointr   )r^   rG   chrr   s     r$   r   zIdentityUnicodeMap.get_unichr   s    		&c23xr#   N)r   r    r!   rK   rJ   r   r"   r#   r$   r   r      s    c c r#   r   c                        e Zd ZdededdfdZy)FileCMapr:   r;   r(   Nc                 P   t        |t              rt        |t              s$J t        t        |      t        |      f             | j                  }|d d D ];  }t        |      }||v r!t        t        t        t        f   ||         }3i }|||<   |}= t        |d         }|||<   y )N)	rY   rJ   rK   r\   rP   ordr   r   rH   )r.   r:   r;   r[   rg   cits          r$   r=   zFileCMap.add_code2cid   s    $$C)= 	
s$Zc#@
 	
= MMcr 	AQBQwc6k*AbE2')"	 b]"r#   )r   r    r!   rJ   rK   r=   r"   r#   r$   r   r      s     3 4 r#   r   c                   ,    e Zd Zdedeeeef   ddfdZy)FileUnicodeMapr;   r:   r(   Nc                    t        |t              sJ t        t        |                   t        |t              r2t        |j
                  t              sJ t        |j
                        }nJt        |t              r|j                  dd      }n't        |t              rt        |      }nt        |      |dk(  r| j                  j                  |      dk(  ry || j                  |<   y )NzUTF-16BEignore     )rY   rK   rJ   r\   r   namer   rL   rF   r   r   r|   r3   )r.   r;   r:   unichrs       r$   r@   zFileUnicodeMap.add_cid2unichr   s    #s#3Sc^3#dI&dii---!$)),Fe$[[X6Fc"YFt$$ X$//"5"5c":c"A%r#   )r   r    r!   rK   r   r   rL   r@   r"   r#   r$   r   r      s(    &# &U9eS3H-I &d &r#   r   c                   ,     e Zd Zdededdf fdZ xZS )PyCMapr   moduler(   Nc                     t         |   |       |j                  | _        |j                  rd| j
                  d<   y y N)rR      r1   )superr/   CODE2CIDrP   IS_VERTICALr,   )r.   r   r   	__class__s      r$   r/   zPyCMap.__init__   s:    $'"#DJJw r#   )r   r    r!   rJ   r   r/   __classcell__r   s   @r$   r   r      s"    $S $# $$ $ $r#   r   c                   0     e Zd Zdedededdf fdZ xZS )PyUnicodeMapr   r   verticalr(   Nc                     t         |   |       |r!|j                  | _        d| j                  d<   y |j
                  | _        y r   )r   r/   CID2UNICHR_Vr|   r,   CID2UNICHR_H)r.   r   r   r   r   s       r$   r/   zPyUnicodeMap.__init__   s>    $'$11DO"#DJJw$11DOr#   )r   r    r!   rJ   r   rI   r/   r   r   s   @r$   r   r      s)    2S 2# 2 2$ 2 2r#   r   c                       e Zd ZU i Zeeef   ed<   i Zeee	e
   f   ed<    G d de      Zededefd       Zededefd       Zedded	edefd
       Zy)CMapDB_cmap_cache_umap_cachec                       e Zd Zy)CMapDB.CMapNotFoundNr   r"   r#   r$   CMapNotFoundr      s    r#   r   r   r(   c           	         |j                  dd      }d|z  }t        j                  d|       t        j                  j                  dd      t        j                  j                  t        j                  j                  t              d      f}|D ]  }t        j                  j                  ||      }t        j                  j                  |      sCt        j                  |      }	 t        t        |      dt        j                   |j#                                     |j%                          c S  t&        j)                  |      # |j%                          w xY w)	N  z%s.pickle.gzzloading: %r	CMAP_PATHz/usr/share/pdfminer/rB   r"   )replacer^   rG   osenvironr3   pathjoindirname__file__existsgzipopenr\   rJ   pickleloadsreadcloser   r   )clsr   filename
cmap_paths	directoryr   gzfiles          r$   
_load_datazCMapDB._load_data   s    ||D"%!D(		-&JJNN;(>?GGLL2F;

 $ 	#I77<<	84Dww~~d#4#D	2v||FKKM/JKLLN	# !!$'' LLNs   *6E		Ec                 $   |dk(  rt        d      S |dk(  rt        d      S |dk(  rt        d      S |dk(  rt        d      S 	 | j                  |   S # t        $ r Y nw xY w| j	                  |      }t        ||      x| j                  |<   }|S )Nz
Identity-Hr   )r1   z
Identity-Vr   OneByteIdentityHOneByteIdentityV)rk   rw   r   KeyErrorr   r   )r   r   datarB   s       r$   get_cmapzCMapDB.get_cmap   s    <a((\!a((''#!,,''#!,,	??4(( 		~~d#'-dD'99s   A 	A! A!r   c                     	 | j                   |   |   S # t        $ r Y nw xY w| j                  d|z        }dD cg c]  }t        |||       nc c}w c}| j                   |<   | j                   |   |   S )Nzto-unicode-%s)FT)r   r   r   r   )r   r   r   r   r7   s        r$   get_unicode_mapzCMapDB.get_unicode_map  sy    	??4(22 		~~o45FS TdD!!< T Tt$X..s    	  AN)F)r   r    r!   r   r   rJ   r   __annotations__r   r   r   r   r   classmethodr   r   r&   r   rI   rz   r   r"   r#   r$   r   r      s    %'Kc6k"'13Kc4--.3y  (c (c ( ($ C H  " /3 /$ /: / /r#   r   c                   L   e Zd ZdededdfdZddZ ed      Z ed      Z	 ed	      Z
 ed
      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      Z ed      ZdededdfdZdeddfdZy)
CMapParserrB   fpr(   Nc                 j    t        j                  | |       || _        d| _        t	               | _        y )NT)r   r/   rB   _in_cmapset	_warnings)r.   rB   r   s      r$   r/   zCMapParser.__init__  s)    tR(	#&5r#   c                 D    	 | j                          y # t        $ r Y y w xY wr*   )
nextobjectr   r4   s    r$   runzCMapParser.run   s#    	OO 		s    	s	   begincmaps   endcmaps   usecmaps   defs   begincodespaceranges   endcodespaceranges   begincidranges   endcidranges   begincidchars
   endcidchars   beginbfranges
   endbfranges   beginbfchars	   endbfchars   beginnotdefranges   endnotdefrangepostokenc                 4   || j                   u rd| _        | j                          y|| j                  u rd| _        y| j                  sy|| j                  u rA	 | j                  d      \  \  }}\  }}| j                  j                  t        |      |       y|| j                  u rO	 | j                  d      \  \  }}| j                  j                  t        j                  t        |                   y|| j                  u r| j                          y|| j                   u r| j                          y|| j"                  u r| j                          y|| j$                  u rj| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]7  \  }
}}t)        |
t*              s| j-                  d       *t)        |t*              s| j-                  d       Lt)        |t.              s| j-                  d	       nt1        |
      t1        |      k7  r| j-                  d
       |
dd }|dd }||k7  r| j-                  d       |
dd }|dd }t3        |      }t3        |      }t1        |      }t5        ||z
  dz         D ]A  }|t7        j8                  d||z         | d z   }| j                  j;                  ||z   |       C : y|| j<                  u r| j                          y|| j>                  u ru| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]C  \  }}t)        |t*              st)        |t.              s(| j                  j;                  ||       E y|| j@                  u r| j                          y|| jB                  u r| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]  \  }
}}t)        |
t*              s| j-                  d       *t)        |t*              s| j-                  d       Lt1        |
      t1        |      k7  r| j-                  d       ut3        |
      }t3        |      }t)        |tD              rct1        |      ||z
  dz   k7  r| j-                  d       tG        t5        ||dz         |      D ]!  \  }}| j                  j;                  ||       # t)        |t*              sJ |dd }t3        |      }|dd }t1        |      }t5        ||z
  dz         D ]A  }|t7        j8                  d||z         | d z   }| j                  j;                  ||z   |       C  y|| jH                  u r| j                          y|| jJ                  u r~| j                         D cg c]  \  }}|	 }	}}t'        d|	      D ]L  \  }}t)        |t*              st)        |t*              s(| j                  j;                  t3        |      |       N y|| jL                  u r| j                          y|| jN                  u r| j                          y| jQ                  ||f       y# t        $ r Y yw xY w# t        $ r Y yt        j                  $ r Y yw xY wc c}}w c c}}w c c}}w c c}}w )z[ToUnicode CMaps

        See Section 5.9.2 - ToUnicode CMaps of the PDF Reference.
        TNFrm   r      z0The start object of begincidrange is not a byte.z.The end object of begincidrange is not a byte.z.The cid object of begincidrange is not a byte.z?The start and end byte of begincidrange have different lengths.zGThe prefix of the start and end byte of begincidrange are not the same.z>LzThe start object is not a byte.zThe end object is not a byte.z.The start and end byte have different lengths.zPThe difference between the start and end offsets does not match the code length.))KEYWORD_BEGINCMAPr   popallKEYWORD_ENDCMAPKEYWORD_DEFpoprB   r9   r   r   KEYWORD_USECMAPrC   r   r   r   KEYWORD_BEGINCODESPACERANGEKEYWORD_ENDCODESPACERANGEKEYWORD_BEGINCIDRANGEKEYWORD_ENDCIDRANGEr   rY   rL   
_warn_oncerK   ro   r   rangerp   packr@   KEYWORD_BEGINCIDCHARKEYWORD_ENDCIDCHARKEYWORD_BEGINBFRANGEKEYWORD_ENDBFRANGElistzipKEYWORD_BEGINBFCHARKEYWORD_ENDBFCHARKEYWORD_BEGINNOTDEFRANGEKEYWORD_ENDNOTDEFRANGEpush)r.   r   r   _r6   r7   cmapname__objobjs
start_byteend_byter;   start_prefix
end_prefixsvarevarstartendvlenr`   ra   r:   unicode_valuevarbaseprefixs                              r$   
do_keywordzCMapParser.do_keyword7  s0   
 D*** DMKKMd***!DM}}D$$$#'88A; !Q!Q		""<?A6 D(((#'88A; !X		""6??<3I#JK
 D444KKMD222KKMD...KKMD,,,)-7IRC7D7-5a-> 9)
Hc!*e4OO$VW!(E2OO$TU!#s+OO$TUz?c(m3OO- )#2%cr]
:-OO: !"#}dm4ysU{Q/ 9A$v{{4'CTEF'KKAII,,S1Wa89;9@ D---KKMD+++)-7IRC7D7%a. 8	TdE*z#s/CII,,S$78 D---KKMD+++)-7IRC7D7.6q$.? ?*
Hd!*e4OO$EF!(E2OO$CDz?c(m3OO$TU
+h'dD)4yC%K!O3F /2%sQw2G.N E*]		00mDE &dE222rs)C"3<D!#2YFs8D"3;?3 ?"V[[tax%@$%HH		00A>?5?: D,,,KKMD***)-7IRC7D7%a. A	Tc5)ju.EII,,WS\4@A D111KKMD///KKM		3,A !  !   &&   8P 8 8J 8sC   ?Y (AY 5ZZ/ZZ	YY	Y?*Y?>Y?msgc                     || j                   vr6| j                   j                  |       d}t        j                  ||z          yy)z!Warn once for each unique messagezIgnoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. N)r   addr^   warning)r.   r  base_msgs      r$   r   zCMapParser._warn_once  sA    dnn$NNs#/ 
 KK3' %r#   )r(   N)r   r    r!   r&   r   r/   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rK   r   r
  rJ   r   r"   r#   r$   r   r     s   )X )8 ) ) L)*oO*oOf+K"%&<"= #$8 9 01n-/]+/]+n-L)"#67 !23U c U ) U  U n	(c 	(d 	(r#   r   )7__doc__r   loggingr   os.pathr   rp   rh   typingr   r   r   r   r   r   r	   r
   r   r   r   r   r   pdfminer.encodingdbr   pdfminer.pdfexceptionsr   r   pdfminer.psexceptionsr   r   pdfminer.psparserr   r   r   r   r   pdfminer.utilsr   r   	getLoggerr   r^   r   r&   rN   rk   rw   rz   r   r   r   r   r   r   r   r"   r#   r$   <module>r     s   	   	    
     - = 6 T T ,g!	 	" "22P8 2Pj8 | 8 8" t $&Z &*$T $2: 24/ 4/n(y) (r#   