o
    hǐ                     @   s  d dl Z d dlZd dlZd dlZd dlmZmZmZmZ d dl	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZmZmZ d dlmZ d dlm Z  d dl!m"Z" d d	l#m$Z$m%Z%m&Z&m'Z' d d
l(m)Z)m*Z*m+Z+ d dl,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 d dl6m7Z7 d dl8m9Z9m:Z:m;Z; d dl<m=Z=m>Z>m?Z?m@Z@mAZA eBeCZDG dd de+ZEG dd deFZGG dd de$ZHG dd de$ZIG dd de$ZJG dd de$ZKG dd deKZLG dd deMZNG dd  d eMZOG d!d" d"eKZPe:d#ZQe:d$ZRe:d%ZSG d&d' d'ZTG d(d) d)eTZUG d*d+ d+eUZVG d,d- d-eTZWG d.d/ d/ZXG d0d1 d1eXZYG d2d3 d3eYZZG d4d5 d5Z[G d6d7 d7e"Z\dS )8    N)md5sha256sha384sha512)AnyCallableDictIterableIteratorKeysViewListOptionalSequenceTupleTypeUnioncast)default_backend)Cipher
algorithmsmodes)settings)Arcfour)
NumberTree)PDFExceptionPDFKeyErrorPDFObjectNotFoundPDFTypeError)	PDFParserPDFStreamParserPDFSyntaxError)	DecipherCallable	PDFStreamdecipher_all
dict_value	int_value
list_value	str_valuestream_value
uint_value)PSEOF)KWDLITliteral_name)choplistdecode_textformat_int_alphaformat_int_romannunpackc                   @      e Zd ZdS )PDFNoValidXRefN__name__
__module____qualname__ r9   r9   G/var/www/html/venv/lib/python3.10/site-packages/pdfminer/pdfdocument.pyr4   ;       r4   c                   @      e Zd ZdZdS )PDFNoValidXRefWarningztLegacy warning for missing xref.

    Not used anymore because warnings.warn is replaced by logger.Logger.warn.
    Nr6   r7   r8   __doc__r9   r9   r9   r:   r=   ?       r=   c                   @   r3   )PDFNoOutlinesNr5   r9   r9   r9   r:   rA   F   r;   rA   c                   @   r3   )PDFNoPageLabelsNr5   r9   r9   r9   r:   rB   J   r;   rB   c                   @   r3   )PDFDestinationNotFoundNr5   r9   r9   r9   r:   rC   N   r;   rC   c                   @   r3   )PDFEncryptionErrorNr5   r9   r9   r9   r:   rD   R   r;   rD   c                   @   r3   )PDFPasswordIncorrectNr5   r9   r9   r9   r:   rE   V   r;   rE   c                   @   r<   )PDFEncryptionWarningzyLegacy warning for failed decryption.

    Not used anymore because warnings.warn is replaced by logger.Logger.warn.
    Nr>   r9   r9   r9   r:   rF   Z   r@   rF   c                   @   r<   )"PDFTextExtractionNotAllowedWarningzLegacy warning for PDF that does not allow extraction.

    Not used anymore because warnings.warn is replaced by logger.Logger.warn.
    Nr>   r9   r9   r9   r:   rG   a   r@   rG   c                   @   r3   )PDFTextExtractionNotAllowedNr5   r9   r9   r9   r:   rH   h   r;   rH   ZObjStmZXRefCatalogc                   @   sf   e Zd Zdeeef fddZdee fddZ	dede
ee eef fddZd	edd
fddZd
S )PDFBaseXRefreturnc                 C      t NNotImplementedErrorselfr9   r9   r:   get_trailers      zPDFBaseXRef.get_trailerc                 C   s   g S rM   r9   rP   r9   r9   r:   
get_objidsv   rS   zPDFBaseXRef.get_objidsobjidc                 C   s   t |rM   )r   rQ   rU   r9   r9   r:   get_pos|   s   zPDFBaseXRef.get_posparserNc                 C   rL   rM   rN   )rQ   rX   r9   r9   r:   load   rS   zPDFBaseXRef.load)r6   r7   r8   r   strr   rR   r	   intrT   r   r   rW   r   rY   r9   r9   r9   r:   rJ   r   s
     rJ   c                   @   s   e Zd ZdddZdefddZdeddfdd	Zdeddfd
dZde	ee
f fddZdee fddZdedeee eef fddZdS )PDFXRefrK   Nc                 C   s   i | _ i | _d S rM   )offsetstrailerrP   r9   r9   r:   __init__   s   
zPDFXRef.__init__c                 C      d| j   S )Nz<PDFXRef: offsets=%r>r]   keysrP   r9   r9   r:   __repr__      zPDFXRef.__repr__rX   c              	   C   st  	 z|  \}}| }|sW q W n ty   tdw |dr(|| n|d}t|dkr?d|d|}t|z	tt	|\}}W n t
y[   d|d|}t|w t||| D ]G}z|  \}	}| }W n ty|   tdw |d}t|d	krd
|d|}t||\}
}}|dkrqcd t	|
t	|f| j|< qcqtd| j | | d S )NTz Unexpected EOF - file corrupted?   trailer       zTrailer not found: z: line=zInvalid line:    zInvalid XRef format: z, line=   nzxref objects: %r)nextlinestripr*   r4   
startswithseeksplitlenmapr[   
ValueErrorranger]   logdebugload_trailer)rQ   rX   poslinef	error_msgstartnobjsrU   _Zpos_bZgenno_bZuse_br9   r9   r:   rY      sR   




"zPDFXRef.loadc                 C   s   z|  \}}|tdu sJ t|| \}}W n ty4   |d}|s,td|d \}}Y nw | jt	| t
d| j d S )Nre      zUnexpected EOF - file corruptedr   z
trailer=%r)	nexttokenr+   rZ   
nextobjectr*   popr4   r^   updater$   rs   rt   )rQ   rX   r|   kwdZdicxr9   r9   r:   ru      s   
zPDFXRef.load_trailerc                 C      | j S rM   r^   rP   r9   r9   r:   rR         zPDFXRef.get_trailerc                 C   s
   | j  S rM   ra   rP   r9   r9   r:   rT         
zPDFXRef.get_objidsrU   c                 C   s
   | j | S rM   )r]   rV   r9   r9   r:   rW      r   zPDFXRef.get_posrK   N)r6   r7   r8   r_   rZ   rc   r   rY   ru   r   r   rR   r   r[   rT   r   r   rW   r9   r9   r9   r:   r\      s    
&$r\   c                   @   s6   e Zd ZdefddZedZdeddfddZ	dS )	PDFXRefFallbackrK   c                 C   r`   )Nz<PDFXRefFallback: offsets=%r>ra   rP   r9   r9   r:   rc      rd   zPDFXRefFallback.__repr__z^(\d+)\s+(\d+)\s+obj\brX   Nc                 C   s  | d 	 z| \}}W n
 ty   Y d S w |dr1| | | | td| j d S |d}| j	
|}|s?q| \}}t|}t|}	d ||	f| j|< | | | \}
}t|tr|dtu rt|}z|d }W n ty   tjrtd| d}Y nw t| }g }z	 | \}
}|tt| q ty   Y nw t|t|d	 }t|D ]}||d	  }||df| j|< qq)
Nr   r}   re   trailer: %rzlatin-1r   NN is not defined: %rrg   )rm   rj   r*   rl   ru   rs   rt   r^   decode
PDFOBJ_CUEmatchgroupsr[   r]   r   
isinstancer"   getLITERAL_OBJSTMr(   KeyErrorr   STRICTr    r   get_dataappendr   minro   rr   )rQ   rX   rv   Z
line_bytesrw   mZobjid_sZgenno_srU   gennor|   objstreamnZparser1objsindexobjid1r9   r9   r:   rY      s\   





zPDFXRefFallback.load)
r6   r7   r8   rZ   rc   recompiler   r   rY   r9   r9   r9   r:   r      s    
r   c                   @   s~   e Zd ZdddZdefddZdeddfdd	Zdeee	f fd
dZ
dee fddZdedeee eef fddZdS )PDFXRefStreamrK   Nc                 C   s(   d | _ d | _d | _d | _d | _g | _d S rM   )dataentlenfl1fl2fl3rangesrP   r9   r9   r:   r_      s   
zPDFXRefStream.__init__c                 C   s
   d| j  S )Nz<PDFXRefStream: ranges=%r>)r   rP   r9   r9   r:   rc     r   zPDFXRefStream.__repr__rX   c           	      C   s*  |  \}}|  \}}|  \}}| \}}t|tr$|dtur(td|d }|dd|f}t|d dkr@td| j	
tttttf  td| |d \| _| _| _| jd urk| jd urk| jd usmJ | | _| j| j | j | _|j| _td	d
tt| j	| j| j| j d S )Nr   zInvalid PDF stream spec.SizeIndexr   rg   zInvalid index numberWz&xref stream: objid=%s, fields=%d,%d,%dz, )r~   r   r   r"   r   LITERAL_XREFr4   ro   r    r   extendr   r
   r   r[   r.   r   r   r   r   r   r   attrsr^   rs   rt   joinrp   repr)	rQ   rX   r|   rU   r   r   r   sizeZindex_arrayr9   r9   r:   rY     s.   $"
zPDFXRefStream.loadc                 C   r   rM   r   rP   r9   r9   r:   rR      r   zPDFXRefStream.get_trailerc                 c   s    | j D ]?\}}t|D ]6}| jd usJ | jd usJ | j| }| j||| j  }t|d | j d}|dks=|dkrB|| V  qqd S )Nr}   rg   )r   rr   r   r   r2   r   )rQ   rz   r{   ioffsetentf1r9   r9   r:   rT   #  s   

zPDFXRefStream.get_objidsrU   c           
      C   s  d}| j D ]\}}||kr||| k r||| 7 } n	||7 }qt|| jd us+J | jd us2J | jd urA| jd urA| jd usCJ | j| }| j||| j  }t|d | j d}t|| j| j| j  }t|| j| j d  }	|dkr~d ||	fS |dkr||	dfS t|)Nr   r}   rg   )r   r   r   r   r   r   r   r2   )
rQ   rU   r   rz   r{   r   r   r   f2Zf3r9   r9   r:   rW   .  s(   
"


zPDFXRefStream.get_posr   )r6   r7   r8   r_   rZ   rc   r   rY   r   r   rR   r
   r[   rT   r   r   rW   r9   r9   r9   r:   r      s    
$r   c                   @   sX  e Zd ZU dZdZeedf ed< 	d.dee	 de
eef ded	d
fddZd/ddZd/ddZd/ddZd	efddZd	efddZd	efddZde	d	e	fddZde	d	e	fddZded	ee	 fddZde	d	ee	 fd d!Zde	d	efd"d#Zde	d	ee	 fd$d%Z	
d0d&ed'ed(e	d)ee
eef  d	e	f
d*d+Zd&ed'ed(e	d	e	fd,d-Zd
S )1PDFStandardSecurityHandlers    (N^NuAd NV.. h>/dSiz)rg   rh   .supported_revisions docidparampasswordrK   Nc                 C   s   || _ || _|| _|   d S rM   )r   r   r   init)rQ   r   r   r   r9   r9   r:   r_   P  s   z#PDFStandardSecurityHandler.__init__c                 C   s2   |    | j| jvrd| j }t||   d S )NzUnsupported revision: param=%r)init_paramsrr   r   rD   init_key)rQ   ry   r9   r9   r:   r   [  s
   
zPDFStandardSecurityHandler.initc                 C   sn   t | jdd| _t | jd | _t| jd d| _t| jd | _t| jd | _	t | jdd	| _
d S )
NVr   RP    OUZLength(   )r%   r   r   vr   r)   pr'   oulengthrP   r9   r9   r:   r   b  s   z&PDFStandardSecurityHandler.init_paramsc                 C   s    |  | j| _| jd u rtd S rM   )authenticater   keyrE   rP   r9   r9   r:   r   j  s   
z#PDFStandardSecurityHandler.init_keyc                 C      t | jd@ S )N   boolr   rP   r9   r9   r:   is_printableo  rd   z'PDFStandardSecurityHandler.is_printablec                 C   r   )N   r   rP   r9   r9   r:   is_modifiabler  rd   z(PDFStandardSecurityHandler.is_modifiablec                 C   r   )N   r   rP   r9   r9   r:   is_extractableu  rd   z)PDFStandardSecurityHandler.is_extractabler   c                    s   | j dkrt|| jS t| j}|| jd  t|| }tddD ] d	 fddt
|D }t||}q(||7 }|S )Nrg   r   r}          c                 3       | ]
}t | A fV  qd S rM   bytes.0cr   r9   r:   	<genexpr>      z7PDFStandardSecurityHandler.compute_u.<locals>.<genexpr>)r   r   ZencryptPASSWORD_PADDINGr   r   r   digestrr   r   iter)rQ   r   hashresultkr9   r   r:   	compute_ux  s   

z$PDFStandardSecurityHandler.compute_uc                 C   s   || j  d d }t|}|| j |td| j || jd  | jdkr5t	t
| js5|d | }d}| jdkrV| jd }td	D ]}t|d |  }qI|d | S )
Nr   <Lr   r   s      rh   r   2   )r   r   r   r   structpackr   r   r   r   PDFStandardSecurityHandlerV4encrypt_metadatar   r   rr   )rQ   r   r   r   r   r|   r9   r9   r:   compute_encryption_key  s   



z1PDFStandardSecurityHandler.compute_encryption_keyc                 C   s*   | d}| |}|d u r| |}|S )Nlatin1)encodeauthenticate_user_passwordauthenticate_owner_password)rQ   r   Zpassword_bytesr   r9   r9   r:   r     s
   


z'PDFStandardSecurityHandler.authenticatec                 C   s   |  |}| |r|S d S rM   )r   verify_encryption_key)rQ   r   r   r9   r9   r:   r     s   

z5PDFStandardSecurityHandler.authenticate_user_passwordc                 C   s8   |  |}| jdkr|| jkS |d d | jd d kS )Nrg   r   )r   r   r   )rQ   r   r   r9   r9   r:   r     s   


z0PDFStandardSecurityHandler.verify_encryption_keyc                    s   || j  d d }t|}| jdkrtdD ]}t| }qd}| jdkr+| jd }| d | }| jdkrAt|| j}n!| j}tdddD ] d		 fd
dt
|D }t||}qJ| |S )Nr   rh   r   r   r   rg      r   c                 3   r   rM   r   r   r   r9   r:   r     r   zIPDFStandardSecurityHandler.authenticate_owner_password.<locals>.<genexpr>)r   r   r   rr   r   r   r   decryptr   r   r   r   )rQ   r   r   r|   r   r   Zuser_passwordr   r9   r   r:   r     s    




z6PDFStandardSecurityHandler.authenticate_owner_passwordrU   r   r   r   c                 C   s   |  |||S rM   )decrypt_rc4)rQ   rU   r   r   r   r9   r9   r:   r     s   z"PDFStandardSecurityHandler.decryptc                 C   sl   | j d usJ | j td|d d  td|d d  }t|}| d tt|d }t||S )Nr   rh   rg   r   )	r   r   r   r   r   r   ro   r   r   )rQ   rU   r   r   r   r   r9   r9   r:   r     s
   .z&PDFStandardSecurityHandler.decrypt_rc4r   r   rM   )r6   r7   r8   r   r   r   r[   __annotations__r   r   r   rZ   r   r_   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r9   r9   r9   r:   r   I  sN   
 





	r   c                       s   e Zd ZU dZeedf ed< d fddZdede	e
eeegef  fd	d
Z		ddededede	eeef  de	e defddZdedededefddZdedededefddZ  ZS )r   )r   .r   rK   Nc                    s   t    d| _t| jd| _t| jd | _t| jd | _	t
| jdd| _| j| j	kr:d| j }t|i | _| j D ]\}}| t|d }|d u r\d	| j }t||| j|< qB| j| jd
< | j	| jvrwd| j }t|d S )N   ZCFZStmFZStrFZEncryptMetadataTz"Unsupported crypt filter: param=%rZCFMz%Unknown crypt filter method: param=%rZIdentityz Undefined crypt filter: param=%r)superr   r   r$   r   r   cfr-   Zstmfstrfr   r   rD   cfmitemsget_cfmdecrypt_identity)rQ   ry   r   r   rx   	__class__r9   r:   r     s*   



z(PDFStandardSecurityHandlerV4.init_paramsnamec                 C   s    |dkr| j S |dkr| jS d S )NZV2ZAESV2)r   decrypt_aes128rQ   r  r9   r9   r:   r    s
   z$PDFStandardSecurityHandlerV4.get_cfmrU   r   r   r   c                 C   sP   | j s|d ur|d}|d urt|dkr|S |d u r| j}| j| |||S )Nr   ZMetadata)r   r   r-   r  r  )rQ   rU   r   r   r   r  tr9   r9   r:   r     s   
z$PDFStandardSecurityHandlerV4.decryptc                 C   s   |S rM   r9   )rQ   rU   r   r   r9   r9   r:   r    rS   z-PDFStandardSecurityHandlerV4.decrypt_identityc           	      C   s   | j d usJ | j td|d d  td|d d  d }t|}| d tt|d }|d d }|dd  }tt	|t
|t d}| |S )Nr   rh   rg   s   sAlTr   backend)r   r   r   r   r   r   ro   r   r   AESr   CBCr   	decryptorr   )	rQ   rU   r   r   r   r   initialization_vector
ciphertextcipherr9   r9   r:   r	    s&   z+PDFStandardSecurityHandlerV4.decrypt_aes128r   )NN)r6   r7   r8   r   r   r[   r   r   rZ   r   r   r   r  r   r   r   r  r	  __classcell__r9   r9   r  r:   r     s*   
 $
"r   c                	       s  e Zd ZdZd" fddZdedeeeee	ge	f  fddZ
d	edee	 fd
dZd	ede	fddZ	d#d	e	de	dee	 de	fddZ	d#d	e	de	dee	 de	fddZ	d#d	e	de	dee	 de	fddZede	defddZde	de	de	de	fddZdedede	de	fd d!Z  ZS )$PDFStandardSecurityHandlerV5)r      rK   Nc                    s   t    d| _t| jd | _t| jd | _| jd d | _| jdd | _	| jdd  | _
| jd d | _| jdd | _| jdd  | _d S )N   ZOEZUEr   r   )r   r   r   r'   r   oeuer   o_hasho_validation_salt
o_key_saltr   u_hashu_validation_salt
u_key_saltrP   r  r9   r:   r     s   
z(PDFStandardSecurityHandlerV5.init_paramsr  c                 C   s   |dkr| j S d S )NZAESV3)decrypt_aes256r
  r9   r9   r:   r  +  s   z$PDFStandardSecurityHandlerV5.get_cfmr   c                 C   s   |  |}| || j| j}|| jkr2| || j| j}tt|t	
dt d}| | jS | || j}|| jkr[| || j}tt|t	
dt d}| | jS d S )Ns                   r  )_normalize_password_password_hashr  r   r  r  r   r   r  r   r  r   r  r   r  r  r  r  r  )rQ   r   Z
password_br   r  r9   r9   r:   r   1  s(   


z)PDFStandardSecurityHandlerV5.authenticatec                 C   s8   | j dkr|s	dS ddlm} ||}|dd d S )Nr  r   r   )saslprepzutf-8   )r   Zpdfminer._saslprepr#  r   )rQ   r   r#  r9   r9   r:   r!  G  s   
z0PDFStandardSecurityHandlerV5._normalize_passwordsaltvectorc                 C   s.   | j dkr| |||S | ||dd |S )z2Compute password hash depending on revision numberr   r   r   )r   _r5_password_r6_password)rQ   r   r%  r&  r9   r9   r:   r"  Q  s   
z+PDFStandardSecurityHandlerV5._password_hashc                 C   s,   t |}|| |dur|| | S )z#Compute the password for revision 5N)r   r   r   )rQ   r   r%  r&  r   r9   r9   r:   r'  \  s
   

z)PDFStandardSecurityHandlerV5._r5_passwordc                 C   s   t |}|| |dur|| | }t ttf}d }}|dk s)||d krj|| |p/d d }	| j|dd |dd |	d}
|| |
dd  }||
 }|
t|
d  }|d7 }|dk s)||d ks)|dd S )	z#Compute the password for revision 6Nr   @   r   r   r   )r   ivr   r}   )r   r   r   r   r   _aes_cbc_encrypt_bytes_mod_3ro   )rQ   r   r%  r&  Zinitial_hashr   hashesZround_noZlast_byte_valk1eZ	next_hashr9   r9   r:   r(  i  s    


 	z)PDFStandardSecurityHandlerV5._r6_passwordinput_bytesc                 C   s   t dd | D d S )Nc                 s   s    | ]}|d  V  qdS )rh   Nr9   )r   br9   r9   r:   r     s    z<PDFStandardSecurityHandlerV5._bytes_mod_3.<locals>.<genexpr>rh   )sum)r0  r9   r9   r:   r,    s   z)PDFStandardSecurityHandlerV5._bytes_mod_3r   r*  r   c                 C   s0   t t|t|}| }|||  S rM   )r   r   r  r   r  	encryptorr   finalize)rQ   r   r*  r   r  r3  r9   r9   r:   r+    s   z-PDFStandardSecurityHandlerV5._aes_cbc_encryptrU   r   c                 C   sR   |d d }|dd  }| j d usJ tt| j t|t d}| |S )Nr   r  )	r   r   r   r  r   r  r   r  r   )rQ   rU   r   r   r  r  r  r9   r9   r:   r     s   
z+PDFStandardSecurityHandlerV5.decrypt_aes256r   rM   )r6   r7   r8   r   r   rZ   r   r   r[   r   r  r   r!  r"  r'  r(  staticmethodr,  r+  r   r  r9   r9   r  r:   r    sP    $


"r  c                   @   st  e Zd ZU dZeeeedZee	e
e f ed< 			d.dededed	ed
df
ddZedZd/ded
dfddZdede	de	d
efddZded
eee e	f fddZde	de	d
efddZde	d
efddZeeeeeef Zd
ee fddZd
ee fdd Zd!ed"e ee!f d
efd#d$Z"d%e ee!f d
efd&d'Z#ded
e	fd(d)Z$ded*e	d+ee% d
dfd,d-Z&dS )0PDFDocumentaP  PDFDocument object represents a PDF document.

    Since a PDF file can be very big, normally it is not loaded at
    once. So PDF document has to cooperate with a PDF parser in order to
    dynamically import the data as processing goes.

    Typical usage:
      doc = PDFDocument(parser, password)
      obj = doc.getobj(objid)

    )r}   rg   r   r   security_handler_registryr   TrX   r   cachingfallbackrK   Nc           
      C   sn  || _ g | _g | _i | _d| _d| _d| _i | _i | _|| _| j	|  d | _
 | _| _z| |}| ||| j W n tyX   |rVd|_t }|| | j| Y nw | jD ]C}| }|seq\d|v rd|v rtt|d }	nd}	|	t|d f| _| | d|v r| jt|d  d|v rt|d | _ nq\td| jd	turtjrtd
dS dS )z1Set the document to use a given PDFParser object.NTZEncryptID)r   r   ZInfoZRootz(No /Root object! - Is this really a PDF?r   zCatalog not found!)r8  xrefsinfocatalog
encryptiondecipher_parser_cached_objs_parsed_objsset_documentr   r   r   	find_xrefread_xref_fromr4   r9  r   rY   r   rR   r&   r$   _initialize_passwordr    r   LITERAL_CATALOGr   r   )
rQ   rX   r   r8  r9  rv   Znewxrefxrefr^   Zid_valuer9   r9   r:   r_     sZ   



zPDFDocument.__init__   objc                 C   s   | j d usJ | j \}}t|ddkrtd| t|dd}| j|}|d u r3td| ||||}|j| _| | _|	 | _	|
 | _
| jd usSJ d| j_d S )NFilterZStandardzUnknown filter: param=%rr   r   zUnknown algorithm: param=%rF)r>  r-   r   rD   r%   r7  r   r?  r   r   r   r@  r9  )rQ   r   r   r   r   factoryhandlerr9   r9   r:   rF    s   



z PDFDocument._initialize_passwordr   r   rU   c                 C   s   |j | jv r| j|j  \}}n| |\}}| jr(|j d us J ||f| j|j < |d | }z|| }W |S  tyB   td| w )Nrg   zindex too big: %r)rU   rB  _get_objectsr8  
IndexErrorr    )rQ   r   r   rU   r   r   r   r   r9   r9   r:   _getobj_objstm  s   
zPDFDocument._getobj_objstmc                 C   s   | dturtjrtd| z	tt|d }W n ty-   tjr)td| d}Y nw t|	 }|
|  g }z	 | \}}|| q= tyT   Y ||fS w )Nr   zNot a stream object: %rr   r   r   )r   r   r   r   r    r   r[   r   r   r   rC  r   r   r*   )rQ   r   r   rX   r   r|   r   r9   r9   r:   rM    s,   

zPDFDocument._get_objectsrv   c           	      C   s   | j d usJ | j | | j  \}}| j  \}}| j  \}}||krHg }|| jur>| j  \}}|| || jus-t|dkrH|d }||krVtd|d||tdkrbtd| | j  \}}|S )Nrg   zobjid mismatch: =rI  zInvalid object spec: offset=%r)	r@  rm   r~   KEYWORD_OBJr   ro   r    r+   r   )	rQ   rv   rU   r|   r   r   r   r   r   r9   r9   r:   _getobj_parse   s&   


zPDFDocument._getobj_parsec              
   C   s  | j stdtd| || jv r| j| \}}|S | j D ]S}z
||\}}}W n	 ty3   Y qw z2|durHt| |}| 	|||}n| 
||}| jrYt| j|||}t|trd||| W  n ttfyq   Y qw t|td|| | jr||f| j|< |S )zGet object from PDF

        :raises PDFException if PDFDocument is not initialized
        :raises PDFObjectNotFound if objid does not exist in PDF
        zPDFDocument is not initializedzgetobj: objid=%rNzregister: objid=%r: %r)r;  r   rs   rt   rA  rW   r   r(   getobjrO  rS  r?  r#   r   r"   Z	set_objidr*   r    r   r8  )rQ   rU   r   r   rH  Zstrmidr   r   r9   r9   r:   rT  <  s<   


zPDFDocument.getobjc                    s>   d| j vrtdtdtdttj f fdd  | j d dS )NZOutlinesentrylevelrK   c                 3   s    t | } d| v r0d| v sd| v r0tt| d }| d}| d}| d}|||||fV  d| v rDd| v rD | d |d E d H  d| v rT | d |E d H  d S d S )	NZTitleAZDestZSEZFirstZLastr}   ZNext)r$   r/   r'   r   )rU  rV  titledestactionsesearchr9   r:   r]  h  s   


z(PDFDocument.get_outlines.<locals>.searchr   )r=  rA   objectr[   r
   r6  OutlineTyperP   r9   r\  r:   get_outlinesd  s   
 zPDFDocument.get_outlinesc              	   C   s<   | j dusJ zt| j d }W |jS  ttfy   tw )zGenerate page label strings for the PDF document.

        If the document includes page labels, generates strings, one per page.
        If not, raises PDFNoPageLabels.

        The resulting iteration is unbounded.
        N
PageLabels)r=  ra  r   r   rB   labels)rQ   Zpage_labelsr9   r9   r:   get_page_labelsx  s   zPDFDocument.get_page_labelscatr   c              	      sh   z	t | jd }W n ttfy   t fw t |  }dtttf dtf fdd|S )NNamesdrK   c                    s   d| v rt | d \}}|k s|k rd S d| v r8t | d }tttttttf tf  t	d|}| S d| v rQt | d D ]}t
|}|rP|  S qBt f)NZLimitsre  rg   ZKids)r&   dictr   r
   r   r   rZ   r   r   r.   r$   r   )rf  r.  Zk2r   namesr   r   rd  r   lookupr9   r:   rj    s"   "z'PDFDocument.lookup_name.<locals>.lookup)r$   r=  r   r   r   r   rZ   r   )rQ   rd  r   rh  d0r9   ri  r:   lookup_name  s   "zPDFDocument.lookup_namer  c                 C   s`   z	|  d|}W |S  ty/   d| jvrt|t| jd }||vr(t||| }Y |S w )NZDests)rl  r   r=  rC   r$   )rQ   r  r   rk  r9   r9   r:   get_dest  s   	

zPDFDocument.get_destc                 C   s   d}|  D ]8}| }td| |dkr:td| | s'td|t|}|dks6td| |  S |r>|}qtd)	z0Internal function used to locate the first XRef.r   zfind_xref: %rs	   startxrefzxref found: pos=%rzInvalid xref position: r   z Invalid negative xref position: Unexpected EOF)Zrevreadlinesrk   rs   rt   isdigitr4   r[   )rQ   rX   prevrw   rz   r9   r9   r:   rD    s    zPDFDocument.find_xrefrz   r;  c                 C   s  | | |  z| \}}W n ty   tdw td|| t|tr;| | |  t	 }|
| n||ju rD|  t }|
| || | }td| d|v rlt|d }| ||| d|v rt|d }| ||| dS dS )z$Reads XRefs from the given location.rn  z"read_xref_from: start=%d, token=%rr   ZXRefStmZPrevN)rm   resetr~   r*   r4   rs   rt   r   r[   r   rY   ZKEYWORD_XREFrj   r\   r   rR   r%   rE  )rQ   rX   rz   r;  rv   tokenrH  r^   r9   r9   r:   rE    s6   





zPDFDocument.read_xref_from)r   TTr   )'r6   r7   r8   r?   r   r   r  r7  r   r[   r   r   r   rZ   r   r_   r+   rR  rF  r"   r^  rO  r   r   rM  rS  rT  r   r_  r
   r`  rc  r   r   rl  rm  rD  rJ   rE  r9   r9   r9   r:   r6    sT   
 

;&r6  c                   @   s@   e Zd ZdZedee fddZede	de
defddZd	S )
ra  zWPageLabels from the document catalog.

    See Section 8.3.1 in the PDF Reference.
    rK   c                 c   s    | j }t|dks|d d dkr!tjrtd|ddi f t|dD ]N\}\}}t|}|d}t	t
|dd}t|dd}|t|krSt|}	n|| \}
}|
| }t||| }	|	D ]}| ||}|| V  qfq&d S )Nr   z"PageLabels is missing page index 0r}   Sr   r   ZSt)valuesro   r   r   r    insert	enumerater$   r   r/   r'   r%   	itertoolscountrr   _format_page_label)rQ   r   nextrz   Zlabel_dict_uncheckedZ
label_dictstyleprefixZfirst_valuert  endr|   Zrange_lengthvaluelabelr9   r9   r:   rb    s*   
zPageLabels.labelsr~  r{  c                 C   s   |du rd}|S |t du rt| }|S |t du r"t|  }|S |t du r.t| }|S |t du r<t|  }|S |t du rHt| }|S td| d}|S )	z+Format page label value in a specific styleNr   Dr   r   rW  azUnknown page label style: %r)r,   rZ   r1   upperr0   rs   warning)r~  r{  r  r9   r9   r:   ry    s*   
zPageLabels._format_page_labelN)r6   r7   r8   r?   propertyr
   rZ   rb  r5  r[   r   ry  r9   r9   r9   r:   ra    s    ra  )]rw  loggingr   r   hashlibr   r   r   r   typingr   r   r   r	   r
   r   r   r   r   r   r   r   r   Zcryptography.hazmat.backendsr   Z&cryptography.hazmat.primitives.ciphersr   r   r   Zpdfminerr   Zpdfminer.arcfourr   Zpdfminer.data_structuresr   Zpdfminer.pdfexceptionsr   r   r   r   Zpdfminer.pdfparserr   r   r    Zpdfminer.pdftypesr!   r"   r#   r$   r%   r&   r'   r(   r)   Zpdfminer.psexceptionsr*   Zpdfminer.psparserr+   r,   r-   Zpdfminer.utilsr.   r/   r0   r1   r2   	getLoggerr6   rs   r4   SyntaxWarningr=   rA   rB   rC   rD   rE   UserWarningrF   rG   rH   r   r   rG  rJ   r\   r   r   r   r   r  r6  ra  r9   r9   r9   r:   <module>   sV    <,
E3N H|  ^