o
    hx                      @   s   d dl Z d dlZd dlmZmZmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZm Z  e!e"Z#ed
Z$edZ%G dd dZ&dS )    N)	AnyBinaryIO	ContainerDictIteratorListOptionalSetTuple)settings)PDFDocumentPDFNoPageLabelsPDFTextExtractionNotAllowed)PDFObjectNotFoundPDFValueError)	PDFParser)
dict_value	int_value
list_valueresolve1)LIT)Rect
parse_rectZPagePagesc                   @   s   e Zd ZdZdedededee ddf
dd	Zdefd
dZ	h dZ
ededed  fddZe					d$dedeee  dededededed  fddZdedefddZdededefd d!Zdedee fd"d#ZdS )%PDFPageaz  An object that holds the information about a page.

    A PDFPage object is merely a convenience class that has a set
    of keys and values, which describe the properties of a page
    and point to its contents.

    Attributes
    ----------
      doc: a PDFDocument object.
      pageid: any Python object that can uniquely identify the page.
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
      resources: a dictionary of resources used by the page.
      mediabox: the physical size of the page.
      cropbox: the crop rectangle of the page.
      rotate: the page rotation (in degree).
      annots: the page annotations.
      beads: a chain that represents natural reading order.
      label: the page's label (typically, the logical page number).

    docpageidattrslabelreturnNc                 C   s   || _ || _t|| _|| _t| jd| _t| jdt | _	| 
| jd| _| | jd| j| _| | jd| _t| jddd d | _| jd	| _| jd
| _dS )zInitialize a page object.

        doc: a PDFDocument object.
        pageid: any Python object that can uniquely identify the page.
        attrs: a dictionary of page attributes.
        label: page label string.
        ZLastModified	ResourcesMediaBoxCropBoxZContentsRotater   ih  ZAnnotsBN)r   r   r   r   r   r   getZlastmoddict	resources_parse_mediaboxmediabox_parse_cropboxZcropbox_parse_contentscontentsr   rotateZannotsZbeads)selfr   r   r   r    r/   C/var/www/html/venv/lib/python3.10/site-packages/pdfminer/pdfpage.py__init__0   s   
zPDFPage.__init__c                 C   s   d| j d| jdS )Nz<PDFPage: Resources=z, MediaBox=>)r'   r)   )r.   r/   r/   r0   __repr__O   s   zPDFPage.__repr__>   r"   r#   r!   r    documentc           	      #   s6   	 ddt dttt f dttt   dttttt tt t f f f  f fddz }W n t	y>   t
d }Y nw d}djv rbjd j}|D ]\}} ||t|V  d	}qQ|sjD ]1}| D ](}z|}t|tr|d
tu r ||t|V  W qm ty   Y qmw qgd S d S )Nobjparentvisitedr   c           	      3   s   t | tr| }t| }n	| j}t|  }|d u r"t }||v r(d S || | D ]\}}| j	v rB||vrB|||< q1|
d}|d u rTtjsT|
d}|tu rxd|v rxtd|d  t|d D ]}|||E d H  qjd S |tu rtd| ||fV  d S d S )NTypetypeZKidszPages: Kids=%rzPage: %r)
isinstanceintr   getobjcopyobjidsetadditemsINHERITABLE_ATTRSr%   r   ZSTRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)	r5   r6   r7   Z	object_idZobject_propertieskvZobject_typechildclsdepth_first_searchr4   r/   r0   rL   V   s6   



z0PDFPage.create_pages.<locals>.depth_first_searchFr   Tr8   N)r   r   strr   r	   r   r
   r;   Zget_page_labelsr   	itertoolsrepeatcatalognextZxrefsZ
get_objidsr<   r:   r&   r%   rF   r   )	rK   r4   Zpage_labelsZpagesobjectsr>   treeZxrefr5   r/   rJ   r0   create_pagesT   sH   

&


zPDFPage.create_pagesr    TFfppagenosmaxpagespasswordcachingcheck_extractablec                 c   s    t |}t|||d}|js"|rd| }	t|	d| }
t|
 t| |D ]\}}|r4||vr4q)|V  |rB||d krB d S q)d S )N)rZ   r[   z"Text extraction is not allowed: %rzThe PDF %r contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case   )r   r   Zis_extractabler   rD   warning	enumeraterU   )rK   rW   rX   rY   rZ   r[   r\   parserr   	error_msgZwarning_msgZpagenopager/   r/   r0   	get_pages   s(   
zPDFPage.get_pagesvaluec                 C   sT   d}|d u rt d |S ztdd t|D W S  ty)   t d | Y S w )N)        re   g      @g     @zHMediaBox missing from /Page (and not inherited), defaulting to US Letterc                 s       | ]}t |V  qd S rM   r   .0valr/   r/   r0   	<genexpr>       z*PDFPage._parse_mediabox.<locals>.<genexpr>z2Invalid MediaBox in /Page, defaulting to US LetterrD   r^   r   r   r   )r.   rd   Z	us_letterr/   r/   r0   r(      s   
zPDFPage._parse_mediaboxr)   c                 C   sP   |d u rt d |S ztdd t|D W S  ty'   t d | Y S w )Nz2CropBox missing from /Page, defaulting to MediaBoxc                 s   rf   rM   rg   rh   r/   r/   r0   rk      rl   z)PDFPage._parse_cropbox.<locals>.<genexpr>z0Invalid CropBox in /Page, defaulting to MediaBoxrm   )r.   rd   r)   r/   r/   r0   r*      s   

zPDFPage._parse_cropboxc                 C   s(   g }|d urt |}t|ts|g}|S rM   )r   r:   list)r.   rd   r,   r/   r/   r0   r+      s   
zPDFPage._parse_contents)Nr   rV   TF)__name__
__module____qualname____doc__r   objectr   rN   r1   r3   rB   classmethodr   rU   r   r   r;   boolrc   r   r   r(   r*   r   r+   r/   r/   r/   r0   r      sR    
=
$r   )'rO   loggingtypingr   r   r   r   r   r   r   r	   r
   Zpdfminerr   Zpdfminer.pdfdocumentr   r   r   Zpdfminer.pdfexceptionsr   r   Zpdfminer.pdfparserr   Zpdfminer.pdftypesr   r   r   r   Zpdfminer.psparserr   Zpdfminer.utilsr   r   	getLoggerro   rD   rF   rC   r   r/   r/   r/   r0   <module>   s    ,
