o
    hE                     @   s   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZ ddlmZ dedee fd	d
Zdee de jfddZdedeeee	f  ddfddZejdd fdee ddfddZedkroe  dS dS )    N)defaultdictdeque)chain)AnyDefaultDictDictList   )PDFp_strreturnc                 C   s8   d| v rt t| d\}}tt||d S t| gS )N-r	   )mapintsplitlistrange)r   startend r   A/var/www/html/venv/lib/python3.10/site-packages/pdfplumber/cli.pyparse_page_spec   s   
r   args_rawc                 C   s   t d}|jddt dtjjd | }|jdddd	 |jd
ddd	 |jdg ddd |jddd |jdddd |jdddd |jdtj	d |jdt
d |jddtd |jdt
dd || }|jd urytt|j |_|S ) NZ
pdfplumberinfile?rb)nargstypedefaultz--structurezoWrite the structure tree as JSON.  All other arguments except --pages, --laparams, and --indent will be ignored
store_true)helpactionz--structure-textzWrite the structure tree as JSON including text contents.  All other arguments except --pages, --laparams, and --indent will be ignoredz--format)csvjsontextr"   )choicesr   z--types+)r   z--include-attrsz1Include *only* these object attributes in output.)r   r    z--exclude-attrsz,Exclude these object attributes from output.z
--laparams)r   z--precisionz--pages)r   r   z--indentz&Indent level for JSON pretty-printing.)r   r    )argparseArgumentParseradd_argumentFileTypesysstdinbufferadd_mutually_exclusive_groupr#   loadsr   r   
parse_argspagesr   r   )r   parsergroupargsr   r   r   r0      sJ   


r0   pdfdatac           	         s   t dd }| jD ]!}||j  |jD ]}|d}|d u rq |  |d 7  < qq	t|}|rc| }d|v r@||d  |d}|d u rJq/||  d|v r_ fdd	|d D |d< |s1d S d S )
Nc                   S   s   t tS )N)r   strr   r   r   r   <lambda>I   s    z#add_text_to_mcids.<locals>.<lambda>mcidr$   childrenpage_numberZmcidsc                    s   g | ]} | qS r   r   ).0r9   Ztext_contentsr   r   
<listcomp>[   s    z%add_text_to_mcids.<locals>.<listcomp>)r   r1   r;   charsgetr   popleftextend)	r5   r6   Zpage_contentspagecr9   delZpagenor   r=   r   add_text_to_mcidsH   s*   




rG   c              	   C   sD  t | }tj|j|j|jd}|jrttj	|j
|jd nY|jr6|j
}t|| ttj	||jdd nJ|jdkrK|jtj|j|j|j|jd n=|jdkr_|jD ]
}t|jdd	 qSn1|jtj|j|j|j|j|jd
 W d    d S W d    d S W d    d S W d    d S W d    d S 1 sw   Y  d S )N)r1   laparams)indentF)rI   ensure_asciir"   )	precisioninclude_attrsexclude_attrsr$   T)Zlayout)rK   rL   rM   rI   )r0   r
   openr   r1   rH   Z	structureprintr#   dumpsZstructure_treerI   Zstructure_textrG   formatZto_csvr+   stdouttypesrK   rL   rM   Zextract_textto_json)r   r4   r5   treerC   r   r   r   main^   sJ   



"rV   __main__)r'   r#   r+   collectionsr   r   	itertoolsr   typingr   r   r   r   r5   r
   r7   r   r   	Namespacer0   rG   argvrV   __name__r   r   r   r   <module>   s   "4$ 
