
    Eh0                         d dl mZ d dlmZmZ d dlZd dlmZ d dlZd dl	Z	dZ
dZddZd Zd Zdd	Zd
 Zd Ze
fdZd Ze
efdZe
efdZeZddZd Zd Zd ZddZddZd Ze
efdZd Zd Z ddZ!d Z"y)     PDFDocEncoding)DecimalROUND_HALF_UPN)
itemgetter   c                    t        |      }|dk(  rt        |       D cg c]  }|g c}S t        |       dk  rt        |       D cg c]  }|g c}S g }t        t        |             } | d   g}| d   }| dd  D ]2  }|||z   k  r|j	                  |       n|j	                  |       |g}|}4 |j	                  |       |S c c}w c c}w )Nr         )
decimalizesortedlenlistappend)xs	tolerancexgroupscurrent_grouplasts         X/var/www/html/bid_assistant/venv/lib/python3.12/site-packages/pdfplumber/utils-backup.pycluster_listr      s    9%IA~VBZ999
2w{&*6QaS66F	fRj	BUGMa5DV 	!"  #MM-(CM MM- M :6s   
C
Cc           
          t        |      }t        t        |       |      }t        |      D cg c]  \  }}|D cg c]  }||f c} }}}}t	        t        j                  |       }|S c c}w c c}}}w N)r   r   set	enumeratedict	itertoolschain)valuesr   clustersivalue_clustervalnested_tuplescluster_dicts           r   make_cluster_dictr'      s}    9%ICK3H !*( 36 6A} -:;Sa; 6M 6 	78L	 < 6s   A2A-A2-A2c                     | j                  d      rt        j                  | dd dd      S d | D        }dj                  d |D              S )	zg
    Decodes a PDFDocEncoding string to Unicode.
    Adds py3 compatability to pdfminer's version.
    s   r
   Nzutf-16beignorec              3   Z   K   | ]#  }t        |      t        k(  rt        |      n| % y wr   )typestrord).0cs     r   	<genexpr>zdecode_text.<locals>.<genexpr>0   s#     ;A$q'S.Aa/;s   )+ c              3   .   K   | ]  }t         |     y wr   r   )r.   os     r   r0   zdecode_text.<locals>.<genexpr>1   s     7Q~a(7s   )
startswithsix	text_typejoin)sordss     r   decode_textr:   (   sH    
 	||K }}QqrUJ99;;ww7$777    c                 >   t        | t        j                        rt        t	        |             S t        | t        j
                        rT|d k7  r;t        t        |             j                  t        t        |            t              S t        t        |             S | S )N)rounding)	
isinstancenumbersIntegralr   intRealreprquantizer   )vqs     r   r   r   3   sz    !W%%&s1v!W\\"947#,,WT!W-=& - ( ( 47##Hr;   c                 r    | j                   }dj                  |j                  |j                  g      }|dk(  S )N.zpandas.core.frame.DataFrame)	__class__r7   
__module____name__)
collectionclsnames      r   is_dataframerO   >   s4    


C88cnncll45D000r;   c                 >    t        |       r| j                  d      S | S )Nrecords)rO   to_dict)rL   s    r   to_listrS   C   s!    J!!),,r;   c                     t        |      }d}d }t        | t        d            D ]$  }|d k7  r|d   ||z   kD  r|dz  }|d   }||d   z  }& |S )Nr1   x0key x1text)r   r   r   )
line_charsr   colllast_x1chars        r   collate_liner_   I   so    9%IDGzz$'78 tO$t*)0C"DCKDt*V	
 Kr;   c           
          t        t        t        d      |             t        t        t        d      |             t        t        t        d      |             t        t        t        d      |             fS )NrU   toprY   bottom)minmapr   max)objss    r   get_bboxrg   T   s\    C
4 $'(C
5!4()C
4 $'(C
8$d+,	 r;   c           
         t        |      }t        |      }d t        ffd	}t        |       } t        t	        d      |       }t        ||      fd| D        }t	        d      }t	        d      }t        ||      }t        j                  ||      }	|	D 
cg c]  \  }
} |t        ||      |       }}
}t        t        j                  |       }|S c c}}
w )	Nc                 v    t        |       \  }}}}||||dj                  t        t        d      |             dS )Nr1   rZ   )rU   rY   ra   rb   rZ   )rg   r7   rd   r   )charsrU   ra   rY   rb   s        r   process_word_charsz)extract_words.<locals>.process_word_charsc   sB    &uoCVGGC
6 2E:;
 	
r;   c                    t        | t        d            }g }g }|D ]  }|d   dk(  r$t        |      dkD  r|j                  |       g }n	 /t        |      dk(  r|j                  |       O|d   }|d   |d   |z   kD  r|j                  |       g }|j                  |        t        |      dkD  r|j                  |       t	        t        |            }|S )NrU   rV   rZ   rX   r   rY   )r   r   r   r   r   rd   )	rj   r   chars_sortedwordscurrent_wordr^   	last_charprocessed_wordsrk   s	           r   get_line_wordsz%extract_words.<locals>.get_line_wordsn   s    eD)9:  	*DF|s"|$q(LL.#%L\"a'##D)(,	:49!<=LL.#%L##D)	*  |q LL&s#5u=>r;   doctopc              3   J   K   | ]  }|j                  |d          f  ywrt   Ngetr.   r^   doctop_clusterss     r   r0   z extract_words.<locals>.<genexpr>   -       ?..tH~>?     #r   r   rV   )r   )r   DEFAULT_X_TOLERANCErS   rd   r   r'   r   r   groupbyr   r   )rj   x_tolerancey_tolerancers   doctopswith_clusterget_0get_1with_cluster_sortedgroupedkr[   nestedro   rz   rk   s                 @@r   extract_wordsr   \   s     [)K[)K
 )< 4 ENE*X&.G'=OL qMEqME 59 3?G %'Az c%4L 'F ' &)*EL	's   Cc                 B  	
 t        |       dk(  ry t        d      
t        d      }t        |       } t        t        d      |       }t	        ||      		fd| D        }t        j                  t        ||      |      }
fd|D        }dj                  |      }|S )Nr   r   rt   c              3   J   K   | ]  }|j                  |d          f  ywrv   rw   ry   s     r   r0   zextract_text.<locals>.<genexpr>   r{   r|   rV   c              3   P   K   | ]  \  }}t        t        |              y wr   )r_   rd   )r.   r   itemsr   r   s      r   r0   zextract_text.<locals>.<genexpr>   s+      !Au #eU+[9 !s   #&
)	r   r   rS   rd   r'   r   r~   r   r7   )rj   r   r   r   r   r   r   linesr\   rz   r   s    `       @@r   extract_textr      s     5zQqMEqMEENE*X&.G'=OL | ?UKG!!E 99UDKr;   c                 t   |dvrt        d      t        |       dk(  rt        d      |dk(  rdnd}|dk(  rdnd	}t        |      }t        |      }d
 }t        t	        ||             }t        t        ||            }	t        t        ||            }
t        t        t        d t        |	|
      D                          }t        |
      }d t        ||dd       D        }|D cg c]  }|d   |k\  r|d   |d   dz  z    }}|	d   |d   k  r	|	d   g|z   }||d   kD  r||t        d      z   gz   }|S c c}w )z
    The size of a gutter is the distance between the beginning
    of the current character and the beginning of the next character.
    )hrE   z!`orientation` must be "h" or "v".r   z	No chars.rE   rU   ra   rY   rb   c                     | d   dk7  S )NrZ   rX    )r   s    r   <lambda>zfind_gutters.<locals>.<lambda>   s    AfI, r;   c              3   2   K   | ]  \  }}||z   d z    yw)r
   Nr   )r.   startends      r   r0   zfind_gutters.<locals>.<genexpr>   s%      -E3 "CK1, -s   c              3   0   K   | ]  \  }}|||z
  f  y wr   r   )r.   p1p2s      r   r0   zfind_gutters.<locals>.<genexpr>   s$      +B R"W +s   r   Nr
   rm   z0.001)
ValueErrorr   r   r   filterrd   r   r   zipre   r   )rj   orientationmin_size
start_propend_prop	get_startget_endis_nonspacenonspace_charsstartsendsmidsend_maxmid_gapsggutterss                   r   find_guttersr      sq   
 *$<==
5zQ%%$+J"c)txH:&I"G,K&e45N#i01FG^,-Ds -fd+- - . /D$iG+$QR)+H
 "tx !qtAv "G " ay71:1I-')g(88::N"s   &D5c                     t        | t              r"t        fd| j                         D              S t        |       }t	        |       } t        |       } ||      S )Nc              3   @   K   | ]  \  }}|t        |      f  y wr   )filter_objects)r.   r   rE   fns      r   r0   z!filter_objects.<locals>.<genexpr>   s)      %! q"-. %s   )r>   r   r   r+   rS   r   )rf   r   initial_typefiltereds    `  r   r   r      sX    $ %zz|% % 	% :L4=Db$H!!r;   c                 j    | \  }}t        t        |      \  }}}}||k\  xr ||k  xr ||k\  xr ||k  S r   )rd   r   )pointbboxpxpybx0by0bx1by1s           r   point_inside_bboxr      sE    FBZ.Cc3#IFB#IFB#IFB#IFr;   c                 |    | d   | d   f| d   | d   f| d   | d   f| d   | d   ff}t        fd|D              }|S )NrU   ra   rb   rY   c              3   6   K   | ]  }t        |        y wr   )r   )r.   r/   r   s     r   r0   z(obj_inside_bbox_score.<locals>.<genexpr>   s     <q!!T*<s   )sum)objr   cornersscores    `  r   obj_inside_bbox_scorer      sa    	TCJ	TCM"	TCJ	TCM"	G <G<<ELr;   c                    |d k(  rt        | |      }|dk(  ry |dk(  r| S t        t        |      \  }}}}t        |       }d}d}	|d   |k  r||d<   d}|d   |kD  r||d<   d}|d   |k  r%||d   z
  }
||d<   |d   |
z   |d<   |d	   |
z
  |d	<   d}	|d
   |kD  r||d
   z
  }
||d
<   |d   |
z   |d<   d}	|r|d   |d   z
  |d<   |	r|d
   |d   z
  |d<   |S )Nr      FrU   TrY   ra   rt   y1rb   y0widthheight)r   rd   r   r   )r   r   r   rU   ra   rY   rb   copy	x_changed	y_changeddiffs              r   crop_objr      s@   }%c40z$z#:j$/BR9DIIDzBT
	DzBT
	E{ST%[ Uh$.X$Z$&T
	H~X&X$Z$&T
	T
T$Z/Wh$u+5XKr;   c           	         t        | t              r$t        fd| j                         D              S t        |       }t	        |       } fd| D        }r6|D cg c]*  }|d   dkD  r |d   dk  rt        |d   |d         n|d   , }}n9r|D cg c]  }|d   dk(  s|d    }}n|D cg c]  }|d   dkD  s|d    }} ||      S c c}w c c}w c c}w )zu
    strict: Include only objects that are fully within the box?
    crop: Crop lines and rectangles to the box?
    c              3   F   K   | ]  \  }}|t        |       f  yw))strictcropN)within_bbox)r.   r   rE   r   r   r   s      r   r0   zwithin_bbox.<locals>.<genexpr>"  s-      %! AtFFG %s   !c              3   :   K   | ]  }|t        |      f  y wr   )r   )r.   r   r   s     r   r0   zwithin_bbox.<locals>.<genexpr>(  s     F#s)#t45Fs   r   r   r   )r>   r   r   r+   rS   r   )rf   r   r   r   r   scoresr8   matchings    ```    r   r   r     s    
 $ %zz|% % 	% :L4=DFFF*qtax 56aD1HhqtT1Q40!A$F * *	#)8aQqTQYQqT88#)7aQqTAXQqT77!!* 97s   /CC%C3CCc                 2    t        t        | | dd              S )Nr   )r   r   )dividerss    r   dividers_to_boundsr   4  s    Hhqrl+,,r;   c                    t        |       }t        |       } t        |      }t        |      }g }|D ]  fd}	t        t	        |	|             }
g }|D ]W  fd}t        t	        ||
            }t        |      rt        |||      j                         }nd }|j                  |       Y |j                  |         ||      S )Nc                 D    | d   | d   z   dz  }|d   k\  xr |d   k  S )Nra   rb   r
   r   r   r   )r/   midhbs     r   h_testzextract_table.<locals>.h_testC  s5    U8ak)Q.C2a5L3sRU{3r;   c                 D    | d   | d   z   dz  }|d   k\  xr |d   k  S )NrU   rY   r
   r   r   r   )r/   r   vbs     r   v_testzextract_table.<locals>.v_testJ  s5    w4(A-r!u73A;7r;   )r   r   )	r+   rS   r   r   r   r   r   stripr   )rj   rE   r   r   r   r   v_boundsh_bounds	table_arrr   rowrow_arrr   cell
cell_valuer   r   s                  @@r   extract_tabler   7  s     ;LENE!!$H!!$HI "	4 6&%() 	'B8 vs+,D4y)$ + +--2UW  "
NN:&	' 	!)", 	""r;   c           	         t        d      D cg c]  }t        |        c}\  }}}}|j                  dt        d      | d   | d   dd       |j                  dt        d      | d   | d   | d	   z   | d
   | d	   z   dd       |j                  dt        d      | d   dd       |j                  dt        d      | d   dd       ||||gS c c}w )Nr   	rect_edger   r   ra   r   )object_typer   r   rb   r   r   r   rt   )r   r   r   ra   rt   r   rU   rE   )r   r   rY   r   rY   )r   r   rU   r   )ranger   updater   )rectr   ra   rb   leftrights         r   rect_to_edgesr   Z  s    5:1X?d?CuJJ"Q-4ju+  MM"Q-4jE{T(^+x.4>1  	KK"A4j	  
LL"A4j	  &$''9  @s   Cc                 @    t        |       }| d   | d   k(  rdnd|d<   |S )Nr   r   r   rE   r   )r   )lineedges     r   line_to_edger   y  s+    :D"&t*T
":#DKr;   c           	         t        |       dk(  r| S t        t        t        t	        d      |                   }	 t        |      dk(  sJ 	 |d   }|dk(  r
d}ddd	 }n	d}dd
d }t	        |      }t        j                  t        | |      |      }fd}|D 	
cg c]  \  }	}
 ||
       }}	}
t        t        j                  |       S #  t        d      xY wc c}
}	w )Nr   r   r   z5.simplify_edges only takes one orientation at a time.r   ra   rU   rY   c                     | S r   r   r   s    r   recalczsimplify_edges.<locals>.recalc      Kr;   rb   c                     | S r   r   r   s    r   r   zsimplify_edges.<locals>.recalc  r   r;   rV   c                     t        t        | t                          }|d   g}|dd  D ]/  }|d   }|   |   z   k  s|   |   kD  s#|   |<   ||d<   1 |S )NrV   r   r   rm   )r   r   r   )edgessorted_edges
simplifieder   max_propmin_propr   s        r   simplify_edge_groupz+simplify_edges.<locals>.simplify_edge_group  s    F5j.BCD#A(
ab! 	*Ab>D{tH~	9:X;h/%&x[DN%)JrN	* r;   )
r   r   r   rd   r   	Exceptionr   r~   r   r   )r  r   orientationsorient
order_propr   	get_orderedge_groupsr  r   r   simplified_edge_groupsr  r  s    `          @@r   simplify_edgesr  ~  s   
5zQuC
= 95ABCLQ< A%&% !_F}
	 
	 :&I##F5i$@iPK
 $&Au  359 & & 	!7899GQOPP>&s   C #C#C c                 d    t        |       } t        |      }| D ]  }|D ]  }|d   |d   k\  s  y )Nra   )r  )v_edgesh_edgesrE   r   s       r   find_verticesr    sF    W%GW%G , 	,A%AeH$t	,,r;   )r   r   )r   )TF)#pdfminer.utilsr   decimalr   r   r?   operatorr   r   r5   r}   DEFAULT_Y_TOLERANCEr   r'   r:   r   rO   rS   r_   rg   r   r   collate_charsr   r   r   r   r   r   r   r   r   r   r  r  r   r;   r   <module>r     s    ) *    
  $	8	1
 (; 	 $#?D $#2 %N	"G
!F"0- $#!#F(>
(:T,r;   