B
    .Kc\                 @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZmZmZmZmZmZmZ ddlmZm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7m8Z8m$Z$ ddl9m:Z:m;Z;m<Z<m=Z=m>Z> ddl?m@Z@ e8AeBZCeDdZEG dd deZFG dd deZGG dd deZHG dd  d eZIeeGeeJeKeKeLf f ZMd!d"iZNdZOd#ZPd$ZQG d%d& d&eZRe$j$j%eKeSd'd(d)ZTG d*d+ d+e-ZUG d,d- d-ZVG d.d/ d/eZWG d0d1 d1e5ZXe+eKeeK d2d3d4ZYe+e/dd5d6d7ZZe+eeKef d8d9d:Z[dS );z$The CheckExternalLinksBuilder class.    N)deepcopy)datetimetimezone)parsedate_to_datetime)
HTMLParser)path)PriorityQueueQueue)Thread)	AnyDict	GeneratorList
NamedTupleOptionalTupleUnioncast)unquoteurlparse
urlunparse)nodes)Response)ConnectionError	HTTPErrorTooManyRedirects)Sphinx)DummyBuilder)Config)BuildEnvironment)__)SphinxPostTransform)
encode_uriloggingrequests)darkgray	darkgreenpurplered	turquoise)get_node_linez([a-z]+:)?//c               @   s*   e Zd ZU eed< eed< ee ed< dS )	HyperlinkuridocnamelinenoN)__name__
__module____qualname__str__annotations__r   int r5   r5   8lib/python3.7/site-packages/sphinx/builders/linkcheck.pyr+   $   s   
r+   c               @   s"   e Zd ZU eed< ee ed< dS )CheckRequest
next_check	hyperlinkN)r/   r0   r1   floatr3   r   r+   r5   r5   r5   r6   r7   *   s   
r7   c               @   s>   e Zd ZU eed< eed< eed< eed< eed< eed< dS )CheckResultr,   r-   r.   statusmessagecodeN)r/   r0   r1   r2   r3   r4   r5   r5   r5   r6   r;   /   s   
r;   c               @   s   e Zd ZU eed< eed< dS )	RateLimitdelayr8   N)r/   r0   r1   r:   r3   r5   r5   r5   r6   r?   8   s   
r?   ZAcceptz/text/html,application/xhtml+xml;q=0.9,*/*;q=0.8   g      N@c                   s:   e Zd ZdZedd fddZeeddddZ  ZS )	AnchorCheckParserz9Specialized HTML parser that looks for a specific anchor.N)search_anchorreturnc                s   t    || _d| _d S )NF)super__init__rC   found)selfrC   )	__class__r5   r6   rF   K   s    
zAnchorCheckParser.__init__)tagattrsrD   c             C   s0   x*|D ]"\}}|dkr|| j krd| _P qW d S )N)idnameT)rC   rG   )rH   rJ   rK   keyvaluer5   r5   r6   handle_starttagQ   s    z!AnchorCheckParser.handle_starttag)	r/   r0   r1   __doc__r2   rF   r   rP   __classcell__r5   r5   )rI   r6   rB   H   s   rB   )responseanchorrD   c             C   sR   t |}x:| jdddD ](}t|tr.| }|| |jrP qW |  |jS )zReads HTML data from a response object `response` searching for `anchor`.
    Returns True if anchor was found, False otherwise.
    i   T)Z
chunk_sizeZdecode_unicode)rB   Ziter_content
isinstancebytesdecodeZfeedrG   close)rS   rT   parserchunkr5   r5   r6   check_anchorX   s    

r[   c               @   sp   e Zd ZdZdZedZddddZeddd	d
Z	e
e
e
ee
ddddZeddddZddddZdS )CheckExternalLinksBuilderz+
    Checks for broken external links.
    	linkcheckzCLook for any errors in the above output or in %(outdir)s/output.txtN)rD   c             C   s   d| _ i | _td d S )Nr   g      @)broken_hyperlinks
hyperlinkssocketZsetdefaulttimeout)rH   r5   r5   r6   initr   s    zCheckExternalLinksBuilder.init)resultrD   c             C   s  | j |jd}||j|j|j|j|jd}| | |jdkrDd S |jdkr\|jdkr\d S |jrxt	j
d|j|jdd |jd	kr|jrt	
td
|j d |j  nt	
td
|j  n |jdkrt	
td|j  | d|j||j|j n|jdkr$t	
td|j |j  n|jdkr| jjsD| jjrht	jtd|j|j|j|jfd n"t	
td|j td|j   | d|j||j|jd |j  |  jd7  _n|jdkry2dtfdtfdtfdtfdtfd|j \}}W n  tk
r   dt }}Y nX ||d< | jjr`t	jd|j d | d |j |j|jfd n*t	
|d|j |d| d |j   | d| |j||j|jd |j  ntd |j d S )!NF)filenamer.   r<   r>   r,   info	uncheckedworkingoldz(%16s: line %4d) T)Znonlignoredz
-ignored- z: Zlocalz
-local-   z
ok        brokenzbroken link: %s (%s))locationz
broken    z - rA   
redirectedZpermanentlyz
with Foundzwith See OtherZtemporarily)i-  i.  i/  i3  i4  zwith unknown codetextz
redirect  z to zredirected zUnknown status %s.)envdoc2pathr-   r.   r<   r>   r,   r=   write_linkstatloggerrd   r%   write_entryr&   appquietZwarningiserrorwarningr    r(   r^   r'   r)   KeyErrorconfiglinkcheck_allowed_redirects
ValueError)rH   rb   rc   Zlinkstatrl   Zcolorr5   r5   r6   process_resultx   s^    



 
"

z(CheckExternalLinksBuilder.process_result)whatr-   rc   liner,   rD   c             C   s   | j d||||f  d S )Nz%s:%s: [%s] %s
)txt_outfilewrite)rH   rz   r-   rc   r{   r,   r5   r5   r6   rq      s    z%CheckExternalLinksBuilder.write_entry)datarD   c             C   s"   | j t| | j d d S )N
)json_outfiler}   jsondumps)rH   r~   r5   r5   r6   ro      s    z(CheckExternalLinksBuilder.write_linkstatc          
   C   s   t | j| j}td t| jd}t| jd}t|dddD| _	t|ddd(| _
x|| jD ]}| | qfW W d Q R X W d Q R X | jrd| j_d S )N z
output.txtzoutput.jsonwzutf-8)encodingrA   )HyperlinkAvailabilityCheckerrm   rv   rp   rd   r   joinZoutdiropenr|   r   checkr_   ry   r^   rr   Z
statuscode)rH   ZcheckerZoutput_textZoutput_jsonrb   r5   r5   r6   finish   s    
"z CheckExternalLinksBuilder.finish)r/   r0   r1   rQ   rM   r    epilogra   r;   ry   r2   r4   rq   dictro   r   r5   r5   r5   r6   r\   j   s   8r\   c               @   sl   e Zd ZeeddddZddddZdddd	Zee	e
f eeddf d
ddZe	edddZdS )r   N)rm   rv   rD   c             C   s@   || _ || _i | _t | _g | _t | _dd | j jD | _	d S )Nc             S   s   g | ]}t |qS r5   )recompile).0xr5   r5   r6   
<listcomp>   s    z9HyperlinkAvailabilityChecker.__init__.<locals>.<listcomp>)
rv   rm   rate_limitsr	   rqueueworkersr   wqueuelinkcheck_ignore	to_ignore)rH   rm   rv   r5   r5   r6   rF      s    z%HyperlinkAvailabilityChecker.__init__)rD   c             C   sH   xBt | jjD ]2}t| j| j| j| j| j}|  | j	
| qW d S )N)rangerv   linkcheck_workers HyperlinkAvailabilityCheckWorkerrm   r   r   r   startr   append)rH   Z_iZthreadr5   r5   r6   invoke_threads   s    
z+HyperlinkAvailabilityChecker.invoke_threadsc             C   s2   | j   x"| jD ]}| j ttd d qW d S )NF)r   r   r   putr7   CHECK_IMMEDIATELY)rH   Z_workerr5   r5   r6   shutdown_threads   s    
z-HyperlinkAvailabilityChecker.shutdown_threads)r_   rD   c             c   s   |    d}xT| D ]H}| |jrBt|j|j|jdddV  q| jt	t
|d |d7 }qW d}x ||k r| j V  |d7 }qhW |   d S )Nr   rh   r   FrA   )r   valuesis_ignored_urir,   r;   r-   r.   r   r   r7   r   r   getr   )rH   r_   Ztotal_linksr9   Zdoner5   r5   r6   r      s    
z"HyperlinkAvailabilityChecker.check)r,   rD   c                s   t  fdd| jD S )Nc             3   s   | ]}|  V  qd S )N)match)r   Zpat)r,   r5   r6   	<genexpr>   s    z>HyperlinkAvailabilityChecker.is_ignored_uri.<locals>.<genexpr>)anyr   )rH   r,   r5   )r,   r6   r      s    z+HyperlinkAvailabilityChecker.is_ignored_uri)r/   r0   r1   r   r   rF   r   r   r   r2   r+   r   r;   r   boolr   r5   r5   r5   r6   r      s
   
"r   c                   sZ   e Zd ZdZeeddeeef dd fddZ	ddd	d
Z
eee dddZ  ZS )r   z;A worker class for checking the availability of hyperlinks.zQueue[CheckResult]zQueue[CheckRequest]N)rm   rv   r   r   r   rD   c                sl   || _ || _|| _|| _|| _dd | j jD | _dd | j jD | _dd | j j	D | _
t jdd d S )Nc             S   s   g | ]}t |qS r5   )r   r   )r   r   r5   r5   r6   r     s   z=HyperlinkAvailabilityCheckWorker.__init__.<locals>.<listcomp>c             S   s   g | ]}t |qS r5   )r   r   )r   docr5   r5   r6   r     s   c             S   s   g | ]\}}t ||fqS r5   )r   r   )r   pattern	auth_infor5   r5   r6   r     s    T)Zdaemon)rv   rm   r   r   r   linkcheck_anchors_ignoreanchors_ignorelinkcheck_exclude_documentsdocuments_excludelinkcheck_authauthrE   rF   )rH   rm   rv   r   r   r   )rI   r5   r6   rF      s    z)HyperlinkAvailabilityCheckWorker.__init__)rD   c       
   	      s  i j jrj jd< tttf dfddttttf d fddtttdfdd	 tttttf d
fdd}xj }y |\}d krP \}}W n  t	k
r   |\}}}Y nX d krP t
j}yj| j}W n tk
r   Y nX |t krVtt jt|d j  q||\}}}	|dkrttd td  njt|||||	 j  qW d S )NZtimeout)rD   c                 sh   t } d| j| jf d| j| jf dg}x6|D ].}| jjkr2tt}| jj|  |S q2W i S )Nz%s://%sz%s://%s/*)r   Zschemenetlocrv   linkcheck_request_headersr   DEFAULT_REQUEST_HEADERSupdate)url
candidatesuheaders)rH   r,   r5   r6   get_request_headers  s    
zAHyperlinkAvailabilityCheckWorker.run.<locals>.get_request_headersc           
      s  dkr: dd\} }x(jD ]}||r d }P q W n} d }y| d W n tk
rl   t| } Y nX x"jD ]\}}|rvP qvW d } d< y|rjjrt	j
| fdj|d}|  t|t|}|sttd| ny(t	j| fdj|d}|  W nj tttfk
r } zDt|trP|jjd	krP t	j
| fdj|d}|  W d d }~X Y nX W n tk
r2 } z|jjd
krdS |jjd	kr|j}|d k	rjt|d dS dt|dfS |jjdkrdt|dfS dt|dfS W d d }~X Y n\ tk
r^ } zdt|dfS d }~X Y n0X t| j}	yj|	= W n tk
r   Y nX |j !d| !dkrdS |j }
|r|
d| 7 }
 | |
rdS |j"r|j"d j}d|
|fS d|
dfS d S )N#rA   asciir   T)streamrv   r   zAnchor '%s' not found)Zallow_redirectsrv   r   i  i  )rf   z - unauthorizedr   F)zrate-limitedr   r   ri   r   i  rh   /)rf   r   r   rk   )#splitr   r   encodeUnicodeErrorr"   r   rv   linkcheck_anchorsr$   r   Zraise_for_statusr[   r   	Exceptionr    headr   r   r   rU   rS   Zstatus_code
limit_rater   r   r7   r2   r   r   r   ru   r   rstriphistory)Zreq_urlrT   Zrexr   r   rS   rG   errr8   r   new_urlr>   )allowed_redirectr   r9   kwargsrH   r,   r5   r6   	check_uri  s    








z7HyperlinkAvailabilityCheckWorker.run.<locals>.check_uri)r   r   rD   c                s6   x0 j j D ] \}}|| r||rdS qW dS )NTF)rv   rw   itemsr   )r   r   Zfrom_urlZto_url)rH   r5   r6   r   ~  s    z>HyperlinkAvailabilityCheckWorker.run.<locals>.allowed_redirect)r-   rD   c                s   x4j D ]*}|| r|  d|j d}d|dfS qW tdksLdrPdS dstrhdS tj	| }t
t|rdS d	S x*tjjD ]}  \}}}|d
krP qW |||fS )Nz	 matched z! from linkcheck_exclude_documentsrh   r   )r   zmailto:ztel:)re   r   r   )zhttp:zhttps:)rf   r   r   )ri   r   r   ri   )r   r   r   len
startswithuri_rer   dirnamerm   rn   existsr   r   rv   linkcheck_retries)r-   Zdoc_matcherrd   Zsrcdir_r<   r>   )r   rH   r,   r5   r6   r     s$    


z3HyperlinkAvailabilityCheckWorker.run.<locals>.checkFzrate-limitedz-rate limited-   z | sleeping...)rv   linkcheck_timeoutr   r2   r   r4   r   r   r   rx   r   r   r   r8   ru   timeZsleepQUEUE_POLL_SECSr   r7   Z	task_donerp   rd   r%   r   r;   )
rH   r   Zcheck_requestr8   r-   r.   r   r<   rd   r>   r5   )r   r   r   r9   r   rH   r,   r6   run
  sB    &`" 




z$HyperlinkAvailabilityCheckWorker.run)rS   rD   c       
      C   s  d }|j d}|ryt|}W nX tk
rx   yt|}W n ttfk
rT   Y n X t|}|tt	j
  }Y nX t | }t|jj}|d kr| jj}y| j| }W n tk
r   t}Y n$X |j}	d|	 }||kr|	|k r|}||krd S t | }t||| j|< |S )NzRetry-Afterg       @)r   r   r:   rx   r   	TypeErrorr   Z	timestampZnowr   ZutcZtotal_secondsr   r   r   r   rv   linkcheck_rate_limit_timeoutr   ru   DEFAULT_DELAYr@   r?   )
rH   rS   r8   Zretry_afterr@   Zuntilr   Z	max_delayZ
rate_limitZlast_wait_timer5   r5   r6   r     s:    


z+HyperlinkAvailabilityCheckWorker.limit_rate)r/   r0   r1   rQ   r   r   r   r2   r?   rF   r   r   r   r:   r   rR   r5   r5   )rI   r6   r      s    Ar   c               @   s$   e Zd ZdZdZeddddZdS )HyperlinkCollector)r]   i   N)r   rD   c                s   t tjj}|j ttjd d fdd}x2j	tj
D ] }d|krLq>|d }||| q>W x:j	tjD ](}|d d}|rrd|krr||| qrW x6j	tjD ]$}|d}|rd|kr||| qW d S )	N)r,   noderD   c                sB   j d| }|r|} t|}t| jj|}|  kr>| | < d S )Nzlinkcheck-process-uri)rr   Zemit_firstresultr*   r+   rm   r-   )r,   r   Znewurir.   Zuri_info)r_   rH   r5   r6   add_uri  s    z'HyperlinkCollector.run.<locals>.add_uriZrefurir   ?z://source)r   r\   rr   builderr_   r2   r   ZElementZdocumentfindallZ	referenceZimager   raw)rH   r   r   r   Zrefnoder,   ZimgnodeZrawnoder5   )r_   rH   r6   r     s     
zHyperlinkCollector.run)r/   r0   r1   ZbuildersZdefault_priorityr   r   r5   r5   r5   r6   r     s   r   )rr   r,   rD   c             C   sH   t |}|jdkrD|jrD|jd}|sDd|j }t|j|dS dS )zRewrite anchor name of the hyperlink to github.com

    The hyperlink anchors in github.com are dynamically generated.  This rewrites
    them before checking and makes them comparable.
    z
github.comzuser-content-)fragmentN)r   Zhostnamer   r   r   _replace)rr   r,   ZparsedZprefixedr   r5   r5   r6   rewrite_github_anchor  s    r   )rr   rv   rD   c             C   s   xt | jj D ]v\}}z\yt|| jjt|< W n: tjk
rr } ztt	d|j
|j W dd}~X Y nX W d| jj| X qW dS )zFCompile patterns in linkcheck_allowed_redirects to the regexp objects.z=Failed to compile regex in linkcheck_allowed_redirects: %r %sN)listrv   rw   r   r   r   errorrp   rt   r    r   msgpop)rr   rv   r   r   excr5   r5   r6   #compile_linkcheck_allowed_redirects&  s    
"r   )rr   rD   c             C   s   |  t | t | dg d | dg d | di d | dg d | di d | ddd | d	d dtg | d
dd | ddd | ddgd | ddd | d | jdtdd ddddS )Nr   Fr   rw   r   r   r   rA   r   r      r   Tr   z^!r   g     r@zlinkcheck-process-urizconfig-initedi   )priorityZbuiltin)versionZparallel_read_safeZparallel_write_safe)	Zadd_builderr\   Zadd_post_transformr   Zadd_config_valuer4   Z	add_eventZconnectr   )rr   r5   r5   r6   setup3  s$    


r   )\rQ   r   r   r`   r   copyr   r   r   Zemail.utilsr   Zhtml.parserr   osr   Zqueuer   r	   Z	threadingr
   typingr   r   r   r   r   r   r   r   r   Zurllib.parser   r   r   Zdocutilsr   r$   r   Zrequests.exceptionsr   r   r   Zsphinx.applicationr   Zsphinx.builders.dummyr   Zsphinx.configr   Zsphinx.environmentr   Zsphinx.localer    Z!sphinx.transforms.post_transformsr!   Zsphinx.utilr"   r#   Zsphinx.util.consoler%   r&   r'   r(   r)   Zsphinx.util.nodesr*   Z	getLoggerr/   rp   r   r   r+   r7   r;   r?   r:   r2   r4   ZCheckRequestTyper   r   r   r   rB   r   r[   r\   r   r   r   r   r   r   r5   r5   r5   r6   <module>   s\   ,

	]/ |&