B
    1dU              	   @   s   d dl mZ d dlZd dlZd dlmZ d dlmZ d dlmZ e	dZ
ddgdgdgd	Ze	d
Zddd eed deddeddD Zede d ejZdZG dd deZG dd dZdd ZG dd dejZdS )    )chainN)unescape)html5lib_shim)
parse_shim)aabbracronymbZ
blockquotecodeZemiZliolstrongZulhreftitle)r   r   r   )httphttpsmailto c             C   s   g | ]}t |qS  )chr).0cr   r   ]/work/yifan.wang/ringdown/master-ringdown-env/lib/python3.7/site-packages/bleach/sanitizer.py
<listcomp>*   s    r   	                []?c               @   s   e Zd ZdS )NoCssSanitizerWarningN)__name__
__module____qualname__r   r   r   r   r"   5   s   r"   c               @   s0   e Zd ZdZeeeddddfddZdd ZdS )	Cleanera  Cleaner for cleaning HTML fragments of malicious content

    This cleaner is a security-focused function whose sole purpose is to remove
    malicious content from a string such that it can be displayed as content in
    a web page.

    To use::

        from bleach.sanitizer import Cleaner

        cleaner = Cleaner()

        for text in all_the_yucky_things:
            sanitized = cleaner.clean(text)

    .. Note::

       This cleaner is not designed to use to transform content to be used in
       non-web-page contexts.

    .. Warning::

       This cleaner is not thread-safe--the html parser has internal state.
       Create a separate cleaner per thread!


    FTNc       
      C   s   || _ || _|| _|| _|| _|p$g | _|| _tj| j | jddd| _	t
d| _tjddddddd| _|dkrg }t|tr|}n8t|trg }x(| D ]}	t|	ttfr||	 qW d|krtjd	td
 dS )a:  Initializes a Cleaner

        :arg set tags: set of allowed tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip: whether or not to strip disallowed elements

        :arg bool strip_comments: whether or not to strip HTML comments

        :arg list filters: list of html5lib Filter classes to pass streamed content through

            .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters

            .. Warning::

               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.

        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
            sanitizing style attribute values and style text; defaults to None

        F)tagsstripZconsume_entitiesZnamespaceHTMLElementsetreealwaysT)Zquote_attr_valuesZomit_optional_tagsZescape_lt_in_attrsZresolve_entitiessanitizeZalphabetical_attributesNstylez7'style' attribute specified, but css_sanitizer not set.)category)r'   
attributes	protocolsr(   strip_commentsfilterscss_sanitizerr   ZBleachHTMLParserparserZgetTreeWalkerwalkerZBleachHTMLSerializer
serializer
isinstancelistdictvaluestupleextendwarningswarnr"   )
selfr'   r.   r/   r(   r0   r1   r2   Zattributes_valuesr9   r   r   r   __init__V   sB    &




zCleaner.__init__c          	   C   s   t |ts&d|jjdd }t||s.dS | j|}t| || j	| j
| j| j| j| jd}x| jD ]}||d}qjW | j|S )zCleans text and returns sanitized result as unicode

        :arg str text: text to be cleaned

        :returns: sanitized text as unicode

        :raises TypeError: if ``text`` is not a text type

        zargument cannot be of z type, zmust be of text typer   )sourceallowed_tagsr.   strip_disallowed_tagsstrip_html_commentsr2   allowed_protocols)r@   )r6   str	__class__r#   	TypeErrorr3   ZparseFragmentBleachSanitizerFilterr4   r'   r.   r(   r0   r2   r/   r1   r5   render)r>   textmessagedomfilteredZfilter_classr   r   r   clean   s$    


zCleaner.clean)	r#   r$   r%   __doc__ALLOWED_TAGSALLOWED_ATTRIBUTESALLOWED_PROTOCOLSr?   rN   r   r   r   r   r&   9   s   Mr&   c                sL   t  r S t tr& fdd}|S t tr@ fdd}|S tddS )a0  Generates attribute filter function for the given attributes value

    The attributes value can take one of several shapes. This returns a filter
    function appropriate to the attributes value. One nice thing about this is
    that there's less if/then shenanigans in the ``allow_token`` method.

    c                s`   |  kr0 |  }t |r$|| ||S ||kr0dS d kr\ d }t |rT|| ||S ||kS dS )NT*F)callable)tagattrvalueZattr_val)r.   r   r   _attr_filter   s    z.attribute_filter_factory.<locals>._attr_filterc                s   | kS )Nr   )rU   rV   rW   )r.   r   r   rX      s    z3attributes needs to be a callable, a list or a dictN)rT   r6   r8   r7   
ValueError)r.   rX   r   )r.   r   attribute_filter_factory   s    

rZ   c            	   @   sr   e Zd ZdZeeeejej	ej
dddf	ddZdd Zd	d
 Zdd Zdd Zdd Zdd Zdd Zdd ZdS )rH   zmhtml5lib Filter that sanitizes text

    This filter can be used anywhere html5lib filters can be used.

    FTNc             C   sT   t j| | t|| _t|| _t|| _|| _|	| _	|| _
|| _|
| _|| _dS )a_  Creates a BleachSanitizerFilter instance

        :arg source: html5lib TreeWalker stream as an html5lib TreeWalker

        :arg set allowed_tags: set of allowed tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list allowed_protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg attr_val_is_uri: set of attributes that have URI values

        :arg svg_attr_val_allows_ref: set of SVG attributes that can have
            references

        :arg svg_allow_local_href: set of SVG elements that can have local
            hrefs

        :arg bool strip_disallowed_tags: whether or not to strip disallowed
            tags

        :arg bool strip_html_comments: whether or not to strip HTML comments

        :arg CSSSanitizer css_sanitizer: instance with a "sanitize_css" method for
            sanitizing style attribute values and style text; defaults to None

        N)r   Filterr?   	frozensetrA   rD   rZ   attr_filterrB   rC   attr_val_is_urisvg_attr_val_allows_refr2   svg_allow_local_href)r>   r@   rA   r.   rD   r^   r_   r`   rB   rC   r2   r   r   r   r?     s    0


zBleachSanitizerFilter.__init__c             c   s>   x8|D ]0}|  |}|sqt|tr0|E d H  q|V  qW d S )N)sanitize_tokenr6   r7   )r>   token_iteratortokenretr   r   r   sanitize_streamA  s    


z%BleachSanitizerFilter.sanitize_streamc             c   s   g }xn|D ]f}|rR|d dkr,| | q
qjddd |D dd}g }|V  n|d dkrj| | q
|V  q
W ddd |D dd}|V  dS )	z/Merge consecutive Characters tokens in a streamtype
Charactersr   c             S   s   g | ]}|d  qS )datar   )r   
char_tokenr   r   r   r   [  s    z:BleachSanitizerFilter.merge_characters.<locals>.<listcomp>)rh   rf   c             S   s   g | ]}|d  qS )rh   r   )r   ri   r   r   r   r   i  s    N)appendjoin)r>   rb   Zcharacters_bufferrc   Z	new_tokenr   r   r   merge_charactersM  s$    



z&BleachSanitizerFilter.merge_charactersc             C   s   |  | tj| S )N)rl   re   r   r[   __iter__)r>   r   r   r   rm   n  s    zBleachSanitizerFilter.__iter__c             C   s   |d }|dkr>|d | j kr(| |S | jr2dS | |S nJ|dkrr| jsltj|d ddd	d
|d< |S dS n|dkr| |S |S dS )a  Sanitize a token either by HTML-encoding or dropping.

        Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':
        ['attribute', 'pairs'], 'tag': callable}.

        Here callable is a function with two arguments of attribute name and
        value. It should return true of false.

        Also gives the option to strip tags instead of encoding.

        :arg dict token: token to sanitize

        :returns: token or list of tokens

        rf   )StartTagEndTagEmptyTagnameNCommentrh   z&quot;z&#x27;)"')entitiesrg   )rA   allow_tokenrB   disallowed_tokenrC   r   escapesanitize_characters)r>   rc   
token_typer   r   r   ra   s  s     

z$BleachSanitizerFilter.sanitize_tokenc             C   s   | dd}|s|S tt|}||d< d|kr4|S g }xt|D ]}|sNqD|drt|}|dk	r|dkr|ddd n|d|d	 |t	|d
 d }|rD|d|d qD|d|d qDW |S )a  Handles Characters tokens

        Our overridden tokenizer doesn't do anything with entities. However,
        that means that the serializer will convert all ``&`` in Characters
        tokens to ``&amp;``.

        Since we don't want that, we extract entities here and convert them to
        Entity tokens so the serializer will let them be.

        :arg token: the Characters token to work on

        :returns: a list of tokens

        rh   r   &Namprg   )rf   rh   Entity)rf   rq      )
getINVISIBLE_CHARACTERS_REsubINVISIBLE_REPLACEMENT_CHARr   Znext_possible_entity
startswithZmatch_entityrj   len)r>   rc   rh   Z
new_tokenspartentity	remainderr   r   r   ry     s.    

z)BleachSanitizerFilter.sanitize_charactersc             C   s   t |}tdd|}|dd}| }yt|}W n tk
rN   dS X |j	rf|j	|kr|S n@|
drt|S d|kr|dd |kr|S d|ksd	|kr|S dS )
zChecks a uri value to see if it's allowed

        :arg value: the uri value to sanitize
        :arg allowed_protocols: list of allowed protocols

        :returns: allowed value or None

        z[`\000-\040\177-\240\s]+r   u   �N#:r   r   r   )r   Zconvert_entitiesrer   replacelowerr   urlparserY   schemer   split)r>   rW   rD   Znormalized_uriparsedr   r   r   sanitize_uri_value  s&    


z(BleachSanitizerFilter.sanitize_uri_valuec       	      C   s  d|kri }x|d   D ]\}}|\}}| |d ||s>q|| jkrd| || j}|dkr`q|}|| jkrtddt|}|	 }|sqn|}d|d f| j
kr|dtjd dfgkrtd	|rq|d
kr| jr| j|}nd}|||< qW ||d< |S )z-Handles the case where we're allowing the tagrh   rq   Nzurl\s*\(\s*[^#\s][^)]+?\) )Nr   Zxlinkr   z
^\s*[^#\s])Nr,   r   )itemsr]   r^   r   rD   r_   r   r   r   r(   r`   r   
namespacessearchr2   Zsanitize_css)	r>   rc   attrsnamespaced_nameval	namespacerq   	new_valuenew_valr   r   r   rv     s<    

z!BleachSanitizerFilter.allow_tokenc             C   s  |d }|dkr&d|d  d|d< n|d r|dks:t g }xr|d  D ]b\\}}}|rj|sj|| }}|d ks||tjkr|}ntj|  d| }|d	| d
| d qLW d|d  d| d|d< nd|d  d|d< |dr|d d d  d|d< d|d< |d= |S )Nrf   ro   z</rq   >rh   )rn   rp   r   r   z="rs   <r   ZselfClosingz/>rg   )AssertionErrorr   r   prefixesrj   rk   r   )r>   rc   rz   r   nsrq   vr   r   r   r   rw   Z  s(    
 z&BleachSanitizerFilter.disallowed_token)r#   r$   r%   rO   rP   rQ   rR   r   r^   r_   r`   r?   re   rl   rm   ra   ry   r   rv   rw   r   r   r   r   rH      s$   3!+=:ErH   )	itertoolsr   r   r<   Zxml.sax.saxutilsr   Zbleachr   r   r\   rP   rQ   rR   rk   rangeZINVISIBLE_CHARACTERScompileUNICODEr   r   UserWarningr"   r&   rZ   ZSanitizerFilterrH   r   r   r   r   <module>   s(   
* +