B
    }dH                 @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
 ddlZddlmZ ddlmZ d	d
lmZ d	dlmZ d	dlmZmZ d	dlmZ dZedZedZedZedZedZ ed!d"dZ#ed!d"dZ$d5ddZ%dd Z&dd Z'dd Z(d d! Z)d"d# Z*d$d% Z+d&d' Z,e(d6d*d+Z-e(d7d-d.Z.e(d8d/d0Z/e(d9d1d2Z0e(d:d3d4Z1dS );a[  Utilities for auto-discovery of GW data files.

Automatic discovery of file paths for both LIGO and Virgo index solutions
(``gwdatafind`` or ``ffl``, respectvely) is supported.

The functions in this module are highly reliant on having local access to
files (either directly, or via NFS, or CVMFS).

Data discovery using the DataFind service requires the `gwdatafind` Python
package (a dependency of ``gwpy``), and either the ``GW_DATAFIND_SERVER``
(or legacy ``LIGO_DATAFIND_SERVER``) environment variable to be set,
or the ``host`` keyword must be passed to :func:`find_urls` and friends.

Data discovery using the Virgo FFL system requires the ``FFLPATH`` environment
variable to point to the directory containing FFL files, **or** the
``VIRGODATA`` environment variable to point to a directory containing an
``ffl` subdirectory, which contains FFL files.
    N)defaultdict)wraps)mock)segment   )to_gps   )ffldatafind)cache_segments)num_channelsiter_channel_names)	file_pathz(Duncan Macleod <duncan.macleod@ligo.org>z^[A-Z][0-9]$z\A(.*_)?T\Zz\A(.*_)?M\Zz\A(.*_)?[Tt]rend\Zz^(?!.*_GRB\d{6}([A-Z])?$)z({})|)z$\A[A-Z]\d_HOFT_C\d\d(_T\d{7}_v\d)?\Zz\AV1Online\Zz'\AV1O[0-9]+([A-Z]+)?Repro[0-9]+[A-Z]+\Z)z_GRB\d{6}([A-Z])?\Zz_bck\Zz"\AT1200307_V4_EARLY_RECOLORED_V2\Zc             C   s   x@dt fdtfdtfgD ]&\}}||kr||rdt|fS qW xHtdtddtdt dtdtdi	 D ]\}}|
|rj|t|fS qjW dt|fS )	zPrioritise the given GWF type based on its name or trend status.

    This is essentially an ad-hoc ordering function based on internal knowledge
    of how LIGO does GWF type naming.
    zm-trendzs-trendr   r   z	[A-Z]\d_C   
      )LIGO_MINUTE_TREND_TYPELIGO_SECOND_TREND_TYPEVIRGO_SECOND_TREND_TYPEmatchlenHIGH_PRIORITY_TYPErecompileLOW_PRIORITY_TYPEitemssearch)ifoftypetrendZ	trendnameZtrend_regexregprio r"   ]/work/yifan.wang/ringdown/master-ringdown-env/lib/python3.7/site-packages/gwpy/io/datafind.py_type_priorityO   s     

r$   c           	   G   sD   x>| D ]6}t |}y|jdkr$dS W q tk
r:   dS X qW dS )a  Determine whether any of the given files are on tape

    Parameters
    ----------
    *files : `str`
        one or more paths to GWF files

    Returns
    -------
    True/False : `bool`
        `True` if any of the files are determined to be on tape,
        otherwise `False`
    r   TF)osstat	st_blocksAttributeError)filespathr&   r"   r"   r#   on_tapem   s    



r+   c              K   sP   t dst ds| dr"tS yt  W n tk
rJ   tdY nX tS )zReturn the appropriate GWDataFind-like API based on the environment

    This allows switching to the hacky `gwpy.io.ffldatafind` replacement
    module to enable a GWDataFind-like interface for direct FFL data
    discovery at Virgo.
    ZGWDATAFIND_SERVERZLIGO_DATAFIND_SERVERhostz4unknown datafind configuration, cannot discover data)r%   getenvget
gwdatafindr	   Z_get_ffl_basedirKeyErrorRuntimeError)Zdatafind_kwr"   r"   r#   _gwdatafind_module   s    	


r2   c                s   t   fdd}|S )zDecorate a function to see the right ``gwdatafind`` API.

    This exists only to allow on-the-fly replacing of the actual `gwdatafind`
    with :mod:`gwpy.io.ffldatafind` if it looks like we are trying to find
    data from FFL files.
    c           	      s4   t j j tf | jd<  | |S Q R X d S )Nr/   )r   patchdict__globals__r2   )argskwargs)funcr"   r#   wrapped   s    z'_select_gwdatafind_mod.<locals>.wrapped)r   )r8   r9   r"   )r8   r#   _select_gwdatafind_mod   s    r:   c          	   C   sl   ddl m} t }xT| D ]L}||}y||jd |jf W q tk
rb   td| dY qX qW |S )z<Parse ``(ifo, trend)`` pairs from this list of channels
    r   )Channelr   z6Cannot parse interferometer prefix from channel name 'z', cannot proceed with find())detectorr;   setaddr   type	TypeError
ValueError)Zchansr;   foundnameZchanr"   r"   r#   _parse_ifos_and_trends   s    
rD   c             C   s@   |dkrdS t | |f|d|i}t|}tdt|t| S )zDiscover gaps in a datafind/ffl archive for the given ifo/type

    Returns
    -------
    gaps : `int`
        the cumulative size of all gaps in the relevant archive
    Nr   on_gaps)	find_urlsr
   maxabs)r   	frametyper   rE   cacheZcsegsr"   r"   r#   
_find_gaps   s    rK   c             C   sX   t | t | }|sdS d}|r.|d| 7 }|dd| 7 }|sL|d7 }t|dS )z:Raise an exception if required channels are not found
    Nz=Cannot locate the following channel(s) in any known frametypez at GPS=z:
    z
    zQ
[files on tape have not been checked, use allow_tape=True for a complete search])r=   joinrA   )requiredrB   gpstime
allow_tapemissingmsgr"   r"   r#   _error_missing_channels   s    rR   c                sN   t  fdd D }dd |D x$ D ]} | jfddd q*W dS )	zRank and sort the matched frametypes according to some criteria

    ``matches`` is a dict of (channel, [(type, gwf, gapsize), ...])
    entries.
    c             3   s$   | ]} | D ]}|d  V  qqdS )r   Nr"   ).0keyZ	typetuple)r   r"   r#   	<genexpr>   s    z_rank_types.<locals>.<genexpr>c             S   s   i | ]}t |t|f|qS r"   )r+   r   )rS   r*   r"   r"   r#   
<dictcomp>   s    z_rank_types.<locals>.<dictcomp>c                s   | d  f | d   S )Nr   r   r"   )x)rankr"   r#   <lambda>       z_rank_types.<locals>.<lambda>)rT   N)r=   sort)r   pathsrT   r"   )r   rX   r#   _rank_types   s    
r]   Ferrorc                sZ  ddl m  t| ttfr | }n| g} fdd|D }dd | D }	t|ttfrjt| }
|
d }nd}
|dk	rtt|}|dkr|dk	r|dkrt	}t
tt }xFt|D ]8\}}t|||d	}x|D ]}||f|krqyt||||d
d}W n tttfk
r    wY nX t|||
|}d}t|	}y^xXt|D ]L}||	krH|d7 }|	|  |||f |s|	| ||krHP qHW W n> tk
r } ztd|d| d wW dd}~X Y nX |||f |	sP qW qW t| || t fddD }|s:dd | D }t| ttfrN|S |t|  S )aD  Find the frametype(s) that hold data for a given channel

    Parameters
    ----------
    channel : `str`, `~gwpy.detector.Channel`
        the channel to be found

    gpstime : `int`, optional
        target GPS time at which to find correct type

    frametype_match : `str`, optional
        regular expression to use for frametype `str` matching

    host : `str`, optional
        name of datafind host to use

    port : `int`, optional
        port on datafind host to use

    return_all : `bool`, optional, default: `False`
        return all found types, default is to return to 'best' match

    allow_tape : `bool`, optional, default: `False`
        do not test types whose frame files are stored on tape (not on
        spinning disk)

    on_gaps : `str`, optional
        action to take when gaps are discovered in datafind coverage,
        default: ``'error'``, i.e. don't match frametypes with gaps.
        Select ``'ignore'`` to ignore gaps, or ``'warn'`` to display
        warnings when gaps are found in a datafind `find_urls` query

    Returns
    -------
    If a single name is given, and `return_all=False` (default):

    frametype : `str`
        name of best match frame type

    If a single name is given, and `return_all=True`:

    types : `list` of `str`
        the list of all matching frame types

    If multiple names are given, the above return types are wrapped into a
    `dict` of `(channel, type_or_list)` pairs.

    Examples
    --------
    >>> from gwpy.io import datafind as io_datafind
    >>> io_datafind.find_frametype('H1:IMC-PWR_IN_OUTPUT', gpstime=1126259462)
    'H1_R'
    >>> io_datafind.find_frametype('H1:IMC-PWR_IN_OUTPUT', gpstime=1126259462,
    ...                            return_all=True)
    ['H1_R', 'H1_C']
    >>> io_datafind.find_frametype(
    ...     ('H1:IMC-PWR_IN_OUTPUT', 'H1:OMC-DCPD_SUM_OUTPUT',
    ...      'H1:GDS-CALIB_STRAIN'),
    ...     gpstime=1126259462, return_all=True))"
    {'H1:GDS-CALIB_STRAIN': ['H1_HOFT_C00'],
     'H1:OMC-DCPD_SUM_OUTPUT': ['H1_R', 'H1_C'],
     'H1:IMC-PWR_IN_OUTPUT': ['H1_R', 'H1_C']}
    r   )r;   c                s   i | ]} |j |qS r"   )rC   )rS   c)r;   r"   r#   rV   H  s    z"find_frametype.<locals>.<dictcomp>c             S   s   i | ]\}}||qS r"   r"   )rS   rT   valr"   r"   r#   rV   I  s    r   Ni*4)r   r   ignore)rN   rO   
on_missingr   z!failed to read channels for type z: :c                s&   i | ]}t t t |  d  |qS )r   )listzip)rS   rT   )r   r"   r#   rV     s    c             S   s   i | ]\}}|d  |qS )r   r"   )rS   rT   r`   r"   r"   r#   rV     s    )r<   r;   
isinstancerd   tupler   LigoSegmentintr   GRB_TYPEr   r=   rD   
find_typesfind_latestr1   IOError
IndexErrorrK   r   r   appendpopwarningswarnr>   rR   keysr]   str)channelrN   frametype_matchr,   port
return_allrO   rE   ZchannelsnamesZ
gpssegmentZsearchedr   r   typesr   r*   ZgapsrB   Znchannexcoutputr"   )r;   r   r#   find_frametype   sx    F




r~   Tc                s   yt | ||f||d||dS  tk
r   t | ||f|d|d||d y$t trd fdd D S  d S  tk
r   td	Y nX Y nX d
S )a  Intelligently select the best frametype from which to read this channel

    Parameters
    ----------
    channel : `str`, `~gwpy.detector.Channel`
        the channel to be found

    start : `~gwpy.time.LIGOTimeGPS`, `float`, `str`
        GPS start time of period of interest,
        any input parseable by `~gwpy.time.to_gps` is fine

    end : `~gwpy.time.LIGOTimeGPS`, `float`, `str`
        GPS end time of period of interest,
        any input parseable by `~gwpy.time.to_gps` is fine

    host : `str`, optional
        name of datafind host to use

    port : `int`, optional
        port on datafind host to use

    frametype_match : `str`, optiona
        regular expression to use for frametype `str` matching

    allow_tape : `bool`, optional
        do not test types whose frame files are stored on tape (not on
        spinning disk)

    Returns
    -------
    frametype : `str`
        the best matching frametype for the ``channel`` in the
        ``[start, end)`` interval

    Raises
    ------
    ValueError
        if no valid frametypes are found

    Examples
    --------
    >>> from gwpy.io.datafind import find_best_frametype
    >>> find_best_frametype('L1:GDS-CALIB_STRAIN', 1126259460, 1126259464)
    'L1_HOFT_C00'
    r^   )rN   rv   rO   rE   r,   rw   Tra   )rN   rv   rx   rO   rE   r,   rw   c                s   i | ]} | d  |qS )r   r"   )rS   rT   )ftoutr"   r#   rV     s    z'find_best_frametype.<locals>.<dictcomp>r   z/Cannot find any valid frametypes for channel(s)N)r~   r1   rf   r4   rn   rA   )ru   startendrv   rO   r,   rw   r"   )r   r#   find_best_frametype  s     1



r   c                s(   t tj fd|i| fdddS )zpFind the available data types for a given observatory.

    See also
    --------
    gwdatafind.find_types
    r   c                s   t  | dS )N)r   )r$   )rW   )observatoryr   r"   r#   rY     rZ   zfind_types.<locals>.<lambda>)rT   )sortedr/   rk   )r   r   r   r7   r"   )r   r   r#   rk     s    rk   c             K   s   t j| |||fd|i|S )zwFind the URLs of files of a given data type in a GPS interval.

    See also
    --------
    gwdatafind.find_urls
    rE   )r/   rF   )r   rI   r   r   rE   r7   r"   r"   r#   rF     s    rF   c          	   K   s   t | r| d } yH|dk	rDtt|}t| |||d ddd }ntj| |ddd }W n, ttfk
r   td|  d	| Y nX t	|}|st
|rtd
|  d	| d| |S )zqFind the path of the latest file of a given data type.

    See also
    --------
    gwdatafind.find_latest
    r   Nr   ra   )rE   )rb   zno files found for -zLatest frame file for z- is on tape (pass allow_tape=True to force): )SINGLE_IFO_OBSERVATORYr   ri   r   rF   r/   rl   rn   r1   r   r+   rm   )r   rI   rN   rO   r7   r*   r"   r"   r#   rl     s0    

rl   )N)NNNNFFr^   )NTNN)NN)r^   )NF)2__doc__r%   os.pathr   rq   collectionsr   	functoolsr   Zunittestr   r/   Zligo.segmentsr   rh   timer    r	   rJ   r
   Zgwfr   r   utilsr   
__author__r   r   r   r   r   rj   formatrL   r   r   r$   r+   r2   r:   rD   rK   rR   r]   r~   r   rk   rF   rl   r"   r"   r"   r#   <module>$   s^   







   5 @ 