B
    {d
B                 @   s   d dl mZmZmZmZ d dlmZmZ d dlm	Z	 e	
  d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZmZ d dlmZ G dd	 d	eZdS )
    )divisionprint_functionabsolute_importunicode_literals)intopen)standard_libraryN)BaseEstimator)check_random_state)standardize_nd_sampleshift_and_scale_nd_samplec               @   s   e Zd ZdZd#ddZedd Zejd	d Zed
d Zedd Z	dd Z
d$ddZdd Zd%ddZdd Zdd Zed&ddZdd Zdd  Zd!d" ZdS )'GaussianKDEa  
    GaussianKDE

    Kernel denstiy estimate using gaussian kernels and a local kernel bandwidth.
    Implements the ``sklearn.BaseEstimator`` class and can be used in a cross-
    validation gridsearch (``sklearn.model_selection``).

    Parameters
    ----------
    glob_bw : float or str, optional
        The global bandwidth of the kernel, must be a float ``> 0`` or one of
        ``['silverman'|'scott']``. If ``alpha`` is not ``None``, this is the
        bandwidth used for the first estimate KDE from which the local bandwidth
        is calculated. ``If ['silverman'|'scott']`` a rule of thumb is used to
        estimate the global bandwidth. (default: 'silverman')
    alpha : float or None, optional
        If ``None``, only the global bandwidth ``glob_bw`` is used. If
        ``0 <= alpha <= 1``, an adaptive local kernel bandwith is used as
        described in [1]_. (default: 0.5)
    diag_cov : bool, optional
        If ``True``, scale fit sample by variance only, which means using a
        diagonal covariance matrix. (default: False)

    Notes
    -----
    The unweighted kernel density estimator is defined as

    .. math:

      \hat{f}(x) = \sum_i rac{1}{h\lambda_i}\cdot
                     K\left(rac{x - X_i}{h\lambda_i}ight)


    where the product :math:`h\lambda_i` takes the role of a local
    variance :math`\sigma_i^2`.

    The kernel bandwith is choosen locally to account for variations in the
    density of the data.
    Areas with large density gets smaller kernels and vice versa.
    This smoothes the tails and gets high resolution in high statistics regions.
    The local bandwidth parameter is defined as

    .. math: \lambda_i = (\hat{f}(X_i) / g)^{-lpha}

    where :math:`\log g = n^{-1}\sum_i \log\hat{f}(X_i)` is some normalization
    and :math:`\hat{f}(X_i)` the KDE estimate at the data point :math:`X_i`.
    The local bandwidth is multiplied to the global bandwidth for each kernel.

    Furthermore different scales in data is accounted for by scaling it via its
    covariance matrix to an equal spread.
    First a global kernel bandwidth is applied to the transformed data and then
    based on that density a local bandwidth parameter is applied.

    All credit for the method goes to [1]_ and to S. Schoenen and L. Raedel for
    huge parts of the implementation :+1:.
    For information on Silverman or Scott rule, see [2]_ or [3]_.

    References
    ----------
    .. [1] B. Wang and X. Wang, "Bandwidth Selection for Weighted Kernel Density
           Estimation", Sep. 2007, DOI: 10.1214/154957804100000000.
    .. [2] D.W. Scott, "Multivariate Density Estimation: Theory, Practice, and
           Visualization", John Wiley & Sons, New York, Chicester, 1992.
    .. [3] B.W. Silverman, "Density Estimation for Statistics and Data
           Analysis", Vol. 26, Monographs on Statistics and Applied Probability,
           Chapman and Hall, London, 1986.
    	silverman      ?Fc             C   st   t |tkr|dkr.tdn|dkr.tdd | _d | _d | _d | _d | _d | _d | _	d | _
|| _|| _|| _d S )N)r   scottz,glob_bw can be one of ['silverman'|'scott'].r   zGlobal bandwidth must be > 0.)typestr
ValueError
_n_kernels_n_features_std_X_mean_cov_kde_values_inv_loc_bw	_adaptivealpha_glob_bw	_diag_cov)selfglob_bwr   diag_cov r"   X/work/yifan.wang/ringdown/master-ringdown-env/lib/python3.7/site-packages/awkde/awkde.py__init__X   s$    zGaussianKDE.__init__c             C   s   | j S )N)_alpha)r   r"   r"   r#   r   q   s    zGaussianKDE.alphac             C   sP   |dkrd| _ n|dk s |dkr(tdd| _ || _| jdk	rL| j rL|   dS )z
        The adaptive width can easily be changed after the model has been fit,
        because the computation only needs the cached ``_kde_values``.
        NFr      zalpha must be in [0, 1]T)r   r   r%   r   _calc_local_bandwidth)r   r   r"   r"   r#   r   u   s    c             C   s   | j S )N)r   )r   r"   r"   r#   r       s    zGaussianKDE.glob_bwc             C   s   | j S )N)r   )r   r"   r"   r#   r!      s    zGaussianKDE.diag_covc             C   s   | j j| jj_|  |S )N)predict__doc____call____func__)r   Xr"   r"   r#   r*      s    zGaussianKDE.__call__Nc             C   s   |dk	rt d|dk	r t dt|jdkr6td|j\| _| _t|dd| jd\| _| _	| _
| | j| _| jr| j| jdd	| _|   | j	| j
fS )
a  
        Prepare KDE to describe the data.

        Data is transformed via global covariance matrix to equalize scales in
        different features.
        Then a symmetric kernel with cov = diag(1) is used to describe the pdf
        at each point.

        Parameters
        -----------
        X : array-like, shape (n_samples, n_features)
            Data points defining each kernel position. Each row is a point, each
            column is a feature.
        bounds : array-like, shape (n_features, 2)
            Boundary condition for each dimension. The method of mirrored points
            is used to improve prediction close to bounds. If no bound shall be
            given in a specific direction use ``None``, eg.
            ``[[0, None], ...]``. If ``bounds`` is ``None`` no bounds are used
            in any direction. (default: ``None``)
        weights : array-like, shape (n_samples), optional
            Per event weights to consider for ``X``. If ``None`` all weights are
            set to one. (default: ``None``)

        Returns
        -------
        mean : array-like, shape (n_features)
            The (weighted) mean of the given data.
        cov : array-like, shape (n_features, n_features)
            The (weighted) covariance matrix of the given data.

        Raises
        ------
        ``NotImplementedError`` if ``bounds`` or ``weights`` are not ``None``.
        NzTODO: Boundary conditions.z$TODO: Implement weighted statistics.   z,`X` must have shape (n_samples, n_features).T)cholesky	ret_statsdiagF)adaptive)NotImplementedErrorlenshaper   r   r   r   r   r   r   r   _get_glob_bwr   r   	_evaluater   r'   )r   r,   Zboundsweightsr"   r"   r#   fit   s    #zGaussianKDE.fitc             C   sb   | j dkrtdt|}|j\}}|| jkr8tdt|| j| jdd| j	d}| j
|| jdS )a  
        Evaluate KDE at given points X.

        Parameters
        -----------
        X : array-like, shape (n_samples, n_features)
            Data points we want to evaluate the KDE at. Each row is a point,
            each column is a feature.

        Returns
        -------
        prob : array-like, shape (len(X))
            The probability from the KDE pdf for each point in X.
        Nz$KDE has not been fitted to data yet.z/Dimensions of given points and KDE don't match.TF)ZmeanZcovr.   r/   r0   )r1   )r   r   np
atleast_2dr4   r   r   r   r   r   r6   r   )r   r,   _n_featr"   r"   r#   r(      s    




zGaussianKDE.predictc             C   s   | j dkrtdt|}|jd| j|d}| j | }t|| j }| jrZ|| j	| 9 }|
|d}t||d| }t|| j| jS )a  
        Get random samples from the KDE model.

        Parameters
        ----------
        n_samples : int, optional
            Number of samples to generate. (default: 1)
        random_state : RandomState, optional
            Turn seed into a `np.random.RandomState` instance. Method from
            `sklearn.utils`. Can be None, int or RndState. (default: None)

        Returns
        -------
        X : array_like, shape (n_samples, n_features)
            Generated samples from the fitted model.
        Nz$KDE has not been fitted to data yet.r   )sizer&   g      ?)r   r   r
   randintr   r9   onesr   r   r   Zreshaper:   normalr   r   r   )r   Z	n_samplesZrandom_stateZrndgenidxZmeansinvbwsampler"   r"   r#   rC      s    

zGaussianKDE.samplec             C   sr   | j dkrtdt|}|j\}}|| jkr8td| t|}t|dkr^tj S t	t
|S dS )a  
        Compute the total ln-probability of points X under the KDE model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Data points included in the score calculation. Each row is a point,
            each column is a feature.

        Returns
        -------
        lnprob : float
            Total ln-likelihood of the data ``X`` given the KDE model.
        Nz$KDE has not been fitted to data yet.z/Dimensions of given points and KDE don't match.r   )r   r   r9   r:   r4   r   r(   anyinfsumlog)r   r,   r;   r<   Zprobsr"   r"   r#   score  s    



zGaussianKDE.scorec          	   C   s   | j dkrtd|  }dd | j D |d< t| j|d< dd | jD |d< | jdk	rjt| j|d	< nd|d	< tjd
 dk rd}nd}t	t
j||}tj||dd W dQ R X dS )a  
        Write out the relevant parameters for the KDE model as a JSON file,
        which can be used to reconstruct the whole model with ``from_json``.

        Parameters
        ----------
        fpath : string
            File path where to save the JSON dump.
        Nz$KDE has not been fitted to data yet.c             S   s   g | ]}t |qS r"   )list).0Xir"   r"   r#   
<listcomp>C  s    z'GaussianKDE.to_json.<locals>.<listcomp>	kde_X_std
kde_X_meanc             S   s   g | ]}t |qS r"   )rI   )rJ   rK   r"   r"   r#   rL   E  s    	kde_X_covkde_Yr      wbwr-   )objfpindent)r   r   
get_paramsrI   r   r   r   sysversion_infor   ospathabspathjsondump)r   fpathoutmodefr"   r"   r#   to_json5  s    


zGaussianKDE.to_jsonc          	   C   sr  t tj|d}t|}W dQ R X | |d |d |d d}t|d |_|jj	\|_
|_t|d |_t|d	 |_t|j|jkrtd
|jj	|j|jfkrtd|d dk	r|d dkrtdt|d |_|d |_t|j|j
kr
td|rntd| td|j td|j td|j td|j
 td|j |S )an  
        Build a awKDE object from a JSON dict with the needed parts.

        Parameters
        ----------
        fpath : string
            Path to the JSON file. Must have keys:

            - 'alpha', 'diag_cov', 'glob_bw': See GaussianKDE docstring.
            - 'kde_Y': KDE function values at points 'kde_X_std' used for the
                       adaptive kernel computation.
            - 'kde_X_std': Standardized sample in shape
                           ``(nsamples, nfeatures)``.
            - 'kde_X_mean': Mean vector of the standardized sample.
            - 'kde_X_cov': Covariance matrix of the stadardized sample.
        verb : bool, optional
            If ``True`` print model summary. (default: ``False``)

        Returns
        -------
        kde : KDE.GaussianKDE
            KDE object in fitted state, ready to evaluate or sample from.
        rbNr    r   r!   )r    r   r!   rM   rN   rO   z8'kde_X_mean' has not the same dimension as the X values.z3'kde_X_cov' has not shape (n_features, n_features).rP   z*Saved 'alpha' is None, but 'kde_Y' is not.z/'kde_Y' has not the same length as 'kde_X_std'.zLoaded KDE model from {}z- glob_bw         : {:.3f}z- alpha           : {:.3f}z- adaptive        : {}z- Nr. of kernels  : {:d}z- Nr. of data dim : {:d})r   rZ   r[   r\   r]   loadr9   r:   r   r4   r   r   Z
atleast_1dr   r   r3   r   r   r   printformatr   r%   r   )clsr_   verbrb   dZkder"   r"   r#   	from_jsonV  s6    
zGaussianKDE.from_jsonc             C   sf   | j }| j}t|| j }|r*|| j9 }|| ttjdtj	 | j
  | }t| j|||S )a-  
        Evaluate KDE at given points, returning the log-probability.

        Parameters
        -----------
        X : array-like, shape (n_samples, n_features)
            Data points we want to evaluate the KDE at. Each row is a point,
            each column is a feature.
        adaptive : bool, optional
            Wether to evaluate with fixed or with adaptive kernel.
            (default: True)

        Returns
        -------
        prob : array-like, shape (len(X))
            The probability from the KDE PDF for each point in X.
        r-   )r   r   r9   r?   r   r   sqrtZlinalgZdetpir   backendZ
kernel_sumr   )r   r,   r1   nrj   rB   Znormr"   r"   r#   r6     s    
*zGaussianKDE._evaluatec             C   sZ   | j }| j}|dkr4t||d  d d|d  S |dkrPt|d|d  S | jS dS )z9Simple wrapper to handle string args given for global bw.r   g       @g      @g         r   N)r   r   r9   powerr   )r   r    dimZnsamr"   r"   r#   r5     s     zGaussianKDE._get_glob_bwc             C   s4   t t t | j| j }| j| | j | _dS )z7 Build the local bandwidth from cached ``_kde_values``.N)r9   exprF   rG   r   r   r%   r   )r   gr"   r"   r#   r'     s    z!GaussianKDE._calc_local_bandwidth)r   r   F)NN)N)F)__name__
__module____qualname__r)   r$   propertyr   setterr    r!   r*   r8   r(   rC   rH   rc   classmethodrk   r6   r5   r'   r"   r"   r"   r#   r      s"   C

>
%!@r   )
__future__r   r   r   r   builtinsr   r   futurer   Zinstall_aliasesrZ   rX   r]   numpyr9   Zsklearn.baser	   Zsklearn.utilsr
   Zawkde.toolsr   r   Zawkde.backendrn   r   r"   r"   r"   r#   <module>   s   