Source code for pyxpcm.stat

#!/bin/env python
# -*coding: UTF-8 -*-
#
# Methods to be accessed through the xarray accessor or pcm "stat" space:
# m.stat.<method>
# ds.pyxpcm.<method>
#
# Created by gmaze on 2017/12/05
__author__ = 'gmaze@ifremer.fr'

import os
import sys
import xarray as xr
import numpy as np
import dask.array
import warnings
from .utils import docstring

[docs]def quantile(ds, q=0.5, of=None, using='PCM_LABELS', outname='PCM_QUANT', keep_attrs=False): """Compute q-th quantile of a :class:`xarray.DataArray` for each PCM components Parameters ---------- q: float in the range of [0,1] (or sequence of floats) Quantiles to compute, which must be between 0 and 1 inclusive. of: str Name of the :class:`xarray.Dataset` variable to compute quantiles for. using: str Name of the :class:`xarray.Dataset` variable with classification labels to use. Use 'PCM_LABELS' by default. outname: 'PCM_QUANT' or str Name of the :class:`xarray.DataArray` with quantile keep_attrs: boolean, False by default Preserve ``of`` :class:`xarray.Dataset` attributes or not in the new quantile variable. Returns ------- :class:`xarray.Dataset` with shape (K, n_quantiles, N_z=n_features) or :class:`xarray.DataArray` with shape (K, n_quantiles, N_z=n_features) """ # Fill in the dataset, otherwise the xarray.quantile doesn't work if isinstance(ds[of].data, dask.array.Array): warnings.warn("quantile does not work for arrays stored as dask arrays. Loading array data via .load() ") ds[of].compute() ds[using].compute() if using not in ds.data_vars: raise ValueError(("Variable '%s' not found in this dataset") % (using)) if of not in ds.data_vars: raise ValueError(("Variable '%s' not found in this dataset") % (of)) # ID sampling dimensions for this array (all dimensions but those of LABELS) sampling_dims = ds[using].dims ds = ds.stack({'sampling': sampling_dims}) qlist = [] # list of quantiles to compute for label, group in ds.groupby(using): da = group[of] if isinstance(da.data, dask.array.Array): da = da.compute() v = da.quantile(q, dim='sampling', keep_attrs=True) qlist.append(v) # Try to infer the dimension of the class components: # The dimension surely has the unique value in labels: l = ds[using].where(ds[using].notnull(), drop=True).values.flatten() uniquelabels = np.unique(l[~np.isnan(l)]) found_class = False for thisdim in ds.dims: if len(ds[thisdim].values) == len(uniquelabels) and \ np.array_equal(ds[thisdim].values, uniquelabels): dim_class = thisdim found_class = True if not found_class: dim_class = ("pcm_class_%s") % (outname) # Create xarray with all quantiles: if found_class: da = xr.concat(qlist, dim=ds[dim_class]) else: da = xr.concat(qlist, dim=dim_class) da = da.rename(outname) da = da.unstack() # Output: if keep_attrs: da.attrs = ds[of].attrs return da
[docs]def robustness(ds, name='PCM_POST', classdimname='pcm_class', outname='PCM_ROBUSTNESS'): """ Compute classification robustness Parameters ---------- name: str, default is 'PCM_POST' Name of the :class:`xarray.DataArray` with prediction probability (posteriors) classdimname: str, default is 'pcm_class' Name of the dimension holding classes outname: 'PCM_ROBUSTNESS' or str Name of the :class:`xarray.DataArray` with robustness inplace: boolean, False by default If False, return a :class:`xarray.DataArray` with robustness If True, return the input :class:`xarray.Dataset` with robustness added as a new :class:`xarray.DataArray` Returns ------- :class:`xarray.Dataset` if inplace=True or :class:`xarray.DataArray` if inplace=False """ maxpost = ds[name].max(dim=classdimname) K = len(ds[classdimname]) robust = (maxpost - 1. / K) * K / (K - 1.) id = dict() id[classdimname] = 0 da = ds[name][id].rename(outname) da.values = robust da.attrs['long_name'] = 'PCM classification robustness' da.attrs['units'] = '' da.attrs['valid_min'] = 0 da.attrs['valid_max'] = 1 da.attrs['llh'] = ds[name].attrs['llh'] # return da
[docs]def robustness_digit(ds, name='PCM_POST', classdimname='pcm_class', outname='PCM_ROBUSTNESS_CAT'): """ Digitize classification robustness Parameters ---------- ds: :class:`xarray.Dataset` Input dataset name: str, default is 'PCM_POST' Name of the :class:`xarray.DataArray` with prediction probability (posteriors) classdimname: str, default is 'pcm_class' Name of the dimension holding classes outname: 'PCM_ROBUSTNESS_CAT' or str Name of the :class:`xarray.DataArray` with robustness categories inplace: boolean, False by default If False, return a :class:`xarray.DataArray` with robustness If True, return the input :class:`xarray.Dataset` with robustness categories added as a new :class:`xarray.DataArray` Returns ------- :class:`xarray.Dataset` if inplace=True or :class:`xarray.DataArray` if inplace=False """ maxpost = ds[name].max(dim=classdimname) K = len(ds[classdimname]) robust = (maxpost - 1. / K) * K / (K - 1.) Plist = [0, 0.33, 0.66, 0.9, .99, 1] rowl0 = ('Unlikely', 'As likely as not', 'Likely', 'Very Likely', 'Virtually certain') robust_id = np.digitize(robust, Plist) - 1 id = dict() id[classdimname] = 0 da = ds[name][id].rename(outname) da.values = robust_id da.attrs['long_name'] = 'PCM classification robustness category' da.attrs['units'] = '' da.attrs['valid_min'] = 0 da.attrs['valid_max'] = 4 da.attrs['llh'] = ds[name].attrs['llh'] da.attrs['bins'] = Plist da.attrs['legend'] = rowl0 # Add labels to the dataset: return da
class _StatMethods(object): """ Enables use of pyxpcm.stat functions as attributes on a PCM object. """ def __init__(self, m): self._pcm = m def __call__(self, **kwargs): raise ValueError("pyxpcm.stat cannot be called directly. Use one of the statistics methods: quantile, robustness, robustness_digit") @docstring(quantile.__doc__) def quantile(self, *ags, **kwargs): return quantile(*ags, **kwargs) @docstring(robustness.__doc__) def robustness(self, *ags, **kwargs): return robustness(*ags, **kwargs) @docstring(robustness_digit.__doc__) def robustness_digit(self, *ags, **kwargs): return robustness_digit(*ags, **kwargs)