Source code for skyborn.interp.triple_to_grid

import typing
import warnings

import numpy as np
import xarray as xr
from dask.array.core import map_blocks

from .errors import ChunkError, CoordinateError, DimensionError
from .fortran.grid2triple import grid2triple as grid2triple_fort
from .fortran.triple2grid import triple2grid1
from .missing_values import (
    complex_dtypes,
    float_dtypes,
    fort2py_msg,
    msg_dtype,
    py2fort_msg,
)

supported_types = typing.Union[xr.DataArray, np.ndarray]

# Fortran Wrappers _<funcname>()
# These wrappers are executed within dask processes (if any), and could/should
# do anything that can benefit from parallel execution.


def _default_missing_value_for_dtype(ndtype):
    if ndtype in float_dtypes:
        return np.nan
    if ndtype in complex_dtypes:
        return np.nan + np.nan * 1j
    return msg_dtype[ndtype]


def _grid_to_triple(x, y, z, msg_py):
    # Transpose z before Fortran function call
    z = np.transpose(z, axes=(1, 0))
    ndtype = z.dtype.type
    msg_fort = msg_dtype[ndtype]
    if msg_py is None:
        msg_py = _default_missing_value_for_dtype(ndtype)

    # Avoid the generic missing-value conversion path when the data is already
    # compatible with the Fortran sentinel contract. This saves an extra full
    # scan plus the restore pass on the transposed input.
    needs_restore = False
    if ndtype in float_dtypes or ndtype in complex_dtypes:
        msg_is_nan = bool(np.isnan(msg_py))
    else:
        msg_is_nan = False

    if msg_is_nan:
        if np.isnan(z).any():
            z, msg_py, msg_fort = py2fort_msg(z, msg_py=msg_py, msg_fort=msg_fort)
            needs_restore = True
    elif msg_py != msg_fort:
        if (z == msg_py).any():
            z, msg_py, msg_fort = py2fort_msg(z, msg_py=msg_py, msg_fort=msg_fort)
            needs_restore = True

    # Fortran call
    # num_elem is the total number of elements from beginning of each column in the array,
    # which are non missing-value
    out, num_elem = grid2triple_fort(x, y, z, msg_fort)

    # Transpose output to correct dimension order before returning it to outer wrapper
    # As well as get rid of indices corresponding to missing values
    out = np.asarray(out)[:num_elem, :].T

    # Only the input grid can contain the temporary Fortran sentinel. The packed
    # triple output excludes missing cells, so restoring `out` is unnecessary.
    if needs_restore:
        fort2py_msg(z, msg_fort=msg_fort, msg_py=msg_py)

    return out


def _triple_to_grid(
    data,
    x_in,
    y_in,
    x_out,
    y_out,
    shape,
    method=None,
    distmx=None,
    domain=None,
    msg_py=None,
):
    ndtype = data.dtype.type
    msg_fort = msg_dtype[ndtype]
    if msg_py is None:
        msg_py = _default_missing_value_for_dtype(ndtype)

    # Avoid the generic missing-value conversion path when the input already
    # matches the Fortran sentinel contract.
    needs_restore = False
    if ndtype in float_dtypes or ndtype in complex_dtypes:
        msg_is_nan = bool(np.isnan(msg_py))
    else:
        msg_is_nan = False

    if msg_is_nan:
        if np.isnan(data).any():
            data, msg_py, msg_fort = py2fort_msg(data, msg_py=msg_py, msg_fort=msg_fort)
            needs_restore = True
    elif msg_py != msg_fort:
        if (data == msg_py).any():
            data, msg_py, msg_fort = py2fort_msg(data, msg_py=msg_py, msg_fort=msg_fort)
            needs_restore = True

    # Fortran function call
    grid = triple2grid1(
        x_in,
        y_in,
        data,
        x_out,
        y_out,
        zmsg=msg_fort,
        domain=domain,
        method=method,
        distmx=distmx,
    )

    # Reshape output to correct the dimensionality  before returning it to the outer wrapper
    grid = np.asarray(grid)
    grid = grid.reshape(shape)

    # Only restore the input when a temporary sentinel rewrite happened.
    if needs_restore:
        fort2py_msg(data, msg_fort=msg_fort, msg_py=msg_py)

    # Output arrays can legitimately contain the temporary Fortran sentinel.
    if msg_py != msg_fort:
        fort2py_msg(grid, msg_fort=msg_fort, msg_py=msg_py)

    return grid


def _triple_to_grid_eager(
    data,
    x_in,
    y_in,
    x_out,
    y_out,
    method=None,
    distmx=None,
    domain=None,
    msg_py=None,
):
    grid_size = y_out.shape[0] * x_out.shape[0]
    out_shape = data.shape[:-1] + (y_out.shape[0], x_out.shape[0])
    flat_data = data.reshape((-1, data.shape[-1]))

    first = _triple_to_grid(
        flat_data[0],
        x_in,
        y_in,
        x_out,
        y_out,
        (grid_size,),
        method=method,
        distmx=distmx,
        domain=domain,
        msg_py=msg_py,
    )

    flat_grid = np.empty((flat_data.shape[0], grid_size), dtype=first.dtype)
    flat_grid[0] = first

    for idx in range(1, flat_data.shape[0]):
        flat_grid[idx] = _triple_to_grid(
            flat_data[idx],
            x_in,
            y_in,
            x_out,
            y_out,
            (grid_size,),
            method=method,
            distmx=distmx,
            domain=domain,
            msg_py=msg_py,
        )

    return flat_grid.reshape(out_shape)


# TODO: Revisit for implementing this function after deprecating legacy ncomp-style aliases
def _triple_to_grid_2d(x_in, y_in, data, x_out, y_out, msg_py):
    # ''' signature:  grid = _triple2grid(x_in, y_in,data,x_out,y_out,msg_py)
    pass


# Outer Wrappers <funcname>()
# These Wrappers are excecuted in the __main__ python process, and should be
# used for any tasks which would not benefit from parallel execution.


# TODO: This function requires the input to have the coordinates in a particular order,
#  but xarray.DataArrray inputs with coordinates anywhere could/should actually be fine
[docs] def grid_to_triple( data: supported_types, x_in: supported_types = None, y_in: supported_types = None, msg_py: supported_types = None, ) -> supported_types: """Converts a two-dimensional grid with one-dimensional coordinate variables to an array where each grid value is associated with its coordinates. Parameters ---------- data : :class:`xarray.DataArray`, :class:`numpy.ndarray` Two-dimensional input array of size ny x mx containing the data values. Missing values may be present in ``data``, but they are ignored. x_in : :class:`xarray.DataArray`, :class:`numpy.ndarray` A one-dimensional array that specifies the the right dimension coordinates of the input (``data``). Note: It should only be explicitly provided when the input (``fi``) is ``numpy.ndarray``; otherwise, it should come from ``fi.coords``. y_in : :class:`xarray.DataArray`, :class:`numpy.ndarray` A one-dimensional array that specifies the the left dimension coordinates of the input (``data``). Note: It should only be explicitly provided when the input (``fi``) is ``numpy.ndarray``; otherwise, it should come from ``fi.coords``. msg_py : :obj:`numpy.number` A numpy scalar value that represent a missing value in ``data``. This argument allows a user to use a missing value scheme other than NaN or masked arrays, similar to what NCL allows. Returns ------- out : :class:`xarray.DataArray`, :class:`numpy.ndarray` The maximum size of the returned array will be ``3 x ld``, where ``ld <= ny x mx``. If no missing values are encountered in ``data``, then ``ld = ny x mx``. If missing values are encountered in ``data``, they are not returned and hence ``ld`` will be equal to ``ny x mx`` minus the number of missing values found in ``data``. Examples -------- Example 1: Using grid_to_triple with :class:`xarray.DataArray` input .. code-block:: python import numpy as np import xarray as xr from skyborn.interp import grid_to_triple # Open a netCDF data file using xarray default engine and load the data stream ds = xr.open_dataset("./NETCDF_FILE.nc") # [INPUT] Grid & data info on the source curvilinear data = ds.DIST_236_CBL[:] x_in = ds.gridlat_236[:] y_in = ds.gridlon_236[:] output = grid_to_triple(data, x_in, y_in) """ # ''' Start of boilerplate is_input_xr = True # If the input is numpy.ndarray, convert it to xarray.DataArray if not isinstance(data, xr.DataArray): if (x_in is None) | (y_in is None): raise CoordinateError( "grid_to_triple: Argument `x_in` and `y_in` must be provided " "explicitly unless `data` is an xarray.DataArray." ) is_input_xr = False # Pre-validate numpy inputs before wrapping into xarray to ensure # consistent DimensionError types rather than backend-specific errors if np.asarray(data).ndim != 2: raise DimensionError("grid_to_triple: `data` must have two dimensions !\n") if np.asarray(x_in).ndim != 1: raise DimensionError("grid_to_triple: `x_in` must have one dimension !\n") if np.asarray(y_in).ndim != 1: raise DimensionError("grid_to_triple: `y_in` must have one dimension !\n") if np.asarray(x_in).shape[0] != np.asarray(data).shape[1]: raise DimensionError( "grid_to_triple: `x_in` must have the same size (call it `mx`) as the " "right dimension of `data`. !\n" ) if np.asarray(y_in).shape[0] != np.asarray(data).shape[0]: raise DimensionError( "grid_to_triple: `y_in` must have the same size (call it `ny`) as the " "left dimension of `data`. !\n" ) data = xr.DataArray(data) data = data.assign_coords({data.dims[-1]: x_in, data.dims[-2]: y_in}) # x_in and y_in should be coming from xarray input coords or assigned # as coords while xarray being initiated from numpy input above x_in = data.coords[data.dims[-1]] y_in = data.coords[data.dims[-2]] # Basic validity checks if data.ndim != 2: raise DimensionError("grid_to_triple: `data` must have two dimensions !\n") if x_in.ndim != 1: raise DimensionError("grid_to_triple: `x_in` must have one dimension !\n") elif x_in.shape[0] != data.shape[1]: raise DimensionError( "grid_to_triple: `x_in` must have the same size (call it `mx`) as the " "right dimension of `data`. !\n" ) if y_in.ndim != 1: raise DimensionError("grid_to_triple: `y_in` must have one dimension !\n") elif y_in.shape[0] != data.shape[0]: raise DimensionError( "grid_to_triple: `y_in` must have the same size (call it `ny`) as the " "left dimension of `data`. !\n" ) # ''' end of boilerplate # Inner Fortran wrapper call out = _grid_to_triple(x_in.data, y_in.data, data.data, msg_py) # If input was xarray.DataArray, convert output to xarray.DataArray as well if is_input_xr: out = xr.DataArray(out, attrs=data.attrs) return out
# TODO: This function requires the input to have the coordinates in the rightmost two dimensions, # but xarray.DataArrray inputs with coordinates anywhere could/should actually be fine
[docs] def triple_to_grid( data: supported_types, x_in: supported_types, y_in: supported_types, x_out: supported_types, y_out: supported_types, method: int = 1, domain: float = 1.0, distmx: float = None, missing_value: np.number = None, meta: bool = False, ) -> supported_types: """Places unstructured (randomly-spaced) data onto the nearest locations of a rectilinear grid. This function puts unstructured data (randomly-spaced) onto the nearest locations of a rectilinear grid. A default value of ``domain`` option is now set to 1.0 instead of 0.0. This function does not perform interpolation; rather, each individual data point is assigned to the nearest grid point. It is possible that upon return, grid will contain grid points set to missing value if no ``x_in(n)``, ``y_in(n)`` are nearby. Parameters ---------- data : :class:`xarray.DataArray`, :class:`numpy.ndarray` A multi-dimensional array, whose rightmost dimension is the same length as ``x_in`` and ``y_in``, containing the values associated with the "x" and "y" coordinates. Missing values may be present but will be ignored. x_in : :class:`xarray.DataArray`, :class:`numpy.ndarray` A one-dimensional array that specifies the x-coordinate associated with the input (``data``). y_in : :class:`xarray.DataArray`, :class:`numpy.ndarray` A one-dimensional array that specifies the y-coordinate associated with the input (``data``). x_out : :class:`xarray.DataArray`, :class:`numpy.ndarray` A one-dimensional array of length ``M`` containing the x-coordinates associated with the returned two-dimensional grid. The coordinate values must be monotonically increasing. y_out : :class:`xarray.DataArray` or :class:`numpy.ndarray` A one-dimensional array of length ``N`` containing the y-coordinates associated with the returned two-dimensional grid. The coordinate values must be monotonically increasing. method : :obj:`int`, optional An integer value that can be 0 or 1. The default value is 1. A value of 1 means to use the great circle distance formula for distance calculations. Warning: ``method = 0``, together with ``domain = 1.0``, could result in many of the target grid points to be set to the missing value if the number of grid points is large (ie: a high resolution grid) and the number of observations relatively small. domain : :obj:`float`, optional A float value that should be set to a value ``>= 0``. The default value is 1.0. If present, the larger this factor, the wider the spatial domain allowed to influence grid boundary points. Typically, ``domain`` is 1.0 or 2.0. If ``domain <= 0.0``, then values located outside the grid domain specified by ``x_out`` and ``y_out`` arguments will not be used. distmx : :obj:`float`, optional Setting ``distmx`` allows the user to specify a search radius (km) beyond which observations are not considered for nearest neighbor. Only applicable when ``method = 1``. The default ``distmx=1e20 (km)`` means that every grid point will have a nearest neighbor. It is suggested that users specify a reasonable value for ``distmx``. missing_value : :obj:`numpy.number`, optional A numpy scalar value that represent a missing value in ``data``. The default value is ``np.nan``. If specified explicitly, this argument allows the user to use a missing value scheme other than NaN or masked arrays. meta : :obj:`bool`, optional If set to True and the input array is an Xarray, the metadata from the input array will be copied to the output array; default is False. Warning: This option is not yet supported for this function. Returns ------- grid : :class:`xarray.DataArray`, :class:`numpy.ndarray` The returned array will be ``K`` x ``N`` x ``M``, where ``K`` represents the leftmost dimensions of ``data``, N represent the size of ``y_out``, and M represent the size of ``x_out`` coordinate vectors. Examples -------- Example 1: Using triple_to_grid with :class:`xarray.DataArray` input .. code-block:: python import numpy as np import xarray as xr from skyborn.interp import triple_to_grid # Open a netCDF data file using xarray default engine and load the data stream ds = xr.open_dataset("./ruc.nc") # [INPUT] Grid & data info on the source curvilinear data = ds.DIST_236_CBL[:] x_in = ds.gridlat_236[:] y_in = ds.gridlon_236[:] x_out = ds.gridlat_236[:] y_out = ds.gridlon_236[:] # [OUTPUT] Grid on destination points grid (or read the 1D lat and lon from # another .nc file. newlat1D_points=np.linspace(lat2D_curv.min(), lat2D_curv.max(), 100) newlon1D_points=np.linspace(lon2D_curv.min(), lon2D_curv.max(), 100) output = triple_to_grid(data, x_in, y_in, x_out, y_out) """ if (x_in is None) | (y_in is None): raise CoordinateError( "triple_to_grid: Arguments `x_in` and `y_in` must always be " "explicitly provided!" ) # ''' Start of boilerplate is_input_xr = True is_input_dask = False # If the input is numpy.ndarray, convert it to xarray.DataArray if not isinstance(data, xr.DataArray): is_input_xr = False data = xr.DataArray(data) # Basic validity checks if x_in.shape[0] != y_in.shape[0] or x_in.shape[0] != data.shape[data.ndim - 1]: raise DimensionError( "triple_to_grid: The length of `x_in` and `y_in` must be the same " "as the rightmost dimension of `data`!" ) if x_in.ndim > 1 or y_in.ndim > 1: raise DimensionError( "triple_to_grid: `x_in` and `y_in` arguments must be one-dimensional arrays!\n" ) if x_out.ndim > 1 or y_out.ndim > 1: raise DimensionError( "triple_to_grid: `x_out` and `y_out` arguments must be one-dimensional array!\n" ) if not isinstance(method, int): raise TypeError( "triple_to_grid: `method` arg must be an integer! Set it to either 1 or 0." ) if (method != 0) and (method != 1): raise TypeError("triple_to_grid: `method` arg accepts either 0 or 1!") # `distmx` is only applicable when `method`==1 if method: if np.asarray(distmx).size != 1: raise ValueError("triple_to_grid: Provide a scalar value for `distmx`!") else: if distmx is not None: raise ValueError( "triple_to_grid: `distmx` is only applicable when `method`==1!" ) if np.asarray(domain).size != 1: raise ValueError("triple_to_grid: Provide a scalar value for `domain`!") x_in_values = x_in.data if hasattr(x_in, "data") else np.asarray(x_in) y_in_values = y_in.data if hasattr(y_in, "data") else np.asarray(y_in) x_out_values = x_out.data if hasattr(x_out, "data") else np.asarray(x_out) y_out_values = y_out.data if hasattr(y_out, "data") else np.asarray(y_out) # If input data is already chunked if data.chunks is not None: is_input_dask = True # Ensure the rightmost dimension of `data` is not chunked if list(data.chunks)[-1:] != [x_in.shape]: raise ChunkError( "triple_to_grid: Data must be unchunked along the rightmost dimension!" ) else: grid = _triple_to_grid_eager( data.data, x_in_values, y_in_values, x_out_values, y_out_values, method=method, distmx=distmx, domain=domain, msg_py=missing_value, ) if meta: warnings.warn( "WARNING triple_to_grid: Retention of metadata is not yet supported; " "it will thus be ignored in the output!" ) if is_input_xr: return xr.DataArray(grid) return grid # NOTE: Auto-chunking, regardless of what chunk sizes were given by the user, seems # to be explicitly needed in this function because: # The Fortran routine for this function is implemented assuming it would be looped # across the leftmost dimensions of the input (`data`), i.e. on one-dimensional # chunks of size that is equal to the rightmost dimension of `data`, which is the # same as length of `x_in` or `y_in`. # Generate chunks of {'dim_0': 1, 'dim_1': 1, ..., 'dim_n': x_in.shape} data_chunks = list(data.dims) data_chunks[:-1] = [ (k, 1) for (k, v) in zip(list(data.dims)[:-1], list(data.shape)[:-1]) ] data_chunks[-1:] = [ (k, v) for (k, v) in zip(list(data.dims)[-1:], list(data.shape)[-1:]) ] data_chunks = dict(data_chunks) data = data.chunk(data_chunks) # grid data structure elements grid_chunks = list(data.chunks) grid_chunks[-1] = (y_out.shape[0] * x_out.shape[0],) grid_chunks = tuple(grid_chunks) dask_grid_shape = tuple(a[0] for a in list(grid_chunks)) grid_coords = {k: v for (k, v) in data.coords.items()} grid_coords[data.dims[-1]] = x_out # Only add y_out coordinate if data has more than 1 dimension if data.ndim >= 2: grid_coords[data.dims[-2]] = y_out # ''' end of boilerplate # Inner Fortran wrapper call grid = map_blocks( _triple_to_grid, data.data, x_in_values, y_in_values, x_out_values, y_out_values, dask_grid_shape, method=method, distmx=distmx, domain=domain, msg_py=missing_value, chunks=grid_chunks, dtype=data.dtype, drop_axis=[data.ndim - 1], new_axis=[data.ndim - 1], ) # Reshape grid to its final shape grid_shape = data.shape[:-1] + (y_out.shape[0],) + (x_out.shape[0],) grid = grid.reshape(grid_shape) if meta: # grid = xr.DataArray(grid, attrs=data.attrs, dims=data.dims, coords=grid_coords) warnings.warn( "WARNING triple_to_grid: Retention of metadata is not yet supported; " "it will thus be ignored in the output!" ) # else: # grid = xr.DataArray(grid, coords=grid_coords) # If input was xarray.DataArray, convert output to xarray.DataArray as well if is_input_xr: grid = xr.DataArray(grid) # Else if input was numpy.ndarray, convert Dask output to numpy.ndarray with `.compute() else: grid = grid.compute() return grid
# TODO: Revisit for implementing this function after deprecating legacy ncomp-style aliases def triple_to_grid_2d(x_in, y_in, data, x_out, y_out, msg_py): warnings.warn("triple_to_grid_2d function not yet implemented!!! ") # Transparent wrappers for legacy ncomp-style backwards compatibility def grid2triple(x_in, y_in, data, msg_py): warnings.warn( "grid2triple function name and signature will be deprecated soon " "in a future version. Use `grid_to_triple` instead!", PendingDeprecationWarning, ) return grid_to_triple(data, x_in, y_in, msg_py) # Transparent wrappers for legacy ncomp-style backwards compatibility def triple2grid(x_in, y_in, data, x_out, y_out, **kwargs): warnings.warn( "triple2grid function name and signature will be deprecated soon " "in a future version. Use `triple_to_grid` instead!", PendingDeprecationWarning, ) return triple_to_grid(data, x_in, y_in, x_out, y_out, **kwargs)