Module ocean_science_utilities.interpolate.dataarray

Expand source code
import numpy as np
import xarray

from typing import Dict, Optional, List, Tuple

from ocean_science_utilities.interpolate.nd_interp import NdInterpolator


def interpolate_track_data_arrray(
    data_array: xarray.DataArray,
    tracks: Dict[str, np.ndarray],
    independent_variable: Optional[str] = None,
    periodic_coordinates: Optional[Dict[str, float]] = None,
    period_data: Optional[float] = None,
    discont: Optional[float] = None,
) -> xarray.DataArray:
    """
    Interpolate a data array from a dataset at given points in N-dimensions.

    The interpolation function takes care of the following edge cases not
    covered by standard xarray interpolation:

    1) Interpolation along cyclic dimensions (e.g. Longitude interpolating
       across prime- or anti-meridian)
    2) Interpolation of cyclic data. E.g. directions.
    3) Interpolation near non-existing values (e.g. NaN values representing
       land).

    When these are covered by standard xarray interpolation we should
    definitely switch.

    :param data_array: xarray data array.

    :param _points: dictionary with data_array coordinate names as keys and
                    a np array with N entries. With coordiantes lat/lon:
                    {
                        lat: [ lat0, lat1 ,lat2, .., 10]
                        lon: [ [lon0 ,lon1,lon2, .., lonN]
                        time: [t0,t1,t2, ... tN]
                    }
                    where the points we are interolating are formed by the
                    tuples [  (t[0],lat[0],lon[0]),
                                 ....
                              (t[1],lat[N],lon[N]),
                    ]

    :param independent_variable: coordinate that is used to parametrize the
        points, usually time. This variable is used as the output coordinate
        for the returned data array. If None and 'time' is a valid coordinate
        'time' is used, otherwise it defaults to whatever the first returned
        coordinate is from data_array.dims

    :param periodic_coordinates: Dictonary that contains as keys the coordinate
        names of cyclic coordinates, and as value the cyclic length of the
        coordinate.

    :param period_data: Cyclic length of the data. If data is not cyclic
        this is set to None (default).

    :param fractional_cyclic_range: range of the cyclic data expressed as a
        fraciton of the cyclic lenght. E.g for angles in [-pi,pi] with
        cyclic length 2*pi we would give [-0.5,0.5]. Defaults to [0,1].

    :return: A data array of interpolated values with as coordinates the
        specified independent coordinate (usually time).
    """

    # Get dimensions of the data array
    dimensions = data_array.dims

    if periodic_coordinates is not None:
        for wrapped_coordinate in periodic_coordinates:
            if wrapped_coordinate not in dimensions:
                raise ValueError(
                    f"Cyclic coordinate {wrapped_coordinate} is"
                    f" not a valid coordinate of the dataset."
                )
    else:
        periodic_coordinates = {}

    if independent_variable is None:
        if "time" in dimensions:
            chosen_independent_variable = "time"
        else:
            chosen_independent_variable = str(dimensions[0])

    # Ensure that each of the coordinate lists indicated in points is at least
    # 1D
    for coordinate_name in tracks:
        tracks[coordinate_name] = np.atleast_1d(tracks[coordinate_name])

    if len(tracks) != len(dimensions):
        # The number of dimensions does not match the number of dimensions of the
        # points we are interpolating over. If the dataset has a number of singleton
        # dimensions (e.g. for Hycon the surface velocities have a depth dimension
        # that is always located at z=0) we may be able to add
        # these to the track points.
        dim_str = [str(dim) for dim in dimensions]

        track_key = list(tracks.keys())[0]
        for dim in dim_str:
            if dim not in tracks:
                # Singleton dimension is ok to interpolate over. Add points to the
                # tracks that correspond to the singleton dimension.
                if data_array[dim].shape[0] == 1:
                    tracks[dim] = (
                        np.ones(tracks[track_key].shape) * data_array[dim].values[0]
                    )
                else:
                    raise ValueError(
                        f" Points expected to have {len(dimensions)} "
                        f'coordinates corresponding to: {", ".join(dim_str)}'
                    )

    for coordinate_name in tracks:
        dim_str = [str(dim) for dim in dimensions]
        if coordinate_name not in dim_str:
            raise ValueError(
                f" Coordinate {coordinate_name} not in data array"
                f'coordinates: {", ".join(dim_str)}'
            )

    coordinates: List[Tuple[str, np.ndarray]] = []
    for index, c_name in enumerate(data_array.dims):
        # Get weights and indices
        coordinates.append((str(c_name), data_array[c_name].values))

    def _get_data(indices, _dummy):
        return data_array[tuple([xarray.DataArray(x) for x in indices])].values

    interpolator = NdInterpolator(
        get_data=_get_data,
        data_coordinates=coordinates,
        data_shape=data_array.shape,
        interp_coord_names=list(tracks.keys()),
        interp_index_coord_name=chosen_independent_variable,
        data_periodic_coordinates=periodic_coordinates,
        data_period=period_data,
        data_discont=discont,
    )
    interpolated_values = interpolator.interpolate(tracks)

    return xarray.DataArray(
        interpolated_values,
        coords={chosen_independent_variable: tracks[chosen_independent_variable]},
        dims=chosen_independent_variable,
        name=data_array.name,
    )

Functions

def interpolate_track_data_arrray(data_array: xarray.core.dataarray.DataArray, tracks: Dict[str, numpy.ndarray], independent_variable: Optional[str] = None, periodic_coordinates: Optional[Dict[str, float]] = None, period_data: Optional[float] = None, discont: Optional[float] = None) ‑> xarray.core.dataarray.DataArray

Interpolate a data array from a dataset at given points in N-dimensions.

The interpolation function takes care of the following edge cases not covered by standard xarray interpolation:

1) Interpolation along cyclic dimensions (e.g. Longitude interpolating across prime- or anti-meridian) 2) Interpolation of cyclic data. E.g. directions. 3) Interpolation near non-existing values (e.g. NaN values representing land).

When these are covered by standard xarray interpolation we should definitely switch.

:param data_array: xarray data array.

:param _points: dictionary with data_array coordinate names as keys and a np array with N entries. With coordiantes lat/lon: { lat: [ lat0, lat1 ,lat2, .., 10] lon: [ [lon0 ,lon1,lon2, .., lonN] time: [t0,t1,t2, … tN] } where the points we are interolating are formed by the tuples [ (t[0],lat[0],lon[0]), .... (t[1],lat[N],lon[N]), ]

:param independent_variable: coordinate that is used to parametrize the points, usually time. This variable is used as the output coordinate for the returned data array. If None and 'time' is a valid coordinate 'time' is used, otherwise it defaults to whatever the first returned coordinate is from data_array.dims

:param periodic_coordinates: Dictonary that contains as keys the coordinate names of cyclic coordinates, and as value the cyclic length of the coordinate.

:param period_data: Cyclic length of the data. If data is not cyclic this is set to None (default).

:param fractional_cyclic_range: range of the cyclic data expressed as a fraciton of the cyclic lenght. E.g for angles in [-pi,pi] with cyclic length 2*pi we would give [-0.5,0.5]. Defaults to [0,1].

:return: A data array of interpolated values with as coordinates the specified independent coordinate (usually time).

Expand source code
def interpolate_track_data_arrray(
    data_array: xarray.DataArray,
    tracks: Dict[str, np.ndarray],
    independent_variable: Optional[str] = None,
    periodic_coordinates: Optional[Dict[str, float]] = None,
    period_data: Optional[float] = None,
    discont: Optional[float] = None,
) -> xarray.DataArray:
    """
    Interpolate a data array from a dataset at given points in N-dimensions.

    The interpolation function takes care of the following edge cases not
    covered by standard xarray interpolation:

    1) Interpolation along cyclic dimensions (e.g. Longitude interpolating
       across prime- or anti-meridian)
    2) Interpolation of cyclic data. E.g. directions.
    3) Interpolation near non-existing values (e.g. NaN values representing
       land).

    When these are covered by standard xarray interpolation we should
    definitely switch.

    :param data_array: xarray data array.

    :param _points: dictionary with data_array coordinate names as keys and
                    a np array with N entries. With coordiantes lat/lon:
                    {
                        lat: [ lat0, lat1 ,lat2, .., 10]
                        lon: [ [lon0 ,lon1,lon2, .., lonN]
                        time: [t0,t1,t2, ... tN]
                    }
                    where the points we are interolating are formed by the
                    tuples [  (t[0],lat[0],lon[0]),
                                 ....
                              (t[1],lat[N],lon[N]),
                    ]

    :param independent_variable: coordinate that is used to parametrize the
        points, usually time. This variable is used as the output coordinate
        for the returned data array. If None and 'time' is a valid coordinate
        'time' is used, otherwise it defaults to whatever the first returned
        coordinate is from data_array.dims

    :param periodic_coordinates: Dictonary that contains as keys the coordinate
        names of cyclic coordinates, and as value the cyclic length of the
        coordinate.

    :param period_data: Cyclic length of the data. If data is not cyclic
        this is set to None (default).

    :param fractional_cyclic_range: range of the cyclic data expressed as a
        fraciton of the cyclic lenght. E.g for angles in [-pi,pi] with
        cyclic length 2*pi we would give [-0.5,0.5]. Defaults to [0,1].

    :return: A data array of interpolated values with as coordinates the
        specified independent coordinate (usually time).
    """

    # Get dimensions of the data array
    dimensions = data_array.dims

    if periodic_coordinates is not None:
        for wrapped_coordinate in periodic_coordinates:
            if wrapped_coordinate not in dimensions:
                raise ValueError(
                    f"Cyclic coordinate {wrapped_coordinate} is"
                    f" not a valid coordinate of the dataset."
                )
    else:
        periodic_coordinates = {}

    if independent_variable is None:
        if "time" in dimensions:
            chosen_independent_variable = "time"
        else:
            chosen_independent_variable = str(dimensions[0])

    # Ensure that each of the coordinate lists indicated in points is at least
    # 1D
    for coordinate_name in tracks:
        tracks[coordinate_name] = np.atleast_1d(tracks[coordinate_name])

    if len(tracks) != len(dimensions):
        # The number of dimensions does not match the number of dimensions of the
        # points we are interpolating over. If the dataset has a number of singleton
        # dimensions (e.g. for Hycon the surface velocities have a depth dimension
        # that is always located at z=0) we may be able to add
        # these to the track points.
        dim_str = [str(dim) for dim in dimensions]

        track_key = list(tracks.keys())[0]
        for dim in dim_str:
            if dim not in tracks:
                # Singleton dimension is ok to interpolate over. Add points to the
                # tracks that correspond to the singleton dimension.
                if data_array[dim].shape[0] == 1:
                    tracks[dim] = (
                        np.ones(tracks[track_key].shape) * data_array[dim].values[0]
                    )
                else:
                    raise ValueError(
                        f" Points expected to have {len(dimensions)} "
                        f'coordinates corresponding to: {", ".join(dim_str)}'
                    )

    for coordinate_name in tracks:
        dim_str = [str(dim) for dim in dimensions]
        if coordinate_name not in dim_str:
            raise ValueError(
                f" Coordinate {coordinate_name} not in data array"
                f'coordinates: {", ".join(dim_str)}'
            )

    coordinates: List[Tuple[str, np.ndarray]] = []
    for index, c_name in enumerate(data_array.dims):
        # Get weights and indices
        coordinates.append((str(c_name), data_array[c_name].values))

    def _get_data(indices, _dummy):
        return data_array[tuple([xarray.DataArray(x) for x in indices])].values

    interpolator = NdInterpolator(
        get_data=_get_data,
        data_coordinates=coordinates,
        data_shape=data_array.shape,
        interp_coord_names=list(tracks.keys()),
        interp_index_coord_name=chosen_independent_variable,
        data_periodic_coordinates=periodic_coordinates,
        data_period=period_data,
        data_discont=discont,
    )
    interpolated_values = interpolator.interpolate(tracks)

    return xarray.DataArray(
        interpolated_values,
        coords={chosen_independent_variable: tracks[chosen_independent_variable]},
        dims=chosen_independent_variable,
        name=data_array.name,
    )