Source code for veranda.raster.mosaic.base

""" Collection of base classes for managing multi-dimensional I/O of raster data. """

import os
import copy
import gc
import abc
import warnings
import rioxarray  # this import is needed as an extension for xarray
import xarray as xr
import pandas as pd
import numpy as np
from affine import Affine
from typing import List, Tuple, Sequence

from geospade.tools import any_geom2ogr_geom
from geospade.tools import rel_extent
from geospade.crs import SpatialRef
from geospade.raster import RasterGeometry
from geospade.raster import MosaicGeometry
from geospade.raster import Tile
from geospade.raster import find_congruent_tile_id_from_tiles


[docs]class RasterAccess: """ Helper class to build the link between indexes of the source array (access) and the target array (assignment). """ def __init__(self, src_raster_geom, dst_raster_geom, src_root_raster_geom=None): """ Constructor of `RasterAccess`. Parameters ---------- src_raster_geom : geospade.raster.RasterGeometry Raster geometry representing the extent and indices of the data to access. dst_raster_geom : geospade.raster.RasterGeometry Raster geometry representing the extent and indices of the data to assign. src_root_raster_geom : geospade.raster.RasterGeometry, optional Raster geometry representing the origin to which `src_raster_geom` should be referred to. Defaults to None, i.e. the root parent of `src_raster_geom` is used. """ src_root_raster_geom = src_root_raster_geom or src_raster_geom.parent_root origin = src_root_raster_geom.ul_x, src_root_raster_geom.ul_y min_col, min_row, max_col, max_row = rel_extent(origin, src_raster_geom.coord_extent, src_raster_geom.x_pixel_size, src_raster_geom.y_pixel_size) self.src_window = (min_row, min_col, max_row, max_col) origin = dst_raster_geom.ul_x, dst_raster_geom.ul_y min_col, min_row, max_col, max_row = rel_extent(origin, src_raster_geom.coord_extent, src_raster_geom.x_pixel_size, src_raster_geom.y_pixel_size) self.dst_window = (min_row, min_col, max_row, max_col) @property def src_row_slice(self) -> slice: """ Indices for the rows of the data to access. """ return slice(self.src_window[0], self.src_window[2] + 1) @property def src_col_slice(self) -> slice: """ Indices for the columns of the data to access. """ return slice(self.src_window[1], self.src_window[3] + 1) @property def dst_row_slice(self) -> slice: """ Indices for the rows of the data to assign. """ return slice(self.dst_window[0], self.dst_window[2] + 1) @property def dst_col_slice(self) -> slice: """ Indices for the cols of the data to assign. """ return slice(self.dst_window[1], self.dst_window[3] + 1)
[docs]class RasterData(metaclass=abc.ABCMeta): """ Combines spatial information represented as a mosaic with raster mosaic given as 3D array data in memory or as geospatial files on disk. """ def __init__(self, file_register, mosaic, data=None, stack_dimension='layer_id', stack_coords=None, tile_dimension='tile_id', **kwargs): """ Constructor of `RasterData`. Parameters ---------- file_register : pd.Dataframe Data frame managing a stack/list of files containing the following columns: - 'filepath' : str Full file path to a geospatial file. - `stack_dimension` : object Specifies an ID to which layer a file belongs to, e.g. a layer counter or a timestamp. Must correspond to `stack_dimension`. - `tile_dimension` : str Tile name or ID to which tile a file belongs to. mosaic : geospade.raster.MosaicGeometry Mosaic representing the spatial allocation of the given files. The tiles of the mosaic have to match the ID's/names of the 'tile_id' column. data : xr.Dataset, optional Raster data stored in memory. It must match the spatial sampling and CRS of the mosaic, but not its spatial extent or tiling. Moreover, the dimension of the mosaic along the first dimension (stack dimension), must match the entries/filepaths in `file_register`. stack_dimension : str, optional Dimension/column name of the dimension, where to stack files or data over the same spatial region along (first axis), e.g. time, bands etc. Defaults to 'layer_id', i.e. the layer ID's are used as the main coordinates to stack the files. stack_coords : list, optional Additional columns of `file_register` to use as coordinates. Defaults to None, i.e. only coordinates along `stack_dimension` are used. tile_dimension : str, optional Dimension/column name of the dimension containing tile ID's in correspondence with the tiles in `mosaic`. Defaults to 'tile_id'. """ self._file_register = file_register self._files = dict() self._mosaic = mosaic self._data = data self._data_geom = None if data is None else self.raster_geom_from_data(data, sref=mosaic.sref, name='0') self._file_dim = stack_dimension self._tile_dim = tile_dimension self._file_coords = [self._file_dim] if stack_coords is None else stack_coords if 'file_id' not in self._file_register.columns: self._file_register['file_id'] = [None] * len(self._file_register) @property def mosaic(self) -> MosaicGeometry: """ Mosaic geometry of the raster mosaic files. """ return self._mosaic @property def data_geom(self) -> RasterGeometry: """ Raster/tile geometry of the raster mosaic files. """ return self._data_geom @property def layer_ids(self) -> list: """ List : Sorted layers. """ return list(sorted(self._file_register[self._file_dim].unique())) @property def n_layers(self) -> int: """ Maximum number of layers. """ return len(self.layer_ids) @property def n_tiles(self) -> int: """ Number of tiles. """ return len(self._mosaic.all_tiles) @property def file_register(self) -> pd.DataFrame: """ File register of the raster data object. """ return self._file_register.drop(columns=['file_id']) \ if 'file_id' in self._file_register.columns else self._file_register @property def filepaths(self) -> List[str]: """ Unique list of file paths stored in the file register. """ return list(set(self._file_register['filepath'])) @property def data_view(self) -> xr.Dataset: """ View on internal raster data. """ return self._view_data()
[docs] @abc.abstractmethod def load(self, *args, **kwargs): """ An abstract method for loading data either from disk or RAM. """ pass
@staticmethod def _sdims_from_data(data) -> List[str]: """ Collects spatial dimensions of an xr.Dataset, which are assumed to be the last two. Parameters ---------- data : xr.Dataset Raster data. Returns ------- list : Spatial dimensions of the given xr.Dataset. """ return list(data.dims)[-2:] @staticmethod def _pixel_sizes_from_data(data) -> Tuple[float, float]: """ Collects pixel sizes of an xr.Dataset. Parameters ---------- data : xr.Dataset Raster data. Returns ------- x_pixel_size, y_pixel_size : Pixel size in X and Y direction. """ sdims = RasterData._sdims_from_data(data) y_pixel_size = data[sdims[0]].data[0] - data[sdims[0]].data[1] x_pixel_size = data[sdims[1]].data[1] - data[sdims[1]].data[0] return x_pixel_size, y_pixel_size @staticmethod def _extent_from_data(data) -> Tuple[float, float, float, float]: """ Computes the extent/bounding box of an xr.Dataset. Parameters ---------- data : xr.Dataset Raster data. Returns ------- 4-tuple : Extent/bounding box of the given data (lower left x, lower left y, upper right x, upper right y). """ sdims = RasterData._sdims_from_data(data) x_pixel_size, y_pixel_size = RasterData._pixel_sizes_from_data(data) return data[sdims[1]].data[0], data[sdims[0]].data[-1] - y_pixel_size, \ data[sdims[1]].data[-1] + x_pixel_size, data[sdims[0]].data[0]
[docs] @staticmethod def raster_geom_from_data(data, sref=None, **kwargs) -> RasterGeometry: """ Creates a raster geometry from an xarray dataset. Parameters ---------- data : xr.Dataset Raster data. sref : geospade.crs.SpatialRef, optional CRS of the mosaic if not given under the 'spatial_ref' variable. kwargs : dict Key-word arguments for the constructor of `RasterGeometry`. Returns ------- raster_geom : geospade.raster.RasterGeometry Raster geometry representing the spatial extent of the xarray dataset. """ coord_names = list(data.coords.keys()) sref_coord = None for coord_name in coord_names: if data[coord_name].attrs.get('spatial_ref'): sref_coord = data[coord_name] break if sref_coord is None and sref is None: err_msg = "Neither the data contains CRS information nor the keyword." raise ValueError(err_msg) if sref_coord is not None: sref = SpatialRef(sref_coord.attrs.get('spatial_ref')) x_pixel_size, y_pixel_size = RasterData._pixel_sizes_from_data(data) extent = RasterData._extent_from_data(data) raster_geom = RasterGeometry.from_extent(extent, sref, x_pixel_size=x_pixel_size, y_pixel_size=y_pixel_size, **kwargs) return raster_geom
[docs] def apply_nan(self, nodatavals=None): """ Converts no data values given as an attribute '_FillValue' or keyword `nodatavals` to np.nan. Parameters ---------- nodatavals : dict Data variable name to no data value map. Notes ----- This replacement implicitly converts the data format to float. """ nodatavals = nodatavals or dict() if self._data is not None: for dvar in self._data.data_vars: dar = self._data[dvar] nodataval = dar.attrs.get('_FillValue', nodatavals.get(dvar, 0)) self._data[dvar] = dar.where(dar != nodataval)
[docs] def select(self, cmds, inplace=False) -> "RasterData": """ Executes several select operations from a dict/JSON compatible set of commands. Parameters ---------- cmds : list of 3-tuple List of tuples containing the select operator to execute, its positional arguments, and its key-word arguments. inplace : bool, optional If True, the current raster data object is modified. If False, a new raster data instance will be returned (default). Returns ------- RasterData : Raster data object with a mosaic and a file register in compliance with the provided select operations. """ if not inplace: new_raster_data = copy.deepcopy(self) return new_raster_data.select(cmds, inplace=True) for cmd in cmds: fun_name = cmd[0] args = cmd[1] kwargs = cmd[2] sref = kwargs.get('sref') if sref is not None: sref = SpatialRef(sref) kwargs['sref'] = sref getattr(self, fun_name)(*args, inplace=True, **kwargs) return self
[docs] def select_tiles(self, tile_names, inplace=False) -> "RasterData": """ Selects certain tiles from a raster data object. Parameters ---------- tile_names : list of str Tile names/IDs. inplace : bool, optional If True, the current raster data object is modified. If False, a new raster data instance will be returned (default). Returns ------- RasterData : Raster data object with a mosaic and a file register only consisting of the given tiles. """ if not inplace: new_raster_data = copy.deepcopy(self) return new_raster_data.select_tiles(tile_names, inplace=True) self._file_register = self._file_register.loc[self._file_register[self._tile_dim].isin(tile_names)] self._mosaic.select_by_tile_names(tile_names, inplace=True) return self
[docs] def select_layers(self, layer_ids, inplace=False) -> "RasterData": """ Selects layers according to the given layer IDs. Parameters ---------- layer_ids : list Layer IDs to select. inplace : bool, optional If True, the current raster data object is modified. If False, a new raster data instance will be returned (default). Returns ------- RasterData : Raster data object with a file register only consisting of the given layer IDs. """ if not inplace: new_raster_data = copy.deepcopy(self) return new_raster_data.select_layers(layer_ids, inplace=True) layer_ids_close = set(self._file_register[self._file_dim]) - set(layer_ids) self.close(layer_ids=layer_ids_close) self._file_register = self._file_register[self._file_register[self._file_dim].isin(layer_ids)] return self
[docs] def select_px_window(self, row, col, height=1, width=1, inplace=False) -> "RasterData": """ Selects the pixel coordinates according to the given pixel window. Parameters ---------- row : int Top-left row number of the pixel window anchor. col : int Top-left column number of the pixel window anchor. height : int, optional Number of rows/height of the pixel window. Defaults to 1. width : int, optional Number of columns/width of the pixel window. Defaults to 1. inplace : bool, optional If True, the current raster data object is modified. If False, a new raster data instance will be returned (default). Returns ------- RasterData : Raster data object with a data and a mosaic geometry only consisting of the intersected tile with the pixel window. Notes ----- The mosaic will be only sliced if it consists of one tile to prevent ambiguities in terms of the definition of the pixel window. """ if not inplace: new_raster_data = copy.deepcopy(self) return new_raster_data.select_px_window(row, col, height=height, width=width, inplace=True) if self._data_geom is not None: self._data_geom.slice_by_rc(row, col, height=height, width=width, inplace=True, name='0') if self._data_geom is None: wrn_msg = "Pixels are outside the extent of the raster mosaic." warnings.warn(wrn_msg) if len(self._mosaic.tiles) == 1: tile_oi = self._mosaic.tiles[0] tile_oi.slice_by_rc(row, col, height=height, width=width, inplace=True, name='0') tile_oi.active = True self._mosaic = self._mosaic.from_tile_list([tile_oi]) return self
[docs] def select_xy(self, x, y, sref=None, inplace=False) -> "RasterData": """ Selects a pixel according to the given coordinate tuple. Parameters ---------- x : number Coordinate in X direction. y : number Coordinate in Y direction. sref : geospade.crs.SpatialRef, optional CRS of the given coordinate tuple. Defaults to the CRS of the mosaic. inplace : bool, optional If True, the current raster data object is modified. If False, a new raster data instance will be returned (default). Returns ------- RasterData : Raster data object with a file register and a mosaic only consisting of the intersected tile containing information on the location of the time series. """ if not inplace: new_raster_data = copy.deepcopy(self) return new_raster_data.select_xy(x, y, sref=sref, inplace=True) if self._data_geom is not None: row, col = self._data_geom.xy2rc(x, y, sref=sref) self._data_geom.slice_by_rc(row, col, inplace=True, name='0') if self._data_geom is None: wrn_msg = "Coordinates are outside the spatial extent of the raster mosaic." warnings.warn(wrn_msg) tile_oi = self._mosaic.xy2tile(x, y, sref=sref) if tile_oi is not None: row, col = tile_oi.xy2rc(x, y, sref=sref) tile_oi.slice_by_rc(row, col, inplace=True, name='0') tile_oi.active = True self._mosaic = self._mosaic.from_tile_list([tile_oi]) self._file_register = self._file_register[self._file_register[self._tile_dim] == tile_oi.parent_root.name] else: wrn_msg = "Coordinates are outside the spatial extent of the raster mosaic files." warnings.warn(wrn_msg) return return self
[docs] def select_bbox(self, bbox, sref=None, inplace=False) -> "RasterData": """ Selects tile and pixel coordinates according to the given bounding box. Parameters ---------- bbox : list of 2 2-tuple Bounding box to select, i.e. [(x_min, y_min), (x_max, y_max)] sref : geospade.crs.SpatialRef, optional CRS of the given bounding box coordinates. Defaults to the CRS of the mosaic. inplace : bool, optional If True, the current raster data object is modified. If False, a new raster data instance will be returned (default). Returns ------- RasterData : Raster data object with a file register and a mosaic only consisting of the intersected tiles. """ if not inplace: new_raster_data = copy.deepcopy(self) return new_raster_data.select_bbox(bbox, sref=sref, inplace=True) return self.select_polygon(bbox, apply_mask=False, inplace=inplace)
[docs] def select_polygon(self, polygon, sref=None, apply_mask=True, inplace=False) -> "RasterData": """ Selects tile and pixel coordinates according to the given polygon. Parameters ---------- polygon : ogr.Geometry Polygon specifying the pixels to collect. sref : geospade.crs.SpatialRef, optional CRS of the given bounding box coordinates. Defaults to the CRS of the mosaic. apply_mask : bool, optional True if pixels outside the polygon should be set to a no data value (default). False if every pixel withing the bounding box of the polygon should be included. inplace : bool, optional If True, the current raster data object is modified. If False, a new raster data instance will be returned (default). Returns ------- RasterData : Raster data object with a file register and a mosaic only consisting of the intersected tiles. """ if not inplace: new_raster_data = copy.deepcopy(self) return new_raster_data.select_polygon(polygon, sref=sref, apply_mask=apply_mask, inplace=True) sref = sref or self.mosaic.sref polygon = any_geom2ogr_geom(polygon, sref=sref) if self._data_geom is not None: self._data_geom.slice_by_geom(polygon, inplace=True, name='0') if self._data_geom is None: wrn_msg = "Polygon is outside the spatial extent of the raster mosaic." warnings.warn(wrn_msg) sliced_mosaic = self._mosaic.slice_by_geom(polygon, sref=sref, active_only=False, apply_mask=apply_mask, inplace=False, name='0') if sliced_mosaic is None: wrn_msg = "Polygon is outside the spatial extent of the raster mosaic files." warnings.warn(wrn_msg) return self._mosaic = sliced_mosaic tile_names_oi = [tile.parent_root.name for tile in self._mosaic.tiles] self._file_register = self._file_register.loc[self._file_register[self._tile_dim].isin(tile_names_oi)] return self
def _view_data(self) -> xr.Dataset: """ Returns a subset of the data according to the intersected mosaic and current layer ID's. """ data = self._data if data is not None: origin = (self._data_geom.parent_root.ul_x, self._data_geom.parent_root.ul_y) min_col, min_row, max_col, max_row = rel_extent(origin, self._data_geom.coord_extent, x_pixel_size=self._data_geom.x_pixel_size, y_pixel_size=self._data_geom.y_pixel_size) xrars = dict() for dvar in data.data_vars: xrars[dvar] = data[dvar][..., min_row: max_row + 1, min_col:max_col + 1] data = xr.Dataset(xrars) if self._file_dim in data.coords: data = data.sel({self._file_dim: list(np.unique(self._file_register[self._file_dim]))}) return data def _add_grid_mapping(self): """ Adds grid mapping information to the xr.Dataset. """ if self._data is not None and self._data_geom is not None: self._data.rio.write_crs(self._data_geom.sref.wkt, inplace=True) self._data.rio.write_transform(Affine(*self._data_geom.geotrans), inplace=True)
[docs] def close(self, layer_ids=None): """ Closes open file handles and optionally data stored in RAM. Parameters ---------- layer_ids : list, optional Layer IDs indicating the file handles which should be closed. Defaults to None, i.e. all file handles are closed. """ if layer_ids is not None: bool_idxs = self._file_register[self._file_dim].isin(layer_ids) file_ids = set(self._file_register.loc[bool_idxs, 'file_id']) self._file_register.loc[bool_idxs, 'file_id'] = None file_ids = list(set(self._files.keys()) - file_ids) else: self._file_register['file_id'] = None file_ids = list(self._files.keys()) self.__close_file_handles(file_ids)
[docs] def clear_ram(self): """ Releases memory allocated by the internal data object. """ self._data = None
def __close_file_handles(self, file_ids): """ Closes stored file handles and removes them from the internal dictionary. Parameters ---------- file_ids : list of int List of file IDs. """ for file_id in file_ids: self._files[file_id].close() del self._files[file_id] def __enter__(self): return self def __exit__(self, *args, **kwargs): self.close() def __deepcopy__(self, memo): """ Deepcopy method of the `RasterData` class. Parameters ---------- memo : dict Returns ------- RasterData Deepcopy of raster data. """ cls = self.__class__ result = cls.__new__(cls) memo[id(self)] = result for k, v in self.__dict__.items(): if k == '_files': # skip existing file pointers, can't be copied setattr(result, k, dict()) else: setattr(result, k, copy.deepcopy(v, memo)) result._file_register['file_id'] = None # remove existing file IDs return result def _repr_html_(self) -> str: """ HTML table representation of the file register of a raster data instance. """ return self.file_register.style.set_properties(subset=['filepath'], **{'text-align': 'right'})._repr_html_() def __repr__(self) -> str: """ General string representation of a raster data instance. """ return f"{self.__class__.__name__}({self._file_dim}, {self.mosaic.__class__.__name__}):\n\n" \ f"{repr(self.file_register)}"
[docs]class RasterDataReader(RasterData): """ Allows to read and manage a stack of raster data. """ def __init__(self, file_register, mosaic, stack_dimension='layer_id', stack_coords=None, tile_dimension='tile_id'): """ Constructor of `RasterDataReader`. Parameters ---------- file_register : pd.Dataframe Data frame managing a stack/list of files containing the following columns: - 'filepath' : str Full file path to a geospatial file. - `stack_dimension` : object Specifies an ID to which layer a file belongs to, e.g. a layer counter or a timestamp. Must correspond to `stack_dimension`. - `tile_dimension` : str Tile name or ID to which tile a file belongs to. mosaic : geospade.raster.MosaicGeometry Mosaic representing the spatial allocation of the given files. The tiles of the mosaic have to match the ID's/names of the `tile_dimension` column. stack_dimension : str, optional Dimension/column name of the dimension, where to stack the files along (first axis), e.g. time, bands etc. Defaults to 'layer_id', i.e. the layer ID's are used as the main coordinates to stack the files. stack_coords : list, optional Additional columns of `file_register` to use as coordinates. Defaults to None, i.e. only coordinates along `stack_dimension` are used. tile_dimension : str, optional Dimension/column name of the dimension containing tile ID's in correspondence with the tiles in `mosaic`. Defaults to 'tile_id'. """ super().__init__(file_register, mosaic, stack_dimension=stack_dimension, stack_coords=stack_coords, tile_dimension=tile_dimension)
[docs] @abc.abstractmethod def read(self, *args, auto_decode=False, decoder=None, decoder_kwargs=None, **kwargs): """ Read data from disk. Parameters ---------- auto_decode : bool, optional True if data should be decoded according to the information available in its metadata. Defaults to False. decoder : callable, optional Function allowing to decode data read from disk. decoder_kwargs : dict, optional Keyword arguments for the decoder. """ pass
@abc.abstractmethod def _to_xarray(self, *args, **kwargs): """ Converts data read from disk to an xarray dataset. Returns ------- xr.Dataset """ pass
[docs] def load(self, *args, **kwargs): """ Loads mosaic either from disk or RAM. Parameters ---------- args : Positional arguments for the `read` method. kwargs : Key-word arguments for the `read` method. """ if self._data is not None and self._data_geom is not None: self._data = self.data_view self._data_geom.parent = None else: self.read(*args, **kwargs)
@staticmethod def _create_tile_and_layer_info_from_files(filepaths, tile_class, file_class, file_class_kwargs) -> Tuple[List[Tile], list, list]: """ Loops over a given list of files to assign a tile and layer to each file and creates the corresponding indexes. Parameters ---------- filepaths : list of str List of file paths. tile_class : class Class constructor for a tile class. file_class : class Class constructor for a file class. file_class_kwargs : dict Keyword arguments for calling `file_class`. Returns ------- tiles : list of Tile Unique set of tiles representing all input file paths. tile_ids : list of str List of tile ids containing one ID per file. layer_ids : list of str List of layer ids containing one ID per file. """ file_class_kwargs = file_class_kwargs or dict() tile_ids = [] layer_ids = [] tiles = [] tile_idx = 0 for filepath in filepaths: with file_class(filepath, 'r', **file_class_kwargs) as f: sref_wkt = f.sref_wkt geotrans = f.geotrans n_rows, n_cols = f.raster_shape curr_tile = tile_class(n_rows, n_cols, sref=SpatialRef(sref_wkt), geotrans=geotrans, name=str(tile_idx)) curr_tile_id = find_congruent_tile_id_from_tiles(curr_tile, tiles) if curr_tile_id is None: tiles.append(curr_tile) curr_tile_id = str(tile_idx) tile_idx += 1 tile_ids.append(curr_tile_id) # define the layer ID as the next index of all filepaths, which have already been assigned to one tile layer_id = sum(np.array(tile_ids) == curr_tile_id) + 1 layer_ids.append(layer_id) return tiles, tile_ids, layer_ids
[docs]class RasterDataWriter(RasterData): """ Allows to write and manage a stack of raster data. """ def __init__(self, mosaic, file_register=None, data=None, stack_dimension='layer_id', stack_coords=None, tile_dimension='tile_id', dirpath=None, fn_pattern='{layer_id}.xyz', fn_formatter=None): """ Constructor of `RasterDataWriter`. Parameters ---------- mosaic : geospade.raster.MosaicGeometry Mosaic representing the spatial allocation of the given files. The tiles of the mosaic have to match the ID's/names of the `tile_dimension` column. file_register : pd.Dataframe, optional Data frame managing a stack/list of files containing the following columns: - 'filepath' : str Full file path to a geospatial file. - `stack_dimension` : object Specifies an ID to which layer a file belongs to, e.g. a layer counter or a timestamp. Must correspond to `stack_dimension`. - `tile_dimension` : str Tile name or ID to which tile a file belongs to. If it is None, then the constructor tries to create a file from other keyword arguments, i.e. `data`, `dirpath`, `fn_pattern`, and `fn_formatter`. data : xr.Dataset, optional Raster data stored in memory. It must match the spatial sampling and CRS of the mosaic, but not its spatial extent or tiling. Moreover, the dimension of the data along the first dimension (stack dimension), must match the entries/filepaths in `file_register`. stack_dimension : str, optional Dimension/column name of the dimension, where to stack the files along (first axis), e.g. time, bands etc. Defaults to 'layer_id', i.e. the layer ID's are used as the main coordinates to stack the files. stack_coords : list, optional Additional columns of `file_register` to use as coordinates. Defaults to None, i.e. only coordinates along `stack_dimension` are used. tile_dimension : str, optional Dimension/column name of the dimension containing tile ID's in correspondence with the tiles in `mosaic`. Defaults to 'tile_id'. dirpath : str, optional Directory path to the folder where the GeoTIFF files should be written to. Defaults to None, i.e. the current working directory is used. fn_pattern : str, optional Pattern for the filename of the new GeoTIFF files. To fill in specific parts of the new file name with information from the file register, you can specify the respective file register column names in curly brackets and add them to the pattern string as desired. Defaults to '{layer_id}.tif'. fn_formatter : dict, optional Dictionary mapping file register column names with functions allowing to encode their values as strings. """ fn_formatter = fn_formatter or dict() if file_register is None and data is None: err_msg = "Either a file register ('file_register') or an xarray dataset ('data') has to be provided." raise ValueError(err_msg) elif file_register is None and data is not None: file_register = RasterDataWriter._file_register_from_data(data, stack_dimension) if tile_dimension not in file_register.columns: file_register = RasterDataWriter._add_tile_names_to_file_register(file_register, mosaic, tile_dimension) if stack_dimension not in file_register.columns: file_register = RasterDataWriter._add_stack_dims_to_file_register(file_register, stack_dimension, data) if 'filepath' not in file_register.columns: file_register = RasterDataWriter._add_filepaths_to_file_register(file_register, dirpath, fn_pattern, fn_formatter) super().__init__(file_register, mosaic, data=data, stack_dimension=stack_dimension, stack_coords=stack_coords, tile_dimension=tile_dimension)
[docs] @abc.abstractmethod def write(self, data, encoder=None, encoder_kwargs=None, overwrite=False, **kwargs): """ Writes a certain chunk of data to disk. Parameters ---------- data : xr.Dataset Data chunk to be written to disk or being appended to existing data. encoder : callable, optional Function allowing to encode data before writing it to disk. encoder_kwargs : dict, optional Keyword arguments for the encoder. overwrite : bool, optional True if data should be overwritten, False if not (default). """ pass
[docs] @abc.abstractmethod def export(self, apply_tiling=False, encoder=None, encoder_kwargs=None, overwrite=False, **kwargs): """ Writes all the internally stored data to disk. Parameters ---------- apply_tiling : bool, optional True if the internal data should be tiled according to the mosaic. False if the internal data composes a new tile and should not be tiled (default). encoder : callable, optional Function allowing to encode data before writing it to disk. encoder_kwargs : dict, optional Keyword arguments for the encoder. overwrite : bool, optional True if data should be overwritten, False if not (default). """ pass
[docs] @classmethod def from_xarray(cls, data, file_register, mosaic=None, **kwargs) -> "RasterDataWriter": """ Converts an xarray dataset and a file register to a `RasterDataWriter` instance. Parameters ---------- data : xr.Dataset Raster data. file_register : pd.Dataframe, optional Data frame managing a stack/list of files containing the following columns: - 'filepath' : str Full file path to a geospatial file. - `stack_dimension` : object Specifies an ID to which layer a file belongs to, e.g. a layer counter or a timestamp. Must correspond to `stack_dimension`. - `tile_dimension` : str Tile name or ID to which tile a file belongs to. mosaic : geospade.raster.MosaicGeometry, optional Mosaic representing the spatial allocation of the given files. The tiles of the mosaic have to match the ID's/names of the `tile_dimension` column. If it is None, a one-tile mosaic will be created from the given data. kwargs : Key-word arguments for the `RasterDataWriter` constructor. Returns ------- RasterDataWriter """ if mosaic is None: mosaic = cls._mosaic_from_data(data) return cls(mosaic, file_register=file_register, data=data, **kwargs)
[docs] def load(self, *args, **kwargs): """ Loads data from RAM. """ if self._data is not None and self._data_geom is not None: self._data = self.data_view self._data_geom.parent = None
@staticmethod def _mosaic_from_data(data, sref=None) -> MosaicGeometry: """ Creates a default mosaic from a given xarray dataset. Parameters ---------- data : xr.Dataset Raster data. sref : geospade.crs.SpatialRef, optional CRS of the mosaic if not given under the 'spatial_ref' variable of `data`. Returns ------- MosaicGeometry : Mosaic with one tile '0'. """ raster_geom = RasterDataWriter.raster_geom_from_data(data, sref=sref) n_rows, n_cols = raster_geom.shape tile = Tile(n_rows, n_cols, raster_geom.sref, geotrans=raster_geom.geotrans, name='0') return MosaicGeometry.from_tile_list([tile], check_consistency=False) @staticmethod def _file_register_from_data(data, stack_dimension) -> pd.DataFrame: """ Creates a file register with stack dimension coordinates in one column. Parameters ---------- data : xr.Dataset Raster data. stack_dimension : str Dimension name representing the stack dimension of the file register. Returns ------- pd.DataFrame : File register with stack dimension coordinates in one column. """ layers = data[stack_dimension] file_register_dict = dict() file_register_dict[stack_dimension] = layers return pd.DataFrame(file_register_dict) @staticmethod def _add_tile_names_to_file_register(file_register, mosaic, tile_dimension='tile_id') -> pd.DataFrame: """ Adds all tiles of a mosaic to the given file register under the column `tile_dimension`. Parameters ---------- file_register : pd.Dataframe File register to add the tile names to. mosaic : MosaicGeometry Mosaic to extract tile information from. tile_dimension : str, optional Dimension/column name of the dimension containing tile ID's in correspondence with the tiles in `mosaic`. Defaults to 'tile_id'. Returns ------- file_register : pd.Dataframe Modified file register containing the tile names of the given mosaic. """ n_entries = len(file_register) tile_names = mosaic.all_tile_names n_tiles = len(tile_names) file_register = pd.DataFrame(np.repeat(file_register.values, n_tiles, axis=0), columns=file_register.columns) file_register[tile_dimension] = np.repeat([tile_names], n_entries, axis=0).flatten() return file_register @staticmethod def _add_stack_dims_to_file_register(file_register, stack_dimension, data=None): """ Adds coordinate values along the stack dimension of the data to the given file register. If no data is given, then a simple increment is used to represent the stack dimension. Parameters ---------- file_register : pd.Dataframe File register to add the stack dimension coordinates to. stack_dimension : str Dimension name representing the stack dimension of the file register. data : xr.Dataset, optional Raster data. Returns ------- file_register : pd.DataFrame File register with stack dimension coordinates added to. """ n_entries = len(file_register) if data is not None: layers = data[stack_dimension] n_layers = len(layers) file_register = pd.DataFrame(np.repeat(file_register.values, n_layers, axis=0), columns=file_register.columns) file_register[stack_dimension] = np.repeat([layers], n_entries, axis=0).flatten() else: layers = list(range(1, n_entries + 1)) file_register[stack_dimension] = layers return file_register @staticmethod def _add_filepaths_to_file_register(file_register, dirpath=None, fn_pattern='{layer_id}.rd', fn_formatter=None) -> pd.DataFrame: """ Adds a column containing the file paths to new datasets following the naming convention derived from `dirpath`, `fn_pattern`, and `fn_formatter` to the given file register. Parameters ---------- file_register : pd.Dataframe File register to add the stack dimension coordinates to. dirpath : str, optional Directory path to the folder where the GeoTIFF files should be written to. Defaults to none, i.e. the current working directory is used. fn_pattern : str, optional Pattern for the filename of the new files. To fill in specific parts of the new file name with information from the file register, you can specify the respective file register column names in curly brackets and add them to the pattern string as desired. Defaults to '{layer_id}.rd'. fn_formatter : dict, optional Dictionary mapping file register column names with functions allowing to encode their values as strings. Returns ------- file_register : pd.DataFrame File register with an additional column containing the file paths to new datasets following the naming convention derived from `dirpath`, `fn_pattern`, and `fn_formatter`. """ dirpath = dirpath or os.getcwd() fn_formatter = fn_formatter or dict() filepaths = [] for _, row in file_register.iterrows(): fn_entries = dict() for k, v in row.items(): if k in fn_formatter.keys(): v = fn_formatter[k](v) if isinstance(v, str): fn_entries[k] = v filename = fn_pattern.format(**fn_entries) filepaths.append(os.path.join(dirpath, filename)) file_register['filepath'] = filepaths return file_register
if __name__ == '__main__': pass