Source code for tctrack.tempest_extremes.tempest_extremes

"""Module providing a class that binds the Tempest Extremes code.

References
----------
- Tempest Extremes code on GitHub: https://github.com/ClimateGlobalChange/tempestextremes
- Tempest Extremes User Guide: https://climate.ucdavis.edu/tempestextremes.php
- GMD paper on Tempest Extremes v2.1: https://doi.org/10.5194/gmd-14-5023-2021
"""

import csv
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import TypedDict

import cf

from tctrack.core import TCTracker, TCTrackerMetadata, TCTrackerParameters, Trajectory


def lod_to_te(inputs: list[dict]) -> str:
    """
    Convert a sane list of dicts to input format used by Tempest Extremes commands.

    The input list of dictionaries will be concatenated to a single string of values
    separated by `,` and dicts separated by `;`.

    Parameters
    ----------
    inputs : list[dict]
        List of the dictionaries to be concatenated.

    Returns
    -------
    str
        Single string of values separated by `,` and dicts separated by `;`

    Examples
    --------
    >>> lod_to_te([{"a": 1, "b": 2, "c": 3}, {"d": 4, "e": 5, "f": 6}])
    '1,2,3;4,5,6'
    """
    return ";".join(",".join(str(value) for value in d.values()) for d in inputs)


[docs] class TEContour(TypedDict): """ Data required for checking a contour of a single variable during detection. Points will be eliminated in a detection search if they fail this criterion. The closed contour is determined by breadth first search: if any paths exist from the candidate point (or nearby minima/maxima if minmaxdist is specified) that reach the specified distance before achieving the specified delta then we say no closed contour is present. Each contour takes the form of a ``dict`` with keys ``"var"``, ``"delta"``, ``"dist"``, and ``"minmaxdist"``. See Also -------- TETracker.detect : The DetectNodes call from the TETracker object TEDetectParameters : The detection parameter class References ---------- `TempestExtremes Documentation <https://climate.ucdavis.edu/tempestextremes.php#DetectNodes>`__ and the `DetectNodes Source <https://github.com/ClimateGlobalChange/tempestextremes/blob/master/src/nodes/DetectNodes.cpp>`_ Examples -------- To add a contour requirement on ``"psl"`` with a change of ``200.0`` within ``5.5`` degrees of the candidate we create a TEContour as follows: >>> TEContour(var="psl", delta=200.0, dist=5.5, minmaxdist=0.0) {'var': 'psl', 'delta': 200.0, 'dist': 5.5, 'minmaxdist': 0.0} """ var: str """Name of the variable to contour in NetCDF files.""" delta: float """ Amount by which the field must change from the pivot value. If positive (negative) the field must increase (decrease) by this value along the contour. """ dist: float """ Lesser-circle radius (degrees) from the pivot within which the criteria must be satisfied. """ minmaxdist: float """ Lesser-circle radius away from the candidate to search for the minima/maxima. If delta is positive (negative), the pivot is a local minimum (maximum). """
[docs] class TEOutputCommand(TypedDict): """ Data required to specify an additional column in the detection output. Each output command takes the form of a ``dict`` with keys ``"var"``, ``"op"``, and ``"dist"``. See Also -------- TETracker.detect : The DetectNodes call from the TETracker object TEDetectParameters : The detection parameter class References ---------- `TempestExtremes Documentation <https://climate.ucdavis.edu/tempestextremes.php#DetectNodes>`__ and the `DetectNodes Source <https://github.com/ClimateGlobalChange/tempestextremes/blob/master/src/nodes/DetectNodes.cpp>`_ Examples -------- To add an output column with the minimum ``"psl"`` at the candidate point: >>> TEOutputCommand(var="psl", operator="min", dist=0.0) {'var': 'psl', 'operator': 'min', 'dist': 0.0} """ var: str """Name of the variable to write in output files.""" operator: str """ Operator that is applied over all points within the specified distance of the candidate (options include ``"max"``, ``"min"``, ``"avg"``, ``"maxdist"``, and ``"mindist"``). """ dist: float """ Lesser-circle radius (degrees) from the candidate within which the operator is applied. """
[docs] class TEThreshold(TypedDict): """Data required for a threshold filter for a track trajectory when stitching. Any storm track trajectories that do not satisfy the threshold value for a given number of points will be filtered out. Each condition is of the form "var,op,value,count" and multiple conditions are separated by ";". See Also -------- TETracker.stitch : The StitchNodes call from the TETracker object. TEStitchParameters : The stitching parameter class. References ---------- `TempestExtremes Documentation <https://climate.ucdavis.edu/tempestextremes.php#StitchNodes>`__ and the `StitchNodes Source <https://github.com/ClimateGlobalChange/tempestextremes/blob/master/src/nodes/StitchNodes.cpp>`_ Examples -------- To add a filter requiring latitude ``"lat"`` to be less than 40 degrees for 10 or more points in each track trajectory: >>> TEThreshold(var="lat", op="<=", value=40, count=10) {"var": "lat", "op": "<=", "value": 40, "count": 10}, """ var: str """Name of the variable being tested. Called "col" in TempestExtremes.""" op: str """Operator used for the comparison (options include >,>=,<,<=,=,!=,|>=,|<=).""" value: float """Value on the right-hand-side of the comparison.""" count: int | str """ Either the minimum number of points where the threshold must be satisfied or the instruction ``"all"``, ``"first"``, or ``"last"``. ``"all"`` for all points along the path, ``"first"`` for just the first point, and ``"last"`` for only the last point. """
[docs] @dataclass(repr=False) class TEDetectParameters(TCTrackerParameters): """ Dataclass containing values used by the detection operation of TE. See Also -------- TEContour : The class used to define contour criteria TEOutputCommand : The class used to define additional outputs References ---------- `TempestExtremes Documentation <https://climate.ucdavis.edu/tempestextremes.php#DetectNodes>`__ and the `DetectNodes Source <https://github.com/ClimateGlobalChange/tempestextremes/blob/master/src/nodes/DetectNodes.cpp>`_ """ in_data: list[str] """List of strings of NetCDF input files.""" out_header: bool = False """Include header at the top of the output file?""" output_dir: str = "" # Not using None to keep type-checking simple """ File path of the directory to output intermediate files. If left empty, it will use a temporary directory that will last only for the lifetime of the :class:`TETracker` instance. """ output_file: str = "nodes.txt" """Name of output nodefile to write to in the :attr:`output_dir` directory.""" search_by_min: str | None = None """ Input variable in NetCDF files for selecting candidate points (defined as local minima). If ``None``, then uses ``"PSL"`` in Tempest Extremes. """ search_by_max: str | None = None """ Input variable in NetCDF files for selecting candidate points (defined as local maxima). """ closed_contours: list[TEContour] | None = None """ Criteria for candidates to be eliminated if they do not have a closed contour as a list of separate :class:`TEContour` criteria. """ merge_dist: float = 0.0 """ DetectNodes merges candidate points with a distance (in degrees) shorter than the specified value. Among two candidates within the merge distance, only the candidate with the lowest value of the search_by_min field or highest value of the search_by_max field are retained. """ time_filter: str | None = None """ Filter for the input data frequency. Options are: `"3hr"`, `"6hr"`, `"daily"`. Alternatively, can be a regex for the datetime using format `"YYYY-MM-DD HH:MM:SS"`. """ lat_name: str = "lat" """String for the latitude dimension in the NetCDF files.""" lon_name: str = "lon" """String for the longitude dimension in the NetCDF files.""" min_lat: float = 0.0 """Minimum latitude for candidate points.""" max_lat: float = 0.0 """ Maximum latitude for candidate points. If max_lat and min_lat are equal then these arguments are ignored. """ min_lon: float = 0.0 """Minimum longitude for candidate points.""" max_lon: float = 0.0 """ Maximum longitude for candidate points. If ``max_lon`` and ``min_lon`` are equal then these arguments are ignored. """ regional: bool = False """Should lat-lon grid be periodic in longitude.""" output_commands: list[TEOutputCommand] | None = None """ Criteria for any additional columns to be added to the output. Criteria are provided as a list of separate :class:`TEOutputCommand` criteria. """
[docs] @dataclass(repr=False) class TEStitchParameters(TCTrackerParameters): """Dataclass containing values used by the stitch operation of TE. References ---------- `TempestExtremes Documentation <https://climate.ucdavis.edu/tempestextremes.php#StitchNodes>`__ and the `StitchNodes Source <https://github.com/ClimateGlobalChange/tempestextremes/blob/master/src/nodes/StitchNodes.cpp>`_ """ output_dir: str = "" # Not using None to keep type-checking simple """ File path of the directory to output intermediate files. If left empty, it will use the :attr:`TEDetectParameters.output_dir` value if one is provided. Otherwise, it will use a temporary directory that will last only for the lifetime of the :class:`TETracker` instance. """ output_file: str = "trajectories.txt" """ The output filename to write the trajectories to in the :attr:`output_dir` directory. """ in_file: str | None = None """ Filepath of the DetectNodes output file. If this and ``in_list`` are ``None``, it will be determined from :attr:`TEDetectParameters.output_dir` and :attr:`TEDetectParameters.output_file`. Called "in" in TempestExtremes. """ in_list: str | None = None """ File containing a list of input files to be processed together. This is unadvised to use at present as it is likely to be changed. """ in_fmt: list[str] | None = None """ List of the variables in the order they appear in the input file. If ``None``, it will be ``["lon", "lat", ...]``, ending in variables defined in :attr:`TEDetectParameters.output_commands`. """ allow_repeated_times: bool = False """ If ``False``, an error is thrown if there are multiple sections in the input nodefile with the same time. """ caltype: str = "standard" """ The type of calendar to use. Options are ``"standard"`` (365 days with leap years), ``"noleap"``, ``"360_day"``. """ time_begin: str | None = None """Starting date/time for stitching trajectories. Earlier times will be ignored.""" time_end: str | None = None """Ending date/time for stitching trajectories. Later times will be ignored.""" max_sep: float = 5 """ The maximum distance allowed between candidates (degrees). Called "range" in TempestExtremes. """ max_gap: int | str = 0 """ The number of missing points allowed between candidates, as an integer. Or as a string for the maximum time (inclusive) between points, e.g. ``"24hr"``. """ min_time: int | str = 1 """ The minimum required length of a path. Either as an integer for the number of candidates, or a string for total duration, e.g. ``"24h"``. """ min_endpoint_dist: float = 0 """The minimum required distance between the first and last candidates (degrees).""" min_path_dist: float = 0 """The minimum required acumulated distance along the path (degrees).""" threshold_filters: list[TEThreshold] | None = None """ Filters for paths based on the number of nodes that satisfy a threshold. Uses a list of :class:`TEThreshold` objects. Called "thresholdcmd" in TempestExtremes. """ prioritize: str | None = None """ The variable to use to determine the precedence (lowest to highest) of nodes for matching to the next position. """ add_velocity: bool = False """ Whether to include the velocity components (m/s) of the movement of the TC to the output file. """ out_file_format: str = "gfdl" """ Format of the output file. ``"gfdl"``, ``"csv"``, or ``"csvnohead"``. See :meth:`TETracker.stitch` for details. """ out_seconds: bool = False """ For GFDL output file types, determines whether to report the sub-daily time in seconds (``True``) or hours (``False``). """ def __post_init__(self): """Validate parameters.""" if self.out_file_format not in ("gfdl", "csv", "csvnohead"): msg = ( f"Invalid out_file_format ({self.out_file_format}). " "Allowed values are 'gfdl', 'csv', or 'csvnohead'" ) raise ValueError(msg) if self.caltype not in ("standard", "noleap", "360_day"): msg = ( f"Invalid caltype ({self.caltype}). " "Allowed values are 'standard', 'noleap', or '360_day'" ) raise ValueError(msg)
[docs] class TETracker(TCTracker): """Class containing bindings to the Tempest Extremes code. Attributes ---------- detect_parameters : TEDetectParameters Class containing the parameters for the detection step stitch_parameters : TEStitchParameters | None Class containing the parameters for the stitching step """ # Private attributes _tempdir: tempfile.TemporaryDirectory def __init__( self, detect_parameters: TEDetectParameters, stitch_parameters: TEStitchParameters | None = None, ): """ Construct the TempestExtremes class. Parameters ---------- detect_parameters : TEDetectParameters Class containing the parameters for the detection step stitch_parameters : TEStitchParameters | None Class containing the parameters for the stitching step Defaults to the default values in TEStitchParameters Class """ self.detect_parameters: TEDetectParameters = detect_parameters if stitch_parameters is not None: self.stitch_parameters: TEStitchParameters = stitch_parameters else: self.stitch_parameters = TEStitchParameters() dn_params = self.detect_parameters sn_params = self.stitch_parameters # Define the default output directories if dn_params.output_dir == "": # The temporary directory is deleted when the object goes out of scope. # It is stored in an attribute to persist for the lifetime of the tracker. self._tempdir = tempfile.TemporaryDirectory() dn_params.output_dir = self._tempdir.name if sn_params.output_dir == "": sn_params.output_dir = dn_params.output_dir # Set StitchNodes input arguments according to DetectNodes parameters, # if not provided if sn_params.in_file is None and sn_params.in_list is None: sn_params.in_file = str(Path(dn_params.output_dir) / dn_params.output_file) if sn_params.in_fmt is None and dn_params.output_commands is not None: variables = [output["var"] for output in dn_params.output_commands] sn_params.in_fmt = ["lon", "lat", *variables] @property def _parameters(self) -> list[TCTrackerParameters]: """A list of the parameter objects that is accessible from the base class.""" return [self.detect_parameters, self.stitch_parameters] def _make_detect_nodes_call(self): # noqa: PLR0912 - all branches same logic """ Construct a DetectNodes call based on options set in parameters. Returns ------- list[str] list of strings that can be combined to form a DetectNodes command based on the parameters set in self.detect_parameters """ dn_argslist = ["DetectNodes"] if self.detect_parameters.in_data is not None: dn_argslist.extend( [ "--in_data", ";".join(self.detect_parameters.in_data), ] ) out_file = str( Path(self.detect_parameters.output_dir) / self.detect_parameters.output_file ) dn_argslist.extend(["--out", out_file]) if self.detect_parameters.out_header: dn_argslist.extend(["--out_header"]) if self.detect_parameters.search_by_min is not None: dn_argslist.extend( [ "--searchbymin", self.detect_parameters.search_by_min, ] ) if self.detect_parameters.search_by_max is not None: dn_argslist.extend( [ "--searchbymax", self.detect_parameters.search_by_max, ] ) if self.detect_parameters.closed_contours is not None: dn_argslist.extend( [ "--closedcontourcmd", lod_to_te(self.detect_parameters.closed_contours), ] ) dn_argslist.extend( [ "--mergedist", str(self.detect_parameters.merge_dist), ] ) if self.detect_parameters.time_filter is not None: dn_argslist.extend( [ "--timefilter", self.detect_parameters.time_filter, ] ) if self.detect_parameters.lat_name is not None: dn_argslist.extend( [ "--latname", self.detect_parameters.lat_name, ] ) if self.detect_parameters.lon_name is not None: dn_argslist.extend( [ "--lonname", self.detect_parameters.lon_name, ] ) if self.detect_parameters.min_lat is not None: dn_argslist.extend( [ "--minlat", str(self.detect_parameters.min_lat), ] ) if self.detect_parameters.max_lat is not None: dn_argslist.extend( [ "--maxlat", str(self.detect_parameters.max_lat), ] ) if self.detect_parameters.min_lon is not None: dn_argslist.extend( [ "--minlon", str(self.detect_parameters.min_lon), ] ) if self.detect_parameters.max_lon is not None: dn_argslist.extend( [ "--maxlon", str(self.detect_parameters.max_lon), ] ) if self.detect_parameters.regional: dn_argslist.extend(["--regional"]) if self.detect_parameters.output_commands is not None: dn_argslist.extend( [ "--outputcmd", lod_to_te(self.detect_parameters.output_commands), ] ) return dn_argslist
[docs] def detect(self): """ Call the DetectNodes utility of Tempest Extremes. This will make a system call out to the DetectNodes method from Tempest Extremes (provided it has been installed as an external dependency). DetectNodes will be run according to the parameters in the :attr:`detect_parameters` attribute that were set when the :class:`TETracker` instance was created. The output file is a plain text file containing each of the TC candidates at each time from the input files. If :attr:`~TEDetectParameters.output_dir` is ``None`` this will be a temporary file lasting the lifetime of the :class:`TETracker` instance. If :attr:`~TEDetectParameters.out_header` is ``True`` the first two lines of the file will be a header describing the structure of the data. After this each time is listed in the format: .. code-block:: text <year> <month> <day> <count> <hour> <i> <j> <lon> <lat> <var1> <var2> ... ... <i> <j> <lon> <lat> <var1> <var2> ... - ``count`` is the number of nodes at that time. - ``i``, ``j`` are the grid indices of the node. - ``var1``, ``var2``, etc., are scalar variables as defined by :attr:`~TEDetectParameters.output_commands` (typically, psl, orog). Returns ------- dict dict of subprocess output corresponding to stdout, stderr, and returncode. Raises ------ FileNotFoundError If the DetectNodes executeable from TempestExtremes cannot be found. RuntimeError If Tempest Extremes DetectNodes returns a non-zero exit code. References ---------- `TempestExtremes Documentation <https://climate.ucdavis.edu/tempestextremes.php#DetectNodes>`__ and the `DetectNodes Source <https://github.com/ClimateGlobalChange/tempestextremes/blob/master/src/nodes/DetectNodes.cpp>`_ Examples -------- To set the parameters, instantiate a :class:`TETracker` instance and run DetectNodes: >>> my_params = TEDetectParameters(...) >>> my_tracker = TETracker(detect_parameters=my_params) >>> result = my_tracker.detect() """ Path(self.detect_parameters.output_dir).mkdir(parents=True, exist_ok=True) dn_call_list = self._make_detect_nodes_call() return self.run_tracker_subprocess("DetectNodes", dn_call_list)
def _make_stitch_nodes_call(self): """ Construct a StitchNodes call based on options set in parameters. Returns ------- list[str] list of strings that can be combined to form a StitchNodes command based on the parameters set in self.stitch_parameters """ sn_argslist = ["StitchNodes"] sn_params = self.stitch_parameters out_file = str(Path(sn_params.output_dir) / sn_params.output_file) sn_argslist.extend(["--out", out_file]) if sn_params.in_file is not None: sn_argslist.extend(["--in", sn_params.in_file]) if sn_params.in_list is not None: sn_argslist.extend(["--in_list", sn_params.in_list]) if sn_params.in_fmt is not None: sn_argslist.extend(["--in_fmt", ",".join(sn_params.in_fmt)]) if sn_params.allow_repeated_times: sn_argslist.extend(["--allow_repeated_times"]) sn_argslist.extend(["--caltype", str(sn_params.caltype)]) if sn_params.time_begin is not None: sn_argslist.extend(["--time_begin", str(sn_params.time_begin)]) if sn_params.time_end is not None: sn_argslist.extend(["--time_end", str(sn_params.time_end)]) sn_argslist.extend(["--range", str(sn_params.max_sep)]) sn_argslist.extend(["--maxgap", str(sn_params.max_gap)]) sn_argslist.extend(["--mintime", str(sn_params.min_time)]) sn_argslist.extend(["--min_endpoint_dist", str(sn_params.min_endpoint_dist)]) sn_argslist.extend(["--min_path_dist", str(sn_params.min_path_dist)]) if sn_params.threshold_filters is not None: sn_argslist.extend(["--threshold", lod_to_te(sn_params.threshold_filters)]) if sn_params.prioritize is not None: sn_argslist.extend(["--prioritize", str(sn_params.prioritize)]) if sn_params.add_velocity: sn_argslist.extend(["--add_velocity"]) sn_argslist.extend(["--out_file_format", sn_params.out_file_format]) if sn_params.out_seconds: sn_argslist.extend(["--out_seconds"]) return sn_argslist
[docs] def stitch(self): """Call the StitchNodes utility in Tempest Extremes. This will make a system call out to the StitchNodes method from Tempest Extremes (provided it has been installed as an external dependency). StitchNodes will be run according to the parameters in the :attr:`stitch_parameters` attribute that were set when the :class:`TETracker` instance was created. The output is a file containing the data for each node of each trajectory. If :attr:`~TEStitchParameters.output_dir` is ``None`` this will be a temporary file lasting the lifetime of the :class:`TETracker` instance. The format of the file depends on the :attr:`~TEStitchParameters.out_file_format` parameter. The default ``"gfdl"`` output is a plain-text "nodefile" format which contains a number of track trajectories, each of which in the form. .. code-block:: text start <N> <year> <month> <day> <hour> <i> <j> <var1> <var2> ... <year> <month> <day> <hour> ... <i> <j> <var1> <var2> ... <year> <month> <day> <hour> - ``N`` is number of nodes in the trajectory (and number of lines below header). - ``i``, ``j`` are grid indices. - ``var1``, ``var2``, etc., are scalar variables as defined by :attr:`~TEStitchParameters.in_fmt` (typically, lon, lat, psl, orog). - ``hour`` may instead be seconds if :attr:`~TEStitchParameters.out_seconds` is ``True``. Returns ------- dict dict of subprocess output corresponding to stdout, stderr, and returncode. Raises ------ FileNotFoundError If the StitchNodes executeable from TempestExtremes cannot be found. RuntimeError If Tempest Extremes StitchNodes returns a non-zero exit code. References ---------- `TempestExtremes Documentation <https://climate.ucdavis.edu/tempestextremes.php#StitchNodes>`__ and the `StitchNodes Source <https://github.com/ClimateGlobalChange/tempestextremes/blob/master/src/nodes/StitchNodes.cpp>`_ Examples -------- To set the parameters, instantiate a :class:`TETracker` instance and perform stitching: >>> my_params = TEStitchParameters(...) >>> my_tracker = TETracker(stitch_parameters=my_params) >>> result = my_tracker.stitch() """ Path(self.stitch_parameters.output_dir).mkdir(parents=True, exist_ok=True) sn_call_list = self._make_stitch_nodes_call() return self.run_tracker_subprocess("StitchNodes", sn_call_list)
[docs] def read_trajectories(self) -> list[Trajectory]: """ Parse outputs from Tempest Extremes to list of :class:`tctrack.core.Trajectory`. The file to be read and its properties are based on the values in the :attr:`stitch_parameters` attribute. Returns ------- list[Trajectory] A list of :class:`tctrack.core.Trajectory` objects. """ out_file = str( Path(self.stitch_parameters.output_dir) / self.stitch_parameters.output_file ) if self.stitch_parameters.out_file_format == "gfdl": trajectories = self._parse_trajectories_gfdl(out_file) elif self.stitch_parameters.out_file_format == "csv": trajectories = self._parse_trajectories_csv(out_file, has_header=True) elif self.stitch_parameters.out_file_format == "csvnohead": trajectories = self._parse_trajectories_csv(out_file, has_header=False) return trajectories
@staticmethod def _parse_gfdl_line_to_point( line: list[str], variable_names: list[str] | None = None ) -> tuple[list[int], dict[str, int | float]]: """ Parse line from StitchNodes gfdl output into a trajectory data point. Data point format is that expected by a :class:`tctrack.core.Trajectory`. Parameters ---------- line : list[str] A list of strings representing the line split into parts. variable_names : list[str] | None List of variable names for the data columns. Defaults to None. Returns ------- tuple A tuple containing the time as an integer list of [year, day, month, hour] and a dict of variables. """ return_vars: dict[str, int | float] = {} return_vars.update({"grid_i": int(line[0]), "grid_j": int(line[1])}) if variable_names: return_vars.update( { name: float(value) for name, value in zip(variable_names, line[2:-4], strict=False) } ) else: return_vars.update( { f"var_{i}": float(value) for i, value in enumerate(line[2:-4], start=1) } ) time = list(map(int, line[-4:])) return time, return_vars def _parse_trajectories_gfdl(self, file_path): """ Parse track trajectories from a gfdl file. Parameters ---------- file_path : str Path to the input file. Returns ------- list[Trajectory] A list of :class:`tctrack.core.Trajectory` objects. """ trajectories = {} current_trajectory_id = 0 # Initialize trajectory ID # Get variable names from in_fmt var_names = self.stitch_parameters.in_fmt or [] with open(file_path, "r") as file: for line in file: items = line.split() if items[0] == "start": # Start of new trajectory. # Extract metadata and add Trajectory to dict current_trajectory_id += 1 time = list(map(int, items[2:6])) trajectories[current_trajectory_id] = Trajectory( current_trajectory_id, time, calendar=self.stitch_parameters.caltype, ) # Continue processing ongoing trajectory else: trajectories[current_trajectory_id].add_point( *self._parse_gfdl_line_to_point(items, var_names) ) return list(trajectories.values()) def _parse_trajectories_csv(self, file_path, has_header=False): """ Generalized function to parse trajectories from csv file with/without header. Parameters ---------- file_path : str Path to the input file. has_header : bool, optional Whether the file has a header. Defaults to False. Returns ------- list[Trajectory] A list of :class:`tctrack.core.Trajectory` objects. """ trajectories = {} with open(file_path, "r") as file: reader = ( csv.DictReader(file, skipinitialspace=True) if has_header else csv.reader(file) ) for row in reader: if has_header: # Read from dict extracting variable names from keys/header trajectory_id = int(row["track_id"]) time = [int(row[k]) for k in ("year", "month", "day", "hour")] variables_dict = {"grid_i": int(row["i"]), "grid_j": int(row["j"])} variables_dict.update( { key: float(value) for key, value in row.items() if key not in { "track_id", "year", "month", "day", "hour", "i", "j", } } ) else: # Read from csv assuming: id, y, m, d, h, i, j, var1, ..., varn trajectory_id = int(row[0]) time = list(map(int, row[1:5])) variables_dict = {"grid_i": int(row[5]), "grid_j": int(row[6])} # Get variable names from in_fmt var_names = self.stitch_parameters.in_fmt or [ f"var_{i + 1}" for i in range(len(row[7:])) ] variables_dict.update( { var_name: float(row[7 + i]) for i, var_name in enumerate(var_names) } ) if trajectory_id not in trajectories: trajectories[trajectory_id] = Trajectory( trajectory_id=trajectory_id, time=time, calendar=self.stitch_parameters.caltype, ) trajectories[trajectory_id].add_point(time, variables_dict) return list(trajectories.values()) def _set_metadata(self) -> None: """Set the time and variable metadata attributes by reading from input files. Reads metadata for each variable listed in :attr:`detect_parameters.output_commands` from the input NetCDF files defined in :attr:`detect_parameters.in_data` (matching the NetCDF variable name). These will be stored in the :attr:`variable_metadata` attribute as a dictionary of :class:`TCTrackerMetadata` objects. This will be called from the :meth:`set_metadata` method. Raises ------ ValueError If a variable is not found in the input files. Examples -------- To read in the metadata for ``psl`` from ``inputs.nc``: >>> detect_params = TEDetectParameters( >>> in_data=["inputs.nc"], >>> output_commands=[TEOutputCommand(var="psl", operator="min", dist=0)], >>> ) >>> tracker = TETracker(detect_params, stitch_params) >>> tracker.set_metadata() >>> tracker.variable_metadata { "psl": TCTrackerMetadata( properties={ "standard_name": "air_pressure_at_sea_level", "long_name": "Sea Level Pressure", "units": "Pa", }, constructs=[<CF CellMethod: area: point>], ), } """ input_files = self.detect_parameters.in_data # set time metadata variable_name = ( self.detect_parameters.search_by_min or self.detect_parameters.search_by_max or "PSL" ) fields = cf.read( input_files, select=f"ncvar%{variable_name}", # type: ignore[operator] netcdf_backend="netCDF4", ) if not fields: msg = f"Variable '{variable_name}' not found in input files." raise ValueError(msg) _, time_coord = fields[0].construct_item("time") time_arr = time_coord.datetime_array self._time_metadata = { "calendar": time_coord.get_property("calendar"), "units": time_coord.get_property("units"), "start_time": time_arr[0], "end_time": time_arr[-1], } # set variable metadata self._variable_metadata = {} # Set the variable metadata for the grid indices generated by Tempest Extremes self._variable_metadata["grid_i"] = TCTrackerMetadata( {"long_name": "longitudinal grid index"} ) self._variable_metadata["grid_j"] = TCTrackerMetadata( {"long_name": "latitudinal grid index"} ) # Set the variable metadata for the output variables var_outputs = self.detect_parameters.output_commands if var_outputs is None or input_files is None: return for var_output in var_outputs: var_name = var_output["var"] # Get the variable field from the netcdf file fields = cf.read( input_files, select=f"ncvar%{var_name}", # type: ignore[operator] netcdf_backend="netCDF4", ) if not fields: msg = f"Variable '{var_name}' not found in input files." raise ValueError(msg) field = fields[0] # Read and store the relevant metadata self._variable_metadata[var_name] = TCTrackerMetadata( { "standard_name": field.get_property("standard_name", var_name), "long_name": field.get_property("long_name", var_name), "units": field.get_property("units", "unknown"), } ) # Add information about how the value is determined using `output_commands` methods = { "max": "maximum", "min": "minimum", "avg": "mean", } method = methods.get(var_output["operator"]) if method is not None: dist = var_output["dist"] if dist == 0: cell_method = cf.CellMethod("area", "point") else: qualifier = {"comment": f"lesser circle of radius {dist} degrees"} cell_method = cf.CellMethod("area", method, qualifiers=qualifier) self._variable_metadata[var_name].constructs = [cell_method]
[docs] def run_tracker(self, output_file: str): """Run TempestExtremes tracker to obtain tropical cyclone track trajectories. This first runs :meth:`detect` to get TC candidates at each time. Then these are combined into trajectories using :meth:`stitch`. The output is then saved as a CF-compliant NetCDF trajectory file. Arguments --------- output_file : str Filename to which the tropical cyclone trajectories are saved. Raises ------ FileNotFoundError - If the TempestExtremes executables cannot be found. - If the stitch output file does not exist. RuntimeError If the TempestExtremes commands return a non-zero exit code. Examples -------- To create the tracker instance, then use run_tracker to perform the detection, stitching, and generate output. >>> detect_params = TEDetectParameters(...) >>> stitch_params = TEStitchParameters(...) >>> my_tracker = TETracker(detect_params, stitch_params) >>> my_tracker.run_tracker() """ self.detect() self.stitch() self.to_netcdf(output_file)