Source code for eodatasets3.names

import re
from datetime import datetime
from pathlib import Path
from typing import Dict, Mapping, Optional, Sequence, Set, Union
from urllib.parse import quote, urlparse

import datacube.utils.uris as dc_uris

from eodatasets3 import utils
from eodatasets3.model import DEA_URI_PREFIX, Location
from eodatasets3.properties import Eo3Dict, Eo3Interface

# Needed when packaging zip or tar files.
dc_uris.register_scheme("zip", "tar")


class LazyProductName:
    def __init__(self, include_instrument=True, include_collection=False) -> None:
        super().__init__()
        self.include_instrument = include_instrument
        self.include_collection = include_collection

    def __get__(self, c: "NamingConventions", owner) -> str:
        if c.metadata.product_name:
            return c.metadata.product_name

        instrument = c.instrument_abbreviated if self.include_instrument else ""
        return "_".join(
            p
            for p in (
                c.producer_abbreviated,
                f"{c.platform_abbreviated or ''}{instrument or ''}",
                c.metadata.product_family,
                (
                    c.metadata.product_maturity
                    if c.metadata.product_maturity != "stable"
                    else None
                ),
                (
                    f"{c.displayed_collection_number}"
                    if (self.include_collection and c.displayed_collection_number)
                    else None
                ),
            )
            if p
        )


def _strip_major_version(version: str) -> str:
    """
    >>> _strip_major_version('1.2.3')
    '2.3'
    >>> _strip_major_version('01.02.03')
    '02.03'
    >>> _strip_major_version('30.40')
    '40'
    >>> _strip_major_version('40')
    ''
    """
    return ".".join(version.split(".")[1:])


class LazyLabel:
    def __init__(self, include_version=True, strip_major_version=False) -> None:
        super().__init__()
        self.strip_major_version = strip_major_version
        self.include_version = include_version

    def __get__(self, c: "NamingConventions", owner) -> str:
        d = c.metadata

        product_prefix = c.product_name

        version = d.dataset_version
        if version and self.include_version:
            if self.strip_major_version:
                version = _strip_major_version(version)
            version = version.replace(".", "-")
            product_prefix = f"{c.product_name}-{version}"

        maturity: str = d.properties.get("dea:dataset_maturity")
        return "_".join(
            [
                p
                for p in (
                    product_prefix,
                    d.region_code,
                    f"{d.datetime:%Y-%m-%d}",
                    maturity,
                )
                if p
            ]
        )


class LazyPlatformAbbreviation:
    # The abbreviations mentioned in DEA naming conventions doc.
    KNOWN_PLATFORM_ABBREVIATIONS = {
        "landsat-5": "ls5",
        "landsat-7": "ls7",
        "landsat-8": "ls8",
        "landsat-9": "ls9",
        "sentinel-1a": "s1a",
        "sentinel-1b": "s1b",
        "sentinel-2a": "s2a",
        "sentinel-2b": "s2b",
        "aqua": "aqu",
        "terra": "ter",
    }

    # If all platform (abbreviations) match a pattern, return this group name instead.
    KNOWN_PLATFORM_GROUPINGS = {
        "ls": re.compile(r"ls\d+"),
        "s1": re.compile(r"s1[a-z]+"),
        "s2": re.compile(r"s2[a-z]+"),
    }

    def __init__(
        self,
        *,
        known_abbreviations: Dict = None,
        grouped_abbreviations: Dict = None,
        show_specific_platform=True,
        allow_unknown_abbreviations=True,
    ) -> None:
        self.known_abbreviations = (
            known_abbreviations or self.KNOWN_PLATFORM_ABBREVIATIONS
        )
        self.grouped_abbreviations = (
            grouped_abbreviations or self.KNOWN_PLATFORM_GROUPINGS
        )
        self.show_specific_platform = show_specific_platform

        self.allow_unknown_abbreviations = allow_unknown_abbreviations

    def __get__(self, c: "NamingConventions", owner) -> Optional[str]:
        """Abbreviated form of a satellite, as used in dea product names. eg. 'ls7'."""

        p = c.metadata.platforms
        if not p:
            return None

        if not self.allow_unknown_abbreviations:
            unknowns = p.difference(self.known_abbreviations)
            if unknowns:
                raise ValueError(
                    f"We don't know the DEA abbreviation for platforms {unknowns!r}. "
                    f"We'd love to add more! Raise an issue on Github: "
                    f"https://github.com/GeoscienceAustralia/eo-datasets/issues/new' "
                )

        abbreviations = sorted(
            self.known_abbreviations.get(s, s.replace("-", "")) for s in p
        )

        if self.show_specific_platform and len(abbreviations) == 1:
            return abbreviations[0]

        # If all abbreviations are in a group, name it using that group.
        # (eg. "ls" instead of "ls5-ls7-ls8")
        for group_name, pattern in self.grouped_abbreviations.items():
            if all(pattern.match(a) for a in abbreviations):
                return group_name

        # Otherwise, there's a mix of platforms.

        # Is there a common constellation?
        constellation = c.metadata.properties.get("constellation")
        if constellation:
            return constellation

        # Don't bother to include platform in name for un-groupable mixes of them.
        if not self.allow_unknown_abbreviations:
            raise NotImplementedError(
                f"Satellite constellation abbreviation is not known for platforms {p}. "
                f"(for DEA derivative naming conventions.)"
                f"    Is this a mistake? We'd love to add more! Raise an issue on Github: "
                f"https://github.com/GeoscienceAustralia/eo-datasets/issues/new' "
            )
        return None


class LazyInstrumentAbbreviation:
    def __get__(self, c: "NamingConventions", owner) -> Optional[str]:
        """Abbreviated form of an instrument name, as used in dea product names. eg. 'c'."""
        if not c.metadata.instrument:
            return None

        platforms = c.metadata.platforms
        if not platforms or len(platforms) > 1:
            return None

        [p] = platforms

        if p.startswith("sentinel-1") or p.startswith("sentinel-2"):
            return c.metadata.instrument[0].lower()

        if p.startswith("landsat"):
            # Extract from usgs standard:
            # landsat:landsat_product_id: LC08_L1TP_091075_20161213_20170316_01_T2
            # landsat:landsat_scene_id: LC80910752016348LGN01
            landsat_id = c.metadata.properties.get("landsat:landsat_product_id")
            if landsat_id is None:
                landsat_id = c.metadata.properties.get("landsat:landsat_scene_id")

            # from USGS STAC, label is LC08_L2SP_178079_20210417_20210424_02_T1_SR and
            # landsat:scene_id: LC81780792021107LGN00
            if landsat_id is None:
                landsat_id = c.metadata.properties.get("landsat:scene_id")

            if not landsat_id:
                raise NotImplementedError(
                    "No landsat scene or product id: cannot abbreviate Landsat instrument."
                )

            return landsat_id[1].lower()

        # Otherwise, it's unknown.
        raise NotImplementedError(
            f"Instrument abbreviations aren't supported for platform {p!r}. "
            f"We'd love to add more support! Raise an issue on Github: "
            f"https://github.com/GeoscienceAustralia/eo-datasets/issues/new' "
        )


class LazyProducerAbbreviation:
    KNOWN_PRODUCER_ABBREVIATIONS = {
        "ga.gov.au": "ga",
        "usgs.gov": "usgs",
        "sinergise.com": "sinergise",
        "digitalearthafrica.org": "deafrica",
        "esa.int": "esa",
        # Is there another organisation you want to use? Pull requests very welcome!
    }

    def __init__(self, *, known_abbreviations: Dict = None) -> None:
        self.known_abbreviations = (
            known_abbreviations or self.KNOWN_PRODUCER_ABBREVIATIONS
        )

    def __get__(self, c: "NamingConventions", owner) -> Optional[str]:
        """Abbreviated form of a producer, as used in dea product names. eg. 'ga', 'usgs'."""
        if not c.metadata.producer:
            return None

        try:
            return self.known_abbreviations[c.metadata.producer]
        except KeyError:
            raise NotImplementedError(
                f"We don't know how to abbreviate organisation domain name {c.metadata.producer!r}. "
                f"We'd love to add more orgs! Raise an issue on Github: "
                f"https://github.com/GeoscienceAustralia/eo-datasets/issues/new' "
            )


class LazyRegionOffset:
    def __get__(self, c: "NamingConventions", owner) -> Optional[str]:
        # Cut the region code in subfolders
        region_code = c.metadata.region_code
        if region_code:
            return "/".join(utils.subfolderise(region_code))
        return None


class LazyTimeOffset:
    def __init__(self, date_folders_format="%Y/%m/%d") -> None:
        self.date_folders_format = date_folders_format

    def __get__(self, c: "NamingConventions", owner) -> Optional[str]:
        return c.metadata.datetime.strftime(self.date_folders_format)


class LazyDestinationFolder:
    def __init__(
        self,
        include_version=False,
        include_non_final_maturity=True,
    ) -> None:
        super().__init__()
        self.include_version = include_version
        self.include_non_final_maturity = include_non_final_maturity

    def __get__(self, c: "NamingConventions", owner) -> str:
        """The folder hierarchy the datasets files go into.

        This is returned as a relative path.

        (forward slashes, but no starting or ending slash)

        Example: ``"ga_ls8c_ard_3/092/084/2016/06/28"``
        """
        d = c.metadata
        parts = [c.product_name]

        if self.include_version:
            parts.append(d.dataset_version.replace(".", "-"))

        if c.region_folder is not None:
            parts.append(c.region_folder)
        if c.time_folder is not None:
            parts.append(c.time_folder)

        if self.include_non_final_maturity:
            # If it's not a final product, append the maturity to the folder.
            maturity: str = d.properties.get("dea:dataset_maturity")
            if maturity and maturity != "final":
                parts[-1] = f"{parts[-1]}_{maturity}"

        if c.dataset_separator_field is not None:
            val = d.properties[c.dataset_separator_field]
            # TODO: choosable formatter?
            if isinstance(val, datetime):
                val = f"{val:%Y%m%dT%H%M%S}"
            parts.append(val)
        return Path(*parts).as_posix()


class LazyDatasetLocation:
    """The location of the dataset as indexed into ODC. Defaults to the metadata path."""

    def __get__(self, c: "NamingConventions", owner) -> str:
        if not c.collection_prefix:
            raise ValueError(
                "collection_prefix is required if you're not setting a "
                "dataset_location or metadata_path!"
            )

        offset = c.dataset_folder
        if Path(offset).is_absolute():
            raise ValueError("Dataset offset is expected to be relative to collection")
        return f"{c.collection_prefix}/{offset}/"


class MissingRequiredFields(ValueError):
    ...


class RequiredPropertyDict(Eo3Dict):
    """A wrapper for Eo3 Dict that throws a loud error if a required field is accessed
     by not yet set.

    - It gives a friendly error of what fields are required, rather than the user
    seeing obtuse "None" errors sprinkled throughout their code.
    - It will _only_ complain about the given required fields. Other fields behave
      like a normal dictionary.

    """

    # Displayed to user for friendlier errors.
    _REQUIRED_PROPERTY_HINTS = {
        "odc:product_family": 'eg. "wofs" or "level1"',
        "odc:processing_datetime": "Time of processing, perhaps datetime.utcnow()?",
        "odc:producer": "Creator of data, eg 'usgs.gov' or 'ga.gov.au'",
        "odc:dataset_version": "eg. 1.0.0",
    }

    def __init__(
        self,
        required_fields: Set[str],
        properties=None,
    ) -> None:
        self.required_fields = required_fields
        super().__init__(properties)

    def __getitem__(self, item):
        try:
            val = super().__getitem__(item)
            if (not val) and (item in self.required_fields):
                self._raise_all_missing_requirements()
            return val
        except KeyError:
            if item in self.required_fields:
                self._raise_all_missing_requirements()
            raise

    def _raise_all_missing_requirements(self):
        """
        Do we have enough properties to generate file or product names?
        """
        missing_props = []
        for f in self.required_fields:
            if f not in self._props:
                missing_props.append(f)
        if missing_props:
            examples = []
            for p in sorted(missing_props):
                hint = self._REQUIRED_PROPERTY_HINTS.get(p, "")
                if hint:
                    hint = f" ({hint})"
                examples.append(f"\n- {p!r}{hint}")

            raise MissingRequiredFields(
                f"Need more properties to fulfill naming conventions."
                f"{''.join(examples)}"
            )


class EnforceRequirementProperties(Eo3Interface):
    """
    A wrapper for EO3 fields that throws a loud error if a field in required_properties isn't set.

    Throws MissingRequiredFields error.
    """

    @property
    def properties(self) -> Eo3Dict:
        return self._props

    def __init__(self, properties: Mapping, required_fields: Set[str]) -> None:
        self._props = RequiredPropertyDict(required_fields, properties)


class LazyFileName:
    def __init__(self, file_id: str, suffix: str) -> None:
        self.file_id = file_id
        self.suffix = suffix

    def __get__(self, c: "NamingConventions", owner) -> str:
        return c.filename(file_id=self.file_id, suffix=self.suffix)


class LazyProductURI:
    def __get__(self, n: "NamingConventions", owner) -> Optional[str]:
        if not n.base_product_uri:
            return None

        return f"{n.base_product_uri}/product/{quote(n.product_name)}"


def resolve_location(path: Location) -> str:
    """
    Make sure a dataset location is a URL, suitable to be
    the dataset_location in datacube indexing.

    Users may specify a pathlib.Path(), and we'll convert it as needed.
    """
    if isinstance(path, str):
        if not dc_uris.is_url(path) and not dc_uris.is_vsipath(path):
            raise ValueError(
                "A string location is expected to be a URL or VSI path. "
                "Perhaps you want to give it as a local pathlib.Path()?"
            )
        return path

    path = dc_uris.normalise_path(path)
    if ".tar" in path.suffixes:
        return f"tar:{path}!/"
    elif ".zip" in path.suffixes:
        return f"zip:{path}!/"
    else:
        uri = path.as_uri()
        # Base paths specified as directories must end in a slash,
        # so they will be url joined as subfolders. (pathlib strips them)
        if path.is_dir():
            return f"{uri}/"
        return uri


def _as_path(url: str) -> Path:
    """Try to convert the given URL to a local Path"""
    parts = urlparse(url)
    if not parts.scheme == "file":
        raise ValueError(f"Expected a filesystem path, got a URL! {url!r}")

    return Path(parts.path)


[docs]class NamingConventions: """ A generator of names for products, data labels, file paths, urls, etc. These are generated based on a given set of naming conventions, but a user can manually override any properties to avoid generation. Create an instance by calling :meth:`eodatasets3.namer`: .. testcode :: from eodatasets3 import namer properties = { 'eo:platform': 'sentinel-2a', 'eo:instrument': 'MSI', 'odc:product_family': 'level1', } n = namer(properties, conventions='default') print(n.product_name) .. testoutput :: s2am_level1 .. note :: You may want to use an :class:`eodatasets3.DatasetDoc` instance rather than a dict for properties, to get convenience methods such as ``.platform = 'sentinel-2a'``, `.properties`, automatic property normalisation etc. See :ref:`the naming section<names_n_paths>` for an example. Fields are lazily generated when accessed using the underlying metadata properties, but you can manually set any field to avoid generation: .. doctest :: >>> from eodatasets3 import DatasetDoc >>> p = DatasetDoc() >>> p.platform = 'landsat-7' >>> p.product_family = 'nbar' >>> >>> n = namer(conventions='default', properties=p) >>> n.product_name 'ls7_nbar' >>> # Manually override the abbreviation: >>> n.platform_abbreviated = 'ls' >>> n.product_name 'ls_nbar' >>> # Or manually set the entire product name to avoid generation: >>> n.product_name = 'custom_nbar_albers' In order to calculate paths, give it a collection prefix. This can be a :class:`Path object <pathlib.Path>` for local files, or a URL str for remote. .. doctest :: >>> p.datetime = datetime(2014, 4, 5) >>> collection = "s3://dea-public-data-dev/collections" >>> n = namer(conventions='default', properties=p, collection_prefix=collection) >>> n.dataset_location 's3://dea-public-data-dev/collections/ls7_nbar/2014/04/05/' >>> n.metadata_file 'ls7_nbar_2014-04-05.odc-metadata.yaml' All fields named ``*_file`` are filenames inside (relative to) the ``self.dataset_location``. >>> n.resolve_file('thumbnail.jpg') 's3://dea-public-data-dev/collections/ls7_nbar/2014/04/05/thumbnail.jpg' """ _ABSOLUTE_MINIMAL_PROPERTIES = { "odc:product_family", # Required by Stac regardless. "datetime", } # Placed here for public usage, as people can extend the defaults. KNOWN_PRODUCER_ABBREVIATIONS = LazyProducerAbbreviation.KNOWN_PRODUCER_ABBREVIATIONS KNOWN_PLATFORM_ABBREVIATIONS = LazyPlatformAbbreviation.KNOWN_PLATFORM_ABBREVIATIONS KNOWN_PLATFORM_GROUPINGS = LazyPlatformAbbreviation.KNOWN_PLATFORM_GROUPINGS # These are lazily computed on read if not overridden by the user. # ie. User can set th names.product_name = 'blah' #: Product name for ODC #: product_name: str = LazyProductName(include_collection=True) #: Identifier URL for the product #: (This is seen as a global id for the product, unlike the plain product #: name. It doesn't have to resolve to a real path) #: #: Eg. ``https://collections.dea.ga.gov.au/product/ga_ls8c_ard_3`` #: product_uri: str = LazyProductURI() #: Abbreviated form of the platform, used in most other #: paths and names here. #: #: For example, ``landsat-7`` is usually abbreviated to ``ls7`` platform_abbreviated: str = LazyPlatformAbbreviation() #: Abbreviated form of the instrument, used in most other #: paths and names here. #: #: For example, ``ETM+`` is usually abbreviated to ``e`` instrument_abbreviated: str = LazyInstrumentAbbreviation() #: Abbreviated form of the producer of the dataset #: (the producing organisation) #: #: For example, "ga.gov.au" is abbreviated to "ga" producer_abbreviated: str = LazyProducerAbbreviation() #: The Label for the dataset. This is a human-readable alternative #: to showing the UUID in most parts of ODC. It's used by default in filenames #: # No major version by default, as the product name contains it (the collection version). dataset_label: str = LazyLabel(strip_major_version=True) #: The pattern for generating file names. #: #: The pattern is in python's ``str.format()`` syntax, #: with fields ``{file_id}`` and ``{suffix}`` #: #: The namer instance is readable from ``{n}``. filename_pattern: str = "{n.dataset_label}{file_id}.{suffix}" #: The prefix where all files are stored, as a URI. #: #: Eg. ``'file:///my/dataset/collections'`` #: #: (used if dataset_location is generated) collection_prefix: Optional[str] = None #: The region portion of dataset_folder #: #: By default, it will split the region code in half #: #: Eg. ``'012/094'`` #: region_folder = LazyRegionOffset() #: The time portion of dataset_folder #: #: By default, it will be ``"%Y/%m/%d"`` of the dataset's ``'datetime'`` property. #: #: Eg. ``'2019/03/12'`` #: time_folder = LazyTimeOffset() #: The full folder offset from the collection_prefix. #: #: Example: ``'ga_ls8c_ones_3/090/084/2016/01/21'`` #: #: (used if dataset_location is generated) dataset_folder: str = LazyDestinationFolder() #: The full uri of the dataset as indexed into ODC. #: #: **All inner document paths are relative to this.** #: #: Eg. ``s3://dea-public-data/ga_ls_fc_3/2-5-0/091/086/2020/04/04/ga_ls_fc_091086_2020-04-04.odc-metadata.yaml`` #: #: (Defaults to the metadata path inside the dataset_folder) dataset_location: str = LazyDatasetLocation() #: The path or URL to the ODC metadata file. #: #: (if relative, it's relative to self.dataset_location ... but could be absolute too) #: #: Example: ``'ga_ls8c_ones_3-0-0_090084_2016-01-21_final.odc-metadata.yaml'`` metadata_file: str = LazyFileName("", "odc-metadata.yaml") #: The name of a checksum file checksum_file: str = LazyFileName("", "sha1") def __init__( self, properties: Mapping, base_product_uri: str = None, required_fields: Sequence[str] = (), dataset_separator_field: Optional[str] = None, allow_unknown_abbreviations: bool = True, ) -> None: #: The default base URI used in product URI generation #: #: Example: ``https://collections.dea.ga.gov.au/`` self.base_product_uri = base_product_uri self.required_fields = self._ABSOLUTE_MINIMAL_PROPERTIES.union(required_fields) # An extra folder to put each dataset inside, using the value of the given property name. self.dataset_separator_field = dataset_separator_field if self.dataset_separator_field is not None: self.required_fields.add(dataset_separator_field) self.allow_unknown_abbreviations = allow_unknown_abbreviations #: The underlying dataset properties used for generation. self.metadata: Eo3Interface = EnforceRequirementProperties( properties, self.required_fields ) @property def displayed_collection_number(self) -> Optional[int]: # An explicit collection number trumps all. if self.metadata.collection_number: return int(self.metadata.collection_number) # Otherwise it's the first digit of the dataset version. if not self.metadata.dataset_version: return None return int(self.metadata.dataset_version.split(".")[0]) def metadata_filename(self, kind: str = "", suffix: str = "yaml") -> str: return self.filename(kind, suffix)
[docs] def measurement_filename( self, measurement_name: str, suffix: str = "tif", file_id: str = None ) -> str: """ Generate the path to a measurement for the current naming conventions.::: >> p.names.measurement_file('blue', 'tif') 'ga_ls8c_ones_3-0-0_090084_2016-01-21_final_blue.tif' This is the filename inside the self.dataset_folder """ name = measurement_name.replace(":", "_") return self.filename( # We use 'band01'/etc in the filename if provided, rather than 'red' file_id or name, suffix, )
[docs] def filename(self, file_id: str, suffix: str) -> str: """ Make a file name according to the current naming conventions' file pattern. All filenames have a file_id (eg. "odc-metadata" or "") and a suffix (eg. "yaml") Returned file paths are expected to be relative to the ``self.dataset_location`` """ file_id = "_" + file_id.replace("_", "-") if file_id else "" return self.filename_pattern.format(file_id=file_id, suffix=suffix, n=self)
[docs] def thumbnail_filename(self, kind: str = None, suffix: str = "jpg") -> str: """ Get a thumbnail file path (optionally with the given kind and/or suffix.) """ if kind: name = f"{kind}_thumbnail" else: name = "thumbnail" return self.filename(name, suffix)
[docs] def resolve_file(self, path: Location) -> str: """ Convert the given file offset to a fully qualified URL within the dataset location. """ if isinstance(path, Path): if path.is_absolute(): return path.as_uri() path = path.as_posix() location = self.dataset_location resolved = dc_uris.uri_resolve(location, path) return resolved
[docs] def resolve_path(self, path: Location) -> Path: """ Convert the given file offset (inside the dataset location) to a Path on the local filesystem (if possible). :raises ValueError: if the current dataset is not in a file:// location. """ return _as_path(self.resolve_file(path))
@property def dataset_path(self) -> Optional[Path]: """ Get the dataset location as a Path on the local filesystem, if possible. :raises ValueError: if the current dataset is not in a file:// location. """ return _as_path(self.dataset_location) @property def collection_path(self) -> Optional[Path]: """ Get the collection prefix as a Path on the local filesystem, if possible. """ if not self.collection_prefix: return None try: return _as_path(self.collection_prefix) except ValueError: return None def __repr__(self) -> str: ps = {"collection_prefix": self.collection_prefix} try: ps["dataset_location"] = self.dataset_location except ValueError: ... try: ps["metadata_file"] = self.metadata_file except ValueError: ... ps = ", ".join(f"{k}={v!r}" for k, v in ps.items()) return f"{self.__class__.__name__}({ps})"
class DEANamingConventions(NamingConventions): """ Example file structure (note version number in file): ga_ls8c_ones_3/090/084/2016/01/21/ga_ls8c_ones_3-0-0_090084_2016-01-21_final.odc-metadata.yaml """ def __init__( self, properties: Mapping, required_fields=( "eo:platform", "eo:instrument", "odc:processing_datetime", "odc:producer", "odc:product_family", "odc:region_code", "odc:dataset_version", ), dataset_separator_field: Optional[str] = None, ) -> None: # DEA wants consistency via the naming-conventions doc. allow_unknown_abbreviations = False super().__init__( properties, DEA_URI_PREFIX, required_fields, dataset_separator_field, allow_unknown_abbreviations, ) product_name: str = LazyProductName(include_collection=True) # Stricter: only allow pre-approved abbreviations. platform_abbreviated: str = LazyPlatformAbbreviation( allow_unknown_abbreviations=False ) class DEAS2NamingConventions(DEANamingConventions): """ DEA naming conventions, but with an extra subfolder for each unique datatake. It will figure out the datatake if you set a sentinel_tile_id or datastrip_id. """ def __init__( self, properties: Mapping, required_fields=( "eo:instrument", "eo:platform", "odc:dataset_version", "odc:processing_datetime", "odc:producer", "odc:product_family", "odc:region_code", ), dataset_separator_field="sentinel:datatake_start_datetime", ) -> None: super().__init__( properties, required_fields, dataset_separator_field=dataset_separator_field ) class DEADerivativesNamingConventions(DEANamingConventions): """ Common derived products. Unlike plain 'DEA', they use an explicit collection number (odc:collection_number) in the product name which may differ from the software's dataset version (odc:dataset_version) Example file structure (note version number in folder): ga_ls_wo_3/1-6-0/090/081/1998/07/30/ga_ls_wo_3_090081_1998-07-30_interim.odc-metadata.yaml Derivatives have a slightly different folder structure. And they only show constellations (eg. "ls_" or "s2_") rather than the specific satellites in their names (eg. "ls8_"). They have a version-number folder instead of putting it in each filename. And version numbers may not match the collection number (`odc:collection_number` is mandatory). """ def __init__( self, properties: Mapping, required_fields: Sequence[str] = ( "eo:platform", "odc:dataset_version", "odc:collection_number", "odc:processing_datetime", "odc:producer", "odc:product_family", "odc:region_code", "dea:dataset_maturity", ), dataset_separator_field: Optional[str] = None, ) -> None: super().__init__( properties, required_fields=required_fields, dataset_separator_field=dataset_separator_field, ) # Derivates put the version in the folder instead. dataset_label = LazyLabel(include_version=False) product_name = LazyProductName(include_instrument=False, include_collection=True) dataset_folder = LazyDestinationFolder( include_version=True, include_non_final_maturity=False, ) platform_abbreviated = LazyPlatformAbbreviation( show_specific_platform=False, allow_unknown_abbreviations=False, ) class DEAS2DerivativesNamingConventions(DEADerivativesNamingConventions): """ Sentinel-2-based DEA derivative naming conventions. Unlike regular derivatives, there's an extra subfolder for each unique datatake. It will figure out the datatake if you set a sentinel_tile_id or datastrip_id. """ def __init__(self, properties: Mapping) -> None: super().__init__( properties, dataset_separator_field="sentinel:datatake_start_datetime", ) class AfricaProductName: def __get__(self, c: "NamingConventions", owner) -> str: if c.metadata.product_name: return c.metadata.product_name return f"{c.metadata.product_family}_{c.platform_abbreviated}" class DEAfricaNamingConventions(NamingConventions): """ DEAfrica avoids org names and uses simpler "{family}_{platform}" product names. Eg. "wo_ls" (water observations of landsat) """ product_name = AfricaProductName() dataset_label = LazyLabel(include_version=False) dataset_folder = LazyDestinationFolder( include_version=True, include_non_final_maturity=False, ) platform_abbreviated = LazyPlatformAbbreviation( show_specific_platform=False, allow_unknown_abbreviations=False, ) def __init__( self, properties: Mapping, ) -> None: super().__init__( properties, base_product_uri="https://digitalearthafrica.org", required_fields=( "eo:platform", "odc:producer", "odc:region_code", "odc:product_family", "odc:dataset_version", ), ) KNOWN_CONVENTIONS = dict( default=NamingConventions, dea=DEANamingConventions, dea_s2=DEAS2NamingConventions, dea_s2_derivative=DEAS2DerivativesNamingConventions, dea_c3=DEADerivativesNamingConventions, deafrica=DEAfricaNamingConventions, )
[docs]def namer( properties: Union[Eo3Dict, Eo3Interface, dict] = None, *, collection_prefix: Location = None, conventions: str = "default", ) -> "NamingConventions": """ Create a naming instance of the given conventions. Conventions: 'default', 'dea', 'deafrica', ... You usually give it existing properties, but you can use the return value's :attr:`.metadata <eodatasets3.NamingConventions.metadata>` field to set properties afterwards. """ if conventions not in KNOWN_CONVENTIONS: available = ", ".join(KNOWN_CONVENTIONS.keys()) raise ValueError( f"Unknown naming conventions: {conventions}. Possibilities: {available}" ) if isinstance(properties, Eo3Interface): properties = properties.properties if properties is None: properties = Eo3Dict() conventions = KNOWN_CONVENTIONS[conventions](properties) if collection_prefix: conventions.collection_prefix = resolve_location(collection_prefix) return conventions