diff --git a/.gitignore b/.gitignore index 7a1170b..8d23b71 100644 --- a/.gitignore +++ b/.gitignore @@ -175,3 +175,4 @@ cython_debug/ # custom dirs data/ +tools/ \ No newline at end of file diff --git a/README.md b/README.md index 7f05305..38ca72d 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,6 @@ $ docker run \ --env STAC_API_URL=value \ --env STAC_INGEST_USER=value \ --env STAC_INGEST_PASS=value \ - ghcr.io/eopf-sample-service/eopf-stac:0.6.0 s3://path/to/eopf.zarr + ghcr.io/eopf-sample-service/eopf-stac:0.10.0 s3://path/to/eopf.zarr ``` diff --git a/pyproject.toml b/pyproject.toml index 5b83e3d..8d8288b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "eopf-stac" -version = "0.9.1" +version = "0.10.0" authors = [ { name="Mario Winkler", email="mario.winkler@dlr.de" } ] @@ -24,7 +24,9 @@ dependencies = [ "stactools-sentinel3 >= 0.4.0", "stactools-sentinel2 >= 0.6.5", "geojson", - "xarray" + "xarray", + "netCDF4 == 1.7.2", + "footprint-facility" ] [project.optional-dependencies] dev = [ diff --git a/src/eopf_stac/common/constants.py b/src/eopf_stac/common/constants.py index 6e27467..a87d634 100644 --- a/src/eopf_stac/common/constants.py +++ b/src/eopf_stac/common/constants.py @@ -1,18 +1,25 @@ +import re from copy import deepcopy from typing import Final import pystac -from pystac.collection import ItemAssetDefinition +from pystac.item_assets import ItemAssetDefinition from pystac.link import Link from pystac.provider import ProviderRole from eopf_stac.common.eopf_xarray import EopfXarrayBackendConfig, OpMode +CDSE_STAC_API_URL = "https://stac.dataspace.copernicus.eu/v1" + SUPPORTED_PRODUCT_TYPES_S1 = [ "S01SIWGRH", "S01SSMGRH", "S01SEWGRH", + "S01SIWGRD", + "S01SSMGRD", + "S01SEWGRD", "S01SIWSLC", + "S01SIVSLC", # CPM workaround "S01SWVSLC", "S01SSMSLC", "S01SEWSLC", @@ -27,38 +34,38 @@ SUPPORTED_S3_OLCI_L2_PRODUCT_TYPES = ["S03OLCLFR", "S03OLCLRR"] SUPPORTED_S3_SLSTR_L1_PRODUCT_TYPES = ["S03SLSRBT"] SUPPORTED_S3_SLSTR_L2_LST_PRODUCT_TYPE = ["S03SLSLST"] -# SUPPORTED_S3_SLSTR_L2_FRP_PRODUCT_TYPE = ["S03SLSFRP"] # conversion error +SUPPORTED_S3_SLSTR_L2_FRP_PRODUCT_TYPE = ["S03SLSFRP"] +SUPPORTED_S3_SYN_L2_PRODUCT_TYPES = [ + "S03SYNSDR", + "S03SYNVGP", + "S03SYNVG1", + "S03SYNV10", + "S03SYNAOD", +] SUPPORTED_PRODUCT_TYPES_S3 = ( SUPPORTED_S3_OLCI_L1_PRODUCT_TYPES + SUPPORTED_S3_OLCI_L2_PRODUCT_TYPES + SUPPORTED_S3_SLSTR_L1_PRODUCT_TYPES + SUPPORTED_S3_SLSTR_L2_LST_PRODUCT_TYPE + + SUPPORTED_S3_SLSTR_L2_FRP_PRODUCT_TYPE + + SUPPORTED_S3_SYN_L2_PRODUCT_TYPES ) -# Other Sentinen-3 product types to support -SUPPORTED_S3_SRAL_L1_PRODUCT_TYPES = ["S03AHRL1B"] # sentinel-3-sra-l1b -SUPPORTED_S3_SRAL_L2_PRODUCT_TYPES = ["S03AHRL2H"] # sentinel-3-sra-l2-lan-hy -SUPPORTED_S3_SYN_L2_PRODUCT_TYPES = [ - "S03SYNAOD", - "S03SYNSDR", - "S03SYNV10", - "S03SYNVG1", - "S03SYNVGK", - "S03SYNVGP", -] # sentinel-3-syn-l2-aod, sentinel-3-syn-l2, sentinel-3-syn-l2-v10, sentinel-3-syn-l2-vg1, ?, sentinel-3-syn-l2-vgp - - -# other SRAL listed in [1] -# - S03AHRL1A (SR_1_SRA_A_), S03ALRL1A (SR_1_SRA_A_), S03ALRL1B (SR_1_SRA_BS), S03ALRL2H (SR_2_LAN_HY) -# other SYN listed in [1]: S03SYNMIS +# SUPPORTED_S3_SRAL_L1_PRODUCT_TYPES = ["S03AHRL1B"] # sentinel-3-sra-l1b +# SUPPORTED_S3_SRAL_L2_PRODUCT_TYPES = ["S03AHRL2H"] # sentinel-3-sra-l2-lan-hy +# other SRAL listed: S03AHRL1A (SR_1_SRA_A_), S03ALRL1A (SR_1_SRA_A_), S03ALRL1B (SR_1_SRA_BS), S03ALRL2H (SR_2_LAN_HY) # [1] https://cpm.pages.eopf.copernicus.eu/eopf-cpm/main/PSFD/3-product-types-naming-rules.html PRODUCT_TYPE_TO_COLLECTION: Final[dict] = { "S01SIWGRH": "sentinel-1-l1-grd", "S01SSMGRH": "sentinel-1-l1-grd", "S01SEWGRH": "sentinel-1-l1-grd", + "S01SEWGRD": "sentinel-1-l1-grd", + "S01SIWGRD": "sentinel-1-l1-grd", + "S01SSMGRD": "sentinel-1-l1-grd", "S01SIWSLC": "sentinel-1-l1-slc", + "S01SIVSLC": "sentinel-1-l1-slc", # CPM workaround "S01SWVSLC": "sentinel-1-l1-slc", "S01SSMSLC": "sentinel-1-l1-slc", "S01SEWSLC": "sentinel-1-l1-slc", @@ -75,6 +82,11 @@ "S03SLSRBT": "sentinel-3-slstr-l1-rbt", "S03SLSFRP": "sentinel-3-slstr-l2-frp", "S03SLSLST": "sentinel-3-slstr-l2-lst", + "S03SYNSDR": "sentinel-3-syn-l2", + "S03SYNVGP": "sentinel-3-syn-l2-vgp", + "S03SYNVG1": "sentinel-3-syn-l2-vg1", + "S03SYNV10": "sentinel-3-syn-l2-v10", + "S03SYNAOD": "sentinel-3-syn-l2-aod", } MEDIA_TYPE_ZARR = "application/vnd+zarr" @@ -152,3 +164,9 @@ def get_item_asset_product(): PRODUCT_EXTENSION_SCHEMA_URI = "https://stac-extensions.github.io/product/v0.1.0/schema.json" PROCESSING_EXTENSION_SCHEMA_URI = "https://stac-extensions.github.io/processing/v1.2.0/schema.json" EOPF_EXTENSION_SCHEMA_URI = "https://cs-si.github.io/eopf-stac-extension/v1.2.0/schema.json" +VERSION_EXTENSION_SCHEMA_URI = "https://stac-extensions.github.io/version/v1.2.0/schema.json" +RASTER_EXTENSION_SCHEMA_URI = "https://stac-extensions.github.io/raster/v2.0.0/schema.json" + +S2_MGRS_PATTERN: Final[re.Pattern[str]] = re.compile( + r"_T(\d{1,2})([CDEFGHJKLMNPQRSTUVWX])([ABCDEFGHJKLMNPQRSTUVWXYZ][ABCDEFGHJKLMNPQRSTUV])" +) diff --git a/src/eopf_stac/common/stac.py b/src/eopf_stac/common/stac.py index bfaf88d..f99d1fb 100644 --- a/src/eopf_stac/common/stac.py +++ b/src/eopf_stac/common/stac.py @@ -1,17 +1,30 @@ +import json +import logging import os +import re +import geojson import pystac +import shapely +from footprint_facility import rework_to_polygon_geometry +from pystac import Link from pystac.extensions.eo import EOExtension +from pystac.extensions.grid import GridExtension from pystac.extensions.sat import OrbitState, SatExtension from pystac.extensions.timestamps import TimestampsExtension from pystac.utils import now_in_utc, str_to_datetime +from stactools.sentinel2.mgrs import MgrsExtension from eopf_stac.common.constants import ( EOPF_EXTENSION_SCHEMA_URI, PROCESSING_EXTENSION_SCHEMA_URI, PRODUCT_EXTENSION_SCHEMA_URI, + S2_MGRS_PATTERN, + VERSION_EXTENSION_SCHEMA_URI, ) +logger = logging.getLogger(__name__) + def validate_metadata(metadata: dict) -> dict: stac_discovery = metadata.get("metadata", {}).get(".zattrs", {}).get("stac_discovery") @@ -33,6 +46,18 @@ def rearrange_bbox(bbox): return corrected_bbox +def get_identifier_from_href(product_href: str): + if product_href.endswith("/"): + item_id = os.path.basename(product_href[:-1]) + else: + item_id = os.path.basename(product_href) + + if item_id.lower().endswith(".safe") or item_id.lower().endswith(".sen3") or item_id.lower().endswith(".zarr"): + item_id = os.path.splitext(item_id)[0] + + return item_id + + def get_identifier(stac_discovery: dict): item_id = stac_discovery.get("id") # CPM workaround for https://gitlab.eopf.copernicus.eu/cpm/eopf-cpm/-/issues/690 @@ -45,33 +70,70 @@ def get_datetimes(properties: dict): datetime = None start_datetime = None end_datetime = None + + start_datetime_str = properties.get("start_datetime") + if start_datetime_str is not None: + start_datetime = str_to_datetime(start_datetime_str) + + end_datetime_str = properties.get("end_datetime") + if end_datetime_str is not None: + end_datetime = str_to_datetime(end_datetime_str) + datetime_str = properties.get("datetime") if datetime_str is not None: # CPM workaround for https://gitlab.eopf.copernicus.eu/cpm/eopf-cpm/-/issues/643 if datetime_str == "null": - datetime = None + datetime = start_datetime else: datetime = str_to_datetime(datetime_str) - - if datetime is None: - # start_datetime and end_datetime must be supplied - start_datetime_str = properties.get("start_datetime") - if start_datetime_str is not None: - start_datetime = str_to_datetime(start_datetime_str) - datetime = start_datetime - end_datetime_str = properties.get("end_datetime") - if end_datetime_str is not None: - end_datetime = str_to_datetime(end_datetime_str) + else: + datetime = start_datetime return (datetime, start_datetime, end_datetime) +def get_cpm_version(path: str) -> str | None: + # matches "cpm_v256" + p = re.compile("cpm_v[0-9]+") + m = p.search(path) + if m is not None: + g = m.group() + cpm_version = f"{g[5]}.{g[6]}.{g[7]}" + return cpm_version + + # matches "cpm-2.5.9" + p = re.compile("(cpm-([0-9]\.)*[0-9])") + m = p.search(path) + if m is not None: + g = m.group() + cpm_version = f"{g[4]}.{g[6]}.{g[8]}" + return cpm_version + + return None + + +def fix_geometry(item: pystac.Item) -> None: + coordinates = geojson.Polygon.clean_coordinates(coords=item.geometry["coordinates"], precision=15) + first_coord = coordinates[0][0] + last_coord = coordinates[0][-1] + if first_coord != last_coord: + # CPM workaround for https://gitlab.eopf.copernicus.eu/cpm/eopf-cpm/-/issues/708 + logger.info("Fixing coordinates to end linear ring where it started") + coordinates[0].append(first_coord) + item.geometry["coordinates"] = coordinates + + geometry = shapely.from_geojson(json.dumps(item.geometry)) + reworked = rework_to_polygon_geometry(geometry) + item.geometry = json.loads(shapely.to_geojson(reworked)) + + def fill_timestamp_properties(item: pystac.Item, properties: dict) -> None: - created_datetime = properties.get("created") - if created_datetime is None: - created_datetime = now_in_utc() - else: - created_datetime = str_to_datetime(created_datetime) + # created_datetime_str = properties.get("created") + # created_datetime = None + # if created_datetime_str is None: + created_datetime = now_in_utc() + # else: + # created_datetime = str_to_datetime(created_datetime_str) item.common_metadata.created = created_datetime item.common_metadata.updated = created_datetime @@ -112,46 +174,59 @@ def fill_eo_properties(item: pystac.Item, properties: dict) -> None: eo.snow_cover = snow_cover -def fill_processing_properties(item: pystac.Item, properties: dict) -> None: - # CPM workaround: following invalid values are ignored: - # "processing:expression": "systematic", - # "processing:facility": "OPE,OPE,OPE", - # "processing:version": "", +def fill_processing_properties( + item: pystac.Item, properties: dict, cpm_version: str = None, baseline_processing_version: str = None +) -> None: + # CPM workarounds: + # Some invalid values are ignored: + # - "processing:expression": "systematic", + # - "processing:facility": "OPE,OPE,OPE" or ["OPE","OPE","OPE"] + # Baseline processing version is added + # CPM version is added proc_expression = properties.get("processing:expression") proc_lineage = properties.get("processing:lineage") proc_level = properties.get("processing:level") proc_facility = properties.get("processing:facility") + if type(proc_facility) is list and len(proc_facility) > 0: + proc_facility = proc_facility[0] proc_datetime = properties.get("processing:datetime") - proc_version = properties.get("processing:version") proc_software = properties.get("processing:software") - if any_not_none( - [proc_expression, proc_facility, proc_level, proc_lineage, proc_software, proc_datetime, proc_version] - ): + if any_not_none([proc_expression, proc_facility, proc_level, proc_lineage, proc_software, proc_datetime]): item.stac_extensions.append(PROCESSING_EXTENSION_SCHEMA_URI) if proc_expression is not None and proc_expression != "systematic": item.properties["processing:expression"] = proc_expression if proc_software is not None: - item.properties["processing:software"] = proc_software + # CPM workaround + if proc_software.get("name") is None and proc_software.get("version") is None: + item.properties["processing:software"] = proc_software + else: + item.properties["processing:software"] = {} if proc_datetime is not None: item.properties["processing:datetime"] = proc_datetime - if is_valid_string(proc_facility) and proc_facility != "OPE,OPE,OPE": + if is_valid_string(proc_facility): item.properties["processing:facility"] = proc_facility if is_valid_string(proc_level): item.properties["processing:level"] = proc_level if is_valid_string(proc_lineage): item.properties["processing:lineage"] = proc_lineage - if is_valid_string(proc_version): - # CPM workaround - if proc_version != "TODO": - item.properties["processing:version"] = proc_version + + # Add CPM to processing:software + if cpm_version is not None: + if proc_software is None: + item.properties["processing:software"] = {} + item.properties["processing:software"]["EOPF-CPM"] = cpm_version + + # Add baseline processing version + if baseline_processing_version is not None: + item.properties["processing:version"] = baseline_processing_version def fill_product_properties(item: pystac.Item, product_type: str, properties: dict) -> None: product_timeliness = properties.get("product:timeliness") product_timeliness_category = properties.get("product:timeliness_category") product_acquisition_type = properties.get("product:acquisition_type") - if any([product_type, product_acquisition_type, all([product_timeliness, product_timeliness_category])]): + if any_not_none([product_type, product_acquisition_type, all([product_timeliness, product_timeliness_category])]): item.stac_extensions.append(PRODUCT_EXTENSION_SCHEMA_URI) if is_valid_string(product_type): item.properties["product:type"] = product_type @@ -187,20 +262,41 @@ def fill_eopf_properties(item: pystac.Item, properties: dict) -> None: ] ): item.stac_extensions.append(EOPF_EXTENSION_SCHEMA_URI) - if is_valid_string(datatake_id): + if datatake_id is not None: item.properties["eopf:datatake_id"] = datatake_id - if is_valid_string(instrument_mode): + if instrument_mode is not None: # CPM workaround if instrument_mode != "Earth Observation": item.properties["eopf:instrument_mode"] = instrument_mode if origin_datetime: item.properties["eopf:origin_datetime"] = origin_datetime - if is_valid_string(datastrip_id): + if datastrip_id is not None: item.properties["eopf:datastrip_id"] = datastrip_id if instrument_configuration_id is not None: item.properties["eopf:instrument_configuration_id"] = instrument_configuration_id +def fill_mgrs_grid_properties(item: pystac.Item, identifier: str) -> bool: + success = False + if identifier is not None: + mgrs_match = S2_MGRS_PATTERN.search(identifier) + success = mgrs_match and len(mgrs_groups := mgrs_match.groups()) + if success: + mgrs = MgrsExtension.ext(item, add_if_missing=True) + mgrs.utm_zone = int(mgrs_groups[0]) + mgrs.latitude_band = mgrs_groups[1] + mgrs.grid_square = mgrs_groups[2] + grid = GridExtension.ext(item, add_if_missing=True) + grid.code = f"MGRS-{mgrs.utm_zone}{mgrs.latitude_band}{mgrs.grid_square}" + return success + + +def fill_version_properties(item: pystac.Item) -> None: + if item is not None: + item.properties["deprecated"] = False + item.stac_extensions.append(VERSION_EXTENSION_SCHEMA_URI) + + def is_valid_string(value: str) -> bool: return value is not None and len(value) > 0 @@ -209,3 +305,12 @@ def any_not_none(values: list) -> bool: for v in values: if v is not None: return True + + +def create_cdse_link(cdse_scene_href: str) -> Link: + return Link( + rel="alternate", + title="CDSE STAC item", + target=cdse_scene_href, + media_type="application/geo+json", + ) diff --git a/src/eopf_stac/io.py b/src/eopf_stac/io.py index 28b443a..66f7b06 100644 --- a/src/eopf_stac/io.py +++ b/src/eopf_stac/io.py @@ -10,13 +10,14 @@ from pystac.utils import now_in_utc from eopf_stac.common.constants import ( + CDSE_STAC_API_URL, PRODUCT_METADATA_PATH, PRODUCT_TYPE_TO_COLLECTION, SUPPORTED_PRODUCT_TYPES_S1, SUPPORTED_PRODUCT_TYPES_S2, SUPPORTED_PRODUCT_TYPES_S3, ) -from eopf_stac.common.stac import validate_metadata +from eopf_stac.common.stac import get_cpm_version, validate_metadata from eopf_stac.sentinel1.stac import create_item as create_item_s1 from eopf_stac.sentinel2.stac import create_item as create_item_s2 from eopf_stac.sentinel3.stac import create_item as create_item_s3 @@ -49,24 +50,65 @@ def read_metadata(eopf_href: str) -> dict: return validate_metadata(zmetadata) -def create_item(metadata: dict, eopf_href: str) -> pystac.Item: +def create_item(metadata: dict, eopf_href: str, source_uri: str | None) -> pystac.Item: + # Determine product type product_type = metadata[".zattrs"]["stac_discovery"].get("properties", {}).get("product:type") # workaround eopf-cpm 2.4.x if product_type is None: product_type = metadata[".zattrs"]["stac_discovery"].get("properties", {}).get("eopf:type") - if product_type is None: raise ValueError("No product type in stac_discovery metadata") - logger.info(f"Product type is {product_type}") + # Extract CPM version from eopf_href + cpm_version = get_cpm_version(eopf_href) + logger.info(f"CPM version is {cpm_version}") + + # CDSE scene id and href + logger.info(f"Source URI is {source_uri}") + cdse_scene_id = None + if source_uri is not None and len(source_uri) > 0: + cdse_scene_id = get_source_identifier(source_uri) + logger.info(f"CDSE scene ID is {cdse_scene_id}") + else: + logger.warning("No value for --source-uri provided. Some STAC properties might not be available!") + + cdse_scene_href = None + if cdse_scene_id is not None: + cdse_scene_href = get_source_stac_item_url(cdse_scene_id) + logger.info(f"CDSE STAC item URL of source scene is {cdse_scene_href}") + + if cdse_scene_href is None: + logger.warning("Unable to determine link to the original scene at CSDE STAC API!") + item = None if product_type in SUPPORTED_PRODUCT_TYPES_S1: - item = create_item_s1(metadata=metadata, product_type=product_type, asset_href_prefix=eopf_href) + item = create_item_s1( + metadata=metadata, + product_type=product_type, + asset_href_prefix=eopf_href, + cpm_version=cpm_version, + cdse_scene_id=cdse_scene_id, + cdse_scene_href=cdse_scene_href, + ) elif product_type in SUPPORTED_PRODUCT_TYPES_S2: - item = create_item_s2(metadata=metadata, product_type=product_type, asset_href_prefix=eopf_href) + item = create_item_s2( + metadata=metadata, + product_type=product_type, + asset_href_prefix=eopf_href, + cpm_version=cpm_version, + cdse_scene_id=cdse_scene_id, + cdse_scene_href=cdse_scene_href, + ) elif product_type in SUPPORTED_PRODUCT_TYPES_S3: - item = create_item_s3(metadata=metadata, product_type=product_type, asset_href_prefix=eopf_href) + item = create_item_s3( + metadata=metadata, + product_type=product_type, + asset_href_prefix=eopf_href, + cpm_version=cpm_version, + cdse_scene_id=cdse_scene_id, + cdse_scene_href=cdse_scene_href, + ) else: raise ValueError(f"The product type '{product_type}' is not supported") @@ -103,3 +145,48 @@ def register_item(item: pystac.Item, stac_api_url: str) -> pystac.Item: logger.info(f"Successfully {api_action} STAC item {item.id} in collection {item.collection_id}") return item + + +def get_source_identifier(source_uri: str | None) -> str: + source_identifier = None + if source_uri is not None: + if source_uri.endswith("/"): + source_uri = source_uri[:-1] + source_identifier = source_uri.split("/")[-1] + if source_identifier.lower().endswith(".safe") or source_identifier.lower().endswith(".sen3"): + source_identifier = os.path.splitext(source_identifier)[0] + return source_identifier + + +def get_source_stac_item_url(source_scene_id: str) -> str | None: + source_stac_item_url = None + try: + source_stac_item_url = get_cdse_stac_item_url(source_scene_id) + except Exception as e: + logger.warning(str(e)) + + return source_stac_item_url + + +def get_cdse_stac_item_url(scene_id: str) -> str: + # https://stac.dataspace.copernicus.eu/v1/search?ids= + # https://stac.dataspace.copernicus.eu/v1/search?ids=S2B_MSIL1C_20240428T102559_N0510_R108_T32UPC_20240428T123125 + params = {"ids": scene_id} + repsonse = requests.get(url=f"{CDSE_STAC_API_URL}/search", params=params) + repsonse.raise_for_status() + + item_url = None + item_collection_dict = repsonse.json() + if len(item_collection_dict["features"]) > 0: + item_dict = item_collection_dict["features"][0] + for link in item_dict["links"]: + rel = link.get("rel") + if rel is not None and rel == "self": + href = link.get("href") + if href is not None and len(href) > 0: + item_url = href + + if item_url is None: + raise ValueError(f"Failed to find STAC item for scene id {scene_id} at CDSE") + + return item_url diff --git a/src/eopf_stac/main.py b/src/eopf_stac/main.py index 008c356..e659c76 100644 --- a/src/eopf_stac/main.py +++ b/src/eopf_stac/main.py @@ -53,7 +53,12 @@ def exit_on_error(exit_code: int = 1): def main(): parser = argparse.ArgumentParser("eopf-stac.py") - parser.add_argument("URL", help="Local file path or S3 URL to the EOPF product.", type=str) + parser.add_argument("URL", help="Local file path or URL to the EOPF product", type=str) + parser.add_argument( + "--source-uri", + help="Reference to the original product which was input for the EOPF conversion", + action="store", + ) parser.add_argument( "--dry-run", help="Create STAC item without trying to insert it into the catalog", action="store_true" ) @@ -72,7 +77,7 @@ def main(): metadata = read_metadata(args.URL) logger.info(f"Creating STAC item for {args.URL} ...") - item = create_item(metadata=metadata, eopf_href=args.URL) + item = create_item(metadata=metadata, eopf_href=args.URL, source_uri=args.source_uri) logger.debug(json.dumps(item.to_dict(), indent=4)) if not args.dry_run: diff --git a/src/eopf_stac/sentinel1/assets.py b/src/eopf_stac/sentinel1/assets.py index 9e5a6e0..f4e180e 100644 --- a/src/eopf_stac/sentinel1/assets.py +++ b/src/eopf_stac/sentinel1/assets.py @@ -17,7 +17,7 @@ ) -def create_grd_assets(asset_href_prefix: str, components: list[str]) -> dict[str, pystac.Asset]: +def create_grd_assets(asset_href_prefix: str, components: dict[str:str]) -> dict[str, pystac.Asset]: assets = {} # Create assets for current polarisation @@ -37,10 +37,10 @@ def create_grd_assets(asset_href_prefix: str, components: list[str]) -> dict[str return assets -def create_slc_assets(asset_href_prefix: str, components: list[str]) -> dict[str, pystac.Asset]: +def create_slc_assets(asset_href_prefix: str, components: dict[str:str]) -> dict[str, pystac.Asset]: assets = {} - # Create assets for current swath and polarisation + # TBD: Create assets for current swath and polarisation # Create product and metadata assets assets[PRODUCT_ASSET_KEY] = get_item_asset_product().create_asset(asset_href_prefix) @@ -50,7 +50,9 @@ def create_slc_assets(asset_href_prefix: str, components: list[str]) -> dict[str return assets -def create_ocn_assets(asset_href_prefix: str, components: list[str], instrument_mode: str) -> dict[str, pystac.Asset]: +def create_ocn_assets( + asset_href_prefix: str, components: dict[str:str], instrument_mode: str +) -> dict[str, pystac.Asset]: assets = {} # For WV mode the measurements data set are one per vignette. Not creating assets for each burst at the moment. diff --git a/src/eopf_stac/sentinel1/constants.py b/src/eopf_stac/sentinel1/constants.py index 205efd2..3377df0 100644 --- a/src/eopf_stac/sentinel1/constants.py +++ b/src/eopf_stac/sentinel1/constants.py @@ -180,9 +180,13 @@ S1_PRODUCT_TYPE_MAPPING: Final[dict[str, str]] = { "S01SIWGRH": "IW_GRDH_1S", + "S01SIWGRD": "IW_GRDH_1S", "S01SSMGRH": "SM_GRDH_1S", + "S01SSMGRD": "SM_GRDH_1S", "S01SEWGRH": "EW_GRDH_1S", + "S01SEWGRD": "EW_GRDH_1S", "S01SIWSLC": "IW_SLC__1S", + "S01SIVSLC": "IW_SLC__1S", # CPM workaround "S01SWVSLC": "WV_SLC__1S", "S01SSMSLC": "SM_SLC__1S", "S01SEWSLC": "EW_SLC__1S", @@ -192,6 +196,6 @@ "S01SWVOCN": "WV_OCN__2S", } -S1_GRD_PRODUCT_TYPES: Final[list[str]] = ["S01SIWGRH", "S01SSMGRH", "S01SEWGRH"] -S1_SLC_PRODUCT_TYPES: Final[list[str]] = ["S01SIWSLC", "S01SWVSLC", "S01SEWSLC", "S01SSMSLC"] +S1_GRD_PRODUCT_TYPES: Final[list[str]] = ["S01SIWGRH", "S01SSMGRH", "S01SEWGRH", "S01SIWGRD", "S01SSMGRD", "S01SEWGRD"] +S1_SLC_PRODUCT_TYPES: Final[list[str]] = ["S01SIWSLC", "S01SWVSLC", "S01SEWSLC", "S01SSMSLC", "S01SIVSLC"] S1_OCN_PRODUCT_TYPES: Final[list[str]] = ["S01SIWOCN", "S01SEWOCN", "S01SSMOCN", "S01SWVOCN"] diff --git a/src/eopf_stac/sentinel1/stac.py b/src/eopf_stac/sentinel1/stac.py index f2ae6a9..5a0ec28 100644 --- a/src/eopf_stac/sentinel1/stac.py +++ b/src/eopf_stac/sentinel1/stac.py @@ -1,11 +1,9 @@ import logging import os -import geojson import pystac -from pystac.extensions.sar import FrequencyBand, Polarization, SarExtension +from pystac.extensions.sar import FrequencyBand, Polarization from pystac.extensions.view import ViewExtension -from pystac.utils import datetime_to_str from eopf_stac.common.constants import ( EOPF_PROVIDER, @@ -14,13 +12,16 @@ SENTINEL_PROVIDER, ) from eopf_stac.common.stac import ( + create_cdse_link, fill_eopf_properties, fill_processing_properties, fill_product_properties, fill_sat_properties, fill_timestamp_properties, + fill_version_properties, + fix_geometry, get_datetimes, - get_identifier, + get_identifier_from_href, rearrange_bbox, ) from eopf_stac.sentinel1.assets import create_grd_assets, create_ocn_assets, create_slc_assets @@ -34,7 +35,14 @@ logger = logging.getLogger(__name__) -def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> pystac.Item: +def create_item( + metadata: dict, + product_type: str, + asset_href_prefix: str, + cpm_version: str = None, + cdse_scene_id: str | None = None, + cdse_scene_href: str | None = None, +) -> pystac.Item: stac_discovery = metadata[".zattrs"]["stac_discovery"] other_metadata = metadata[".zattrs"]["other_metadata"] properties = stac_discovery["properties"] @@ -46,7 +54,7 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py end_datetime = datetimes[2] item = pystac.Item( - id=get_identifier(stac_discovery), + id=get_identifier_from_href(asset_href_prefix), bbox=rearrange_bbox(stac_discovery.get("bbox")), geometry=stac_discovery.get("geometry"), properties={}, @@ -55,6 +63,9 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py end_datetime=end_datetime, ) + # -- Geometry (fix antimeridian, unclosed ring, etc) + fix_geometry(item) + # -- Common metadata item.common_metadata.mission = "Sentinel-1" @@ -99,7 +110,10 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py view.off_nadir = off_nadir # Processing Extension - fill_processing_properties(item, properties) + baseline_version = None + if properties.get("processing:software") is not None: + baseline_version = properties.get("processing:software").get("Sentinel-1 IPF") + fill_processing_properties(item, properties, cpm_version, baseline_version) # Product Extension fill_product_properties(item, product_type, properties) @@ -138,53 +152,55 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py sar_instrument_mode, ] ): - sar = SarExtension.ext(item, add_if_missing=True) + item.stac_extensions.append("https://stac-extensions.github.io/sar/v1.3.0/schema.json") + # sar = SarExtension.ext(item, add_if_missing=True) if polarizations: - sar.polarizations = polarizations + item.properties["sar:polarizations"] = polarizations if frequency_band: - sar.frequency_band = frequency_band - if instrument_mode: - sar.instrument_mode = instrument_mode + item.properties["sar:frequency_band"] = frequency_band if center_frequency: - sar.center_frequency = center_frequency + item.properties["sar:center_frequency"] = center_frequency else: - sar.center_frequency = 5.405 + item.properties["sar:center_frequency"] = 5.405 if resolution_range: - sar.resolution_range = resolution_range + item.properties["sar:resolution_range"] = resolution_range if resolution_azimuth: - sar.resolution_azimuth = resolution_azimuth + item.properties["sar:resolution_azimuth"] = resolution_azimuth if pixel_spacing_range: - sar.pixel_spacing_range = pixel_spacing_range + item.properties["sar:pixel_spacing_range"] = pixel_spacing_range if observation_direction: - sar.observation_direction = observation_direction + item.properties["sar:observation_direction"] = observation_direction if pixel_spacing_azimuth: - sar.pixel_spacing_azimuth = pixel_spacing_azimuth + item.properties["sar:pixel_spacing_azimuth"] = pixel_spacing_azimuth if sar_product_type: - sar.product_type = sar_product_type + item.properties["sar:product_type"] = sar_product_type if sar_instrument_mode: - sar.instrument_mode = sar_instrument_mode + item.properties["sar:instrument_mode"] = sar_instrument_mode # EOPF Extension fill_eopf_properties(item, properties) + # Version Extension + fill_version_properties(item) + logger.debug("Getting product components...") product_components = get_product_components(metadata=metadata, product_type=product_type) # Reconstruct original identifier of SAFE product # CPM workaround for https://gitlab.eopf.copernicus.eu/cpm/eopf-cpm/-/issues/70 - component_name = None - for _, name in product_components.items(): - component_name = name - break # we need only one component_name - item.id = construct_identifier_s1( - product_type=product_type, - polarization=polarizations_value, - startTime=datetime_to_str(start_datetime), - endTime=datetime_to_str(end_datetime), - platform=platform, - orbit=properties.get("sat:absolute_orbit"), - component=component_name, - ) + # component_name = None + # for _, name in product_components.items(): + # component_name = name + # break # we need only one component_name + # item.id = construct_identifier_s1( + # product_type=product_type, + # polarization=polarizations_value, + # startTime=datetime_to_str(start_datetime), + # endTime=datetime_to_str(end_datetime), + # platform=platform, + # orbit=properties.get("sat:absolute_orbit"), + # component=component_name, + # ) # -- Assets assets = {} @@ -207,34 +223,18 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py item.add_asset(key, asset) # -- Links - item.links.append(SENTINEL_LICENSE) - - # CPM workaround for https://gitlab.eopf.copernicus.eu/cpm/eopf-cpm/-/issues/708 - fix_geometry(item=item) + if cdse_scene_href is not None: + item.links.append(create_cdse_link(cdse_scene_href)) return item -def fix_geometry(item: pystac.Item): - coordinates = geojson.Polygon.clean_coordinates(coords=item.geometry["coordinates"], precision=15) - first_coord = coordinates[0][0] - - # Append first coordinate to polygon - coordinates[0].append(first_coord) - - # Validate new coordinates - polygon = geojson.Polygon(coordinates=coordinates, validate=True) - - # Upate item geomatry - item.geometry = polygon - - def get_product_components(metadata: dict, product_type: str) -> dict[str:str]: components = {} - stac_discovery_links = metadata[".zattrs"]["stac_discovery"]["links"] - if stac_discovery_links: - for component_name in stac_discovery_links: + component_refs = metadata[".zattrs"]["stac_discovery"].get("assets") + if component_refs: + for component_name, _ in component_refs.items(): if isinstance(component_name, str): if product_type in S1_GRD_PRODUCT_TYPES: key = component_name.split("_")[6] @@ -249,12 +249,15 @@ def get_product_components(metadata: dict, product_type: str) -> dict[str:str]: elif product_type in S1_OCN_PRODUCT_TYPES: key = component_name for sub_component in ( - metadata.get(f"{component_name.lower()}/.zattrs", {}).get("stac_discovery", {}).get("links", {}) + metadata.get(f"{component_name.lower()}/.zattrs", {}) + .get("stac_discovery", {}) + .get("assets", {}) ): if isinstance(sub_component, str): components[component_name] = sub_component else: - raise ValueError("Links section in metadata is missing") + # raise ValueError("No references to product components found") + logger.warning("Cannot detect all product parts. Some assets might not be available!") return components diff --git a/src/eopf_stac/sentinel2/assets.py b/src/eopf_stac/sentinel2/assets.py index 6d1d795..3acc38f 100644 --- a/src/eopf_stac/sentinel2/assets.py +++ b/src/eopf_stac/sentinel2/assets.py @@ -4,7 +4,6 @@ import numpy as np import pystac from pystac.extensions.eo import Band -from pystac.extensions.raster import RasterExtension from stactools.sentinel2.constants import ( BANDS_TO_ASSET_NAME, SENTINEL_BANDS, @@ -16,6 +15,7 @@ PRODUCT_ASSET_KEY, PRODUCT_METADATA_ASSET_KEY, PRODUCT_METADATA_PATH, + RASTER_EXTENSION_SCHEMA_URI, ROLE_DATA, ROLE_DATASET, get_item_asset_metadata, @@ -23,11 +23,9 @@ ) from eopf_stac.sentinel2.constants import ( ASSET_TO_DESCRIPTION, - BAND_ASSET_EXTRA_FIELDS, DATASET_PATHS_TO_ASSET, L2A_AOT_WVP_ASSETS_TO_PATH, L2A_SCL_ASSETS_TO_PATH, - OTHER_ASSET_EXTRA_FIELDS, ROLE_REFLECTANCE, ) @@ -40,7 +38,6 @@ def get_band_item_assets(band_asset_defs: dict) -> dict[str, pystac.ItemAssetDef asset_key=key, roles=[ROLE_DATA, ROLE_REFLECTANCE], band_keys=[band_key], - extra_fields=deepcopy(BAND_ASSET_EXTRA_FIELDS), ) return item_assets @@ -71,7 +68,6 @@ def get_aot_wvp_item_assets() -> dict[str, pystac.ItemAssetDefinition]: asset_key=key, roles=[ROLE_DATA], band_keys=[], - extra_fields=deepcopy(OTHER_ASSET_EXTRA_FIELDS), title_with_resolution=False, ) item_assets[key] = item_asset @@ -116,7 +112,6 @@ def get_scl_item_assets() -> dict[str, pystac.ItemAssetDefinition]: asset_key=key, roles=[ROLE_DATA], band_keys=[], - extra_fields=deepcopy(OTHER_ASSET_EXTRA_FIELDS), title_with_resolution=False, ) return item_assets @@ -152,7 +147,6 @@ def get_tci_item_assets(tci_asset_defs: dict) -> dict[str, pystac.ItemAssetDefin asset_key=key, roles=[ROLE_DATA], band_keys=["B04", "B03", "B02"], - extra_fields=deepcopy(OTHER_ASSET_EXTRA_FIELDS), title_with_resolution=False, ) return item_assets @@ -244,14 +238,6 @@ def create_item_asset( bands = get_bands_for_band_keys(band_keys) extra_fields["bands"] = bands - if "alternate" in extra_fields: - if "xarray" in extra_fields["alternate"]: - if "xarray:open_dataset_kwargs" in extra_fields["alternate"]["xarray"]: - open_dataset_kwargs = extra_fields["alternate"]["xarray"]["xarray:open_dataset_kwargs"] - if len(band_keys) > 0 and band_key != "TCI": - open_dataset_kwargs["bands"] = band_keys - open_dataset_kwargs["spatial_res"] = int(gsd) - title = ASSET_TO_DESCRIPTION[band_key] if title_with_resolution: title = f"{title} - {gsd}m" @@ -287,6 +273,8 @@ def unsuffixed_band_resolution(asset_key: str) -> str: def update_extra_fields_from_metadata(asset: pystac.Asset, attrs: dict, item: pystac.Item): + attrs = attrs.get("_eopf_attrs") + if attrs.get("long_name"): asset.description = attrs.get("long_name") @@ -305,11 +293,12 @@ def update_extra_fields_from_metadata(asset: pystac.Asset, attrs: dict, item: py scale = attrs.get("scale_factor") offset = attrs.get("add_offset") if any([scale, offset]): - RasterExtension.add_to(item) - if scale: - asset.extra_fields["raster:scale"] = attrs.get("scale_factor") - if offset: - asset.extra_fields["raster:offset"] = attrs.get("add_offset") + if RASTER_EXTENSION_SCHEMA_URI not in item.stac_extensions: + item.stac_extensions.append(RASTER_EXTENSION_SCHEMA_URI) + if scale is not None: + asset.extra_fields["raster:scale"] = scale + if offset is not None: + asset.extra_fields["raster:offset"] = offset if attrs.get("fill_value") is not None: asset.extra_fields["nodata"] = attrs.get("fill_value") diff --git a/src/eopf_stac/sentinel2/constants.py b/src/eopf_stac/sentinel2/constants.py index 84d4a23..1d6d503 100644 --- a/src/eopf_stac/sentinel2/constants.py +++ b/src/eopf_stac/sentinel2/constants.py @@ -1,10 +1,33 @@ +import datetime +import os import re from copy import deepcopy from re import Pattern from typing import Final -from eopf_stac.common.constants import DATASET_ASSET_EXTRA_FIELDS -from eopf_stac.common.eopf_xarray import EopfXarrayBackendConfig, OpMode +from pystac import ItemAssetDefinition, MediaType, Provider +from pystac.collection import ( + Extent, + SpatialExtent, + TemporalExtent, +) +from pystac.extensions.sat import OrbitState +from stactools.sentinel2.constants import ( + SENTINEL_BANDS, +) + +from eopf_stac.common.constants import ( + DATASET_ASSET_EXTRA_FIELDS, + EOPF_PROVIDER, + LICENSE_PROVIDER, + PRODUCT_ASSET_KEY, + PRODUCT_METADATA_ASSET_KEY, + ROLE_DATA, + ROLE_DATASET, + SENTINEL_PROVIDER, + get_item_asset_metadata, + get_item_asset_product, +) ROLE_REFLECTANCE = "reflectance" @@ -88,14 +111,6 @@ "SR_60m": "measurements/reflectance/r60m", } -OTHER_ASSET_EXTRA_FIELDS: dict[str:dict] = { - "alternate": {"xarray": deepcopy(DATASET_ASSET_EXTRA_FIELDS)}, -} - -BAND_ASSET_EXTRA_FIELDS: dict[str:dict] = { - "alternate": {"xarray": {"xarray:open_dataset_kwargs": EopfXarrayBackendConfig(mode=OpMode.ANALYSIS).to_dict()}}, -} - L1C_BAND_ASSETS_TO_PATH: Final[dict[str, str]] = { "B01_60m": "measurements/reflectance/r60m/b01", "B02_10m": "measurements/reflectance/r10m/b02", @@ -113,3 +128,88 @@ } L1C_TCI_ASSETS_TO_PATH: Final[dict[str, str]] = {"TCI_10m": "quality/l1c_quicklook/r10m/tci"} + + +def get_msi_band_item_assets() -> dict[str:ItemAssetDefinition]: + item_assets = {} + for band_key, band in SENTINEL_BANDS.items(): + item_asset = ItemAssetDefinition.create( + title=f"TOA radiance for OLCI acquisition band {band_key}", + media_type=MediaType.ZARR, + description=None, + roles=[ROLE_DATA], + extra_fields={"bands": [band.to_dict()]}, + ) + item_assets[f"{band_key}_radianceData"] = item_asset + + return item_assets + + +S2_MSI_L1C_ASSETS: dict[str, ItemAssetDefinition] = { + "SR_10m": ItemAssetDefinition.create( + title="Surface Reflectance - 10m", + media_type=MediaType.ZARR, + description=None, + roles=[ROLE_DATA, ROLE_DATASET], + extra_fields={ + **deepcopy(DATASET_ASSET_EXTRA_FIELDS), + "gsd": 10, + "bands": list( + map( + lambda b: b.to_dict(), + [SENTINEL_BANDS["blue"], SENTINEL_BANDS["green"], SENTINEL_BANDS["red"], SENTINEL_BANDS["nir"]], + ) + ), + }, + ), + **get_msi_band_item_assets(), + PRODUCT_ASSET_KEY: get_item_asset_product(), + PRODUCT_METADATA_ASSET_KEY: get_item_asset_metadata(), +} + +# -- Collections + + +SENTINEL2_METADATA = { + "extent": Extent( + SpatialExtent([-180.0, -90.0, 180.0, 90.0]), + TemporalExtent([datetime.datetime(2024, 4, 1, 0, 0, 0), None]), + ), + "keywords": ["Copernicus", "Sentinel", "EU", "ESA", "Satellite", "Global", "Earth", "Reflectance"], + "providers": [ + LICENSE_PROVIDER, + Provider( + name=SENTINEL_PROVIDER.name, + roles=SENTINEL_PROVIDER.roles, + url=os.path.join(SENTINEL_PROVIDER.url, "sentinel-2"), + ), + EOPF_PROVIDER, + ], + "constellation": "sentinel-2", + "platforms": ["Sentinel-2A", "Sentinel-2B", "Sentinel-2C"], + "sat": { + "orbit_state": [OrbitState.ASCENDING, OrbitState.DESCENDING], + "platform_international_designator": ["2015-028A", "2017-013A", "2024-157A"], + }, +} + +# summaries.add("sci:doi", ["10.5270/S2_-znk9xsj"]) +# summaries.add("bands", bands) + +S2_MSI_L1C = { + "id": "sentinel-2-l1c", + "title": "Sentinel-2 Level-1C", + "description": ( + "The Sentinel-2 Level-1C product is composed of 110x110 km2 tiles (ortho-images in UTM/WGS84 projection). " + "Earth is subdivided on a predefined set of tiles, defined in UTM/WGS84 projection and using a 100 km step. " + "However, each tile has a surface of 110x110 km² in order to provide large overlap with the neighbouring. " + "The Level-1C product results from using a Digital Elevation Model (DEM) to project the image in cartographic " + "geometry. Per-pixel radiometric measurements are provided in Top Of Atmosphere (TOA) reflectances along with " + "the parameters to transform them into radiances." + ), + "product_type": "S02MSIL1C", + "processing_level": "L1", + "instruments": ["msi"], + "gsd": [10, 20, 60], + "item_assets": {**S2_MSI_L1C_ASSETS}, +} diff --git a/src/eopf_stac/sentinel2/stac.py b/src/eopf_stac/sentinel2/stac.py index 6f8b6fd..46c11d8 100644 --- a/src/eopf_stac/sentinel2/stac.py +++ b/src/eopf_stac/sentinel2/stac.py @@ -1,10 +1,10 @@ import logging import os +import re from itertools import chain import antimeridian import pystac -from pystac.extensions.grid import GridExtension from pystac.extensions.projection import ProjectionExtension from pystac.extensions.scientific import ItemScientificExtension from pystac.extensions.view import ViewExtension @@ -12,7 +12,6 @@ SENTINEL_CONSTELLATION, SENTINEL_INSTRUMENTS, ) -from stactools.sentinel2.mgrs import MgrsExtension from eopf_stac.common.constants import ( EOPF_PROVIDER, @@ -21,14 +20,18 @@ SENTINEL_PROVIDER, ) from eopf_stac.common.stac import ( + create_cdse_link, fill_eo_properties, fill_eopf_properties, + fill_mgrs_grid_properties, fill_processing_properties, fill_product_properties, fill_sat_properties, fill_timestamp_properties, + fill_version_properties, + fix_geometry, get_datetimes, - get_identifier, + get_identifier_from_href, rearrange_bbox, ) from eopf_stac.sentinel2.assets import ( @@ -47,13 +50,19 @@ L2A_BAND_ASSETS_TO_PATH, L2A_SCL_ASSETS_TO_PATH, L2A_TCI_ASSETS_TO_PATH, - MGRS_PATTERN, ) logger = logging.getLogger(__name__) -def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> pystac.Item: +def create_item( + metadata: dict, + product_type: str, + asset_href_prefix: str, + cpm_version: str = None, + cdse_scene_id: str | None = None, + cdse_scene_href: str | None = None, +) -> pystac.Item: stac_discovery = metadata[".zattrs"]["stac_discovery"] other_metadata = metadata[".zattrs"]["other_metadata"] properties = stac_discovery["properties"] @@ -66,8 +75,10 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py bbox = rearrange_bbox(stac_discovery.get("bbox")) + identifier = get_identifier_from_href(asset_href_prefix) + item = pystac.Item( - id=get_identifier(stac_discovery), + id=identifier, bbox=bbox, geometry=stac_discovery.get("geometry"), properties={}, @@ -76,6 +87,9 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py end_datetime=end_datetime, ) + # -- Geometry (fix antimeridian, unclosed ring, etc) + fix_geometry(item) + # -- Common metadata mission = properties.get("mission") @@ -126,16 +140,15 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py projection.centroid = {"lat": round(centroid.y, 5), "lon": round(centroid.x, 5)} # MGRS and Grid Extension - mgrs_match = MGRS_PATTERN.search(stac_discovery.get("id")) - if mgrs_match and len(mgrs_groups := mgrs_match.groups()) == 3: - mgrs = MgrsExtension.ext(item, add_if_missing=True) - mgrs.utm_zone = int(mgrs_groups[0]) - mgrs.latitude_band = mgrs_groups[1] - mgrs.grid_square = mgrs_groups[2] - grid = GridExtension.ext(item, add_if_missing=True) - grid.code = f"MGRS-{mgrs.utm_zone}{mgrs.latitude_band}{mgrs.grid_square}" - else: - logger.warning(f"Error populating MGRS and Grid Extensions fields from ID: {stac_discovery.get('id')}") + # First try to extract mgrs fields from identifier + mgrs_grid = fill_mgrs_grid_properties(item=item, identifier=identifier) + if not mgrs_grid: + logger.warning(f"Unable to populate MGRS and Grid Extensions fields from: {identifier}") + if cdse_scene_id is not None: + # Retry with csde scene id + mgrs_grid = fill_mgrs_grid_properties(item=item, identifier=cdse_scene_id) + if not mgrs_grid: + logger.warning(f"Unable to populate MGRS and Grid Extensions fields from: {cdse_scene_id}") # View Extension sun_azimuth = other_metadata.get("mean_sun_azimuth_angle_in_deg_for_all_bands_all_detectors") @@ -149,7 +162,15 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py # TODO view.azimuth view.incidence_angle # Processing Extension - fill_processing_properties(item, properties) + baseline_version = get_baseline_processing_version(identifier) + if baseline_version is None: + logger.warning(f"Unable to populate processing:version field from: {identifier}") + if cdse_scene_id is not None: + baseline_version = get_baseline_processing_version(cdse_scene_id) + if baseline_version is None: + logger.warning(f"Unable to populate processing:version field from: {cdse_scene_id}") + + fill_processing_properties(item, properties, cpm_version, baseline_version) # Product Extension fill_product_properties(item, product_type, properties) @@ -162,6 +183,9 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py # EOPF Extension fill_eopf_properties(item, properties) + # Version Extension + fill_version_properties(item) + # -- Assets logger.debug("Creating assets ...") @@ -194,7 +218,21 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py item.add_asset(key, asset) # -- Links - item.links.append(SENTINEL_LICENSE) + if cdse_scene_href is not None: + item.links.append(create_cdse_link(cdse_scene_href)) return item + + +def get_baseline_processing_version(identifier: str) -> str | None: + # S2B_MSIL1C_20240428T102559_N0510_R108_T32UPC_20240428T123125 + # S2A_MSIL2A_20250109T100401_N0511_R122_T34UCE_20250109T122750 + proc_version = None + if identifier is not None: + proc_version_pattern = re.compile(r"_N(\d{2})(\d{2})") + proc_version_match = proc_version_pattern.search(identifier) + if proc_version_match and len(proc_version_groups := proc_version_match.groups()) == 2: + proc_version = f"{proc_version_groups[0]}.{proc_version_groups[1]}" + + return proc_version diff --git a/src/eopf_stac/sentinel3/constants.py b/src/eopf_stac/sentinel3/constants.py index 9973d59..f235b1c 100644 --- a/src/eopf_stac/sentinel3/constants.py +++ b/src/eopf_stac/sentinel3/constants.py @@ -314,15 +314,64 @@ def get_slstr_bands(band_keys: list[str] | None = None) -> list[dict]: } SLSTR_L2_FRP_ASSETS: dict[str, ItemAssetDefinition] = { + "FRP_an": ItemAssetDefinition.create( + title="FRP_an measurements", + media_type=pystac.MediaType.ZARR, + description=None, + roles=[ROLE_DATA, ROLE_DATASET], + extra_fields={"bands": get_slstr_bands(["S05", "S06", "S07", "S10"])}, + ), + "FRP_bn": ItemAssetDefinition.create( + title="FRP_bn measurements", + media_type=pystac.MediaType.ZARR, + description=None, + roles=[ROLE_DATA, ROLE_DATASET], + extra_fields={"bands": get_slstr_bands(["S05", "S06", "S07", "S10"])}, + ), + "FRP_in": ItemAssetDefinition.create( + title="FRP_in measurements", + media_type=pystac.MediaType.ZARR, + description=None, + roles=[ROLE_DATA, ROLE_DATASET], + extra_fields={"bands": get_slstr_bands(["S05", "S06", "S07", "S10"])}, + ), PRODUCT_ASSET_KEY: get_item_asset_product(), PRODUCT_METADATA_ASSET_KEY: get_item_asset_metadata(), } SLSTR_L2_FRP_ASSETS_KEY_TO_PATH: dict[str:str] = { + "FRP_an": "measurements/anadir", + "FRP_bn": "measurements/bnadir", + "FRP_in": "measurements/inadir", PRODUCT_ASSET_KEY: "", PRODUCT_METADATA_ASSET_KEY: PRODUCT_METADATA_PATH, } +SYN_L2_AOD_ASSETS: dict[str, ItemAssetDefinition] = { + PRODUCT_ASSET_KEY: get_item_asset_product(), + PRODUCT_METADATA_ASSET_KEY: get_item_asset_metadata(), +} + +SYN_L2_VGP_ASSETS: dict[str, ItemAssetDefinition] = { + PRODUCT_ASSET_KEY: get_item_asset_product(), + PRODUCT_METADATA_ASSET_KEY: get_item_asset_metadata(), +} + +SYN_L2_VG1_ASSETS: dict[str, ItemAssetDefinition] = { + PRODUCT_ASSET_KEY: get_item_asset_product(), + PRODUCT_METADATA_ASSET_KEY: get_item_asset_metadata(), +} + +SYN_L2_V10_ASSETS: dict[str, ItemAssetDefinition] = { + PRODUCT_ASSET_KEY: get_item_asset_product(), + PRODUCT_METADATA_ASSET_KEY: get_item_asset_metadata(), +} + +SYN_L2_SYN_ASSETS: dict[str, ItemAssetDefinition] = { + PRODUCT_ASSET_KEY: get_item_asset_product(), + PRODUCT_METADATA_ASSET_KEY: get_item_asset_metadata(), +} + # -- Collection metadata S3_OLCI_L1_EFR = { @@ -335,7 +384,7 @@ def get_slstr_bands(band_keys: list[str] | None = None) -> list[dict]: ), "product_type": "S03OLCEFR", "processing_level": "L1", - "instrument": "olci", + "instruments": ["olci"], "gsd": [300], "item_assets": {**OLCI_L1_ASSETS}, } @@ -350,7 +399,7 @@ def get_slstr_bands(band_keys: list[str] | None = None) -> list[dict]: ), "product_type": "S03OLCERR", "processing_level": "L1", - "instrument": "olci", + "instruments": ["olci"], "gsd": [1200], "item_assets": {**OLCI_L1_ASSETS}, } @@ -363,7 +412,7 @@ def get_slstr_bands(band_keys: list[str] | None = None) -> list[dict]: ), "product_type": "S03OLCLFR", "processing_level": "L2", - "instrument": "olci", + "instruments": ["olci"], "gsd": [300], "item_assets": {**OLCI_L2_ASSETS}, } @@ -376,7 +425,7 @@ def get_slstr_bands(band_keys: list[str] | None = None) -> list[dict]: ), "product_type": "S03OLCLRR", "processing_level": "L2", - "instrument": "olci", + "instruments": ["olci"], "gsd": [1200], "item_assets": {**OLCI_L2_ASSETS}, } @@ -391,7 +440,7 @@ def get_slstr_bands(band_keys: list[str] | None = None) -> list[dict]: ), "product_type": "S03SLSRBT", "processing_level": "L1", - "instrument": "slstr", + "instruments": ["slstr"], "gsd": [500, 1000], "item_assets": {**SLSTR_L1_ASSETS}, } @@ -402,7 +451,7 @@ def get_slstr_bands(band_keys: list[str] | None = None) -> list[dict]: "description": "The Sentinel-3 SLSTR Level-2 LST product provides land surface temperature.", "product_type": "S03SLSLST", "processing_level": "L2", - "instrument": "slstr", + "instruments": ["slstr"], "gsd": [500, 1000], "item_assets": {**SLSTR_L2_LST_ASSETS}, } @@ -415,12 +464,80 @@ def get_slstr_bands(band_keys: list[str] | None = None) -> list[dict]: ), "product_type": "S03SLSFRP", "processing_level": "L2", - "instrument": "slstr", + "instruments": ["slstr"], "gsd": [500, 1000], "item_assets": {**SLSTR_L2_FRP_ASSETS}, } -# TBD: SRAL, SYN +S3_SYN_L2_AOD = { + "id": "sentinel-3-syn-l2-aod", + "title": "Sentinel-3 SYN Level-2 AOD", + "description": ( + "The Sentinel-3 Level-2 AOD product is a global product over land and sea providing aerosol " + "optical thickness, surface reflectance and several aerosol characteristics on a wider resolution (4.5 km)." + ), + "product_type": "S03SYNAOD", + "processing_level": "L2", + "instruments": ["olci", "slstr"], + "gsd": [4500], + "item_assets": {**SYN_L2_AOD_ASSETS}, +} + +S3_SYN_L2_VGP = { + "id": "sentinel-3-syn-l2-vgp", + "title": "Sentinel-3 SYN Level-2 VGP", + "description": ( + "The Sentinel-3 Level-2 SYN VGP is a 1 km VEGETATION-Like product (~VGT-P) providing TOA reflectance." + ), + "product_type": "S03SYNAOD", + "processing_level": "L2", + "instruments": ["olci", "slstr"], + "gsd": [1000], + "item_assets": {**SYN_L2_VGP_ASSETS}, +} + +S3_SYN_L2_VG1 = { + "id": "sentinel-3-syn-l2-vg1", + "title": "Sentinel-3 SYN Level-2 VG1", + "description": ( + "The Sentinel-3 Level-2 SYN VG1 is a 1 km VEGETATION-Like product (~VGT-S1) " + "providing maximum NDVI value composite received during 1 day." + ), + "product_type": "S03SYNVG1", + "processing_level": "L2", + "instruments": ["olci", "slstr"], + "gsd": [1000], + "item_assets": {**SYN_L2_VG1_ASSETS}, +} + +S3_SYN_L2_V10 = { + "id": "sentinel-3-syn-l2-v10", + "title": "Sentinel-3 SYN Level-2 V10", + "description": ( + "The Sentinel-3 Level-2 SYN V10 is a 1 km VEGETATION-Like product (~VGT-S1) " + "providing maximum NDVI value composite received during 10 days." + ), + "product_type": "S03SYNV10", + "processing_level": "L2", + "instruments": ["olci", "slstr"], + "gsd": [1000], + "item_assets": {**SYN_L2_V10_ASSETS}, +} + +S3_SYN_L2_SYN = { + "id": "sentinel-3-syn-l2", + "title": "Sentinel-3 SYN Level-2", + "description": ( + "The Sentinel-3 Level-2 SYN products provide the surface reflectance and aerosol parameters over Land." + ), + "product_type": "S03SYNSDR", + "processing_level": "L2", + "instruments": ["olci", "slstr"], + "gsd": [300], + "item_assets": {**SYN_L2_SYN_ASSETS}, +} + +# TBD: SRAL # Conversion not supported by CPM; no mapping @@ -432,7 +549,7 @@ def get_slstr_bands(band_keys: list[str] | None = None) -> list[dict]: ), "product_type": "S03OLCWFR", "processing_level": "L2", - "instrument": "olci", + "instruments": ["olci"], "gsd": 300, "item_assets": { PRODUCT_ASSET_KEY: get_item_asset_product(), diff --git a/src/eopf_stac/sentinel3/stac.py b/src/eopf_stac/sentinel3/stac.py index b66ea78..2d79911 100644 --- a/src/eopf_stac/sentinel3/stac.py +++ b/src/eopf_stac/sentinel3/stac.py @@ -12,18 +12,22 @@ SUPPORTED_S3_OLCI_L1_PRODUCT_TYPES, SUPPORTED_S3_OLCI_L2_PRODUCT_TYPES, SUPPORTED_S3_SLSTR_L1_PRODUCT_TYPES, + SUPPORTED_S3_SLSTR_L2_FRP_PRODUCT_TYPE, SUPPORTED_S3_SLSTR_L2_LST_PRODUCT_TYPE, THUMBNAIL_ASSET, ) from eopf_stac.common.stac import ( + create_cdse_link, fill_eo_properties, fill_eopf_properties, fill_processing_properties, fill_product_properties, fill_sat_properties, fill_timestamp_properties, + fill_version_properties, + fix_geometry, get_datetimes, - get_identifier, + get_identifier_from_href, is_valid_string, rearrange_bbox, ) @@ -35,6 +39,8 @@ SENTINEL3_METADATA, SLSTR_L1_ASSETS, SLSTR_L1_ASSETS_KEY_TO_PATH, + SLSTR_L2_FRP_ASSETS, + SLSTR_L2_FRP_ASSETS_KEY_TO_PATH, SLSTR_L2_LST_ASSETS, SLSTR_L2_LST_ASSETS_KEY_TO_PATH, ) @@ -47,7 +53,7 @@ def create_collection(collection_metadata: dict, thumbnail_href: str) -> pystac. summary_dict = { "constellation": [mission_metadata.get("constellation")], "platform": mission_metadata.get("platforms"), - "instruments": [collection_metadata.get("instrument")], + "instruments": collection_metadata.get("instruments"), "gsd": collection_metadata.get("gsd"), "processing:level": [collection_metadata.get("processing_level")], "product:type": [collection_metadata.get("product_type")], @@ -86,7 +92,14 @@ def create_collection(collection_metadata: dict, thumbnail_href: str) -> pystac. return collection -def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> pystac.Item: +def create_item( + metadata: dict, + product_type: str, + asset_href_prefix: str, + cpm_version: str = None, + cdse_scene_id: str | None = None, + cdse_scene_href: str | None = None, +) -> pystac.Item: stac_discovery = metadata[".zattrs"]["stac_discovery"] # other_metadata = metadata[".zattrs"]["other_metadata"] properties = stac_discovery["properties"] @@ -98,7 +111,7 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py end_datetime = datetimes[2] item = pystac.Item( - id=get_identifier(stac_discovery), + id=get_identifier_from_href(asset_href_prefix), bbox=rearrange_bbox(stac_discovery.get("bbox")), geometry=stac_discovery.get("geometry"), properties={}, @@ -107,15 +120,18 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py end_datetime=end_datetime, ) + # -- Geometry (fix antimeridian, unclosed ring, etc) + fix_geometry(item) + # -- Common metadata item.common_metadata.mission = SENTINEL3_METADATA["constellation"].capitalize() item.common_metadata.providers = SENTINEL3_METADATA["providers"] item.common_metadata.constellation = SENTINEL3_METADATA["constellation"] - # CPM workaround: instrument property which is not an array - if properties.get("instrument"): - item.common_metadata.instruments = [properties.get("instrument")] + # CPM workaround: instruments property which is not an array + if properties.get("instruments"): + item.common_metadata.instruments = [properties.get("instruments")] if properties.get("platform"): item.common_metadata.platform = properties.get("platform") @@ -139,7 +155,10 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py fill_eo_properties(item, properties) # Processing Extension - fill_processing_properties(item, properties) + baseline_version = None + if properties.get("processing:software") is not None: + baseline_version = properties.get("processing:software").get("PUG") + fill_processing_properties(item, properties, cpm_version, baseline_version) # Product Extension fill_product_properties(item, product_type, properties) @@ -147,6 +166,9 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py # EOPF Extension fill_eopf_properties(item, properties) + # Version Extension + fill_version_properties(item) + # -- Assets logger.debug("Creating assets ...") @@ -164,6 +186,9 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py elif product_type in SUPPORTED_S3_SLSTR_L2_LST_PRODUCT_TYPE: asset_defintions = SLSTR_L2_LST_ASSETS asset_path_lookups = SLSTR_L2_LST_ASSETS_KEY_TO_PATH + elif product_type in SUPPORTED_S3_SLSTR_L2_FRP_PRODUCT_TYPE: + asset_defintions = SLSTR_L2_FRP_ASSETS + asset_path_lookups = SLSTR_L2_FRP_ASSETS_KEY_TO_PATH else: raise ValueError(f"Unsupported Sentinel-3 product type '{product_type}'") @@ -180,7 +205,8 @@ def create_item(metadata: dict, product_type: str, asset_href_prefix: str) -> py item.add_asset(key, asset) # -- Links - item.links.append(SENTINEL_LICENSE) + if cdse_scene_href is not None: + item.links.append(create_cdse_link(cdse_scene_href)) return item diff --git a/tests/common/test_stac.py b/tests/common/test_stac.py index 9eb604c..abde8b3 100644 --- a/tests/common/test_stac.py +++ b/tests/common/test_stac.py @@ -3,20 +3,33 @@ import pytest from dateutil.tz import tzutc -from eopf_stac.common.stac import get_datetimes, get_identifier, rearrange_bbox, validate_metadata +from eopf_stac.common.stac import ( + get_datetimes, + get_identifier_from_href, + rearrange_bbox, + validate_metadata, +) class TestSTAC: - def test_get_identifier(self): - tests = { - "S1A_S2_GRDH_1SDH_20250408T213530_20250408T213554_058671_0743C6_742E.SAFE": "S1A_S2_GRDH_1SDH_20250408T213530_20250408T213554_058671_0743C6_742E", - "S3A_OL_2_WFR____20250416T102344_20250416T102644_20250416T121904_0179_125_008_2340_MAR_O_NR_003.SEN3": "S3A_OL_2_WFR____20250416T102344_20250416T102644_20250416T121904_0179_125_008_2340_MAR_O_NR_003", - "S1A_IW_GRDH_1SDV_20250319T002519_20250319T002544_058366_07377B_ABA5.SAFE": "S1A_IW_GRDH_1SDV_20250319T002519_20250319T002544_058366_07377B_ABA5", - } + def test_get_identifier_from_href(self): + tests = [ + "S1A_S2_GRDH_1SDH_20250408T213530_20250408T213554_058671_0743C6_742E.SAFE", + "S3A_OL_2_WFR____20250416T102344_20250416T102644_20250416T121904_0179_125_008_2340_MAR_O_NR_003.SEN3/", + "S02MSIL2A_20250109T100401_0000_A122_T323.zarr", + "s3://eopf-data/cpm_v261/S01SSMGRD_20250408T213530_0024_A149__8D6.zarr", + "/path/to/data/converted/cpm-2.6.1/S03SLSFRP_20250512T184151_0180_A384_SBFC.zarr", + "https://objects.eodc.eu/e05ab01a9d56408d82ac32d69a5aae2a:202507-s02msil2a/23/products/cpm_v256/S2A_MSIL2A_20250723T033201_N0511_R018_T48QUF_20250723T082115.zarr", + ] - for actual, expected in tests.items(): - metadata = {"id": actual} - assert expected == get_identifier(metadata) + for href in tests: + identifier = get_identifier_from_href(href) + assert identifier is not None + assert len(identifier) > 0 + assert not identifier.endswith("/") + assert not identifier.endswith(".zarr") + assert not identifier.endswith(".SAFE") + assert not identifier.endswith(".SEN3") def test_validate_metadata(self): zattrs = {".zattrs": {"stac_discovery": {}, "other_metadata": {}}} diff --git a/tests/data-files/S01SIWGRH_20250319T002519_0024_a019_T651.json b/tests/data-files/S01SIWGRH_20250319T002519_0024_a019_T651.json deleted file mode 100644 index f07b0e4..0000000 --- a/tests/data-files/S01SIWGRH_20250319T002519_0024_a019_T651.json +++ /dev/null @@ -1,7091 +0,0 @@ -{ - "metadata": { - ".zattrs": { - "other_metadata": { - "azimuth_steering_rate": 0.0, - "eopf_category": "eocontainer", - "history": [ - { - "output": "Downlinked Stream", - "processingTime": "2025-03-19T01:41:14.154927", - "processor": null, - "type": "Raw Data" - }, - { - "inputs": { - "Applicable Document1": "S-1 Core PDGS S-1 Level-0 Product Format Specifications S1PD.SP.00110.ASTR", - "Applicable Document2": "Sentinel-1 SAR Space Packet Protocol Data Unit S1-IF-ASD-PL-0007", - "Raw Data": "Downlinked Stream" - }, - "output": "Unknown", - "processingTime": "2025-03-19T01:47:00.872865", - "processor": null, - "type": "Raw Data" - }, - { - "output": "Downlinked Stream", - "processingTime": "2025-03-19T01:40:37.150098", - "processor": null, - "type": "Raw Data" - }, - { - "inputs": { - "Applicable Document1": "S-1 Core PDGS S-1 Level-0 Product Format Specifications S1PD.SP.00110.ASTR", - "Applicable Document2": "Sentinel-1 SAR Space Packet Protocol Data Unit S1-IF-ASD-PL-0007", - "Raw Data": "Downlinked Stream" - }, - "output": "Unknown", - "processingTime": "2025-03-19T01:47:25.182922", - "processor": null, - "type": "Raw Data" - }, - { - "inputs": { - "Applicable Document1": "S-1 Core PDGS S-1 Level-0 Product Format Specifications S1PD.SP.00110.ASTR", - "Applicable Document2": "Sentinel-1 SAR Space Packet Protocol Data Unit S1-IF-ASD-PL-0007", - "Raw Data1": "Unknown", - "Raw Data2": "Unknown" - }, - "output": "/data/CDP/production//2428364/S1A_IW_RAW__0SDV_20250319T002515_20250319T002547_058366_07377B_C548.SAFE", - "processingCentre": "S1A-PS, ESA", - "processingTime": "2025-03-19T01:47:37.310749", - "processor": "", - "type": "Level-0 Product" - }, - { - "inputs": { - "AUX_CAL": "/data/CDP/production//2428364/S1A_AUX_CAL_V20190228T092500_G20240327T102320.SAFE", - "AUX_INS": "/data/CDP/production//2428364/S1A_AUX_INS_V20231201T000000_G20250116T092837.SAFE", - "AUX_PP1": "/data/CDP/production//2428364/S1A_AUX_PP1_V20190228T092500_G20241125T134138.SAFE", - "AUX_RES": "/data/CDP/production//2428364/S1A_OPER_AUX_RESORB_OPOD_20250319T014931_V20250318T215916_20250319T011646.EOF", - "Level-0 Annotation Product": "/data/CDP/production//2428364/S1A_IW_RAW__0ADV_20250319T002450_20250319T002609_058366_07377B_FE81.SAFE", - "Level-0 Calibration Product": "/data/CDP/production//2428364/S1A_IW_RAW__0CDV_20250319T002450_20250319T002609_058366_07377B_4CF2.SAFE", - "Level-0 Noise Product": "/data/CDP/production//2428364/S1A_IW_RAW__0NDV_20250319T002450_20250319T002609_058366_07377B_EDBC.SAFE", - "Level-0 Product": "/data/CDP/production//2428364/S1A_IW_RAW__0SDV_20250319T002515_20250319T002547_058366_07377B_C548.SAFE", - "product definition": "Sentinel-1 Product Definition (S1-RS-MDA-52-7440) release 2/7", - "product specification": "Sentinel-1 Product Specification (S1-RS-MDA-52-7441) release 3/14" - }, - "output": "S1A_IW_SL1__1_DV_20250319T002516_20250319T002548_058366_07377B_9A55.SAFE", - "processingCentre": "S1A-PS, ESA", - "processingTime": "2025-03-19T02:01:43.000000", - "processor": "Sentinel-1 IPF", - "type": "Level-1 Intermediate SLC Product", - "version": "003.91" - }, - { - "inputs": { - "Level-1 Intermediate SLC Product": "S1A_IW_SL1__1_DV_20250319T002516_20250319T002548_058366_07377B_9A55.SAFE" - }, - "output": "", - "processingCentre": "S1A-PS, ESA", - "processingTime": "2025-03-19T02:03:03.000000", - "processor": "Sentinel-1 IPF", - "type": "L1", - "version": "003.91" - } - ], - "platform_heading": 0.0, - "projection": "MISSING", - "pulse_repetition_frequency": 0.0, - "range_sampling_rate": 0.0, - "timeliness_category": "MISSING", - "title": "MISSING" - }, - "stac_discovery": { - "assets": {}, - "bbox": [ - 81.802361, - 6.702116, - 79.248627, - 8.663423 - ], - "geometry": { - "coordinates": [ - [ - [ - 81.478508, - 6.702116 - ], - [ - 79.248627, - 7.157641 - ], - [ - 79.563393, - 8.663423 - ], - [ - 81.802361, - 8.211793 - ] - ] - ], - "type": "Polygon" - }, - "id": "SAR Standard L1 Product", - "links": [ - { - "href": "./.zattrs.json", - "rel": "self", - "type": "application/json" - }, - "S01SIWGRD_20250319T002519_0025_A334_ABA5_07377B_VV", - "S01SIWGRD_20250319T002519_0025_A334_ABA5_07377B_VH" - ], - "properties": { - "constellation": "sentinel-1", - "created": "2025-03-19T02:03:03.000000Z", - "datetime": "null", - "end_datetime": "2025-03-19T00:25:44.165074Z", - "eopf:datatake_id": "472955", - "eopf:instrument_mode": "IW", - "instrument": "sar", - "platform": "sentinel-1a", - "processing:expression": "systematic", - "processing:software": { - "Sentinel-1 IPF": "003.91" - }, - "product:timeliness": "MISSING", - "product:timeliness_category": "NRT", - "product:type": "S01SIWGRH", - "provider": [ - { - "name": "S1A-PS", - "roles": [ - "processor" - ] - }, - { - "name": "ESA", - "roles": [ - "producer" - ] - } - ], - "sar:center_frequency": 0, - "sar:frequency_band": "MISSING", - "sar:instrument_mode": "IW", - "sar:looks_equivalent_number": 0, - "sar:pixel_spacing_range": 0, - "sar:polarizations": [ - "VV", - "VH" - ], - "sar:product_type": "GRD", - "sar:resolution_azimuth": 0, - "sar:resolution_range": 0, - "sat:absolute_orbit": 58366, - "sat:anx_datetime": "2025-03-18T23:38:05.865305", - "sat:orbit_state": "descending", - "sat:relative_orbit": 19, - "start_datetime": "2025-03-19T00:25:19.166197Z", - "view:azimuth": 0, - "view:incidence_angle": 0, - "view:off_nadir": 0 - }, - "stac_extensions": [ - "https://stac-extensions.github.io/eopf/v1.0.0/schema.json", - "https://stac-extensions.github.io/product/v0.1.0/schema.json", - "https://stac-extensions.github.io/eo/v1.1.0/schema.json", - "https://stac-extensions.github.io/sat/v1.0.0/schema.json", - "https://stac-extensions.github.io/view/v1.0.0/schema.json", - "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", - "https://stac-extensions.github.io/processing/v1.2.0/schema.json", - "https://stac-extensions.github.io/sar/v1.0.0/schema.json" - ], - "stac_version": "1.0.0", - "type": "Feature" - } - }, - ".zgroup": { - "zarr_format": 2 - }, - "S01SIWGRD_20250319T002519_0025_A334_ABA5_07377B_VH/.zattrs": { - "other_metadata": { - "azimuth_steering_rate": 0.0, - "downlink_information": { - "azimuth_time": "2025-03-19T00:25:14.969260", - "baq_block_length": 256, - "decimation_filter_bandwidth": 56590000.0, - "ecc_number": 8, - "filter_length": 36, - "first_line_sensing_time": "2025-03-19T00:24:51.256767", - "instrument_config_id": 7, - "last_line_sensing_time": "2025-03-19T00:25:21.806570", - "mean_bit_rate": 2.333923816058796e-05, - "num_err_azimuth_beam_address": 0, - "num_err_baq_block_length": 0, - "num_err_baq_mode": 0, - "num_err_cal_mode": 0, - "num_err_cal_type": 0, - "num_err_calibration_beam_address": 0, - "num_err_data_take_id": 0, - "num_err_ecc_number": 0, - "num_err_elevation_beam_address": 0, - "num_err_instrument_config_id": 0, - "num_err_number_of_quads": 0, - "num_err_packet_count": 0, - "num_err_polarisation": 0, - "num_err_pri": 0, - "num_err_pri_count": 0, - "num_err_range_decimation": 0, - "num_err_rank": 0, - "num_err_rx_channel_id": 0, - "num_err_rx_gain": 0, - "num_err_sas_test_mode": 0, - "num_err_signal_type": 0, - "num_err_ssb_error_flag": 0, - "num_err_swap_flag": 0, - "num_err_swath_number": 0, - "num_err_swl": 0, - "num_err_swst": 0, - "num_err_sync_marker": 0, - "num_err_temp_comp": 8454, - "num_err_test_mode": 0, - "num_err_tx_pulse_number": 0, - "num_err_tx_pulse_start_frequency": 0, - "num_err_tx_ramp_rate": 0, - "num_isp_header_errors": 8454, - "pointing_status": { - "azimuth_time": "2025-03-19T00:24:50.492768" - }, - "prf": 1717.128973878037, - "pri": 0.0005823674372819869, - "rank": 9, - "rx_channel_id": 1, - "rx_gain": -4.0, - "sampling_frequency_after_decimation": 64345238.12571429, - "swath_number": 10, - "swl_azimuth_time": "2025-03-19T00:25:16.066513", - "swl_value": 0.0003719755779921818, - "swst_azimuth_time": "2025-03-19T00:25:16.066513", - "swst_value": 8.967696572995874e-05, - "tx_pulse_length": 5.240481033595628e-05, - "tx_pulse_ramp_rate": 1078230321255.894, - "tx_pulse_start_frequency": -28251534.19637256 - }, - "eopf_category": "eoproduct", - "history": [ - { - "output": "Downlinked Stream", - "processingTime": "2025-03-19T01:41:14.154927", - "processor": null, - "type": "Raw Data" - }, - { - "inputs": { - "Applicable Document1": "S-1 Core PDGS S-1 Level-0 Product Format Specifications S1PD.SP.00110.ASTR", - "Applicable Document2": "Sentinel-1 SAR Space Packet Protocol Data Unit S1-IF-ASD-PL-0007", - "Raw Data": "Downlinked Stream" - }, - "output": "Unknown", - "processingTime": "2025-03-19T01:47:00.872865", - "processor": null, - "type": "Raw Data" - }, - { - "output": "Downlinked Stream", - "processingTime": "2025-03-19T01:40:37.150098", - "processor": null, - "type": "Raw Data" - }, - { - "inputs": { - "Applicable Document1": "S-1 Core PDGS S-1 Level-0 Product Format Specifications S1PD.SP.00110.ASTR", - "Applicable Document2": "Sentinel-1 SAR Space Packet Protocol Data Unit S1-IF-ASD-PL-0007", - "Raw Data": "Downlinked Stream" - }, - "output": "Unknown", - "processingTime": "2025-03-19T01:47:25.182922", - "processor": null, - "type": "Raw Data" - }, - { - "inputs": { - "Applicable Document1": "S-1 Core PDGS S-1 Level-0 Product Format Specifications S1PD.SP.00110.ASTR", - "Applicable Document2": "Sentinel-1 SAR Space Packet Protocol Data Unit S1-IF-ASD-PL-0007", - "Raw Data1": "Unknown", - "Raw Data2": "Unknown" - }, - "output": "/data/CDP/production//2428364/S1A_IW_RAW__0SDV_20250319T002515_20250319T002547_058366_07377B_C548.SAFE", - "processingCentre": "S1A-PS, ESA", - "processingTime": "2025-03-19T01:47:37.310749", - "processor": "", - "type": "Level-0 Product" - }, - { - "inputs": { - "AUX_CAL": "/data/CDP/production//2428364/S1A_AUX_CAL_V20190228T092500_G20240327T102320.SAFE", - "AUX_INS": "/data/CDP/production//2428364/S1A_AUX_INS_V20231201T000000_G20250116T092837.SAFE", - "AUX_PP1": "/data/CDP/production//2428364/S1A_AUX_PP1_V20190228T092500_G20241125T134138.SAFE", - "AUX_RES": "/data/CDP/production//2428364/S1A_OPER_AUX_RESORB_OPOD_20250319T014931_V20250318T215916_20250319T011646.EOF", - "Level-0 Annotation Product": "/data/CDP/production//2428364/S1A_IW_RAW__0ADV_20250319T002450_20250319T002609_058366_07377B_FE81.SAFE", - "Level-0 Calibration Product": "/data/CDP/production//2428364/S1A_IW_RAW__0CDV_20250319T002450_20250319T002609_058366_07377B_4CF2.SAFE", - "Level-0 Noise Product": "/data/CDP/production//2428364/S1A_IW_RAW__0NDV_20250319T002450_20250319T002609_058366_07377B_EDBC.SAFE", - "Level-0 Product": "/data/CDP/production//2428364/S1A_IW_RAW__0SDV_20250319T002515_20250319T002547_058366_07377B_C548.SAFE", - "product definition": "Sentinel-1 Product Definition (S1-RS-MDA-52-7440) release 2/7", - "product specification": "Sentinel-1 Product Specification (S1-RS-MDA-52-7441) release 3/14" - }, - "output": "S1A_IW_SL1__1_DV_20250319T002516_20250319T002548_058366_07377B_9A55.SAFE", - "processingCentre": "S1A-PS, ESA", - "processingTime": "2025-03-19T02:01:43.000000", - "processor": "Sentinel-1 IPF", - "type": "Level-1 Intermediate SLC Product", - "version": "003.91" - }, - { - "inputs": { - "Level-1 Intermediate SLC Product": "S1A_IW_SL1__1_DV_20250319T002516_20250319T002548_058366_07377B_9A55.SAFE" - }, - "output": "", - "processingCentre": "S1A-PS, ESA", - "processingTime": "2025-03-19T02:03:03.000000", - "processor": "Sentinel-1 IPF", - "type": "L1", - "version": "003.91" - } - ], - "image_information": {}, - "platform_heading": 0.0, - "processing_information": { - "azimuth_processing": { - "look_bandwidth": 327.0, - "look_overlap": 0.0, - "number_of_looks": 1, - "processing_bandwidth": 327.0, - "total_bandwidth": 327.0, - "window_coefficient": 0.7 - }, - "input_dimensions": { - "azimuth_time": "2025-03-19T00:25:16.081259", - "number_of_input_lines": 16368, - "number_of_input_samples": 23846 - }, - "processor_scaling_factor": 880278200000.0, - "range_processing": { - "look_bandwidth": 14100000.0, - "look_overlap": 3500552.958257202, - "number_of_looks": 5, - "processing_bandwidth": 56500000.0, - "total_bandwidth": 56504455.48389234, - "window_coefficient": 0.7 - } - }, - "product_quality": { - "azimuth_time": "2025-03-19T00:25:19.166197", - "doppler_centroid_quality": { - "dc_method": "Data Analysis", - "doppler_centroid_uncertain_flag": true - }, - "downlink_quality": { - "chirp_source_used": "Nominal", - "downlink_gaps_in_input_data_significant_flag": true, - "downlink_missing_lines_significant_flag": true, - "i_input_data_mean": 0.150081604719162, - "i_input_data_std_dev": 4.104006767272949, - "input_data_mean_outside_nominal_range_flag": true, - "input_data_st_dev_outside_nominal_range_flag": true, - "instrument_gaps_in_input_data_significant_flag": true, - "instrument_missing_lines_significant_flag": true, - "invalid_downlink_params_flag": true, - "mean_pg_product_amplitude": 0.6961150765419006, - "mean_pg_product_phase": -1.407727122306824, - "num_downlink_input_data_gaps": 0, - "num_downlink_input_missing_lines": 0, - "num_instrument_input_data_gaps": 0, - "num_instrument_input_missing_lines": 0, - "num_ssb_error_input_data_gaps": 0, - "num_ssb_error_input_missing_lines": 0, - "pg_product_derivation_failed_flag": true, - "pg_source_used": "Extracted", - "q_input_data_mean": 0.3690513968467712, - "q_input_data_std_dev": 4.052077770233154, - "replica_reconstruction_failed_flag": true, - "rrf_spectrum_used": "Extended Tapered", - "ssb_error_gaps_in_input_data_significant_flag": true, - "ssb_error_missing_lines_significant_flag": true, - "std_dev_pg_product_amplitude": 0.0661095355187528, - "std_dev_pg_product_phase": 0.1972076973040096 - }, - "image_quality": { - "output_data_mean_outside_nominal_range_flag": true, - "output_data_st_dev_outside_nominal_range_flag": true - }, - "raw_data_analysis_quality": { - "i_bias": 0.150081604719162, - "i_bias_significance_flag": true, - "iq_gain_imbalance": 1.012814998626709, - "iq_gain_significance_flag": true, - "iq_quadrature_departure": 0.02684994973242283, - "iq_quadrature_departure_significance_flag": true, - "q_bias": 0.3690513968467712, - "q_bias_significance_flag": true - } - }, - "projection": "MISSING", - "pulse_repetition_frequency": 0.0, - "range_sampling_rate": 0.0, - "raw_data_analysis": { - "azimuth_time": "2025-03-19T00:25:14.969260", - "i_bias": 0.150081604719162, - "i_bias_lower_bound": -0.004527978133410215, - "i_bias_upper_bound": 0.004527978133410215, - "i_bias_used_for_correction": 0.2673436999320984, - "iq_gain_imbalance": 1.012814998626709, - "iq_gain_imbalance_used_for_correction": 0.9776973128318787, - "iq_gain_lower_bound": 0.9988967180252075, - "iq_gain_upper_bound": 1.001103043556213, - "iq_quadrature_departure": 0.02684994973242283, - "iq_quadrature_departure_lower_bound": -0.8577287197113037, - "iq_quadrature_departure_upper_bound": 0.9114223122596741, - "iq_quadrature_departure_used_for_correction": 0.1142786964774132, - "q_bias": 0.3690513968467712, - "q_bias_lower_bound": -0.004470684099942446, - "q_bias_upper_bound": 0.004470684099942446, - "q_bias_used_for_correction": 0.2957572937011719 - }, - "reference_replica": { - "azimuth_time": "2025-03-19T00:25:16.066497", - "chirp_source": "Nominal", - "pg_source": "Extracted", - "time_delay": 4.364419e-07 - }, - "rfi": { - "radio_frequency_interference": { - "rfi_burst_report": { - "azimuth_time": [ - "2025-03-19T00:25:14.969260", - "2025-03-19T00:25:17.727817", - "2025-03-19T00:25:20.488429", - "2025-03-19T00:25:23.249041", - "2025-03-19T00:25:26.007597", - "2025-03-19T00:25:28.764098", - "2025-03-19T00:25:31.524710", - "2025-03-19T00:25:34.281211", - "2025-03-19T00:25:37.039768", - "2025-03-19T00:25:39.798325", - "2025-03-19T00:25:42.556881", - "2025-03-19T00:25:15.918927", - "2025-03-19T00:25:18.675428", - "2025-03-19T00:25:21.433985", - "2025-03-19T00:25:24.192541", - "2025-03-19T00:25:26.949042", - "2025-03-19T00:25:29.707599", - "2025-03-19T00:25:32.466155", - "2025-03-19T00:25:35.224712", - "2025-03-19T00:25:37.983268", - "2025-03-19T00:25:40.741825", - "2025-03-19T00:25:43.498326", - "2025-03-19T00:25:16.872705", - "2025-03-19T00:25:19.629206", - "2025-03-19T00:25:22.387763", - "2025-03-19T00:25:25.146319", - "2025-03-19T00:25:27.908987", - "2025-03-19T00:25:30.665488", - "2025-03-19T00:25:33.426100", - "2025-03-19T00:25:36.188768", - "2025-03-19T00:25:38.943213", - "2025-03-19T00:25:41.695603", - "2025-03-19T00:25:44.456215" - ], - "frequency_domain": {}, - "in_band_out_band_power_ratio": [ - 2.649377, - 3.545179, - 4.095889, - 4.735836, - 4.996821, - 5.096524, - 5.264029, - 5.375037, - 5.370361, - 5.452934, - 5.270215, - 5.45241, - 5.392192, - 5.482689, - 5.671585, - 5.608797, - 5.708266, - 5.828252, - 5.872991, - 5.786286, - 5.723807, - 5.804662, - 4.61628, - 4.675713, - 4.889071, - 5.047597, - 4.736124, - 4.305416, - 4.160142, - 3.4282, - 2.692275, - 2.303042, - 2.259758 - ], - "time_domain": {} - }, - "rfi_detection_from_noise_report": { - "max_fisher_z": [ - 3.352104, - 3.404781, - 3.48189, - 2.91028, - 3.960063, - 3.349052, - 3.412021, - 4.390975, - 3.645367, - 3.592229, - 3.608615, - 3.29825, - 3.492459, - 3.947762, - 3.425074, - 3.8069, - 4.269464, - 3.796329, - 4.773983, - 3.810817, - 4.02652, - 3.813924, - 3.865636, - 3.868048, - 5.071112, - 3.745871, - 3.505671, - 3.310105, - 4.647959, - 3.956942, - 2.919282, - 3.767288, - 3.129921, - 2.969668, - 3.398469, - 3.327206, - 3.969509, - 3.535412 - ], - "max_kl_divergence": [ - 3.964864, - 3.992482, - 2.909149, - 2.549565, - 3.628023, - 2.742988, - 2.729814, - 6.869903, - 3.186701, - 3.130119, - 3.081134, - 2.745461, - 2.907596, - 3.724871, - 2.874417, - 3.315729, - 4.222542, - 3.375285, - 5.191179, - 3.41997, - 3.786592, - 3.787151, - 3.428886, - 4.024864, - 5.774247, - 3.238413, - 2.861144, - 2.680607, - 5.021134, - 3.637823, - 2.160239, - 3.353565, - 2.29807, - 2.164253, - 2.722573, - 2.665523, - 3.604549, - 2.983123 - ], - "max_rfi_psd": [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0 - ], - "noise_sensing_time": [ - "2025-03-19T00:25:16.081275", - "2025-03-19T00:25:18.839530", - "2025-03-19T00:25:21.597816", - "2025-03-19T00:25:24.356101", - "2025-03-19T00:25:27.114372", - "2025-03-19T00:25:29.872642", - "2025-03-19T00:25:32.630928", - "2025-03-19T00:25:35.389198", - "2025-03-19T00:25:38.147483", - "2025-03-19T00:25:40.905754", - "2025-03-19T00:25:43.664024", - "2025-03-19T00:25:46.422309", - "2025-03-19T00:25:14.155189", - "2025-03-19T00:25:16.913459", - "2025-03-19T00:25:19.671745", - "2025-03-19T00:25:22.430015", - "2025-03-19T00:25:25.188301", - "2025-03-19T00:25:27.946571", - "2025-03-19T00:25:30.704841", - "2025-03-19T00:25:33.463127", - "2025-03-19T00:25:36.221412", - "2025-03-19T00:25:38.979682", - "2025-03-19T00:25:41.737953", - "2025-03-19T00:25:44.496238", - "2025-03-19T00:25:47.254524", - "2025-03-19T00:25:15.233222", - "2025-03-19T00:25:17.991493", - "2025-03-19T00:25:20.749778", - "2025-03-19T00:25:23.508064", - "2025-03-19T00:25:26.266334", - "2025-03-19T00:25:29.024620", - "2025-03-19T00:25:31.782890", - "2025-03-19T00:25:34.541160", - "2025-03-19T00:25:37.299446", - "2025-03-19T00:25:40.057731", - "2025-03-19T00:25:42.815986", - "2025-03-19T00:25:45.574272", - "2025-03-19T00:25:48.332557" - ], - "rfi_detected": [ - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false", - "false" - ] - } - } - }, - "swath_merging": { - "swath_bounds_azimuth_time": "2025-03-19T00:25:19.166197", - "swath_bounds_first_azimuth_line": 0, - "swath_bounds_first_range_sample": 0, - "swath_bounds_last_azimuth_line": 16794 - }, - "timeliness_category": "MISSING", - "title": "MISSING" - }, - "stac_discovery": { - "assets": {}, - "bbox": [ - 81.802361, - 6.702116, - 79.248627, - 8.663423 - ], - "geometry": { - "coordinates": [ - [] - ], - "type": "Polygon" - }, - "id": "SAR Standard L1 Product", - "links": [ - { - "href": "./.zattrs.json", - "rel": "self", - "type": "application/json" - } - ], - "properties": { - "constellation": "sentinel-1", - "created": "metadataSection/metadataObject[@ID='processing']/metadataWrap/xmlData/safe:processing/@stopZ", - "datetime": "null", - "end_datetime": "metadataSection/metadataObject[@ID='acquisitionPeriod']/metadataWrap/xmlData/safe:acquisitionPeriod/safe:stopTimeZ", - "instrument": "sar", - "platform": "concat(metadatasection/metadataobject[@id='platform']/metadatawrap/xmldata/safe:platform/safe:familyname, metadatasection/metadataobject[@id='platform']/metadatawrap/xmldata/safe:platform/safe:number)", - "processing:expression": "systematic", - "processing:software": { - "": "" - }, - "processing:version": "004.00", - "product:timeliness": "MISSING", - "product:timeliness_category": "NRT", - "product:type": "S01SIWGRH", - "provider": [ - { - "name": "No data about provider", - "roles": [ - "processor" - ] - }, - { - "name": "No data about provider", - "roles": [ - "producer" - ] - } - ], - "sar:center_frequency": 0, - "sar:frequency_band": "MISSING", - "sar:looks_equivalent_number": 0, - "sar:pixel_spacing_range": 0, - "sar:resolution_azimuth": 0, - "sar:resolution_range": 0, - "sat:orbit_state": [], - "start_datetime": "metadataSection/metadataObject[@ID='acquisitionPeriod']/metadataWrap/xmlData/safe:acquisitionPeriod/safe:startTimeZ", - "view:azimuth": 0, - "view:incidence_angle": 0, - "view:off_nadir": 0 - }, - "stac_extensions": [ - "https://stac-extensions.github.io/eopf/v1.0.0/schema.json", - "https://stac-extensions.github.io/product/v0.1.0/schema.json", - "https://stac-extensions.github.io/eo/v1.1.0/schema.json", - "https://stac-extensions.github.io/sat/v1.0.0/schema.json", - "https://stac-extensions.github.io/view/v1.0.0/schema.json", - "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", - "https://stac-extensions.github.io/processing/v1.2.0/schema.json", - "https://stac-extensions.github.io/sar/v1.0.0/schema.json" - ], - "stac_version": "1.0.0", - "type": "Feature" - } - }, - "S01SIWGRD_20250319T002519_0025_A334_ABA5_07377B_VH/.zgroup": { - "zarr_format": 2 - }, - "S01SIWGRD_20250319T002519_0025_A334_ABA5_07377B_VH/conditions/.zattrs": {}, - "S01SIWGRD_20250319T002519_0025_A334_ABA5_07377B_VH/conditions/.zgroup": { - "zarr_format": 2 - }, - "S01SIWGRD_20250319T002519_0025_A334_ABA5_07377B_VH/conditions/antenna_pattern/.zattrs": {}, - "S01SIWGRD_20250319T002519_0025_A334_ABA5_07377B_VH/conditions/antenna_pattern/.zgroup": { - "zarr_format": 2 - }, - "S01SIWGRD_20250319T002519_0025_A334_ABA5_07377B_VH/conditions/antenna_pattern/azimuth_time/.zarray": { - "chunks": [ - 27 - ], - "compressor": { - "blocksize": 0, - "clevel": 3, - "cname": "zstd", - "id": "blosc", - "shuffle": 2 - }, - "dtype": " 10%, set to 2 if inside 10km-dilated ice edge area", - "short_name": "owi_mask", - "valid_max": 8, - "valid_min": 0 - }, - "coordinates": "latitude longitude", - "flag_meanings": "valid_data land ice no_data", - "flag_values": "[0, 1, 2, 4]", - "valid_max": 8, - "valid_min": 0 - }, - "owi/S01SIWOCN_20250321T063156_0025_A334_2479_0738CE_VV/conditions/nesz/.zarray": { - "chunks": [ - 166, - 265, - 2 - ], - "compressor": { - "blocksize": 0, - "clevel": 3, - "cname": "zstd", - "id": "blosc", - "shuffle": 2 - }, - "dtype": " dict: @@ -21,15 +26,103 @@ def get_metadata(file: str) -> dict: return metadata +def get_eopf_product_info(path: str): + metadata_file = f"{path}/.zmetadata" + + fs = fsspec.filesystem("file") + f = fs.open(metadata_file, "rb") + zmetadata = json.load(f) + metadata = validate_metadata(zmetadata) + stac_discovery = metadata[".zattrs"]["stac_discovery"] + stac_item_id = get_identifier(stac_discovery) + cpm_version = get_cpm_version(path) + baseline_version = get_baseline_processing_version(stac_item_id) + + eopf_id = os.path.splitext(os.path.basename(path))[0] + eopf_product = { + "stac_item_id": stac_item_id, + "stac_item_file_path": os.path.join("tests", f"{eopf_id}.json"), + "eopf_id": eopf_id, + "metadata_file": metadata_file, + "url": f"s3://eopf-data/cpm-{cpm_version}/{eopf_id}.zarr", + "cpm_version": cpm_version, + "baseline_version": baseline_version, + } + return eopf_product + + +def create_stac_item_s1(test_product: dict): + path = test_product.get("path") + cpm = test_product.get("cpm") + + eopf_id = os.path.splitext(os.path.basename(path))[0] + url = f"s3://eopf-data/cpm-{cpm}/{eopf_id}.zarr" + metadata_file = f"{path}/.zmetadata" + + metadata = get_metadata(metadata_file) + product_type = get_product_type(metadata) + + return create_item_s1( + metadata=metadata, + product_type=product_type, + asset_href_prefix=url, + cpm_version=cpm, + ) + + +def create_stac_item_s2(test_product: dict): + path = test_product.get("path") + cpm = test_product.get("cpm") + source_uri = test_product.get("source_uri") + + eopf_id = os.path.splitext(os.path.basename(path))[0] + url = f"s3://eopf-data/cpm-{cpm}/{eopf_id}.zarr" + metadata_file = f"{path}/.zmetadata" + + metadata = get_metadata(metadata_file) + product_type = get_product_type(metadata) + + return create_item_s2( + metadata=metadata, product_type=product_type, asset_href_prefix=url, cpm_version=cpm, cdse_scene_id=source_uri + ) + + +def create_stac_item_s3(test_product: dict): + path = test_product.get("path") + cpm = test_product.get("cpm") + + eopf_id = os.path.splitext(os.path.basename(path))[0] + url = f"s3://eopf-data/cpm-{cpm}/{eopf_id}.zarr" + metadata_file = f"{path}/.zmetadata" + + metadata = get_metadata(metadata_file) + product_type = get_product_type(metadata) + + return create_item_s3( + metadata=metadata, + product_type=product_type, + asset_href_prefix=url, + cpm_version=cpm, + ) + + +def create_test_product_spec(product_spec: dict): + path = product_spec.get("path") + cpm = product_spec.get("cpm") + eopf_id = os.path.splitext(os.path.basename(path))[0] + product_spec["url"] = f"s3://eopf-data/cpm-{cpm}/{eopf_id}.zarr" + return product_spec + + def get_product_type(metadata: dict) -> str: return metadata[".zattrs"]["stac_discovery"]["properties"]["product:type"] def check_common_metadata(item: pystac.Item): - assert item.datetime is not None assert item.geometry is not None assert item.bbox is not None assert item.common_metadata.platform is not None + assert item.datetime is not None assert item.common_metadata.start_datetime is not None assert item.common_metadata.end_datetime is not None assert item.common_metadata.created is not None