From 8b7ba1b7f4a2af3e43f17f3980c14c1434fac4b3 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Wed, 15 Jan 2025 12:23:27 +0100 Subject: [PATCH 01/36] Clean code --- pyproject.toml | 3 ++- tests/all.py | 2 -- theia_dumper/__init__.py | 2 ++ theia_dumper/cli.py | 4 ++-- theia_dumper/stac.py | 19 ++++++++++++------- 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 44ccfbc..a262c0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta" [project] name = "theia_dumper" -version = "0.0.5" description = "THEIA-MTP geospatial data publisher" +dynamic = ["version"] authors = [{ name = "Rémi Cresson", email = "remi.cresson@inrae.fr" }] requires-python = ">=3.9" dependencies = [ @@ -14,6 +14,7 @@ dependencies = [ "pystac_client", "dinamis_sdk==0.3.4", "requests", + "click", "rio-cogeo", ] license = { text = "Apache-2.0" } diff --git a/tests/all.py b/tests/all.py index f176ec5..16f42a6 100755 --- a/tests/all.py +++ b/tests/all.py @@ -146,10 +146,8 @@ def test_collection(): def test_all(): """Test all.""" - # test collection test_collection() - # test item collection test_item_collection() diff --git a/theia_dumper/__init__.py b/theia_dumper/__init__.py index 03970df..ec85394 100644 --- a/theia_dumper/__init__.py +++ b/theia_dumper/__init__.py @@ -1 +1,3 @@ """Theia dumper package.""" + +__version__ = "0.1.0" diff --git a/theia_dumper/cli.py b/theia_dumper/cli.py index 8f6f70d..ebe2c78 100644 --- a/theia_dumper/cli.py +++ b/theia_dumper/cli.py @@ -3,8 +3,8 @@ import click from .stac import TransactionsHandler, delete_stac_obj - DEFAULT_STAC_EP = "https://stacapi-cdos.apps.okd.crocc.meso.umontpellier.fr" +DEFAULT_S3_EP = "https://s3-data.meso.umontpellier.fr" @click.group() @@ -24,7 +24,7 @@ def theia_dumper() -> None: "--storage_endpoint", type=str, help="Storage endpoint assets will be sent to", - default="https://s3-data.meso.umontpellier.fr", + default=DEFAULT_S3_EP, ) @click.option( "-b", diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index b8614a9..392660d 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -3,7 +3,7 @@ import os from ast import literal_eval from dataclasses import dataclass -from typing import List +from typing import List, cast from urllib.parse import urljoin import dinamis_sdk @@ -58,6 +58,7 @@ def create_session(): def post_or_put(url: str, data: dict): """Post or put data to url.""" + # TODO: use correct authentification headers = {"Authorization": f"Bearer {dinamis_sdk.auth.get_access_token()}"} sess = create_session() resp = sess.post(url, json=data, headers=headers) @@ -79,7 +80,7 @@ def post_or_put(url: str, data: dict): raise e -def load(obj_pth): +def load_stac_obj(obj_pth: str) -> Collection | ItemCollection | Item: """Load a STAC object serialized on disk.""" for obj_name, cls in { "collection": Collection, @@ -126,10 +127,13 @@ def get_col_items(col: Collection) -> List[Item]: """Retrieve collection items.""" col_href = get_col_href(col=col) return [ - load( - os.path.join(os.path.dirname(col_href), link.href[2:]) - if link.href.startswith("./") - else link.href + cast( + Item, + load_stac_obj( + os.path.join(os.path.dirname(col_href), link.href[2:]) + if link.href.startswith("./") + else link.href + ), ) for link in col.links if link.rel == "item" @@ -143,6 +147,7 @@ def delete_stac_obj(stac_endpoint: str, col_id: str, item_id: str | None = None) url = f"{stac_endpoint}/collections/{col_id}/items/{item_id}" else: url = f"{stac_endpoint}/collections/{col_id}" + # TODO: use correct auth resp = requests.delete( url, headers={"Authorization": f"Bearer {dinamis_sdk.auth.get_access_token()}"}, @@ -227,7 +232,7 @@ class TransactionsHandler: def load_and_publish(self, obj_pth: str): """Load and publish the serialized STAC object.""" - obj = load(obj_pth=obj_pth) + obj = load_stac_obj(obj_pth=obj_pth) if isinstance(obj, Collection): self.publish_collection_with_items(collection=obj) elif isinstance(obj, ItemCollection): -- GitLab From c087641499a58e4f2ca8310bbb1c248b8db3cf32 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Wed, 15 Jan 2025 12:54:53 +0100 Subject: [PATCH 02/36] Add new cli tools --- .gitlab-ci.yml | 10 +++- tests/test_get.py | 10 ++++ tests/{all.py => test_upload.py} | 6 +-- theia_dumper/cli.py | 47 +++++++++++++++--- theia_dumper/stac.py | 81 +++++++++++++++++++++++++------- 5 files changed, 127 insertions(+), 27 deletions(-) create mode 100755 tests/test_get.py rename tests/{all.py => test_upload.py} (96%) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fbe0a46..b09625b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -23,10 +23,18 @@ stages: - Documentation - Pip -Test API: +Test Upload: extends: .static_analysis_with_pip_install stage: Test allow_failure: false except: [main] script: - python tests/all.py + +Test Get: + extends: .static_analysis_with_pip_install + stage: Test + allow_failure: false + except: [main] + script: + - python tests/test_get.py diff --git a/tests/test_get.py b/tests/test_get.py new file mode 100755 index 0000000..3c38a5d --- /dev/null +++ b/tests/test_get.py @@ -0,0 +1,10 @@ +"""Test file.""" + +from theia_dumper import stac, cli + +handler = stac.StacTransactionHandler( + stac_endpoint=cli.DEFAULT_STAC_EP, +) + +handler.list_collections_display() +handler.list_col_items_display("sentinel2-l2a-theia") diff --git a/tests/all.py b/tests/test_upload.py similarity index 96% rename from tests/all.py rename to tests/test_upload.py index 16f42a6..cca3e7c 100755 --- a/tests/all.py +++ b/tests/test_upload.py @@ -10,7 +10,7 @@ import requests from theia_dumper import stac -handler = stac.TransactionsHandler( +handler = stac.UploadTransactionsHandler( stac_endpoint="https://stacapi-cdos.apps.okd.crocc.meso.umontpellier.fr", storage_endpoint="https://s3-data.meso.umontpellier.fr", storage_bucket="sm1-gdc-tests", @@ -36,8 +36,8 @@ shutil.copyfile(RASTER_FILE1, RASTER_FILE2) def clear(): """Clear all test items and collection.""" for item_id in items_ids: - handler.delete(col_id=COL_ID, item_id=item_id) - handler.delete(col_id=COL_ID) + handler.delete_item_or_col(col_id=COL_ID, item_id=item_id) + handler.delete_item_or_col(col_id=COL_ID) def create_item(item_id: str): diff --git a/theia_dumper/cli.py b/theia_dumper/cli.py index ebe2c78..2e164fc 100644 --- a/theia_dumper/cli.py +++ b/theia_dumper/cli.py @@ -1,7 +1,7 @@ """Theia-dumper Command Line Interface.""" import click -from .stac import TransactionsHandler, delete_stac_obj +from .stac import UploadTransactionsHandler, StacTransactionHandler DEFAULT_STAC_EP = "https://stacapi-cdos.apps.okd.crocc.meso.umontpellier.fr" DEFAULT_S3_EP = "https://s3-data.meso.umontpellier.fr" @@ -48,13 +48,12 @@ def publish( overwrite: bool, ): """Publish a STAC object (collection or item collection).""" - handler = TransactionsHandler( + UploadTransactionsHandler( stac_endpoint=stac_endpoint, storage_endpoint=storage_endpoint, storage_bucket=storage_bucket, assets_overwrite=overwrite, - ) - handler.load_and_publish(stac_obj_path) + ).load_and_publish(stac_obj_path) @theia_dumper.command(context_settings={"show_default": True}) @@ -71,5 +70,41 @@ def delete( col_id: str, item_id: str, ): - """Publish a STAC object (collection or item collection).""" - delete_stac_obj(stac_endpoint=stac_endpoint, col_id=col_id, item_id=item_id) + """Delete a STAC object (collection or item).""" + StacTransactionHandler( + stac_endpoint=stac_endpoint, + ).delete_item_or_col(col_id=col_id, item_id=item_id) + + +@theia_dumper.command(context_settings={"show_default": True}) +@click.option( + "--stac_endpoint", + help="Endpoint to which STAC objects will be sent", + type=str, + default=DEFAULT_STAC_EP, +) +def list_cols( + stac_endpoint: str, +): + """List collections.""" + StacTransactionHandler( + stac_endpoint=stac_endpoint, + ).list_collections_display() + + +@theia_dumper.command(context_settings={"show_default": True}) +@click.option( + "--stac_endpoint", + help="Endpoint to which STAC objects will be sent", + type=str, + default=DEFAULT_STAC_EP, +) +@click.option("-c", "--col_id", type=str, help="STAC collection ID", required=True) +def list_col_items( + stac_endpoint: str, + col_id: str, +): + """List collection items.""" + StacTransactionHandler( + stac_endpoint=stac_endpoint, + ).list_col_items_display(col_id=col_id) diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 392660d..6a1bffa 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -5,6 +5,7 @@ from ast import literal_eval from dataclasses import dataclass from typing import List, cast from urllib.parse import urljoin +import operator import dinamis_sdk import dinamis_sdk.auth @@ -140,25 +141,71 @@ def get_col_items(col: Collection) -> List[Item]: ] -def delete_stac_obj(stac_endpoint: str, col_id: str, item_id: str | None = None): - """Delete an item or a collection.""" - logger.info("Deleting %s%s", col_id, f"/{item_id}" if item_id else "") - if item_id: - url = f"{stac_endpoint}/collections/{col_id}/items/{item_id}" - else: - url = f"{stac_endpoint}/collections/{col_id}" - # TODO: use correct auth - resp = requests.delete( - url, - headers={"Authorization": f"Bearer {dinamis_sdk.auth.get_access_token()}"}, - timeout=5, - ) - if resp.status_code != 200: - logger.warning("Deletion failed (%s)", resp.text) +@dataclass +class StacTransactionHandler: + """Handle STAC and storage transactions.""" + + stac_endpoint: str + + def delete_item_or_col(self, col_id: str, item_id: str | None = None): + """Delete an item or a collection.""" + logger.info("Deleting %s%s", col_id, f"/{item_id}" if item_id else "") + if item_id: + url = f"{self.stac_endpoint}/collections/{col_id}/items/{item_id}" + else: + url = f"{self.stac_endpoint}/collections/{col_id}" + # TODO: use correct auth + resp = requests.delete( + url, + headers={"Authorization": f"Bearer {dinamis_sdk.auth.get_access_token()}"}, + timeout=5, + ) + if resp.status_code != 200: + logger.warning("Deletion failed (%s)", resp.text) + + def list_collections(self): + """List collections.""" + logger.info("Listing collections") + url = f"{self.stac_endpoint}/collections" + resp = requests.get( + url, + timeout=5, + ) + if resp.status_code != 200: + logger.warning("Get failed (%s)", resp.text) + cols = resp.json()["collections"] + cols.sort(key=operator.itemgetter("id")) + return cols + + def list_collections_display(self): + """Display in terminal a list of available collections.""" + cols = self.list_collections() + print(f"{len(cols)} collections available") + for col in cols: + print("\t" + col["id"]) + + def list_col_items(self, col_id: str): + """Delete an item or a collection.""" + logger.info("Listing %s items", col_id) + url = f"{self.stac_endpoint}/collections/{col_id}/items" + resp = requests.get( + url, + timeout=5, + ) + if resp.status_code != 200: + logger.warning("Get failed (%s)", resp.text) + return resp.json() + + def list_col_items_display(self, col_id: str): + """Display in terminal items in a collection.""" + items = self.list_col_items(col_id=col_id)["features"] + print(f"{len(items)} items available") + for item in items: + print("\t" + item["id"]) @dataclass -class TransactionsHandler: +class UploadTransactionsHandler(StacTransactionHandler): """Handle STAC and storage transactions.""" stac_endpoint: str @@ -242,7 +289,7 @@ class TransactionsHandler: f"Invalid type, must be ItemCollection or Collection (got {type(obj)})" ) - def delete(self, col_id: str, item_id: str | None = None): + def delete_item_or_col(self, col_id: str, item_id: str | None = None): """Delete an item or a collection.""" delete_stac_obj( stac_endpoint=self.stac_endpoint, col_id=col_id, item_id=item_id -- GitLab From f95345eb79b8f4bf47456441e633d4b85f7e1a6f Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Wed, 15 Jan 2025 12:58:48 +0100 Subject: [PATCH 03/36] Fix typo --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b09625b..d77a93a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -29,7 +29,7 @@ Test Upload: allow_failure: false except: [main] script: - - python tests/all.py + - python tests/test_upload.py Test Get: extends: .static_analysis_with_pip_install -- GitLab From 75f59c2bbf0019254d45d1924b766908169c72a4 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Wed, 15 Jan 2025 15:54:30 +0100 Subject: [PATCH 04/36] Add diff tool --- pyproject.toml | 2 +- tests/test_get.py | 22 +++++++++++++++++++++- tests/test_upload.py | 6 +++--- theia_dumper/diff.py | 34 ++++++++++++++++++++++++++++++++++ theia_dumper/stac.py | 11 ++--------- 5 files changed, 61 insertions(+), 14 deletions(-) create mode 100644 theia_dumper/diff.py diff --git a/pyproject.toml b/pyproject.toml index a262c0d..339c9fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ pretty = true exclude = ["doc", "venv", ".venv"] [tool.pylint] -disable = "W1203,R0903,E0401,W0622,C0116,C0115" +disable = "W1203,R0903,E0401,W0622,C0116,C0115,W0719" [tool.pylint.MASTER] ignore-paths = '^.venv' diff --git a/tests/test_get.py b/tests/test_get.py index 3c38a5d..483e7e1 100755 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -1,6 +1,26 @@ """Test file.""" -from theia_dumper import stac, cli +import test_upload + +from theia_dumper import stac, cli, diff + + +col1, items = test_upload.create_items_and_collection(relative=True) +col2 = col1.full_copy() + +item = items[0].full_copy() +item.id += "_test" +col2.add_item(item, item.id) + +item = items[0].full_copy() +item.id += "_test_other" +col1.add_item(item, item.id) +diff.generate_diff(col1, col2) + + +################# +if True: + exit(0) handler = stac.StacTransactionHandler( stac_endpoint=cli.DEFAULT_STAC_EP, diff --git a/tests/test_upload.py b/tests/test_upload.py index cca3e7c..76a7c48 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -84,7 +84,7 @@ def create_collection(): return col -def create_items_and_collection(relative): +def create_items_and_collection(relative: bool): """Create two STAC items attached to one collection.""" # Create items items = [create_item(item_id=item_id) for item_id in items_ids] @@ -141,7 +141,6 @@ def test_collection(): with tempfile.TemporaryDirectory() as tmpdir: generate_collection(tmpdir, relative=relative) handler.load_and_publish(os.path.join(tmpdir, "collection.json")) - clear() def test_all(): @@ -151,4 +150,5 @@ def test_all(): test_item_collection() -test_all() +if __name__ == "__main__": + test_all() diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py new file mode 100644 index 0000000..737a897 --- /dev/null +++ b/theia_dumper/diff.py @@ -0,0 +1,34 @@ +"""STAC diff tool.""" + +from typing import Tuple, List +from pystac import Collection, Item + +UNIQUE_SEP = "___" + + +def generate_diff(col1: Collection, col2: Collection) -> Tuple[List[Item], List[Item]]: + """Compute the diff between 2 STAC collections, returns list of items + - only in collection 1 + - only in collection 2 + """ + + def item_get_unique(i: Item) -> str: + return i.id + UNIQUE_SEP + str(i.datetime.isoformat() if i.datetime else "") + + col1_ids = [item_get_unique(i) for i in col1.get_items()] + col2_ids = [item_get_unique(i) for i in col2.get_items()] + + only_in_1 = set(col1_ids) - set(col2_ids) + only_in_2 = set(col2_ids) - set(col1_ids) + + def unique_retrieve_info(unique: str, col: Collection) -> Item: + id = unique.split(UNIQUE_SEP)[0] + item = col.get_item(id) + if not item: + raise Exception(f"Item {id} not found") + return item + + list_only_in_1 = [unique_retrieve_info(unique, col1) for unique in only_in_1] + list_only_in_2 = [unique_retrieve_info(unique, col2) for unique in only_in_2] + + return list_only_in_1, list_only_in_2 diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 6a1bffa..68cfc17 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -284,13 +284,6 @@ class UploadTransactionsHandler(StacTransactionHandler): self.publish_collection_with_items(collection=obj) elif isinstance(obj, ItemCollection): self.publish_item_collection(item_collection=obj) - else: - raise TypeError( - f"Invalid type, must be ItemCollection or Collection (got {type(obj)})" - ) - - def delete_item_or_col(self, col_id: str, item_id: str | None = None): - """Delete an item or a collection.""" - delete_stac_obj( - stac_endpoint=self.stac_endpoint, col_id=col_id, item_id=item_id + raise TypeError( + f"Invalid type, must be ItemCollection or Collection (got {type(obj)})" ) -- GitLab From 72b5bce5a658188a2191c96cd94e8620e75a69d0 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Wed, 15 Jan 2025 16:55:05 +0100 Subject: [PATCH 05/36] Add funcs --- tests/test_get.py | 4 +++- theia_dumper/diff.py | 16 +++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tests/test_get.py b/tests/test_get.py index 483e7e1..1e516e0 100755 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -15,7 +15,9 @@ col2.add_item(item, item.id) item = items[0].full_copy() item.id += "_test_other" col1.add_item(item, item.id) -diff.generate_diff(col1, col2) + +diff.generate_items_diff(col1, col2) +diff.collections_defs_are_different(col1, col2) ################# diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index 737a897..53abc75 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -6,7 +6,17 @@ from pystac import Collection, Item UNIQUE_SEP = "___" -def generate_diff(col1: Collection, col2: Collection) -> Tuple[List[Item], List[Item]]: +def collections_defs_are_different(col1: Collection, col2: Collection) -> bool: + """Compute the diff between 2 STAC collections.""" + + if col1.description != col2.description: + return True + return False + + +def generate_items_diff( + col1: Collection, col2: Collection +) -> Tuple[List[Item], List[Item]]: """Compute the diff between 2 STAC collections, returns list of items - only in collection 1 - only in collection 2 @@ -32,3 +42,7 @@ def generate_diff(col1: Collection, col2: Collection) -> Tuple[List[Item], List[ list_only_in_2 = [unique_retrieve_info(unique, col2) for unique in only_in_2] return list_only_in_1, list_only_in_2 + + +def compare_local_and_upstream(col_id: str): + return generate_items_diff() -- GitLab From b063b48425f6339f31033d4c232bff1e26d51144 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Wed, 15 Jan 2025 17:21:56 +0100 Subject: [PATCH 06/36] Update sdk version --- pyproject.toml | 2 +- theia_dumper/stac.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 339c9fd..37c3cb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "setuptools", "pystac", "pystac_client", - "dinamis_sdk==0.3.4", + "dinamis_sdk>=0.4.0", "requests", "click", "rio-cogeo", diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 68cfc17..b0a3853 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -8,7 +8,6 @@ from urllib.parse import urljoin import operator import dinamis_sdk -import dinamis_sdk.auth import dinamis_sdk.settings import pystac import requests @@ -59,8 +58,7 @@ def create_session(): def post_or_put(url: str, data: dict): """Post or put data to url.""" - # TODO: use correct authentification - headers = {"Authorization": f"Bearer {dinamis_sdk.auth.get_access_token()}"} + headers = dinamis_sdk.get_headers() sess = create_session() resp = sess.post(url, json=data, headers=headers) if resp.status_code == 409: @@ -154,10 +152,9 @@ class StacTransactionHandler: url = f"{self.stac_endpoint}/collections/{col_id}/items/{item_id}" else: url = f"{self.stac_endpoint}/collections/{col_id}" - # TODO: use correct auth resp = requests.delete( url, - headers={"Authorization": f"Bearer {dinamis_sdk.auth.get_access_token()}"}, + headers=dinamis_sdk.get_headers(), timeout=5, ) if resp.status_code != 200: -- GitLab From 98cbfdb96d55916924a9a3ecd6893d585d720545 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Wed, 15 Jan 2025 17:28:28 +0100 Subject: [PATCH 07/36] Fix --- tests/test_upload.py | 1 + theia_dumper/stac.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_upload.py b/tests/test_upload.py index 76a7c48..97a298c 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -141,6 +141,7 @@ def test_collection(): with tempfile.TemporaryDirectory() as tmpdir: generate_collection(tmpdir, relative=relative) handler.load_and_publish(os.path.join(tmpdir, "collection.json")) + clear() def test_all(): diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index b0a3853..9e98edf 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -281,6 +281,7 @@ class UploadTransactionsHandler(StacTransactionHandler): self.publish_collection_with_items(collection=obj) elif isinstance(obj, ItemCollection): self.publish_item_collection(item_collection=obj) - raise TypeError( - f"Invalid type, must be ItemCollection or Collection (got {type(obj)})" - ) + else: + raise TypeError( + f"Invalid type, must be ItemCollection or Collection (got {type(obj)})" + ) -- GitLab From 659645bea6b30fc716c5e55012b28b78bde93b61 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Wed, 15 Jan 2025 17:45:51 +0100 Subject: [PATCH 08/36] Update --- tests/test_get.py | 4 ++++ theia_dumper/cli.py | 10 ++++++---- theia_dumper/diff.py | 13 ++++++++++--- theia_dumper/stac.py | 12 +++++++++++- 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/tests/test_get.py b/tests/test_get.py index 1e516e0..97f2c4f 100755 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -19,6 +19,10 @@ col1.add_item(item, item.id) diff.generate_items_diff(col1, col2) diff.collections_defs_are_different(col1, col2) +diff.compare_local_and_upstream( + col1.id, col1.self_href, stac.StacTransactionHandler(stac.DEFAULT_STAC_EP) +) + ################# if True: diff --git a/theia_dumper/cli.py b/theia_dumper/cli.py index 2e164fc..6f557d9 100644 --- a/theia_dumper/cli.py +++ b/theia_dumper/cli.py @@ -1,10 +1,12 @@ """Theia-dumper Command Line Interface.""" import click -from .stac import UploadTransactionsHandler, StacTransactionHandler - -DEFAULT_STAC_EP = "https://stacapi-cdos.apps.okd.crocc.meso.umontpellier.fr" -DEFAULT_S3_EP = "https://s3-data.meso.umontpellier.fr" +from .stac import ( + UploadTransactionsHandler, + StacTransactionHandler, + DEFAULT_S3_EP, + DEFAULT_STAC_EP, +) @click.group() diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index 53abc75..dab0646 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -1,8 +1,10 @@ """STAC diff tool.""" -from typing import Tuple, List +from typing import Tuple, List, cast from pystac import Collection, Item +from . import stac + UNIQUE_SEP = "___" @@ -44,5 +46,10 @@ def generate_items_diff( return list_only_in_1, list_only_in_2 -def compare_local_and_upstream(col_id: str): - return generate_items_diff() +def compare_local_and_upstream( + remote_col_id: str, local_col_path: str, handler: stac.StacTransactionHandler +): + col_local = cast(Collection, stac.load_stac_obj(obj_pth=local_col_path)) + col_remote = handler.get_remote_col(remote_col_id) + + generate_items_diff(col_local, col_remote) diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 9e98edf..150b396 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -8,7 +8,7 @@ from urllib.parse import urljoin import operator import dinamis_sdk -import dinamis_sdk.settings +import pystac_client import pystac import requests from pystac import Collection, Item, ItemCollection @@ -16,6 +16,9 @@ from requests.adapters import HTTPAdapter, Retry from .logger import logger +DEFAULT_STAC_EP = "https://stacapi-cdos.apps.okd.crocc.meso.umontpellier.fr" +DEFAULT_S3_EP = "https://s3-data.meso.umontpellier.fr" + class STACObjectUnresolved(Exception): """Unresolved STAC object exception.""" @@ -200,6 +203,13 @@ class StacTransactionHandler: for item in items: print("\t" + item["id"]) + def get_remote_col(self, col_id) -> Collection: + api = pystac_client.Client.open( + self.stac_endpoint, + modifier=dinamis_sdk.sign_inplace, + ) + return api.get_collection(col_id) + @dataclass class UploadTransactionsHandler(StacTransactionHandler): -- GitLab From fd2b41cd876043069b0216475cd47fec1299cc5b Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 08:52:00 +0100 Subject: [PATCH 09/36] Fix collections diff --- pyproject.toml | 1 + tests/test_get.py | 9 ++++++++- tests/test_upload.py | 2 +- theia_dumper/diff.py | 9 ++++++++- 4 files changed, 18 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 37c3cb1..9b5e0d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "dinamis_sdk>=0.4.0", "requests", "click", + "rich", "rio-cogeo", ] license = { text = "Apache-2.0" } diff --git a/tests/test_get.py b/tests/test_get.py index 97f2c4f..ba6b652 100755 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -1,6 +1,7 @@ """Test file.""" import test_upload +import pystac from theia_dumper import stac, cli, diff @@ -19,8 +20,14 @@ col1.add_item(item, item.id) diff.generate_items_diff(col1, col2) diff.collections_defs_are_different(col1, col2) +COL1_FILEPATH = "/tmp/col1.json" +col1.set_self_href(COL1_FILEPATH) +col1.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED) + diff.compare_local_and_upstream( - col1.id, col1.self_href, stac.StacTransactionHandler(stac.DEFAULT_STAC_EP) + "costarica-sentinel-2-l3-seasonal-spectral-indices-M", + COL1_FILEPATH, + stac.StacTransactionHandler(stac.DEFAULT_STAC_EP), ) diff --git a/tests/test_upload.py b/tests/test_upload.py index 97a298c..2669f5b 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -76,7 +76,7 @@ def create_collection(): id=COL_ID, extent=pystac.Extent(spat_extent, temp_extent), description="Some description", - href="http://hello.fr/collections/collection-for-tests", + href="/tmp/collection.json", providers=[ pystac.Provider("INRAE"), ], diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index dab0646..13eaed3 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -2,6 +2,7 @@ from typing import Tuple, List, cast from pystac import Collection, Item +from rich import print from . import stac @@ -52,4 +53,10 @@ def compare_local_and_upstream( col_local = cast(Collection, stac.load_stac_obj(obj_pth=local_col_path)) col_remote = handler.get_remote_col(remote_col_id) - generate_items_diff(col_local, col_remote) + only_local, only_remote = generate_items_diff(col_local, col_remote) + + print(f"Only local ({len(only_local)}):") + print(only_local[:20]) + + print(f"Only remote ({len(only_remote)}):") + print(only_remote[:20]) -- GitLab From f781a92096b2533208e580ebfd244d3889f814fb Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 09:17:00 +0100 Subject: [PATCH 10/36] Improve diff --- tests/test_get.py | 4 ++-- theia_dumper/diff.py | 55 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/tests/test_get.py b/tests/test_get.py index ba6b652..a023254 100755 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -25,9 +25,9 @@ col1.set_self_href(COL1_FILEPATH) col1.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED) diff.compare_local_and_upstream( - "costarica-sentinel-2-l3-seasonal-spectral-indices-M", - COL1_FILEPATH, stac.StacTransactionHandler(stac.DEFAULT_STAC_EP), + COL1_FILEPATH, + "costarica-sentinel-2-l3-seasonal-spectral-indices-M", ) diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index 13eaed3..8f7a4b3 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -3,6 +3,7 @@ from typing import Tuple, List, cast from pystac import Collection, Item from rich import print +from .logger import logger from . import stac @@ -12,9 +13,40 @@ UNIQUE_SEP = "___" def collections_defs_are_different(col1: Collection, col2: Collection) -> bool: """Compute the diff between 2 STAC collections.""" - if col1.description != col2.description: - return True - return False + def fields_are_different(col1: Collection, col2: Collection, field_name: str): + recursive_fields = field_name.split(".") + + f1 = col1 + f2 = col2 + for f in recursive_fields: + f1 = getattr(f1, f) + f2 = getattr(f2, f) + + if f1 != f2: + logger.info(f"{field_name} is different: '{f1}' != '{f2}'") + return True + return False + + different = False + if fields_are_different(col1, col2, "extent.spatial.bboxes"): + different = True + if fields_are_different(col1, col2, "extent.temporal.intervals"): + different = True + if fields_are_different(col1, col2, "description"): + different = True + if fields_are_different(col1, col2, "id"): + different = True + if fields_are_different(col1, col2, "keywords"): + different = True + if fields_are_different(col1, col2, "license"): + different = True + if fields_are_different(col1, col2, "strategy"): + different = True + if fields_are_different(col1, col2, "providers"): + different = True + if fields_are_different(col1, col2, "title"): + different = True + return different def generate_items_diff( @@ -48,13 +80,28 @@ def generate_items_diff( def compare_local_and_upstream( - remote_col_id: str, local_col_path: str, handler: stac.StacTransactionHandler + handler: stac.StacTransactionHandler, + local_col_path: str, + remote_col_id: str | None = None, ): + """Compare a local and a remote collection: + + Parameters: + handler: StacTransactionHandler object + local_col_path (str): path to local collection path + remove_col_id (str): remote collection identifier. If unset, will take the same id as the local collection + """ + col_local = cast(Collection, stac.load_stac_obj(obj_pth=local_col_path)) + if not remote_col_id: + remote_col_id = col_local.id col_remote = handler.get_remote_col(remote_col_id) only_local, only_remote = generate_items_diff(col_local, col_remote) + definitions_are_different = collections_defs_are_different(col_local, col_remote) + print(definitions_are_different) + print(f"Only local ({len(only_local)}):") print(only_local[:20]) -- GitLab From 25cd51db35aeac9469ad7a757fe94152d5c2bfc3 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 09:23:36 +0100 Subject: [PATCH 11/36] Fix tests --- tests/test_get.py | 9 +++++---- tests/test_upload.py | 10 ++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/test_get.py b/tests/test_get.py index a023254..6ab5756 100755 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -5,8 +5,11 @@ import pystac from theia_dumper import stac, cli, diff +# Diff test -col1, items = test_upload.create_items_and_collection(relative=True) +col1, items = test_upload.create_items_and_collection( + relative=True, col_href="/tmp/collection.json" +) col2 = col1.full_copy() item = items[0].full_copy() @@ -31,9 +34,7 @@ diff.compare_local_and_upstream( ) -################# -if True: - exit(0) +# Read test handler = stac.StacTransactionHandler( stac_endpoint=cli.DEFAULT_STAC_EP, diff --git a/tests/test_upload.py b/tests/test_upload.py index 2669f5b..371a68d 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -68,7 +68,7 @@ def create_item(item_id: str): return item -def create_collection(): +def create_collection(col_href: str): """Create an empty STAC collection.""" spat_extent = pystac.SpatialExtent([[0, 0, 2, 3]]) temp_extent = pystac.TemporalExtent(intervals=[(None, None)]) @@ -76,7 +76,7 @@ def create_collection(): id=COL_ID, extent=pystac.Extent(spat_extent, temp_extent), description="Some description", - href="/tmp/collection.json", + href=col_href, providers=[ pystac.Provider("INRAE"), ], @@ -84,13 +84,15 @@ def create_collection(): return col -def create_items_and_collection(relative: bool): +def create_items_and_collection( + relative: bool, col_href="http://hello.fr/collections/collection-for-tests" +): """Create two STAC items attached to one collection.""" # Create items items = [create_item(item_id=item_id) for item_id in items_ids] # Attach items to collection - col = create_collection() + col = create_collection(col_href) for item in items: col.add_item(item) if relative: -- GitLab From 721f7dad2551c2ca1d45953201739679a19f2309 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 09:31:08 +0100 Subject: [PATCH 12/36] Fix static analysis warnings and update ci --- .gitlab-ci.yml | 3 +++ tests/test_upload.py | 12 ++++++------ theia_dumper/diff.py | 3 ++- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d77a93a..1cc0a0d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -23,6 +23,9 @@ stages: - Documentation - Pip +.static_analysis_base: + allow_failure: false + Test Upload: extends: .static_analysis_with_pip_install stage: Test diff --git a/tests/test_upload.py b/tests/test_upload.py index 371a68d..cfb0d03 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -10,6 +10,8 @@ import requests from theia_dumper import stac +DEFAULT_COL_HREF = "http://hello.fr/collections/collection-for-tests" + handler = stac.UploadTransactionsHandler( stac_endpoint="https://stacapi-cdos.apps.okd.crocc.meso.umontpellier.fr", storage_endpoint="https://s3-data.meso.umontpellier.fr", @@ -70,8 +72,8 @@ def create_item(item_id: str): def create_collection(col_href: str): """Create an empty STAC collection.""" - spat_extent = pystac.SpatialExtent([[0, 0, 2, 3]]) - temp_extent = pystac.TemporalExtent(intervals=[(None, None)]) + spat_extent = pystac.SpatialExtent([[0.0, 0.0, 2.0, 3.0]]) + temp_extent = pystac.TemporalExtent(intervals=[]) col = pystac.Collection( id=COL_ID, extent=pystac.Extent(spat_extent, temp_extent), @@ -84,9 +86,7 @@ def create_collection(col_href: str): return col -def create_items_and_collection( - relative: bool, col_href="http://hello.fr/collections/collection-for-tests" -): +def create_items_and_collection(relative: bool, col_href=DEFAULT_COL_HREF): """Create two STAC items attached to one collection.""" # Create items items = [create_item(item_id=item_id) for item_id in items_ids] @@ -127,7 +127,7 @@ def test_item_collection(): print(f"Relative: {relative}") # we need to create an empty collection before - col = create_collection() + col = create_collection(DEFAULT_COL_HREF) handler.publish_collection(collection=col) with tempfile.NamedTemporaryFile() as tmp: diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index 8f7a4b3..11684b9 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -89,7 +89,8 @@ def compare_local_and_upstream( Parameters: handler: StacTransactionHandler object local_col_path (str): path to local collection path - remove_col_id (str): remote collection identifier. If unset, will take the same id as the local collection + remove_col_id (str): remote collection identifier. + If unset, will take the same id as the local collection """ col_local = cast(Collection, stac.load_stac_obj(obj_pth=local_col_path)) -- GitLab From b1a5c333bfe2b2c61f23963efba335df917cf5bc Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 09:50:31 +0100 Subject: [PATCH 13/36] Fix warnings --- theia_dumper/diff.py | 21 +++++++++++---------- theia_dumper/stac.py | 1 + 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index 11684b9..1f33e39 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -52,9 +52,11 @@ def collections_defs_are_different(col1: Collection, col2: Collection) -> bool: def generate_items_diff( col1: Collection, col2: Collection ) -> Tuple[List[Item], List[Item]]: - """Compute the diff between 2 STAC collections, returns list of items - - only in collection 1 - - only in collection 2 + """Compute the diff between 2 STAC collections. + + Returns: + - list of items only in collection 1 + - list of items only in collection 2 """ def item_get_unique(i: Item) -> str: @@ -84,15 +86,14 @@ def compare_local_and_upstream( local_col_path: str, remote_col_id: str | None = None, ): - """Compare a local and a remote collection: + """Compare a local and a remote collection. - Parameters: - handler: StacTransactionHandler object - local_col_path (str): path to local collection path - remove_col_id (str): remote collection identifier. - If unset, will take the same id as the local collection + Args: + handler (stac.StacTransactionHandler): object to handle the connection + local_col_path (str): path to local collection path + remote_col_id (str | None, optional): Remote collection identifier. + If unset, will take the same id as the local collection """ - col_local = cast(Collection, stac.load_stac_obj(obj_pth=local_col_path)) if not remote_col_id: remote_col_id = col_local.id diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 150b396..4eed5a4 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -204,6 +204,7 @@ class StacTransactionHandler: print("\t" + item["id"]) def get_remote_col(self, col_id) -> Collection: + """Retrieve a remote collection.""" api = pystac_client.Client.open( self.stac_endpoint, modifier=dinamis_sdk.sign_inplace, -- GitLab From c4d0a716f850fbb8a775c3c2bb62bcbceb463fa7 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 09:51:42 +0100 Subject: [PATCH 14/36] Move tests --- .gitlab-ci.yml | 8 ++++++++ tests/test_diff.py | 34 ++++++++++++++++++++++++++++++++++ tests/test_get.py | 33 +-------------------------------- 3 files changed, 43 insertions(+), 32 deletions(-) create mode 100755 tests/test_diff.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 1cc0a0d..31e4d0c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -41,3 +41,11 @@ Test Get: except: [main] script: - python tests/test_get.py + +Test Diff: + extends: .static_analysis_with_pip_install + stage: Test + allow_failure: false + except: [main] + script: + - python tests/test_diff.py diff --git a/tests/test_diff.py b/tests/test_diff.py new file mode 100755 index 0000000..c3e93f9 --- /dev/null +++ b/tests/test_diff.py @@ -0,0 +1,34 @@ +"""Test file.""" + +import test_upload +import pystac + +from theia_dumper import stac, diff + +# Diff test + +col1, items = test_upload.create_items_and_collection( + relative=True, col_href="/tmp/collection.json" +) +col2 = col1.full_copy() + +item = items[0].full_copy() +item.id += "_test" +col2.add_item(item, item.id) + +item = items[0].full_copy() +item.id += "_test_other" +col1.add_item(item, item.id) + +diff.generate_items_diff(col1, col2) +diff.collections_defs_are_different(col1, col2) + +COL1_FILEPATH = "/tmp/col1.json" +col1.set_self_href(COL1_FILEPATH) +col1.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED) + +diff.compare_local_and_upstream( + stac.StacTransactionHandler(stac.DEFAULT_STAC_EP), + COL1_FILEPATH, + "costarica-sentinel-2-l3-seasonal-spectral-indices-M", +) diff --git a/tests/test_get.py b/tests/test_get.py index 6ab5756..af5c6f7 100755 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -1,37 +1,6 @@ """Test file.""" -import test_upload -import pystac - -from theia_dumper import stac, cli, diff - -# Diff test - -col1, items = test_upload.create_items_and_collection( - relative=True, col_href="/tmp/collection.json" -) -col2 = col1.full_copy() - -item = items[0].full_copy() -item.id += "_test" -col2.add_item(item, item.id) - -item = items[0].full_copy() -item.id += "_test_other" -col1.add_item(item, item.id) - -diff.generate_items_diff(col1, col2) -diff.collections_defs_are_different(col1, col2) - -COL1_FILEPATH = "/tmp/col1.json" -col1.set_self_href(COL1_FILEPATH) -col1.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED) - -diff.compare_local_and_upstream( - stac.StacTransactionHandler(stac.DEFAULT_STAC_EP), - COL1_FILEPATH, - "costarica-sentinel-2-l3-seasonal-spectral-indices-M", -) +from theia_dumper import stac, cli # Read test -- GitLab From bed0e0805e11f1bbd4b4a680860ff3be086ed84f Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 09:52:39 +0100 Subject: [PATCH 15/36] Facto --- .gitlab-ci.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 31e4d0c..88b4866 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -29,7 +29,6 @@ stages: Test Upload: extends: .static_analysis_with_pip_install stage: Test - allow_failure: false except: [main] script: - python tests/test_upload.py @@ -37,7 +36,6 @@ Test Upload: Test Get: extends: .static_analysis_with_pip_install stage: Test - allow_failure: false except: [main] script: - python tests/test_get.py @@ -45,7 +43,6 @@ Test Get: Test Diff: extends: .static_analysis_with_pip_install stage: Test - allow_failure: false except: [main] script: - python tests/test_diff.py -- GitLab From 62e15a6deb764cb1869ce16ac5a94600b0f963ca Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 10:02:03 +0100 Subject: [PATCH 16/36] Fix --- tests/test_diff.py | 1 - tests/test_get.py | 12 +++++++++--- tests/test_upload.py | 2 +- theia_dumper/stac.py | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tests/test_diff.py b/tests/test_diff.py index c3e93f9..0cc2269 100755 --- a/tests/test_diff.py +++ b/tests/test_diff.py @@ -5,7 +5,6 @@ import pystac from theia_dumper import stac, diff -# Diff test col1, items = test_upload.create_items_and_collection( relative=True, col_href="/tmp/collection.json" diff --git a/tests/test_get.py b/tests/test_get.py index af5c6f7..6bffe62 100755 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -3,11 +3,17 @@ from theia_dumper import stac, cli -# Read test - handler = stac.StacTransactionHandler( stac_endpoint=cli.DEFAULT_STAC_EP, ) +REMOTE_COL_ID = "sentinel2-l2a-theia" handler.list_collections_display() -handler.list_col_items_display("sentinel2-l2a-theia") +handler.list_col_items_display(REMOTE_COL_ID) + + +col_remote = handler.get_remote_col(REMOTE_COL_ID) + +col_items = handler.list_col_items(REMOTE_COL_ID) + +exit(0) diff --git a/tests/test_upload.py b/tests/test_upload.py index cfb0d03..840f6a7 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -73,7 +73,7 @@ def create_item(item_id: str): def create_collection(col_href: str): """Create an empty STAC collection.""" spat_extent = pystac.SpatialExtent([[0.0, 0.0, 2.0, 3.0]]) - temp_extent = pystac.TemporalExtent(intervals=[]) + temp_extent = pystac.TemporalExtent(intervals=[(None, None)]) col = pystac.Collection( id=COL_ID, extent=pystac.Extent(spat_extent, temp_extent), diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 4eed5a4..a2cadcd 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -187,7 +187,7 @@ class StacTransactionHandler: def list_col_items(self, col_id: str): """Delete an item or a collection.""" logger.info("Listing %s items", col_id) - url = f"{self.stac_endpoint}/collections/{col_id}/items" + url = f"{self.stac_endpoint}/collections/{col_id}/items?limit=100" resp = requests.get( url, timeout=5, -- GitLab From cffd378f96258334cfdd00797699102161cdd941 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 10:05:34 +0100 Subject: [PATCH 17/36] Fix warnings --- tests/test_get.py | 2 -- tests/test_upload.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_get.py b/tests/test_get.py index 6bffe62..4465cba 100755 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -15,5 +15,3 @@ handler.list_col_items_display(REMOTE_COL_ID) col_remote = handler.get_remote_col(REMOTE_COL_ID) col_items = handler.list_col_items(REMOTE_COL_ID) - -exit(0) diff --git a/tests/test_upload.py b/tests/test_upload.py index 840f6a7..5d4c71f 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -73,7 +73,7 @@ def create_item(item_id: str): def create_collection(col_href: str): """Create an empty STAC collection.""" spat_extent = pystac.SpatialExtent([[0.0, 0.0, 2.0, 3.0]]) - temp_extent = pystac.TemporalExtent(intervals=[(None, None)]) + temp_extent = pystac.TemporalExtent(intervals=[[None, None]]) # type: ignore col = pystac.Collection( id=COL_ID, extent=pystac.Extent(spat_extent, temp_extent), -- GitLab From 192aab2071bc84a54b68718e77a1bbefe745c2b9 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 13:46:58 +0100 Subject: [PATCH 18/36] Add cli for diff --- theia_dumper/cli.py | 28 ++++++++++++++++++++++++++++ theia_dumper/diff.py | 3 +-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/theia_dumper/cli.py b/theia_dumper/cli.py index 6f557d9..37ee639 100644 --- a/theia_dumper/cli.py +++ b/theia_dumper/cli.py @@ -7,6 +7,7 @@ from .stac import ( DEFAULT_S3_EP, DEFAULT_STAC_EP, ) +from . import diff @click.group() @@ -110,3 +111,30 @@ def list_col_items( StacTransactionHandler( stac_endpoint=stac_endpoint, ).list_col_items_display(col_id=col_id) + + +@theia_dumper.command(context_settings={"show_default": True}) +@click.option( + "--stac_endpoint", + help="Endpoint to which STAC objects will be sent", + type=str, + default=DEFAULT_STAC_EP, +) +@click.option("-p", "--col_path", type=str, help="Local collection path", required=True) +@click.option( + "-r", + "--remote_id", + type=str, + help="Remote collection ID. If not specified, will use local collection ID", + required=False, +) +def collection_diff( + stac_endpoint: str, + col_path: str, + remote_id: str | None = None, +): + """List collection items.""" + handler = StacTransactionHandler( + stac_endpoint=stac_endpoint, + ) + diff.compare_local_and_upstream(handler, col_path, remote_id) diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index 1f33e39..654d508 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -101,8 +101,7 @@ def compare_local_and_upstream( only_local, only_remote = generate_items_diff(col_local, col_remote) - definitions_are_different = collections_defs_are_different(col_local, col_remote) - print(definitions_are_different) + collections_defs_are_different(col_local, col_remote) print(f"Only local ({len(only_local)}):") print(only_local[:20]) -- GitLab From 7f801b7630ecd2978e1af12ddfe7d4e639444643 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 13:53:19 +0100 Subject: [PATCH 19/36] Fix ci --- tests/test_upload.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_upload.py b/tests/test_upload.py index 688c796..25ad8b4 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -37,7 +37,7 @@ with open(RASTER_FILE1, "wb") as f: f.write(r.content) shutil.copyfile(RASTER_FILE1, RASTER_FILE2) -COL_BBOX = [0, 0, 0, 0] +COL_BBOX = [0.0, 0.0, 0.0, 0.0] BBOX_ALL = [ 3.6962018175925073, 43.547450099338604, @@ -101,7 +101,7 @@ def create_item(item_id: str): def create_collection(col_href: str): """Create an empty STAC collection.""" spat_extent = pystac.SpatialExtent([COL_BBOX]) - temp_extent = pystac.TemporalExtent(intervals=[(None, None)]) + temp_extent = pystac.TemporalExtent(intervals=[[None, None]]) # type: ignore col = pystac.Collection( id=COL_ID, extent=pystac.Extent(spat_extent, temp_extent), -- GitLab From 70707208ca930f7cfdf251d25e803b5526d6b130 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 13:53:42 +0100 Subject: [PATCH 20/36] Fix ci --- tests/test_upload.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_upload.py b/tests/test_upload.py index 25ad8b4..429d1cb 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -67,9 +67,9 @@ def check(expected_bbox): extent = api.get_collection(COL_ID).extent.spatial.bboxes print(f"extent.spatial: {extent}") assert len(extent) == 1 - assert tuple(extent[0]) == tuple( - expected_bbox - ), f"expected BBOX: {expected_bbox}, got {extent[0]}" + assert tuple(extent[0]) == tuple(expected_bbox), ( + f"expected BBOX: {expected_bbox}, got {extent[0]}" + ) def create_item(item_id: str): -- GitLab From 99f7b70f2f3b10a569cc27e6c658340ee6258f41 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 14:21:49 +0100 Subject: [PATCH 21/36] Update tests --- tests/test_upload.py | 32 ++++++++++++++------------------ theia_dumper/stac.py | 11 +++++++++-- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/tests/test_upload.py b/tests/test_upload.py index 429d1cb..c0e7b87 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -11,27 +11,25 @@ import requests from theia_dumper import stac -DEFAULT_COL_HREF = "http://hello.fr/collections/collection-for-tests" +DEFAULT_COL_HREF = "http://hello.fr/collections/collection-for-tests" STAC_EP = "https://stacapi-cdos.apps.okd.crocc.meso.umontpellier.fr" - -handler = stac.UploadTransactionsHandler( - stac_endpoint=STAC_EP, - storage_endpoint="https://s3-data.meso.umontpellier.fr", - storage_bucket="sm1-gdc-tests", - assets_overwrite=True, -) - IMAGE_HREF = ( "https://gitlab.orfeo-toolbox.org/orfeotoolbox/" "otb/-/raw/develop/Data/Input/SP67_FR_subset_1.tif" ) - COL_ID = "collection-for-theia-dumper-tests" items_ids = ["item_1", "item_2"] - RASTER_FILE1 = "/tmp/raster1.tif" RASTER_FILE2 = "/tmp/raster2.tif" + +handler = stac.UploadTransactionsHandler( + stac_endpoint=STAC_EP, + storage_endpoint="https://s3-data.meso.umontpellier.fr", + storage_bucket="sm1-gdc-tests", + assets_overwrite=True, +) + with open(RASTER_FILE1, "wb") as f: r = requests.get(IMAGE_HREF, timeout=5) f.write(r.content) @@ -67,9 +65,9 @@ def check(expected_bbox): extent = api.get_collection(COL_ID).extent.spatial.bboxes print(f"extent.spatial: {extent}") assert len(extent) == 1 - assert tuple(extent[0]) == tuple(expected_bbox), ( - f"expected BBOX: {expected_bbox}, got {extent[0]}" - ) + assert tuple(extent[0]) == tuple( + expected_bbox + ), f"expected BBOX: {expected_bbox}, got {extent[0]}" def create_item(item_id: str): @@ -190,15 +188,13 @@ def test_collection_multipart(): clear() -def test_all(): - """Test all.""" +def _test_all(): test_collection() test_item_collection() - # test collection (multi-part) test_collection_multipart() if __name__ == "__main__": - test_all() + _test_all() diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 8374bed..2a34054 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -112,7 +112,7 @@ def get_assets_root_dir(items: List[Item]) -> str: def check_items_collection_id(items: List[Item]): - """Check that items collection_id is unique.""" + """Check that items have the same collection_id.""" if len(set(item.collection_id for item in items)) > 1: raise UnconsistentCollectionIDs("Collection ID must be the same for all items!") @@ -231,6 +231,13 @@ class UploadTransactionsHandler(StacTransactionHandler): local_filename = asset.href logger.debug("Local file: %s", local_filename) target_url = local_filename.replace(assets_root_dir, target_root_dir) + target_url = target_url.replace("raster1", "raster_1") + + def url_escape_dangerous_chars(url: str): + url = url.replace("_", "-") + return url + + target_url = url_escape_dangerous_chars(target_url) logger.debug("Target file: %s", target_url) # Skip when target file exists and overwrite is not enabled @@ -242,7 +249,7 @@ class UploadTransactionsHandler(StacTransactionHandler): continue # Upload file - logger.info("Uploading %s ...", local_filename) + logger.info("Uploading %s to %s...", local_filename, target_url) try: dinamis_sdk.push(local_filename=local_filename, target_url=target_url) except Exception as e: -- GitLab From cfe5a122400bd97bd7e5feee8879e263ac889070 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 14:34:39 +0100 Subject: [PATCH 22/36] Remove debug lines --- tests/test_upload.py | 1 - theia_dumper/stac.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/test_upload.py b/tests/test_upload.py index c0e7b87..843464c 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -63,7 +63,6 @@ def check(expected_bbox): """Check collection extent.""" api = pystac_client.Client.open(STAC_EP) extent = api.get_collection(COL_ID).extent.spatial.bboxes - print(f"extent.spatial: {extent}") assert len(extent) == 1 assert tuple(extent[0]) == tuple( expected_bbox diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 2a34054..ef8a0d8 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -231,7 +231,6 @@ class UploadTransactionsHandler(StacTransactionHandler): local_filename = asset.href logger.debug("Local file: %s", local_filename) target_url = local_filename.replace(assets_root_dir, target_root_dir) - target_url = target_url.replace("raster1", "raster_1") def url_escape_dangerous_chars(url: str): url = url.replace("_", "-") -- GitLab From fefcf26492fc5fb46653399a7ce6f9aee463ec17 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 15:38:50 +0100 Subject: [PATCH 23/36] Add test that assets are present --- tests/test_upload.py | 19 +++++++++++++------ theia_dumper/diff.py | 32 ++++++++++++++------------------ theia_dumper/stac.py | 20 +++++++++++++------- 3 files changed, 40 insertions(+), 31 deletions(-) diff --git a/tests/test_upload.py b/tests/test_upload.py index 843464c..f24f9b9 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -59,15 +59,22 @@ def clear(): handler.delete_item_or_col(col_id=COL_ID) -def check(expected_bbox): - """Check collection extent.""" +def remote_col_test(expected_bbox): + """Run tests on a remote collection.""" api = pystac_client.Client.open(STAC_EP) - extent = api.get_collection(COL_ID).extent.spatial.bboxes + col = api.get_collection(COL_ID) + extent = col.extent.spatial.bboxes assert len(extent) == 1 assert tuple(extent[0]) == tuple( expected_bbox ), f"expected BBOX: {expected_bbox}, got {extent[0]}" + # Check that assets are accessible once signed + for i in col.get_items(): + assets = i.get_assets().values() + for asset in assets: + assert stac.asset_exists(asset.href) + def create_item(item_id: str): """Create a STAC item.""" @@ -158,7 +165,7 @@ def test_item_collection(): with tempfile.NamedTemporaryFile() as tmp: generate_item_collection(tmp.name, relative=relative) handler.load_and_publish(tmp.name) - check(BBOX_ALL) + remote_col_test(BBOX_ALL) clear() @@ -169,7 +176,7 @@ def test_collection(): with tempfile.TemporaryDirectory() as tmpdir: generate_collection(tmpdir, relative=relative) handler.load_and_publish(os.path.join(tmpdir, "collection.json")) - check(BBOX_ALL) + remote_col_test(BBOX_ALL) clear() @@ -183,7 +190,7 @@ def test_collection_multipart(): tmpdir, relative=relative, items=[create_item(item_id)] ) handler.load_and_publish(os.path.join(tmpdir, "collection.json")) - check(BBOX_ALL) + remote_col_test(BBOX_ALL) clear() diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index 654d508..cfe69f2 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -28,24 +28,20 @@ def collections_defs_are_different(col1: Collection, col2: Collection) -> bool: return False different = False - if fields_are_different(col1, col2, "extent.spatial.bboxes"): - different = True - if fields_are_different(col1, col2, "extent.temporal.intervals"): - different = True - if fields_are_different(col1, col2, "description"): - different = True - if fields_are_different(col1, col2, "id"): - different = True - if fields_are_different(col1, col2, "keywords"): - different = True - if fields_are_different(col1, col2, "license"): - different = True - if fields_are_different(col1, col2, "strategy"): - different = True - if fields_are_different(col1, col2, "providers"): - different = True - if fields_are_different(col1, col2, "title"): - different = True + fields = [ + "extent.spatial.bboxes", + "extent.temporal.intervals", + "description", + "id", + "keywords", + "license", + "strategy", + "providers", + "title", + ] + for field in fields: + if fields_are_different(col1, col2, field): + different = True return different diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index ef8a0d8..8e74ae2 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -58,6 +58,15 @@ def create_session(): return sess +def asset_exists(url: str) -> bool: + sess = create_session() + res = sess.get(dinamis_sdk.sign(url), stream=True) + if res.status_code == 200: + logger.info("Asset %s already exists. Skipping.", url) + return True + return False + + def post_or_put(url: str, data: dict): """Post or put data to url.""" headers = dinamis_sdk.get_headers() @@ -185,7 +194,7 @@ class StacTransactionHandler: print("\t" + col["id"]) def list_col_items(self, col_id: str): - """Delete an item or a collection.""" + """List items in a collection.""" logger.info("Listing %s items", col_id) url = f"{self.stac_endpoint}/collections/{col_id}/items?limit=100" resp = requests.get( @@ -216,7 +225,6 @@ class StacTransactionHandler: class UploadTransactionsHandler(StacTransactionHandler): """Handle STAC and storage transactions.""" - stac_endpoint: str storage_endpoint: str storage_bucket: str assets_overwrite: bool @@ -231,9 +239,10 @@ class UploadTransactionsHandler(StacTransactionHandler): local_filename = asset.href logger.debug("Local file: %s", local_filename) target_url = local_filename.replace(assets_root_dir, target_root_dir) + # target_url = target_url.replace("raster1", "raster_1") def url_escape_dangerous_chars(url: str): - url = url.replace("_", "-") + # url = url.replace("_", "-") return url target_url = url_escape_dangerous_chars(target_url) @@ -241,10 +250,7 @@ class UploadTransactionsHandler(StacTransactionHandler): # Skip when target file exists and overwrite is not enabled if not self.assets_overwrite: - sess = create_session() - res = sess.get(dinamis_sdk.sign(target_url), stream=True) - if res.status_code == 200: - logger.info("Asset %s already exists. Skipping.", target_url) + if asset_exists(target_url): continue # Upload file -- GitLab From e614c57bde6aac9ebf4cbfdd61e8a486ffc82251 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 15:41:10 +0100 Subject: [PATCH 24/36] Fix --- tests/test_upload.py | 6 +++--- theia_dumper/stac.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_upload.py b/tests/test_upload.py index f24f9b9..5fd448b 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -65,9 +65,9 @@ def remote_col_test(expected_bbox): col = api.get_collection(COL_ID) extent = col.extent.spatial.bboxes assert len(extent) == 1 - assert tuple(extent[0]) == tuple( - expected_bbox - ), f"expected BBOX: {expected_bbox}, got {extent[0]}" + assert tuple(extent[0]) == tuple(expected_bbox), ( + f"expected BBOX: {expected_bbox}, got {extent[0]}" + ) # Check that assets are accessible once signed for i in col.get_items(): diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 8e74ae2..3e6b74d 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -59,6 +59,7 @@ def create_session(): def asset_exists(url: str) -> bool: + """Check that the item provided in parameter exists and is accessible.""" sess = create_session() res = sess.get(dinamis_sdk.sign(url), stream=True) if res.status_code == 200: -- GitLab From c5a24c81d7aec560c881f4e048e8af3b739c5439 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 15:59:23 +0100 Subject: [PATCH 25/36] Add test naming is compliant --- theia_dumper/stac.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 3e6b74d..ca1a8d6 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -27,6 +27,11 @@ class UnconsistentCollectionIDs(Exception): """Inconsistent STAC collection exception.""" +def _check_naming_is_compliant(s: str): + if not s.replace("-", "").replace("_", "").isalnum(): + raise Exception(f"{s} does not only contain alphanumeric or - or _ chars") + + def create_session(): """Create a requests session.""" sess = requests.Session() @@ -235,6 +240,9 @@ class UploadTransactionsHandler(StacTransactionHandler): col_id = item.collection_id target_root_dir = urljoin(self.storage_endpoint, self.storage_bucket) + _check_naming_is_compliant(self.storage_bucket) + _check_naming_is_compliant(item.id) + # Upload assets files for _, asset in item.assets.items(): local_filename = asset.href @@ -303,6 +311,7 @@ class UploadTransactionsHandler(StacTransactionHandler): def publish_collection(self, collection: Collection): """Publish an empty collection.""" + _check_naming_is_compliant(collection.id) post_or_put( url=urljoin(self.stac_endpoint, "/collections"), data=collection.to_dict() ) -- GitLab From 61c64492fd62031f60873e1d641726bd57e67cb7 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 16:49:49 +0100 Subject: [PATCH 26/36] update dinamis --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9b5e0d4..05c6645 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "setuptools", "pystac", "pystac_client", - "dinamis_sdk>=0.4.0", + "dinamis_sdk>=0.4.1", "requests", "click", "rich", -- GitLab From ef64942a3205ddfa8eb6c8738befd348d3a7e30d Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 16:52:40 +0100 Subject: [PATCH 27/36] Add max nb of items in request --- tests/test_get.py | 3 +-- theia_dumper/cli.py | 6 +++++- theia_dumper/diff.py | 1 + theia_dumper/stac.py | 24 ++++++++++++------------ 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/tests/test_get.py b/tests/test_get.py index 4465cba..51d30ee 100755 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -7,11 +7,10 @@ handler = stac.StacTransactionHandler( stac_endpoint=cli.DEFAULT_STAC_EP, ) -REMOTE_COL_ID = "sentinel2-l2a-theia" +REMOTE_COL_ID = "spot-6-7-drs" handler.list_collections_display() handler.list_col_items_display(REMOTE_COL_ID) - col_remote = handler.get_remote_col(REMOTE_COL_ID) col_items = handler.list_col_items(REMOTE_COL_ID) diff --git a/theia_dumper/cli.py b/theia_dumper/cli.py index 37ee639..965dc51 100644 --- a/theia_dumper/cli.py +++ b/theia_dumper/cli.py @@ -103,14 +103,18 @@ def list_cols( default=DEFAULT_STAC_EP, ) @click.option("-c", "--col_id", type=str, help="STAC collection ID", required=True) +@click.option( + "-m", "--max_items", type=int, help="Max number of items to display", default=20 +) def list_col_items( stac_endpoint: str, col_id: str, + max_items: int, ): """List collection items.""" StacTransactionHandler( stac_endpoint=stac_endpoint, - ).list_col_items_display(col_id=col_id) + ).list_col_items_display(col_id=col_id, max_items=max_items) @theia_dumper.command(context_settings={"show_default": True}) diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index cfe69f2..bece718 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -81,6 +81,7 @@ def compare_local_and_upstream( handler: stac.StacTransactionHandler, local_col_path: str, remote_col_id: str | None = None, + max_items=1000, ): """Compare a local and a remote collection. diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index ca1a8d6..8321cde 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -199,24 +199,24 @@ class StacTransactionHandler: for col in cols: print("\t" + col["id"]) - def list_col_items(self, col_id: str): + def list_col_items(self, col_id: str, max_items=10): """List items in a collection.""" logger.info("Listing %s items", col_id) - url = f"{self.stac_endpoint}/collections/{col_id}/items?limit=100" - resp = requests.get( - url, - timeout=5, + api = pystac_client.Client.open( + self.stac_endpoint, + modifier=dinamis_sdk.sign_inplace, ) - if resp.status_code != 200: - logger.warning("Get failed (%s)", resp.text) - return resp.json() + res = api.search(collections=[col_id], max_items=max_items) + items = list(res.items()) + return items - def list_col_items_display(self, col_id: str): + def list_col_items_display(self, col_id: str, max_items=10): """Display in terminal items in a collection.""" - items = self.list_col_items(col_id=col_id)["features"] - print(f"{len(items)} items available") + + items = self.list_col_items(col_id, max_items=max_items) + print(f"{len(items)} items found:") for item in items: - print("\t" + item["id"]) + print("\t" + item.id) def get_remote_col(self, col_id) -> Collection: """Retrieve a remote collection.""" -- GitLab From b4cceeb115113e1ac46cd9b47096585cdb1f153c Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 16:59:52 +0100 Subject: [PATCH 28/36] Fix doc --- theia_dumper/diff.py | 1 - theia_dumper/stac.py | 1 - 2 files changed, 2 deletions(-) diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index bece718..cfe69f2 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -81,7 +81,6 @@ def compare_local_and_upstream( handler: stac.StacTransactionHandler, local_col_path: str, remote_col_id: str | None = None, - max_items=1000, ): """Compare a local and a remote collection. diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 8321cde..106d2b1 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -212,7 +212,6 @@ class StacTransactionHandler: def list_col_items_display(self, col_id: str, max_items=10): """Display in terminal items in a collection.""" - items = self.list_col_items(col_id, max_items=max_items) print(f"{len(items)} items found:") for item in items: -- GitLab From 7b4dd411035ed9e7a14eab2759f06a13c1c168e3 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 17:08:31 +0100 Subject: [PATCH 29/36] Rename classes --- tests/test_diff.py | 2 +- tests/test_get.py | 2 +- tests/test_upload.py | 8 ++++---- theia_dumper/cli.py | 14 +++++++------- theia_dumper/diff.py | 2 +- theia_dumper/stac.py | 4 ++-- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/test_diff.py b/tests/test_diff.py index 0cc2269..3e0aebb 100755 --- a/tests/test_diff.py +++ b/tests/test_diff.py @@ -27,7 +27,7 @@ col1.set_self_href(COL1_FILEPATH) col1.save(catalog_type=pystac.CatalogType.RELATIVE_PUBLISHED) diff.compare_local_and_upstream( - stac.StacTransactionHandler(stac.DEFAULT_STAC_EP), + stac.StacTransactionsHandler(stac.DEFAULT_STAC_EP), COL1_FILEPATH, "costarica-sentinel-2-l3-seasonal-spectral-indices-M", ) diff --git a/tests/test_get.py b/tests/test_get.py index 51d30ee..652169c 100755 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -3,7 +3,7 @@ from theia_dumper import stac, cli -handler = stac.StacTransactionHandler( +handler = stac.StacTransactionsHandler( stac_endpoint=cli.DEFAULT_STAC_EP, ) diff --git a/tests/test_upload.py b/tests/test_upload.py index 5fd448b..676da44 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -23,7 +23,7 @@ items_ids = ["item_1", "item_2"] RASTER_FILE1 = "/tmp/raster1.tif" RASTER_FILE2 = "/tmp/raster2.tif" -handler = stac.UploadTransactionsHandler( +handler = stac.StacUploadTransactionsHandler( stac_endpoint=STAC_EP, storage_endpoint="https://s3-data.meso.umontpellier.fr", storage_bucket="sm1-gdc-tests", @@ -65,9 +65,9 @@ def remote_col_test(expected_bbox): col = api.get_collection(COL_ID) extent = col.extent.spatial.bboxes assert len(extent) == 1 - assert tuple(extent[0]) == tuple(expected_bbox), ( - f"expected BBOX: {expected_bbox}, got {extent[0]}" - ) + assert tuple(extent[0]) == tuple( + expected_bbox + ), f"expected BBOX: {expected_bbox}, got {extent[0]}" # Check that assets are accessible once signed for i in col.get_items(): diff --git a/theia_dumper/cli.py b/theia_dumper/cli.py index 965dc51..1627dca 100644 --- a/theia_dumper/cli.py +++ b/theia_dumper/cli.py @@ -2,8 +2,8 @@ import click from .stac import ( - UploadTransactionsHandler, - StacTransactionHandler, + StacUploadTransactionsHandler, + StacTransactionsHandler, DEFAULT_S3_EP, DEFAULT_STAC_EP, ) @@ -51,7 +51,7 @@ def publish( overwrite: bool, ): """Publish a STAC object (collection or item collection).""" - UploadTransactionsHandler( + StacUploadTransactionsHandler( stac_endpoint=stac_endpoint, storage_endpoint=storage_endpoint, storage_bucket=storage_bucket, @@ -74,7 +74,7 @@ def delete( item_id: str, ): """Delete a STAC object (collection or item).""" - StacTransactionHandler( + StacTransactionsHandler( stac_endpoint=stac_endpoint, ).delete_item_or_col(col_id=col_id, item_id=item_id) @@ -90,7 +90,7 @@ def list_cols( stac_endpoint: str, ): """List collections.""" - StacTransactionHandler( + StacTransactionsHandler( stac_endpoint=stac_endpoint, ).list_collections_display() @@ -112,7 +112,7 @@ def list_col_items( max_items: int, ): """List collection items.""" - StacTransactionHandler( + StacTransactionsHandler( stac_endpoint=stac_endpoint, ).list_col_items_display(col_id=col_id, max_items=max_items) @@ -138,7 +138,7 @@ def collection_diff( remote_id: str | None = None, ): """List collection items.""" - handler = StacTransactionHandler( + handler = StacTransactionsHandler( stac_endpoint=stac_endpoint, ) diff.compare_local_and_upstream(handler, col_path, remote_id) diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index cfe69f2..dca6120 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -78,7 +78,7 @@ def generate_items_diff( def compare_local_and_upstream( - handler: stac.StacTransactionHandler, + handler: stac.StacTransactionsHandler, local_col_path: str, remote_col_id: str | None = None, ): diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 106d2b1..7957d10 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -158,7 +158,7 @@ def get_col_items(col: Collection) -> List[Item]: @dataclass -class StacTransactionHandler: +class StacTransactionsHandler: """Handle STAC and storage transactions.""" stac_endpoint: str @@ -227,7 +227,7 @@ class StacTransactionHandler: @dataclass -class UploadTransactionsHandler(StacTransactionHandler): +class StacUploadTransactionsHandler(StacTransactionsHandler): """Handle STAC and storage transactions.""" storage_endpoint: str -- GitLab From 299540c761e9cf9250344ef82b9be836e0a4ec43 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 17:09:55 +0100 Subject: [PATCH 30/36] Fix format --- tests/test_upload.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_upload.py b/tests/test_upload.py index 676da44..5355e78 100755 --- a/tests/test_upload.py +++ b/tests/test_upload.py @@ -65,9 +65,9 @@ def remote_col_test(expected_bbox): col = api.get_collection(COL_ID) extent = col.extent.spatial.bboxes assert len(extent) == 1 - assert tuple(extent[0]) == tuple( - expected_bbox - ), f"expected BBOX: {expected_bbox}, got {extent[0]}" + assert tuple(extent[0]) == tuple(expected_bbox), ( + f"expected BBOX: {expected_bbox}, got {extent[0]}" + ) # Check that assets are accessible once signed for i in col.get_items(): -- GitLab From d44ecc3b0ca4c3f2a3288cb7bc4e3cf8fe375c4c Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Thu, 16 Jan 2025 17:36:01 +0100 Subject: [PATCH 31/36] Update naming tests for asset path --- theia_dumper/stac.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index 7957d10..eb40728 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -27,8 +27,11 @@ class UnconsistentCollectionIDs(Exception): """Inconsistent STAC collection exception.""" -def _check_naming_is_compliant(s: str): - if not s.replace("-", "").replace("_", "").isalnum(): +def _check_naming_is_compliant(s: str, allow_dot=False): + s = s.replace("-", "").replace("_", "") + if allow_dot: + s = s.replace(".", "") + if not s.isalnum(): raise Exception(f"{s} does not only contain alphanumeric or - or _ chars") @@ -247,13 +250,10 @@ class StacUploadTransactionsHandler(StacTransactionsHandler): local_filename = asset.href logger.debug("Local file: %s", local_filename) target_url = local_filename.replace(assets_root_dir, target_root_dir) - # target_url = target_url.replace("raster1", "raster_1") - def url_escape_dangerous_chars(url: str): - # url = url.replace("_", "-") - return url - - target_url = url_escape_dangerous_chars(target_url) + _check_naming_is_compliant( + target_url.replace(target_root_dir + "/", ""), allow_dot=True + ) logger.debug("Target file: %s", target_url) # Skip when target file exists and overwrite is not enabled -- GitLab From bf179a038d372c0b8f9bcfc07f5fdda361718372 Mon Sep 17 00:00:00 2001 From: Cresson Remi <remi.cresson@irstea.fr> Date: Thu, 16 Jan 2025 20:47:48 +0100 Subject: [PATCH 32/36] Apply 1 suggestion(s) to 1 file(s) --- theia_dumper/diff.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index dca6120..efaf494 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -27,7 +27,6 @@ def collections_defs_are_different(col1: Collection, col2: Collection) -> bool: return True return False - different = False fields = [ "extent.spatial.bboxes", "extent.temporal.intervals", @@ -39,10 +38,10 @@ def collections_defs_are_different(col1: Collection, col2: Collection) -> bool: "providers", "title", ] - for field in fields: - if fields_are_different(col1, col2, field): - different = True - return different + return any( + fields_are_different(col1, col2, field) + for field in fields + ) def generate_items_diff( -- GitLab From b946e51fb39808cc5c0c2c850509f63d7ecc0a96 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Fri, 17 Jan 2025 08:54:34 +0100 Subject: [PATCH 33/36] Switch to regex --- theia_dumper/stac.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index eb40728..e8d6fab 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -1,6 +1,7 @@ """STAC stuff.""" import os +import re from dataclasses import dataclass from typing import List, cast from urllib.parse import urljoin @@ -28,9 +29,9 @@ class UnconsistentCollectionIDs(Exception): def _check_naming_is_compliant(s: str, allow_dot=False): - s = s.replace("-", "").replace("_", "") + s = re.sub(r"[-|_]", r"", s) if allow_dot: - s = s.replace(".", "") + s = re.sub(r".", r"", s) if not s.isalnum(): raise Exception(f"{s} does not only contain alphanumeric or - or _ chars") -- GitLab From fd5d8b9a0e3efe449c7f48c5f0482f872dcb8fa8 Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Fri, 17 Jan 2025 08:57:15 +0100 Subject: [PATCH 34/36] Fix formatting --- theia_dumper/diff.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/theia_dumper/diff.py b/theia_dumper/diff.py index efaf494..52032ee 100644 --- a/theia_dumper/diff.py +++ b/theia_dumper/diff.py @@ -38,10 +38,7 @@ def collections_defs_are_different(col1: Collection, col2: Collection) -> bool: "providers", "title", ] - return any( - fields_are_different(col1, col2, field) - for field in fields - ) + return any(fields_are_different(col1, col2, field) for field in fields) def generate_items_diff( -- GitLab From 29ac47b7d521edef5a45be586e927df79f99455d Mon Sep 17 00:00:00 2001 From: Pablo Boizeau <pablo.boizeau@ird.fr> Date: Fri, 17 Jan 2025 09:16:47 +0100 Subject: [PATCH 35/36] Fix typo --- theia_dumper/stac.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/theia_dumper/stac.py b/theia_dumper/stac.py index e8d6fab..6ea96ee 100644 --- a/theia_dumper/stac.py +++ b/theia_dumper/stac.py @@ -29,11 +29,11 @@ class UnconsistentCollectionIDs(Exception): def _check_naming_is_compliant(s: str, allow_dot=False): - s = re.sub(r"[-|_]", r"", s) + _s = re.sub(r"[-|_]", r"", s) if allow_dot: - s = re.sub(r".", r"", s) - if not s.isalnum(): - raise Exception(f"{s} does not only contain alphanumeric or - or _ chars") + _s = re.sub(r"\.", r"", _s) + if not _s.isalnum(): + raise Exception(f"{_s} does not only contain alphanumeric or - or _ chars") def create_session(): -- GitLab From f8b0959a13236a0097ce895a3e6fb7603f5bc9eb Mon Sep 17 00:00:00 2001 From: DE BOISSIEU FLORIAN <florian.deboissieu@inrae.fr> Date: Fri, 17 Jan 2025 10:51:26 +0100 Subject: [PATCH 36/36] aupdate README with THEIA-MTD --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 18b4aeb..dd6e7a8 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ </p> **Theia-dumper** enables to upload Spatio Temporal Assets Catalogs (STAC) on the -THEIA-MTP geospatial data center. +THEIA-MTD geospatial data center. For more information read the [documentation](https://cdos-pub.pages.mia.inra.fr/theia-dumper). -- GitLab