feat: implementing ship detection model

drduhe · drduhe · commit 73c093848fea · 2025-09-17T11:52:48.000-06:00
diff --git a/.gitattributes b/.gitattributes
@@ -1,2 +1,5 @@
-assets/model_weights.pth filter=lfs diff=lfs merge=lfs -text
+assets/aircraft_model_weights.pth filter=lfs diff=lfs merge=lfs -text
+assets/ship_model_weights.pth filter=lfs diff=lfs merge=lfs -text
 assets/images/2_planes.tiff filter=lfs diff=lfs merge=lfs -text
+assets/*.pth filter=lfs diff=lfs merge=lfs -text
+assets/images/*.tiff filter=lfs diff=lfs merge=lfs -text
diff --git a/assets/aircraft_model_weights.pth b/assets/aircraft_model_weights.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e878fef8cb125159a7b2054ff1c02fce274659700261aec2e2a7e0c1b0c37e22
+size 351017331
diff --git a/assets/ship_model_weights.pth b/assets/ship_model_weights.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e878fef8cb125159a7b2054ff1c02fce274659700261aec2e2a7e0c1b0c37e22
+size 351017331
diff --git a/src/aws/osml/models/aircraft/app.py b/src/aws/osml/models/aircraft/app.py
@@ -1,4 +1,4 @@
-#  Copyright 2023-2024 Amazon.com, Inc. or its affiliates.
+#  Copyright 2023-2025 Amazon.com, Inc. or its affiliates.
 
 import json
 import os
@@ -50,7 +50,7 @@ def build_predictor() -> DefaultPredictor:
     cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
     # Path to the model weights
     cfg.MODEL.WEIGHTS = os.getenv(
-        os.path.join("MODEL_WEIGHTS"), os.path.join("/home/osml-models/assets/", "model_weights.pth")
+        os.path.join("MODEL_WEIGHTS"), os.path.join("/home/osml-models/assets/", "aircraft_model_weights.pth")
     )
 
     # Build the detectron2 default predictor
diff --git a/src/aws/osml/models/ship/__init__.py b/src/aws/osml/models/ship/__init__.py
@@ -0,0 +1 @@
+#  Copyright 2023-2025 Amazon.com, Inc. or its affiliates.
diff --git a/src/aws/osml/models/ship/app.py b/src/aws/osml/models/ship/app.py
@@ -0,0 +1,202 @@
+#  Copyright 2023-2025 Amazon.com, Inc. or its affiliates.
+
+import json
+import os
+import uuid
+import warnings
+from typing import Dict, Optional, Union
+
+import numpy as np
+import torch
+from detectron2.config import get_cfg
+from detectron2.engine import DefaultPredictor
+from detectron2.structures.instances import Instances
+from flask import Request, Response, request
+from osgeo import gdal
+
+from aws.osml.models import build_flask_app, build_logger, setup_server
+from aws.osml.models.ship.config import build_config
+
+ENABLE_SEGMENTATION = os.environ.get("ENABLE_SEGMENTATION", "False").lower() == "true"
+ENABLE_FAULT_DETECTION = os.environ.get("ENABLE_FAULT_DETECTION", "False").lower() == "true"
+
+# Enable exceptions for GDAL
+gdal.UseExceptions()
+
+# Create logger instance
+logger = build_logger()
+
+# Create our default flask app
+app = build_flask_app(logger)
+
+
+def build_predictor() -> DefaultPredictor:
+    """
+    Create a single detection predictor to detect ships
+    :return: DefaultPredictor
+    """
+    # Load the prebuilt plane model w/ Detectron2
+    cfg = get_cfg()
+    # If we can't find a gpu
+    if not torch.cuda.is_available():
+        cfg.MODEL.DEVICE = "cpu"
+        app.logger.warning("GPU not found, running in CPU mode!")
+    # Set to only expect one class (ships)
+    cfg = build_config()
+
+    # Build the detectron2 default predictor
+    return DefaultPredictor(cfg)
+
+
+def instances_to_feature_collection(
+    instances: Instances, image_id: Optional[str] = str(uuid.uuid4())
+) -> Dict[str, Union[str, list]]:
+    """
+    Convert the gRPC response from the GetDetection call into a GeoJSON output.
+    Each detection is a feature in the collection, including image coordinates,
+    score, and type identifier as feature properties.
+
+    :param instances: Detectron2 result instances
+    :param image_id: Identifier for the processed image (optional)
+    :return: FeatureCollection object containing detections
+    """
+    geojson_feature_collection_dict = {"type": "FeatureCollection", "features": []}
+    if instances:
+        # Get the bounding boxes for this image
+        bboxes = instances.pred_boxes.tensor.cpu().numpy().tolist()
+
+        # Get the scores for this image, this model does not support segmentation
+        scores = instances.scores.cpu().numpy().tolist()
+
+        for i in range(0, len(bboxes)):
+            feature = {
+                "type": "Feature",
+                "geometry": {"type": "Point", "coordinates": [0.0, 0.0]},
+                "id": str(uuid.uuid4()),
+                "properties": {
+                    "bounds_imcoords": bboxes[i],
+                    "detection_score": float(scores[i]),
+                    "feature_types": {"ship": float(scores[i])},
+                    "image_id": image_id,
+                },
+            }
+            app.logger.debug(feature)
+            geojson_feature_collection_dict["features"].append(feature)
+    else:
+        app.logger.debug("No features found!")
+
+    return geojson_feature_collection_dict
+
+
+def request_to_instances(req: Request) -> Union[Instances, None]:
+    """
+    Use GDAL to open the image. The binary payload from the HTTP request is used to
+    create an in-memory VFS for GDAL which is then opened to decode the image into
+    a dataset which will give us access to a NumPy array for the pixels. Then
+    use that image to create detectron2 detection instances.
+
+    :param req: Request: the flask request object passed into the SM endpoint
+    :return: Either a set of detectron2 detection instances or nothing
+    """
+    # Set up default variables
+    temp_ds_name = "/vsimem/" + str(uuid.uuid4())
+    gdal_dataset = None
+    instances = None
+    try:
+        # Load the binary memory buffer sent to the model
+        gdal.FileFromMemBuffer(temp_ds_name, req.get_data())
+        gdal_dataset = gdal.Open(temp_ds_name)
+
+        # Read GDAL dataset and convert to a numpy array
+        image_array = gdal_dataset.ReadAsArray()
+
+        # Check if all pixels are zero and raise an exception if so
+        if ENABLE_FAULT_DETECTION:
+            app.logger.debug(f"Image array min: {image_array.min()}, max: {image_array.max()}")
+            if np.all(np.isclose(image_array, 0)):
+                err = "All pixels in the image tile are set to 0."
+                app.logger.error(err)
+                raise Exception(err)
+
+        # Handling of different image shapes
+        if image_array.ndim == 2:  # For grayscale images without a channel dimension
+            # Reshape to add a channel dimension and replicate across 3 channels for RGB
+            image_array = np.stack([image_array] * 3, axis=0)
+        elif image_array.shape[0] == 1:  # For grayscale images with a channel dimension
+            # Replicate the single channel across 3 channels for RGB
+            image_array = np.repeat(image_array, 3, axis=0)
+        elif image_array.shape[0] == 4:  # For images with an alpha channel
+            # Remove the alpha channel
+            image_array = image_array[:3, :, :]
+
+        # Conversion to uint8 (ensure this is done after ensuring 3 channels)
+        image_array = (image_array * 255).astype(np.uint8)
+
+        # Transpose the array from (channels, height, width) to (height, width, channels)
+        image = np.transpose(image_array, (1, 2, 0))
+        app.logger.debug(f"Running D2 on image array: {image}")
+
+        # PyTorch can often give warnings about upcoming changes
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            instances = ship_detector(image)["instances"]
+    except Exception as err:
+        app.logger.error(f"Unable to load tile from request: {err}")
+        raise err
+    finally:
+        try:
+            if gdal_dataset is not None:
+                if temp_ds_name is not None:
+                    gdal.Unlink(temp_ds_name)
+                del gdal_dataset
+        except Exception as err:
+            app.logger.warning(f"Unable to cleanup gdal dataset: {err}")
+
+    return instances
+
+
+# Build our ship predictor
+ship_detector = build_predictor()
+
+
+@app.route("/ping", methods=["GET"])
+def healthcheck() -> Response:
+    """
+    This is a health check that will always pass since this is a stub model.
+
+    :return: Successful status code (200) indicates all is well
+    """
+    app.logger.debug("Responding to health check")
+    return Response(response="\n", status=200)
+
+
+@app.route("/invocations", methods=["POST"])
+def predict() -> Response:
+    """
+    This is the model invocation endpoint for the model container's REST
+    API. The binary payload, in this case an image, is taken from the request
+    parsed to ensure it is a valid image. This is a stub implementation that
+    will always return a fixed set of detections for a valid input image.
+
+    :return: Response: Contains the GeoJSON results or an error status
+    """
+    app.logger.debug("Invoking model endpoint using the Detectron2 Ship Model!")
+    try:
+        # Load the image into memory and get detection instances
+        app.logger.debug("Loading image request.")
+        instances = request_to_instances(request)
+
+        # Generate a geojson feature collection that we can return
+        geojson_detects = instances_to_feature_collection(instances)
+        app.logger.debug(f"Sending geojson to requester: {json.dumps(geojson_detects)}")
+
+        # Send back the detections
+        return Response(response=json.dumps(geojson_detects), status=200)
+    except Exception as err:
+        app.logger.debug(err)
+        return Response(response="Unable to process request!", status=500)
+
+
+# pragma: no cover
+if __name__ == "__main__":
+    setup_server(app)
diff --git a/src/aws/osml/models/ship/config.py b/src/aws/osml/models/ship/config.py
@@ -0,0 +1,86 @@
+#  Copyright 2025 Amazon.com, Inc. or its affiliates.
+
+"""Detectron2 configuration module for ship detection on high-resolution imagery.
+
+This configuration uses the R_101_DC5_3x backbone for improved receptive field and spatial detail,
+with performance optimizations for AWS p3.2xlarge or similar environments.
+"""
+
+import os
+
+from detectron2 import model_zoo
+from detectron2.config import get_cfg
+
+
+def build_config():
+    """Set up Detectron2 config optimized for 2048×2048 tile inputs using R_101_DC5 backbone.
+
+    Returns:
+        Configured Detectron2 config object
+    """
+    # -----------------------------
+    # Config: Faster R-CNN R101-DC5 (better for small objects than FPN R50)
+    # -----------------------------
+    cfg = get_cfg()
+    cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml"))
+
+    # Path to the model weights
+    cfg.MODEL.WEIGHTS = os.getenv(
+        os.path.join("MODEL_WEIGHTS"), os.path.join("/home/osml-models/assets/", "ship_model_weights.pth")
+    )
+
+    # Datasets
+    cfg.DATASETS.TRAIN = ("ship_train",)
+    cfg.DATASETS.TEST = ("ship_test",)
+
+    # One class
+    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
+
+    # Anchors: add tiny + elongated ratios for hulls
+    cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[72, 96, 160, 256, 384, 512, 704]]
+    cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.18, 0.38, 0.71, 1.5, 2.56, 4.3, 6.9]]
+
+    # RPN proposal budget (don’t prune tiny ships too early)
+    cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 6000
+    cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 4000
+    cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2000
+    cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 1000
+    cfg.MODEL.RPN.NMS_THRESH = 0.5
+
+    # ROI settings
+    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 1024
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.45
+    cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 7
+    cfg.MODEL.ROI_ALIGN_USE_PRECISE_ROI_POOLER = True
+
+    # Input format & sizes (allow light jitter; we’re not hard-locking to 2024 anymore)
+    cfg.INPUT.FORMAT = "BGR"
+    cfg.INPUT.MIN_SIZE_TRAIN = (1536, 1792, 2024, 2240)
+    cfg.INPUT.MAX_SIZE_TRAIN = 2560
+    cfg.INPUT.MIN_SIZE_TEST = 2024
+    cfg.INPUT.MAX_SIZE_TEST = 2560
+
+    # Sampler: repeat rare positives if dataset is sparse
+    cfg.DATALOADER.SAMPLER_TRAIN = "RepeatFactorTrainingSampler"
+    cfg.DATALOADER.REPEAT_THRESHOLD = 0.001
+
+    # Mixed precision + grad clip
+    cfg.SOLVER.IMS_PER_BATCH = 2  # tune to VRAM
+    cfg.SOLVER.BASE_LR = 0.00025
+    cfg.SOLVER.WARMUP_ITERS = 1000
+    cfg.SOLVER.WARMUP_FACTOR = 0.001
+    cfg.SOLVER.WARMUP_METHOD = "linear"
+    cfg.SOLVER.MAX_ITER = 500000
+    cfg.SOLVER.STEPS = [30000, 45000]
+    cfg.SOLVER.AMP.ENABLED = True
+    cfg.SOLVER.CLIP_GRADIENTS.ENABLED = True
+    cfg.SOLVER.CLIP_GRADIENTS.CLIP_VALUE = 1.0
+    cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE = "value"
+
+    # Evaluation & checkpoints
+    cfg.TEST.AUG.ENABLED = True
+    cfg.TEST.DETECTIONS_PER_IMAGE = 500
+    cfg.TEST.EVAL_PERIOD = 1000
+    cfg.SOLVER.CHECKPOINT_PERIOD = 1000
+
+    return cfg

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:e878fef8cb125159a7b2054ff1c02fce274659700261aec2e2a7e0c1b0c37e22`
	`3`	`+size 351017331`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+# Copyright 2023-2025 Amazon.com, Inc. or its affiliates.`