diff --git a/.gitattributes b/.gitattributes index 20a9a78..2132354 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,5 @@ -assets/model_weights.pth filter=lfs diff=lfs merge=lfs -text +assets/aircraft_model_weights.pth filter=lfs diff=lfs merge=lfs -text +assets/ship_model_weights.pth filter=lfs diff=lfs merge=lfs -text assets/images/2_planes.tiff filter=lfs diff=lfs merge=lfs -text +assets/*.pth filter=lfs diff=lfs merge=lfs -text +assets/images/*.tiff filter=lfs diff=lfs merge=lfs -text diff --git a/assets/model_weights.pth b/assets/aircraft_model_weights.pth similarity index 100% rename from assets/model_weights.pth rename to assets/aircraft_model_weights.pth diff --git a/assets/ship_model_weights.pth b/assets/ship_model_weights.pth new file mode 100644 index 0000000..4ffa7c0 --- /dev/null +++ b/assets/ship_model_weights.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79818b2ccc31e7b4a91c0e286b2dcfcd1255f06ada952dd919713c7b1977ad17 +size 330024867 diff --git a/conda/environment-py310.yml b/conda/environment-py310.yml index 67b1d38..e9bc771 100644 --- a/conda/environment-py310.yml +++ b/conda/environment-py310.yml @@ -1,4 +1,4 @@ -# Copyright 2023-2024 Amazon.com, Inc. or its affiliates. +# Copyright 2023-2025 Amazon.com, Inc. or its affiliates. name: osml_models channels: @@ -7,12 +7,4 @@ dependencies: - conda-forge::python=3.10.12 - conda-forge::gdal=3.7.2 - conda-forge::proj=9.3.0 - - pip: - - json-logging==1.3.0 - - boto3==1.34.104 - - setuptools==68.0.0 - - argparse==1.4.0 - - flask==2.3.3 - - waitress==2.1.2 - - shapely==2.0.1 - - matplotlib==3.7.2 + - conda-forge::numpy=1.26.4 diff --git a/docker/Dockerfile b/docker/Dockerfile index 7223b26..f5a346d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,62 +1,118 @@ # Copyright 2023-2025 Amazon.com, Inc. or its affiliates. -# Use NVIDIA's CUDA base image -FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu18.04 AS build-env +# ============================================================================= +# Base image: Ubuntu 22.04 + CUDA 11.6.2 (devel) +# ============================================================================= +FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu18.04 as osml_model -# Set AWS to the maintainer +# Set maintainer label LABEL maintainer="Amazon Web Services" -# Enable sudo access for the build session +# Advertise SageMaker multi-container capability +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true + +# Use root for setup USER root -# Update and install core build dependencies +# ============================================================================= +# Install core build dependencies (incl. TIFF); clean apt lists in same layer +# ============================================================================= RUN apt-get update -y \ && apt-get upgrade -y \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y --fix-missing --no-install-recommends \ + && DEBIAN_FRONTEND=noninteractive \ + apt-get install -y --fix-missing --no-install-recommends \ software-properties-common build-essential ca-certificates \ git make cmake wget unzip libtool automake \ zlib1g-dev libsqlite3-dev pkg-config sqlite3 libcurl4-gnutls-dev \ - libtiff5-dev + libtiff5-dev \ + && rm -rf /var/lib/apt/lists/* -# Install Miniconda +# ============================================================================= +# Miniconda +# ============================================================================= ARG MINICONDA_VERSION=Miniconda3-latest-Linux-x86_64 ARG MINICONDA_URL=https://repo.anaconda.com/miniconda/${MINICONDA_VERSION}.sh -ENV CONDA_TARGET_ENV=osml_model -RUN wget -c ${MINICONDA_URL} \ - && chmod +x ${MINICONDA_VERSION}.sh \ - && ./${MINICONDA_VERSION}.sh -b -f -p /opt/conda \ - && rm ${MINICONDA_VERSION}.sh \ - && ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh - -# Set our new conda target lib dirs -ENV PATH=$PATH:/opt/conda/bin -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib/ -ENV PROJ_LIB=$PROJ_LIB:/opt/conda/share/proj +ENV CONDA_DIR=/opt/conda -# Copy the conda environment file and create the environment -COPY conda/environment-py310.yml environment.yml - -# Accept Conda TOS before creating the environment +RUN wget -c ${MINICONDA_URL} \ + && chmod +x ${MINICONDA_VERSION}.sh \ + && ./${MINICONDA_VERSION}.sh -b -f -p ${CONDA_DIR} \ + && rm ${MINICONDA_VERSION}.sh \ + && ln -s ${CONDA_DIR}/etc/profile.d/conda.sh /etc/profile.d/conda.sh + +# Configure environment variables used by common geospatial stacks +ENV CONDA_TARGET_ENV=osml_models +ENV PATH=/opt/conda/envs/${CONDA_TARGET_ENV}/bin:/opt/conda/bin:$PATH +ENV LD_LIBRARY_PATH=/opt/conda/envs/${CONDA_TARGET_ENV}/lib:/opt/conda/envs/${CONDA_TARGET_ENV}/lib/gdal:${LD_LIBRARY_PATH} +ENV PROJ_LIB=/opt/conda/share/proj:$PROJ_LIB + +# ============================================================================= +# Conda environment (py310 + GDAL/PROJ + D2 Deps) +# ============================================================================= +COPY conda/environment-py310.yml /tmp/environment.yml + +# Create env and minimize image size RUN conda config --set always_yes true && \ conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \ - conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r - -RUN conda env create -n ${CONDA_TARGET_ENV} --file environment.yml && \ - conda clean -afy && \ + conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r && \ + conda env create -f /tmp/environment.yml && \ find /opt/conda/ -follow -type f -name '*.a' -delete && \ find /opt/conda/ -follow -type f -name '*.pyc' -delete && \ find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ - rm -rf /opt/conda/pkgs + rm -rf /opt/conda/pkgs && \ + conda clean -afy + +# ============================================================================= +# Entry shim +# - Ensure conda env is active for RUN/CMD/ENTRYPOINT +# ============================================================================= +RUN cat >/entry.sh <<'BASH' +#!/usr/bin/env bash +set -eo pipefail + +# Activate conda env if available +if [ -f /opt/conda/etc/profile.d/conda.sh ]; then + . /opt/conda/etc/profile.d/conda.sh + conda activate "${CONDA_TARGET_ENV:-base}" >/dev/null 2>&1 || true +fi + +# If a command was passed, exec it; otherwise start bash +if [ "$#" -gt 0 ]; then + exec "$@" +else + exec /bin/bash +fi +BASH +RUN chmod +x /entry.sh + +# Make subsequent RUN use the activated env +SHELL ["/entry.sh", "/bin/bash", "-c"] + + +# Configure .bashrc to drop into a conda env and immediately activate our TARGET env +# Note this makes python3 default to our conda managed python version +RUN conda init && echo 'conda activate "${CONDA_TARGET_ENV:-base}"' >> ~/.bashrc + +# ============================================================================= +# PyTorch 1.12.0 (CUDA 16.0 wheels) +# ============================================================================= +RUN python3 -m pip install --no-cache-dir \ + torch==1.12.0+cu116 \ + torchvision==0.13.0+cu116 \ + -f https://download.pytorch.org/whl/torch_stable.html -# Activate the conda environment and install Python dependencies -RUN . /opt/conda/etc/profile.d/conda.sh && conda activate ${CONDA_TARGET_ENV} && \ - python3 -m pip install --no-cache-dir \ +# ============================================================================= +# Detectron2 (build against target Torch/CUDA for Sagemaker Endpoints) +# - Set arch list for common AWS GPUs +# ============================================================================= +ENV FORCE_CUDA=1 +ARG TORCH_CUDA_ARCH_LIST="Pascal;Volta;Turing" +RUN python3 -m pip install --no-cache-dir \ "fvcore>=0.1.5,<0.1.6" \ iopath==0.1.8 \ pycocotools \ omegaconf==2.1.1 \ hydra-core==1.1.1 \ - black==21.4b2 \ termcolor==1.1.0 \ matplotlib==3.5.2 \ yacs==0.1.8 \ @@ -65,69 +121,37 @@ RUN . /opt/conda/etc/profile.d/conda.sh && conda activate ${CONDA_TARGET_ENV} && tqdm==4.62.3 \ tensorboard==2.8.0 \ opencv-contrib-python-headless==4.8.0.76 \ - setuptools==69.5.1 + setuptools==69.5.1 \ + 'git+https://github.com/facebookresearch/detectron2.git' -# Install Torch with GPU support -RUN . /opt/conda/etc/profile.d/conda.sh && conda activate ${CONDA_TARGET_ENV} && \ - python3 -m pip install --no-cache-dir \ - torch==1.12.0+cu116 \ - torchvision==0.13.0+cu116 \ - -f https://download.pytorch.org/whl/torch_stable.html +# Final pip/conda cleanups +RUN conda clean -afy && python -m pip cache purge -# Install Detectron2 -ENV FORCE_CUDA="1" -ARG TORCH_CUDA_ARCH_LIST="Pascal;Volta;Turing" -ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}" -RUN . /opt/conda/etc/profile.d/conda.sh && conda activate ${CONDA_TARGET_ENV} && \ - python3 -m pip install --no-cache-dir --no-deps 'git+https://github.com/facebookresearch/detectron2.git' - -# Clean up unnecessary files -RUN apt-get clean && \ - rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ - conda clean -afy && \ - python -m pip cache purge - -# Stage 2: Build the final image -FROM nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu18.04 AS osml_model - -LABEL maintainer="Amazon Web Services" -# Support multi-container SageMaker endpoints -LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true -USER root - -# Copy only the necessary files from the build environment -COPY --from=build-env /opt/conda /opt/conda - -# Set environment variables -ENV CONDA_TARGET_ENV="osml_model" -ENV PATH=$PATH:/opt/conda/bin -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib/ -ENV PROJ_LIB=$PROJ_LIB:/opt/conda/share/proj -ENV PYTHONUNBUFFERED=1 - -# Set up the conda environment -SHELL ["/opt/conda/bin/conda", "run", "--no-capture-output", "-n", "osml_model", "/bin/bash", "-c"] -RUN echo 'conda activate "${CONDA_TARGET_ENV:-base}"' >> ~/.bashrc - -# Copy model source and install it -RUN mkdir /home/osml-models -COPY . /home/osml-models - -# Install the application dependencies +# ============================================================================= +# Application code +# ============================================================================= WORKDIR /home/osml-models -RUN chmod 777 --recursive . -RUN python3 -m pip install --no-cache-dir . +RUN mkdir -p /home/osml-models +COPY . /home/osml-models +RUN chmod -R 0777 . \ + && python3 -m pip install --no-cache-dir . -# Expose the necessary ports +# ============================================================================= +# Runtime +# ============================================================================= EXPOSE 8080 -# Disable health check +# Disable healthcheck (external orchestrator/SageMaker handles health) HEALTHCHECK NONE -# Set up a user to run the container -RUN adduser --system --no-create-home --group model -RUN chown -R model:model ./ +# Drop privileges for runtime +RUN adduser --system --no-create-home --group model \ + && chown -R model:model /home/osml-models \ + && mkdir -p /tmp/iopath_cache && chown model:model /tmp/iopath_cache USER model -# Set the entry point -ENTRYPOINT python3 src/aws/osml/models/$MODEL_SELECTION/app.py +# Set iopath cache directory to avoid permission warnings +ENV IOPATH_CACHE_DIR=/tmp/iopath_cache + +# Expand MODEL_SELECTION, and run app +ENTRYPOINT /entry.sh python /home/osml-models/src/aws/osml/models/${MODEL_SELECTION}/app.py diff --git a/setup.cfg b/setup.cfg index e0c29e1..dd4b84e 100755 --- a/setup.cfg +++ b/setup.cfg @@ -27,38 +27,29 @@ project_urls = Source = https://github.com/awslabs/osml-models Tracker = https://github.com/awslabs/osml-models/issues classifiers = - Programming Language :: Python :: 3 + Programming Language :: Python :: 3.13 Operating System :: OS Independent + License :: OSI Approved :: MIT License [options] zip_safe = False -package_dir= +package_dir = =src -packages=find_namespace: -python_requires = >=3.9 +packages = find_namespace: +python_requires = >=3.10 include_package_data = True install_requires = - json-logging==1.3.0 - boto3==1.34.104 - setuptools==68.0.0 - argparse==1.4.0 - flask==2.3.3 - waitress==2.1.2 - shapely==2.0.1 - matplotlib==3.7.2 + json-logging==1.5.1 + boto3==1.40.36 + Flask==3.1.2 + Waitress==3.0.2 + Shapely==2.1.1 + matplotlib==3.10.6 + tabulate==0.9.0 + yacs==0.1.8 [options.packages.find] where = src exclude = test - -[options.package_data] -package_data = - = ["py.typed"] - -[options.extras_require] -gdal = - gdal>=3.7.0 -test = - tox diff --git a/src/aws/osml/models/aircraft/app.py b/src/aws/osml/models/aircraft/app.py index 4b22899..f503cd3 100644 --- a/src/aws/osml/models/aircraft/app.py +++ b/src/aws/osml/models/aircraft/app.py @@ -1,4 +1,4 @@ -# Copyright 2023-2024 Amazon.com, Inc. or its affiliates. +# Copyright 2023-2025 Amazon.com, Inc. or its affiliates. import json import os @@ -30,6 +30,9 @@ # Create our default flask app app = build_flask_app(logger) +# Test logging +app.logger.info("Starting aircraft model application...") + def build_predictor() -> DefaultPredictor: """ @@ -41,7 +44,7 @@ def build_predictor() -> DefaultPredictor: # If we can't find a gpu if not torch.cuda.is_available(): cfg.MODEL.DEVICE = "cpu" - app.logger.warning("GPU not found, running in CPU mode!") + app.logger.info("GPU not found, running in CPU mode!") # Set to only expect one class (aircraft) cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # Set the detection threshold to 90% @@ -50,11 +53,32 @@ def build_predictor() -> DefaultPredictor: cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) # Path to the model weights cfg.MODEL.WEIGHTS = os.getenv( - os.path.join("MODEL_WEIGHTS"), os.path.join("/home/osml-models/assets/", "model_weights.pth") + os.path.join("MODEL_WEIGHTS"), os.path.join("/home/osml-models/assets/", "aircraft_model_weights.pth") ) - # Build the detectron2 default predictor - return DefaultPredictor(cfg) + # Build the detectron2 default predictor with error handling for CPU mode + try: + # Suppress checkpoint loading warnings for expected shape mismatches + import logging + + checkpoint_logger = logging.getLogger("fvcore.common.checkpoint") + original_level = checkpoint_logger.level + checkpoint_logger.setLevel(logging.ERROR) + + predictor = DefaultPredictor(cfg) + + # Restore original logging level + checkpoint_logger.setLevel(original_level) + + return predictor + except RuntimeError as e: + if "NVIDIA driver" in str(e) or "CUDA" in str(e): + app.logger.warning(f"CUDA error detected, forcing CPU mode: {e}") + # Force CPU mode and try again + cfg.MODEL.DEVICE = "cpu" + return DefaultPredictor(cfg) + else: + raise e def mask_to_polygon(mask: torch.Tensor) -> List[List[float]]: @@ -202,6 +226,7 @@ def request_to_instances(req: Request) -> Union[Instances, None]: # Build our aircraft predictor aircraft_predictor = build_predictor() +app.logger.info("Aircraft model predictor initialized successfully!") @app.route("/ping", methods=["GET"]) diff --git a/src/aws/osml/models/server_utils.py b/src/aws/osml/models/server_utils.py index 127a4a4..09f2b64 100644 --- a/src/aws/osml/models/server_utils.py +++ b/src/aws/osml/models/server_utils.py @@ -14,7 +14,7 @@ gdal.UseExceptions() -def build_logger(level: int = logging.WARN) -> logging.Logger: +def build_logger(level: int = logging.INFO) -> logging.Logger: """ Utility function to create and configure a logger that outputs logs in JSON format. @@ -52,7 +52,7 @@ def setup_server(app: Flask): port = int(os.getenv("SAGEMAKER_BIND_TO_PORT", 8080)) # Log all arguments in a single log message - app.logger.debug(f"Initializing OSML Model Flask server on port {port}!") + app.logger.info(f"Initializing OSML Model Flask server on port {port}!") # Start the simple web application server using Waitress. # Flask's app.run() is only intended to be used in development @@ -84,7 +84,14 @@ def build_flask_app(logger: logging.Logger) -> Flask: app.logger.setLevel(logger.level) if json_logging._current_framework is None: + # Suppress debug messages from json_logging initialization + import logging + + json_logger = logging.getLogger("json_logging") + original_level = json_logger.level + json_logger.setLevel(logging.WARNING) json_logging.init_flask(enable_json=True) + json_logger.setLevel(original_level) return app diff --git a/src/aws/osml/models/ship/__init__.py b/src/aws/osml/models/ship/__init__.py new file mode 100644 index 0000000..117962e --- /dev/null +++ b/src/aws/osml/models/ship/__init__.py @@ -0,0 +1 @@ +# Copyright 2023-2025 Amazon.com, Inc. or its affiliates. diff --git a/src/aws/osml/models/ship/app.py b/src/aws/osml/models/ship/app.py new file mode 100644 index 0000000..771d2e9 --- /dev/null +++ b/src/aws/osml/models/ship/app.py @@ -0,0 +1,229 @@ +# Copyright 2023-2025 Amazon.com, Inc. or its affiliates. + +import json +import os +import uuid +import warnings +from typing import Dict, Optional, Union + +import numpy as np +import torch +from detectron2.config import get_cfg +from detectron2.engine import DefaultPredictor +from detectron2.structures.instances import Instances +from flask import Request, Response, request +from osgeo import gdal + +from aws.osml.models import build_flask_app, build_logger, setup_server +from aws.osml.models.ship.config import build_config + +ENABLE_SEGMENTATION = os.environ.get("ENABLE_SEGMENTATION", "False").lower() == "true" +ENABLE_FAULT_DETECTION = os.environ.get("ENABLE_FAULT_DETECTION", "False").lower() == "true" + +# Enable exceptions for GDAL +gdal.UseExceptions() + +# Create logger instance +logger = build_logger() + +# Create our default flask app +app = build_flask_app(logger) + +# Test logging +app.logger.info("Starting ship model application...") + + +def build_predictor() -> DefaultPredictor: + """ + Create a single detection predictor to detect ships + :return: DefaultPredictor + """ + # Load the prebuilt plane model w/ Detectron2 + cfg = get_cfg() + + # Set to only expect one class (ships) + cfg = build_config() + + # If we can't find a gpu, set device to CPU after config is built + if not torch.cuda.is_available(): + cfg.MODEL.DEVICE = "cpu" + app.logger.info("GPU not found, running in CPU mode!") + + # Build the detectron2 default predictor with error handling for CPU mode + try: + # Suppress checkpoint loading warnings for expected shape mismatches + import logging + + checkpoint_logger = logging.getLogger("fvcore.common.checkpoint") + original_level = checkpoint_logger.level + checkpoint_logger.setLevel(logging.ERROR) + + predictor = DefaultPredictor(cfg) + + # Restore original logging level + checkpoint_logger.setLevel(original_level) + + return predictor + except RuntimeError as e: + if "NVIDIA driver" in str(e) or "CUDA" in str(e): + app.logger.warning(f"CUDA error detected, forcing CPU mode: {e}") + # Force CPU mode and try again + cfg.MODEL.DEVICE = "cpu" + return DefaultPredictor(cfg) + else: + raise e + + +def instances_to_feature_collection( + instances: Instances, image_id: Optional[str] = str(uuid.uuid4()) +) -> Dict[str, Union[str, list]]: + """ + Convert the gRPC response from the GetDetection call into a GeoJSON output. + Each detection is a feature in the collection, including image coordinates, + score, and type identifier as feature properties. + + :param instances: Detectron2 result instances + :param image_id: Identifier for the processed image (optional) + :return: FeatureCollection object containing detections + """ + geojson_feature_collection_dict = {"type": "FeatureCollection", "features": []} + if instances: + # Get the bounding boxes for this image + bboxes = instances.pred_boxes.tensor.cpu().numpy().tolist() + + # Get the scores for this image, this model does not support segmentation + scores = instances.scores.cpu().numpy().tolist() + + for i in range(0, len(bboxes)): + feature = { + "type": "Feature", + "geometry": {"type": "Point", "coordinates": [0.0, 0.0]}, + "id": str(uuid.uuid4()), + "properties": { + "bounds_imcoords": bboxes[i], + "detection_score": float(scores[i]), + "feature_types": {"ship": float(scores[i])}, + "image_id": image_id, + }, + } + app.logger.debug(feature) + geojson_feature_collection_dict["features"].append(feature) + else: + app.logger.debug("No features found!") + + return geojson_feature_collection_dict + + +def request_to_instances(req: Request) -> Union[Instances, None]: + """ + Use GDAL to open the image. The binary payload from the HTTP request is used to + create an in-memory VFS for GDAL which is then opened to decode the image into + a dataset which will give us access to a NumPy array for the pixels. Then + use that image to create detectron2 detection instances. + + :param req: Request: the flask request object passed into the SM endpoint + :return: Either a set of detectron2 detection instances or nothing + """ + # Set up default variables + temp_ds_name = "/vsimem/" + str(uuid.uuid4()) + gdal_dataset = None + instances = None + try: + # Load the binary memory buffer sent to the model + gdal.FileFromMemBuffer(temp_ds_name, req.get_data()) + gdal_dataset = gdal.Open(temp_ds_name) + + # Read GDAL dataset and convert to a numpy array + image_array = gdal_dataset.ReadAsArray() + + # Check if all pixels are zero and raise an exception if so + if ENABLE_FAULT_DETECTION: + app.logger.debug(f"Image array min: {image_array.min()}, max: {image_array.max()}") + if np.all(np.isclose(image_array, 0)): + err = "All pixels in the image tile are set to 0." + app.logger.error(err) + raise Exception(err) + + # Handling of different image shapes + if image_array.ndim == 2: # For grayscale images without a channel dimension + # Reshape to add a channel dimension and replicate across 3 channels for RGB + image_array = np.stack([image_array] * 3, axis=0) + elif image_array.shape[0] == 1: # For grayscale images with a channel dimension + # Replicate the single channel across 3 channels for RGB + image_array = np.repeat(image_array, 3, axis=0) + elif image_array.shape[0] == 4: # For images with an alpha channel + # Remove the alpha channel + image_array = image_array[:3, :, :] + + # Conversion to uint8 (ensure this is done after ensuring 3 channels) + image_array = (image_array * 255).astype(np.uint8) + + # Transpose the array from (channels, height, width) to (height, width, channels) + image = np.transpose(image_array, (1, 2, 0)) + app.logger.debug(f"Running D2 on image array: {image}") + + # PyTorch can often give warnings about upcoming changes + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + instances = ship_detector(image)["instances"] + except Exception as err: + app.logger.error(f"Unable to load tile from request: {err}") + raise err + finally: + try: + if gdal_dataset is not None: + if temp_ds_name is not None: + gdal.Unlink(temp_ds_name) + del gdal_dataset + except Exception as err: + app.logger.warning(f"Unable to cleanup gdal dataset: {err}") + + return instances + + +# Build our ship predictor +ship_detector = build_predictor() +app.logger.info("Ship model predictor initialized successfully!") + + +@app.route("/ping", methods=["GET"]) +def healthcheck() -> Response: + """ + This is a health check that will always pass since this is a stub model. + + :return: Successful status code (200) indicates all is well + """ + app.logger.debug("Responding to health check") + return Response(response="\n", status=200) + + +@app.route("/invocations", methods=["POST"]) +def predict() -> Response: + """ + This is the model invocation endpoint for the model container's REST + API. The binary payload, in this case an image, is taken from the request + parsed to ensure it is a valid image. This is a stub implementation that + will always return a fixed set of detections for a valid input image. + + :return: Response: Contains the GeoJSON results or an error status + """ + app.logger.debug("Invoking model endpoint using the Detectron2 Ship Model!") + try: + # Load the image into memory and get detection instances + app.logger.debug("Loading image request.") + instances = request_to_instances(request) + + # Generate a geojson feature collection that we can return + geojson_detects = instances_to_feature_collection(instances) + app.logger.debug(f"Sending geojson to requester: {json.dumps(geojson_detects)}") + + # Send back the detections + return Response(response=json.dumps(geojson_detects), status=200) + except Exception as err: + app.logger.debug(err) + return Response(response="Unable to process request!", status=500) + + +# pragma: no cover +if __name__ == "__main__": + setup_server(app) diff --git a/src/aws/osml/models/ship/config.py b/src/aws/osml/models/ship/config.py new file mode 100644 index 0000000..32c72db --- /dev/null +++ b/src/aws/osml/models/ship/config.py @@ -0,0 +1,36 @@ +# Copyright 2025 Amazon.com, Inc. or its affiliates. + +"""Detectron2 configuration module for building detection on high-resolution drone imagery. + +This configuration uses the ResNet-101_DC backbone for improved receptive field and spatial detail, +with performance optimizations for AWS p3.2xlarge or similar environments. +""" + +from detectron2 import model_zoo +from detectron2.config import get_cfg + + +def build_config(): + """Set up Detectron2 config optimized for 2048×2048 tile inputs using R_101_DC5 backbone. + + Returns: + Configured Detectron2 config object + """ + # ----------------------------- + # Config: Faster R-CNN R101-DC5 (better for small objects than FPN R50) + # ----------------------------- + cfg = get_cfg() + cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")) + cfg.DATASETS.TRAIN = "ships_sar" + cfg.DATASETS.TEST = () + cfg.DATALOADER.NUM_WORKERS = 2 + cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml") + cfg.SOLVER.IMS_PER_BATCH = 10 + cfg.SOLVER.BASE_LR = 0.00025 + cfg.SOLVER.MAX_ITER = 1000 + cfg.SOLVER.STEPS = [] + cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 # faster, and good enough for this dataset (default: 512) + cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 + cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.75 + + return cfg diff --git a/tox.ini b/tox.ini index ba049e1..b85fec2 100755 --- a/tox.ini +++ b/tox.ini @@ -9,7 +9,7 @@ [tox] envlist = # Basic configurations: Run the tests for each python version. - py{39, 310, 311} + py{310, 311} # Build and test the docs with sphinx. docs