11# Copyright 2023-2025 Amazon.com, Inc. or its affiliates.
22
3- # Use NVIDIA's CUDA base image
4- FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu18.04 AS build-env
3+ # =============================================================================
4+ # Base image: Ubuntu 22.04 + CUDA 11.6.2 (devel)
5+ # =============================================================================
6+ FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu18.04 as osml_model
57
6- # Set AWS to the maintainer
8+ # Set maintainer label
79LABEL maintainer="Amazon Web Services"
810
9- # Enable sudo access for the build session
11+ # Advertise SageMaker multi-container capability
12+ LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
13+
14+ # Use root for setup
1015USER root
1116
12- # Update and install core build dependencies
17+ # =============================================================================
18+ # Install core build dependencies (incl. TIFF); clean apt lists in same layer
19+ # =============================================================================
1320RUN apt-get update -y \
1421 && apt-get upgrade -y \
15- && DEBIAN_FRONTEND=noninteractive apt-get install -y --fix-missing --no-install-recommends \
22+ && DEBIAN_FRONTEND=noninteractive \
23+ apt-get install -y --fix-missing --no-install-recommends \
1624 software-properties-common build-essential ca-certificates \
1725 git make cmake wget unzip libtool automake \
1826 zlib1g-dev libsqlite3-dev pkg-config sqlite3 libcurl4-gnutls-dev \
19- libtiff5-dev
27+ libtiff5-dev \
28+ && rm -rf /var/lib/apt/lists/*
2029
21- # Install Miniconda
30+ # =============================================================================
31+ # Miniconda
32+ # =============================================================================
2233ARG MINICONDA_VERSION=Miniconda3-latest-Linux-x86_64
2334ARG MINICONDA_URL=https://repo.anaconda.com/miniconda/${MINICONDA_VERSION}.sh
24- ENV CONDA_TARGET_ENV=osml_model
25- RUN wget -c ${MINICONDA_URL} \
26- && chmod +x ${MINICONDA_VERSION}.sh \
27- && ./${MINICONDA_VERSION}.sh -b -f -p /opt/conda \
28- && rm ${MINICONDA_VERSION}.sh \
29- && ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh
30-
31- # Set our new conda target lib dirs
32- ENV PATH=$PATH:/opt/conda/bin
33- ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib/
34- ENV PROJ_LIB=$PROJ_LIB:/opt/conda/share/proj
35+ ENV CONDA_DIR=/opt/conda
3536
36- # Copy the conda environment file and create the environment
37- COPY conda/environment-py310.yml environment.yml
38-
39- # Accept Conda TOS before creating the environment
37+ RUN wget -c ${MINICONDA_URL} \
38+ && chmod +x ${MINICONDA_VERSION}.sh \
39+ && ./${MINICONDA_VERSION}.sh -b -f -p ${CONDA_DIR} \
40+ && rm ${MINICONDA_VERSION}.sh \
41+ && ln -s ${CONDA_DIR}/etc/profile.d/conda.sh /etc/profile.d/conda.sh
42+
43+ # Configure environment variables used by common geospatial stacks
44+ ENV CONDA_TARGET_ENV=osml_models
45+ ENV PATH=/opt/conda/envs/${CONDA_TARGET_ENV}/bin:/opt/conda/bin:$PATH
46+ ENV LD_LIBRARY_PATH=/opt/conda/envs/${CONDA_TARGET_ENV}/lib:/opt/conda/envs/${CONDA_TARGET_ENV}/lib/gdal:${LD_LIBRARY_PATH}
47+ ENV PROJ_LIB=/opt/conda/share/proj:$PROJ_LIB
48+
49+ # =============================================================================
50+ # Conda environment (py310 + GDAL/PROJ + D2 Deps)
51+ # =============================================================================
52+ COPY conda/environment-py310.yml /tmp/environment.yml
53+
54+ # Create env and minimize image size
4055RUN conda config --set always_yes true && \
4156 conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \
42- conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
43-
44- RUN conda env create -n ${CONDA_TARGET_ENV} --file environment.yml && \
45- conda clean -afy && \
57+ conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r && \
58+ conda env create -f /tmp/environment.yml && \
4659 find /opt/conda/ -follow -type f -name '*.a' -delete && \
4760 find /opt/conda/ -follow -type f -name '*.pyc' -delete && \
4861 find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
49- rm -rf /opt/conda/pkgs
62+ rm -rf /opt/conda/pkgs && \
63+ conda clean -afy
64+
65+ # =============================================================================
66+ # Entry shim
67+ # - Ensure conda env is active for RUN/CMD/ENTRYPOINT
68+ # =============================================================================
69+ RUN cat >/entry.sh <<'BASH'
70+ # !/usr/bin/env bash
71+ set -eo pipefail
72+
73+ # Activate conda env if available
74+ if [ -f /opt/conda/etc/profile.d/conda.sh ]; then
75+ . /opt/conda/etc/profile.d/conda.sh
76+ conda activate "${CONDA_TARGET_ENV:-base}" >/dev/null 2>&1 || true
77+ fi
78+
79+ # If a command was passed, exec it; otherwise start bash
80+ if [ "$#" -gt 0 ]; then
81+ exec "$@"
82+ else
83+ exec /bin/bash
84+ fi
85+ BASH
86+ RUN chmod +x /entry.sh
87+
88+ # Make subsequent RUN use the activated env
89+ SHELL ["/entry.sh" , "/bin/bash" , "-c" ]
90+
91+
92+ # Configure .bashrc to drop into a conda env and immediately activate our TARGET env
93+ # Note this makes python3 default to our conda managed python version
94+ RUN conda init && echo 'conda activate "${CONDA_TARGET_ENV:-base}"' >> ~/.bashrc
95+
96+ # =============================================================================
97+ # PyTorch 1.12.0 (CUDA 16.0 wheels)
98+ # =============================================================================
99+ RUN python3 -m pip install --no-cache-dir \
100+ torch==1.12.0+cu116 \
101+ torchvision==0.13.0+cu116 \
102+ -f https://download.pytorch.org/whl/torch_stable.html
50103
51- # Activate the conda environment and install Python dependencies
52- RUN . /opt/conda/etc/profile.d/conda.sh && conda activate ${CONDA_TARGET_ENV} && \
53- python3 -m pip install --no-cache-dir \
104+ # =============================================================================
105+ # Detectron2 (build against target Torch/CUDA for Sagemaker Endpoints)
106+ # - Set arch list for common AWS GPUs
107+ # =============================================================================
108+ ENV FORCE_CUDA=1
109+ ARG TORCH_CUDA_ARCH_LIST="Pascal;Volta;Turing"
110+ RUN python3 -m pip install --no-cache-dir \
54111 "fvcore>=0.1.5,<0.1.6" \
55112 iopath==0.1.8 \
56113 pycocotools \
57114 omegaconf==2.1.1 \
58115 hydra-core==1.1.1 \
59- black==21.4b2 \
60116 termcolor==1.1.0 \
61117 matplotlib==3.5.2 \
62118 yacs==0.1.8 \
@@ -65,69 +121,37 @@ RUN . /opt/conda/etc/profile.d/conda.sh && conda activate ${CONDA_TARGET_ENV} &&
65121 tqdm==4.62.3 \
66122 tensorboard==2.8.0 \
67123 opencv-contrib-python-headless==4.8.0.76 \
68- setuptools==69.5.1
124+ setuptools==69.5.1 \
125+ 'git+https://github.com/facebookresearch/detectron2.git'
69126
70- # Install Torch with GPU support
71- RUN . /opt/conda/etc/profile.d/conda.sh && conda activate ${CONDA_TARGET_ENV} && \
72- python3 -m pip install --no-cache-dir \
73- torch==1.12.0+cu116 \
74- torchvision==0.13.0+cu116 \
75- -f https://download.pytorch.org/whl/torch_stable.html
127+ # Final pip/conda cleanups
128+ RUN conda clean -afy && python -m pip cache purge
76129
77- # Install Detectron2
78- ENV FORCE_CUDA="1"
79- ARG TORCH_CUDA_ARCH_LIST="Pascal;Volta;Turing"
80- ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
81- RUN . /opt/conda/etc/profile.d/conda.sh && conda activate ${CONDA_TARGET_ENV} && \
82- python3 -m pip install --no-cache-dir --no-deps 'git+https://github.com/facebookresearch/detectron2.git'
83-
84- # Clean up unnecessary files
85- RUN apt-get clean && \
86- rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
87- conda clean -afy && \
88- python -m pip cache purge
89-
90- # Stage 2: Build the final image
91- FROM nvidia/cuda:11.6.2-cudnn8-runtime-ubuntu18.04 AS osml_model
92-
93- LABEL maintainer="Amazon Web Services"
94- # Support multi-container SageMaker endpoints
95- LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
96- USER root
97-
98- # Copy only the necessary files from the build environment
99- COPY --from=build-env /opt/conda /opt/conda
100-
101- # Set environment variables
102- ENV CONDA_TARGET_ENV="osml_model"
103- ENV PATH=$PATH:/opt/conda/bin
104- ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/conda/lib/
105- ENV PROJ_LIB=$PROJ_LIB:/opt/conda/share/proj
106- ENV PYTHONUNBUFFERED=1
107-
108- # Set up the conda environment
109- SHELL ["/opt/conda/bin/conda" , "run" , "--no-capture-output" , "-n" , "osml_model" , "/bin/bash" , "-c" ]
110- RUN echo 'conda activate "${CONDA_TARGET_ENV:-base}"' >> ~/.bashrc
111-
112- # Copy model source and install it
113- RUN mkdir /home/osml-models
114- COPY . /home/osml-models
115-
116- # Install the application dependencies
130+ # =============================================================================
131+ # Application code
132+ # =============================================================================
117133WORKDIR /home/osml-models
118- RUN chmod 777 --recursive .
119- RUN python3 -m pip install --no-cache-dir .
134+ RUN mkdir -p /home/osml-models
135+ COPY . /home/osml-models
136+ RUN chmod -R 0777 . \
137+ && python3 -m pip install --no-cache-dir .
120138
121- # Expose the necessary ports
139+ # =============================================================================
140+ # Runtime
141+ # =============================================================================
122142EXPOSE 8080
123143
124- # Disable health check
144+ # Disable healthcheck (external orchestrator/SageMaker handles health)
125145HEALTHCHECK NONE
126146
127- # Set up a user to run the container
128- RUN adduser --system --no-create-home --group model
129- RUN chown -R model:model ./
147+ # Drop privileges for runtime
148+ RUN adduser --system --no-create-home --group model \
149+ && chown -R model:model /home/osml-models \
150+ && mkdir -p /tmp/iopath_cache && chown model:model /tmp/iopath_cache
130151USER model
131152
132- # Set the entry point
133- ENTRYPOINT python3 src/aws/osml/models/$MODEL_SELECTION/app.py
153+ # Set iopath cache directory to avoid permission warnings
154+ ENV IOPATH_CACHE_DIR=/tmp/iopath_cache
155+
156+ # Expand MODEL_SELECTION, and run app
157+ ENTRYPOINT /entry.sh python /home/osml-models/src/aws/osml/models/${MODEL_SELECTION}/app.py
0 commit comments