diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml index 19d065c..e5533c9 100644 --- a/.github/workflows/build_and_publish.yml +++ b/.github/workflows/build_and_publish.yml @@ -6,21 +6,13 @@ on: pull_request: jobs: - build-and-push-docker-image: - name: Build Docker image and push to repositories + build-and-push-docker-images: + name: Build and push Docker images runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 - - name: Prepare tag - id: prep - run: | - DOCKER_IMAGE=nielsborie/machine-learning-environments - VERSION=`cat ./SNAPSHOT.txt` - TAGS="${DOCKER_IMAGE}:${VERSION}" - echo ::set-output name=tags::${TAGS} - - name: Set up QEMU uses: docker/setup-qemu-action@v3 with: @@ -35,18 +27,15 @@ jobs: with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Build and push - id: docker_build - uses: docker/build-push-action@v5 - with: - builder: ${{ steps.buildx.outputs.name }} - context: "{{defaultContext}}:advanced" - platforms: linux/amd64,linux/arm64 - push: ${{ github.event_name != 'pull_request' }} - tags: ${{ steps.prep.outputs.tags }} - cache-from: type=gha - cache-to: type=gha,mode=max + - name: Clean up unnecessary directories + run: | + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf "/usr/local/share/boost" + sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - name: Build and push image + run: | + make build-all-images - name: Image digest - run: echo ${{ steps.docker_build.outputs.digest }} \ No newline at end of file + run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/Makefile b/Makefile index 0beab9d..e0ad540 100644 --- a/Makefile +++ b/Makefile @@ -1,47 +1,93 @@ SHELL := /bin/bash -BRANCH_NAME ?= $(shell git branch | grep \* | cut -d ' ' -f2) PROJECT_NAME ?= machine-learning-environments GIT_COMMIT ?= $(shell git rev-parse HEAD) - +PYTHON_INTERPRETER = python3 +PYTHON_VERSION ?= 3.11 +OS_NAME = $(shell uname) REGISTRY_URL ?= nielsborie -DOCKER_TAG_NAME = $(BRANCH_NAME) -export REGISTRY_URL +LAYER ?= base +BUILDER ?= conda + +ifdef GITHUB_ACTIONS + BRANCH_NAME ?= $(shell echo "${GITHUB_REF}" | awk -F'/' '{print $$3}') +else + BRANCH_NAME ?= $(shell git branch | grep \* | cut -d ' ' -f2) +endif + +ifeq ($(BRANCH_NAME),main) + IMAGE_VERSION := v$(shell cat ./VERSION.txt) +else ifeq ($(BRANCH_NAME),develop) + IMAGE_VERSION := v$(shell cat ./SNAPSHOT.txt) +else + IMAGE_VERSION := $(BRANCH_NAME) +endif + +SUPPORTED_PYTHON_VERSIONS := 3.9 3.10 3.11 3.12 +ALL_LAYERS := base advanced +ALL_BUILDERS := conda mamba .DEFAULT_GOAL:=help help: ## Display this help - @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n\nTargets:\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-10s\033[0m %s\n", $$1, $$2 }' $(MAKEFILE_LIST) + @echo "Usage:" + @echo " make " + @echo "" + @echo "Targets:" + @awk 'BEGIN {FS = ":.*##"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 }' $(MAKEFILE_LIST) ### Docker ### -docker-build: ## Build machine-learning-environments docker image - docker build --force-rm -t ${REGISTRY_URL}/${PROJECT_NAME}:${DOCKER_TAG_NAME} . - docker tag ${REGISTRY_URL}/${PROJECT_NAME}:${DOCKER_TAG_NAME} ${REGISTRY_URL}/${PROJECT_NAME}:latest +.PHONY: build-all-images build-image + +build-all-images: ## Build all machine-learning-environments docker images + @for version in $(SUPPORTED_PYTHON_VERSIONS); do \ + for layer in $(ALL_LAYERS); do \ + $(MAKE) build-image PYTHON_VERSION=$$version LAYER=$$layer IMAGE_VERSION=$(IMAGE_VERSION); \ + done \ + done + +build-image: ## Build a single machine-learning-environments docker image (args : PYTHON_VERSION, LAYER, BUILDER, IMAGE_VERSION) + @echo "PYTHON_VERSION=$(PYTHON_VERSION) PYTHON_RELEASE_VERSION=$$(jq -r '.python."$(PYTHON_VERSION)".release' package.json)" + @real_python_version=$$(jq -r '.python."$(PYTHON_VERSION)".release' package.json); \ + docker build --progress=plain --no-cache --force-rm -t $(REGISTRY_URL)/$(LAYER)-$(BUILDER)-py$(PYTHON_VERSION):$(IMAGE_VERSION) --build-arg PYTHON_RELEASE_VERSION=$$real_python_version --build-arg PYTHON_VERSION=$(PYTHON_VERSION) --build-arg IMAGE_VERSION=$(IMAGE_VERSION) --build-arg BUILDER=$(BUILDER) -f layers/$(LAYER)/$(BUILDER).Dockerfile layers/$(LAYER)/ -docker-push: ## Push machine-learning-environments image to registry - docker push ${REGISTRY_URL}/${PROJECT_NAME}:${DOCKER_TAG_NAME} + +docker-push: ## Push machine-learning-environments image to registry (args : PYTHON_VERSION, LAYER, BUILDER, IMAGE_VERSION) + docker push $(REGISTRY_URL)/$(LAYER)-$(BUILDER)-py$(PYTHON_VERSION):$(IMAGE_VERSION) if [ "${BRANCH_NAME}" = "main" ]; then \ - docker push ${REGISTRY_URL}/${PROJECT_NAME}:latest; \ + docker push $(REGISTRY_URL)/$(LAYER)-$(BUILDER)-py$(PYTHON_VERSION):latest; \ fi; ### Running environments ### -docker-run: ## Run machine-learning-environments using docker image (args: version=[version]) - ifeq ($(version),) - VERSION=latest - else - VERSION=$(version) - endif - #@echo "▶️ Running ${VERSION} ..." - docker run --name ML-env -d -p 8887:8888 ${REGISTRY_URL}/${PROJECT_NAME}:${VERSION} +docker-run: ## Run machine-learning-environments using docker image (args : PYTHON_VERSION, LAYER, BUILDER, IMAGE_VERSION) + docker run --rm -it -d --name ML-env $(REGISTRY_URL)/$(LAYER)-$(BUILDER)-py$(PYTHON_VERSION):$(IMAGE_VERSION) + +docker-interactive: ## Enter into the machine-learning-environments container + docker exec -it ML-env /bin/bash start: ## Start the machine-learning-environments container docker start ML-env stop: ## Stop the machine-learning-environments container - docker start ML-env + docker stop ML-env + +clean: ## Remove the machine-learning-environments container + docker rm ML-env + +docker-system-prune: + docker system prune + +run-within-container: ## Execute a specified Python file within a pre-started container. (args : SCRIPT_FILE) + @echo "Executing the specified Python file within a pre-started container..." + docker cp ${PWD}/scripts ML-env:/home + docker exec -it ML-env python /home/scripts/$(SCRIPT_FILE) +run-in-container: ## Execute a specified Python file within a container without requiring prior startup. (args : SCRIPT_FILE) + @echo "Executing the specified Python file within a container without requiring prior startup..." + docker run -it --rm -v "${PWD}"/scripts:/home/scripts -w /home nielsborie/machine-learning-environments:${LAYER}-conda-py3.11--upgrade_and_refactos -c "python /home/scripts/$(SCRIPT_FILE)" ### RELEASE ### -generate-changelog: ## Generate/Update CHANGELOG.md file +## Generate/Update CHANGELOG.md file +generate-changelog: gitmoji-changelog ### GitHub action test ### diff --git a/SNAPSHOT.txt b/SNAPSHOT.txt index 9786f4f..1e93c1c 100644 --- a/SNAPSHOT.txt +++ b/SNAPSHOT.txt @@ -1 +1 @@ -1.0.1-SNAPSHOT \ No newline at end of file +1.0.1-snapshot \ No newline at end of file diff --git a/advanced/Dockerfile b/advanced/Dockerfile deleted file mode 100644 index cb3a134..0000000 --- a/advanced/Dockerfile +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (c) Niels Borie. - -FROM nielsborie/base-py39 - -LABEL maintainer="Niels BORIE" - -# --- Install h2o -RUN pip install -f http://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o - -# Create the environment: -COPY environment.yml . -RUN mamba env update --name py39 --file environment.yml --prune - -# clean up pip cache -RUN rm -rf /root/.cache/pip/* \ No newline at end of file diff --git a/advanced/environment.yml b/advanced/environment.yml deleted file mode 100644 index f73d399..0000000 --- a/advanced/environment.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: py39 -channels: - - conda-forge -dependencies: - - pip - - xgboost - - lightgbm - - catboost - - mlxtend - - gensim - - vowpalwabbit - - hyperopt - - deap - - update_checker - - tqdm - - stopit - - scikit-mdr - - skrebate - - tpot - - yellowbrick - - spacy - - gplearn - - shap - - lime - - gpxpy - - arrow - - haversine - - toolz - - cytoolz - - sacred - - plotly - - missingno - - pandas-profiling - - pip: - - git+https://github.com/nicta/dora.git - - tables - - sklearn-deap - - trueskill - - kmapper - - sexmachine - - heamy - - geohash - - langid - - s2sphere - - fitter - - vida - - delorean - - skope-rules \ No newline at end of file diff --git a/base/.dockerignore b/base/.dockerignore deleted file mode 100644 index d22fec2..0000000 --- a/base/.dockerignore +++ /dev/null @@ -1,2 +0,0 @@ -# Documentation -README.md \ No newline at end of file diff --git a/base/Dockerfile b/base/Dockerfile deleted file mode 100644 index 9461877..0000000 --- a/base/Dockerfile +++ /dev/null @@ -1,10 +0,0 @@ -FROM continuumio/miniconda3 - -RUN conda install mamba -n base -c conda-forge - -RUN conda create -n py39 python=3.9 - -# Make RUN commands use the new environment: -SHELL ["conda", "run", "--no-capture-output", "-n", "py39", "/bin/bash", "-c"] - -ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "py39", "python"] \ No newline at end of file diff --git a/layers/advanced/conda.Dockerfile b/layers/advanced/conda.Dockerfile new file mode 100644 index 0000000..63fddbc --- /dev/null +++ b/layers/advanced/conda.Dockerfile @@ -0,0 +1,38 @@ +# Define build arguments +ARG PYTHON_VERSION +ARG PYTHON_RELEASE_VERSION +ARG IMAGE_VERSION +ARG BUILDER + +FROM nielsborie/base-${BUILDER}-py${PYTHON_VERSION}:${IMAGE_VERSION} as base + +ARG PYTHON_VERSION +ARG PYTHON_RELEASE_VERSION +ENV ENV_NAME=py${PYTHON_VERSION} + +COPY environment.yml . +RUN sed -i "s/python=[0-9]\+\.[0-9]\+\.[0-9]\+/python=${PYTHON_RELEASE_VERSION}/" environment.yml +RUN /opt/conda/bin/conda env update --name ${ENV_NAME} --file environment.yml --prune + +# Stage 2: Java installation +FROM openjdk:17-jdk-slim as java_backend + +# Set Java in PATH +ENV PATH="/usr/local/openjdk-17/bin:${PATH}" + +# Stage 3: Final stage +FROM base + +# Copy Java binaries from java_backend stage +COPY --from=java_backend /usr/local/openjdk-17 /opt/openjdk-17 + +# Set Java in PATH and LD_LIBRARY_PATH +ENV PATH="/opt/openjdk-17/bin:${PATH}" +ENV LD_LIBRARY_PATH="/opt/openjdk-17/lib/server" + +# Install additional Python packages and configure Java +RUN /opt/conda/bin/conda run -n ${ENV_NAME} pip install -f http://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o --no-cache-dir && \ + rm -rf /root/.cache/pip/* + +# Set the entry point +ENTRYPOINT ["/bin/bash"] diff --git a/layers/advanced/environment.yml b/layers/advanced/environment.yml new file mode 100644 index 0000000..350223d --- /dev/null +++ b/layers/advanced/environment.yml @@ -0,0 +1,37 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + # version + - python=9.9.9 + # vanilla + - xgboost + - lightgbm + - catboost + - mlxtend + - scikit-mdr + - vowpalwabbit + # interpretability + - shap + - lime + # nlp + - gensim + - spacy + # automl + - hyperopt + - optuna + - tpot + # genetic + - deap + - skrebate + - yellowbrick + - gplearn + # manipulation (dates, GPS, ...) + - gpxpy + - arrow + - haversine + - toolz + - cytoolz + - sacred + - missingno \ No newline at end of file diff --git a/layers/advanced/mamba.Dockerfile b/layers/advanced/mamba.Dockerfile new file mode 100644 index 0000000..c6a3a32 --- /dev/null +++ b/layers/advanced/mamba.Dockerfile @@ -0,0 +1,38 @@ +# Define build arguments +ARG PYTHON_VERSION +ARG PYTHON_RELEASE_VERSION +ARG IMAGE_VERSION +ARG BUILDER + +FROM nielsborie/base-${BUILDER}-py${PYTHON_VERSION}:${IMAGE_VERSION} as base + +ARG PYTHON_VERSION +ARG PYTHON_RELEASE_VERSION +ENV ENV_NAME=py${PYTHON_VERSION} + +COPY --chown=$MAMBA_USER:$MAMBA_USER environment.yml . +RUN sed -i "s/python=[0-9]\+\.[0-9]\+\.[0-9]\+/python=${PYTHON_RELEASE_VERSION}/" environment.yml +RUN micromamba install -y -n ${ENV_NAME} -f environment.yml + +# Stage 2: Java installation +FROM openjdk:17-jdk-slim as java_backend + +# Set Java in PATH +ENV PATH="/usr/local/openjdk-17/bin:${PATH}" + +# Stage 3: Final stage +FROM base + +# Copy Java binaries from java_backend stage +COPY --from=java_backend /usr/local/openjdk-17 /opt/openjdk-17 + +# Set Java in PATH and LD_LIBRARY_PATH +ENV PATH="/opt/openjdk-17/bin:${PATH}" +ENV LD_LIBRARY_PATH="/opt/openjdk-17/lib/server" + +# Install additional Python packages and configure Java +RUN micromamba run -n py3.9 pip install -f http://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o --no-cache-dir + +USER root +RUN rm -rf /root/.cache/pip/* +USER $MAMBA_USER diff --git a/layers/base/conda.Dockerfile b/layers/base/conda.Dockerfile new file mode 100644 index 0000000..6729514 --- /dev/null +++ b/layers/base/conda.Dockerfile @@ -0,0 +1,18 @@ +FROM continuumio/miniconda3 + +ARG PYTHON_VERSION +ARG PYTHON_RELEASE_VERSION + +ENV ENV_NAME=py${PYTHON_VERSION} + +COPY environment.yml /opt/environment.yml + +RUN /opt/conda/bin/conda create -n py${PYTHON_VERSION} python=${PYTHON_RELEASE_VERSION} -y && \ + /opt/conda/bin/conda init bash && \ + /opt/conda/bin/conda env update -n py${PYTHON_VERSION} -f /opt/environment.yml && \ + echo "conda activate py${PYTHON_VERSION}" >> ~/.bashrc && \ + /opt/conda/bin/conda clean --all --yes + +ENV PATH=/opt/conda/envs/py${PYTHON_VERSION}/bin:$PATH + +ENTRYPOINT ["/bin/bash"] \ No newline at end of file diff --git a/layers/base/environment.yml b/layers/base/environment.yml new file mode 100644 index 0000000..4ae7995 --- /dev/null +++ b/layers/base/environment.yml @@ -0,0 +1,18 @@ +channels: + - conda-forge + - defaults +dependencies: + # data manipulation + - pandas + - numpy + - pyarrow + # vanilla + - scikit-learn + # plot + - plotly + - bokeh + - seaborn + # other + - ipykernel + - pip + - tqdm \ No newline at end of file diff --git a/layers/base/mamba.Dockerfile b/layers/base/mamba.Dockerfile new file mode 100644 index 0000000..bec3356 --- /dev/null +++ b/layers/base/mamba.Dockerfile @@ -0,0 +1,21 @@ +FROM mambaorg/micromamba + +ARG PYTHON_VERSION +ARG PYTHON_RELEASE_VERSION + +ENV ENV_NAME=py${PYTHON_VERSION} + +COPY --chown=$MAMBA_USER:$MAMBA_USER environment.yml /opt/environment.yml + +RUN micromamba config set extract_threads 1 && \ + micromamba config append channels conda-forge && \ + micromamba create -n ${ENV_NAME} python=${PYTHON_RELEASE_VERSION} -y && \ + micromamba install -y -n ${ENV_NAME} -f /opt/environment.yml && \ + echo "micromamba activate ${ENV_NAME}" >> ~/.bashrc && \ + micromamba clean --all --yes + +ENV PATH /opt/conda/envs/${ENV_NAME}/bin:$PATH + +WORKDIR /home/mambauser + +ENTRYPOINT ["/bin/bash"] \ No newline at end of file diff --git a/package.json b/package.json index 46bfaf9..a5c1f69 100644 --- a/package.json +++ b/package.json @@ -14,5 +14,19 @@ "bugs": { "url": "https://github.com/nielsborie/machine-learning-environments/issues" }, - "homepage": "https://github.com/nielsborie/machine-learning-environments#readme" + "homepage": "https://github.com/nielsborie/machine-learning-environments#readme", + "python": { + "3.9": { + "release": "3.9.18" + }, + "3.10": { + "release": "3.10.13" + }, + "3.11": { + "release": "3.11.8" + }, + "3.12": { + "release": "3.12.2" + } + } } diff --git a/scripts/hello_world.py b/scripts/hello_world.py new file mode 100644 index 0000000..5fcedd2 --- /dev/null +++ b/scripts/hello_world.py @@ -0,0 +1,6 @@ +import pandas as pd + +data = {'foo': ['hello'], 'bar': ['world']} +df = pd.DataFrame(data) + +print(df) \ No newline at end of file diff --git a/scripts/test_h2o.py b/scripts/test_h2o.py new file mode 100644 index 0000000..ce93a65 --- /dev/null +++ b/scripts/test_h2o.py @@ -0,0 +1,3 @@ +import h2o +h2o.init() +print("H2O version:", h2o.__version__) \ No newline at end of file