diff --git a/.binder/postBuild b/.binder/postBuild
new file mode 100644
index 0000000..c33605a
--- /dev/null
+++ b/.binder/postBuild
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+set -e
+
+# This script is called in a binder context. When this script is called, we are
+# inside a git checkout of the scikit-learn/scikit-learn repo. This script is
+# generating notebooks from the scikit-learn python examples.
+
+if [[ ! -f /.dockerenv ]]; then
+ echo "This script was written for repo2docker and is supposed to run inside a docker container."
+ echo "Exiting because this script can delete data if run outside of a docker container."
+ exit 1
+fi
+
+# Back up content we need from the scikit-learn repo
+TMP_CONTENT_DIR=/tmp/scikit-learn
+mkdir -p $TMP_CONTENT_DIR
+cp -r examples .binder $TMP_CONTENT_DIR
+# delete everything in current directory including dot files and dot folders
+find . -delete
+
+# Generate notebooks and remove other files from examples folder
+GENERATED_NOTEBOOKS_DIR=.generated-notebooks
+cp -r $TMP_CONTENT_DIR/examples $GENERATED_NOTEBOOKS_DIR
+
+find $GENERATED_NOTEBOOKS_DIR -name '*.py' -exec sphx_glr_python_to_jupyter.py '{}' +
+NON_NOTEBOOKS=$(find $GENERATED_NOTEBOOKS_DIR -type f | grep -v '\.ipynb')
+rm -f $NON_NOTEBOOKS
+
+# Put the .binder folder back (may be useful for debugging purposes)
+mv $TMP_CONTENT_DIR/.binder .
+# Final clean up
+rm -rf $TMP_CONTENT_DIR
+
+# This is for compatibility with binder sphinx-gallery integration: this makes
+# sure that the binder links generated by sphinx-gallery are correct even tough
+# the repo we use for binder (scikit-learn/scikit-learn) is not the repo of the
+# generated doc (scikit-learn/scikit-learn.github.io)
+mkdir notebooks
+ln -s ../$GENERATED_NOTEBOOKS_DIR notebooks/auto_examples
diff --git a/.binder/requirements.txt b/.binder/requirements.txt
new file mode 100644
index 0000000..bd2b70f
--- /dev/null
+++ b/.binder/requirements.txt
@@ -0,0 +1,10 @@
+--find-links https://pypi.anaconda.org/scientific-python-nightly-wheels/simple/scikit-learn
+--pre
+matplotlib
+scikit-image
+pandas
+seaborn
+Pillow
+sphinx-gallery
+scikit-learn
+polars
diff --git a/.binder/runtime.txt b/.binder/runtime.txt
new file mode 100644
index 0000000..8fdd907
--- /dev/null
+++ b/.binder/runtime.txt
@@ -0,0 +1 @@
+python-3.9
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000..4c40b0c
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,30 @@
+{
+ "name": "gpu-internal",
+ "image": "us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.8_cuda_12.1",
+ "runArgs": [
+ "--gpus=all",
+ "--net=host",
+ "--shm-size=16G"
+ ],
+ "containerEnv": {
+ "BAZEL_REMOTE_CACHE": "1",
+ "SILO_NAME": "cache-silo-${localEnv:USER}-gpuvm"
+ },
+ "initializeCommand": "docker pull us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.8_cuda_12.1",
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "ms-vscode.cpptools-themes",
+ "BazelBuild.vscode-bazel",
+ "DevonDCarew.bazel-code",
+ "StackBuild.bazel-stack-vscode",
+ "StackBuild.bazel-stack-vscode-cc",
+ "xaver.clang-format",
+ "ryanluker.vscode-coverage-gutters",
+ "ms-azuretools.vscode-docker",
+ "ms-python.python"
+ ]
+ }
+ }
+ }
\ No newline at end of file
diff --git a/.devcontainer/gpu-internal/devcontainer.json b/.devcontainer/gpu-internal/devcontainer.json
new file mode 100644
index 0000000..4c40b0c
--- /dev/null
+++ b/.devcontainer/gpu-internal/devcontainer.json
@@ -0,0 +1,30 @@
+{
+ "name": "gpu-internal",
+ "image": "us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.8_cuda_12.1",
+ "runArgs": [
+ "--gpus=all",
+ "--net=host",
+ "--shm-size=16G"
+ ],
+ "containerEnv": {
+ "BAZEL_REMOTE_CACHE": "1",
+ "SILO_NAME": "cache-silo-${localEnv:USER}-gpuvm"
+ },
+ "initializeCommand": "docker pull us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/development:3.8_cuda_12.1",
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "llvm-vs-code-extensions.vscode-clangd",
+ "ms-vscode.cpptools-themes",
+ "BazelBuild.vscode-bazel",
+ "DevonDCarew.bazel-code",
+ "StackBuild.bazel-stack-vscode",
+ "StackBuild.bazel-stack-vscode-cc",
+ "xaver.clang-format",
+ "ryanluker.vscode-coverage-gutters",
+ "ms-azuretools.vscode-docker",
+ "ms-python.python"
+ ]
+ }
+ }
+ }
\ No newline at end of file
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..c296413
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,72 @@
+## Specify settings and behaviors for files
+
+## Set the default behavior, in case contributors don't have core.autocrlf set.
+## Line ending normalization
+## Windows systems uses: "crlf" \r\n (carriage return + line feed)
+## Unix-based (like Linux and macOS) uses: "lf" \n (line feed)
+## end-of-line (EOL) treated as text or binary based on its content
+* text=auto eol=lf
+*.md text eol=lf
+*.py text eol=lf
+*.sh text eol=lf
+
+## Explicitly declare text files
+*.py text
+*.rst text
+*.md text
+*.json text
+*.ipynb text
+*.cfg text
+
+## Baseline images are binary and should not be modified
+*.png binary
+*.jpg binary
+*.pdf binary
+
+## Compressed files are binary and should not be modified
+*.gz binary
+*.npz binary
+*.zip binary
+
+
+## Git LFS for large files
+## specify which files should be stored in Git LFS rather than directly in the Git repository
+# *.jpg filter=lfs diff=lfs merge=lfs -text
+
+## Ignoring changes
+## excluded from the standard Git diff output
+## (e.g., when using git diff): ignore when tracking changes
+# *.log -diff
+
+## Git not to include this file when generating an archive of the repository exports
+## (e.g., when using git archive) ignored during exports
+# secret.txt export-ignore
+
+## Filtering Files
+## commands that modify files during the checkout and commit processes.
+# *.docx filter=custom-filter
+
+## Custom Merge Drivers
+## merge: defining how certain files should be merged during conflicts.
+# *.csv merge=csv-merge-driver
+# *.docx merge=word
+# *.xml merge=union
+# *.txt merge=union
+
+## Attributes for Export
+## export-subst: (Version: $Version$) dynamically update version information
+# version: export-subst
+
+## Git diff attributes
+## how files are displayed in git diff output
+# *.css diff=css
+
+## Language-Specific Settings
+## linguist-language: based on the programming language or linguistic considerations
+# *.py linguist-language=Python
+# *.py indent=spaces=4
+# *.js linguist-language=JavaScript
+
+## Identifying file types
+# *.txt text
+# *.jpg binary
\ No newline at end of file
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
new file mode 100644
index 0000000..240e5b8
--- /dev/null
+++ b/.github/CONTRIBUTING.md
@@ -0,0 +1 @@
+Please refer to the [contributing guide](https://scikit-plots.github.io/devel/index.html).
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 0000000..95f46c4
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,4 @@
+---
+# These are supported funding model platforms
+github: [scikit-plots, numfocus]
+custom: https://numfocus.org/donate-to-scikit-plots
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..a20d7d0
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,85 @@
+---
+name: Bug Report
+description: Report a bug or issue with scikit-plots.
+title: "[Bug]: "
+body:
+ - type: textarea
+ id: summary
+ attributes:
+ label: Bug summary
+ description: Describe the bug in 1-2 short sentences
+ validations:
+ required: true
+ - type: textarea
+ id: reproduction
+ attributes:
+ label: Code for reproduction
+ description: >-
+ If possible, please provide a minimum self-contained example.
+ placeholder: Paste your code here. This field is automatically formatted as Python code.
+ render: Python
+ validations:
+ required: true
+ - type: textarea
+ id: actual
+ attributes:
+ label: Actual outcome
+ description: >-
+ Paste the output produced by the code provided above, e.g.
+ console output, images/videos produced by the code, any relevant screenshots/screencasts, etc.
+ validations:
+ required: true
+ - type: textarea
+ id: expected
+ attributes:
+ label: Expected outcome
+ description: Describe (or provide a visual example of) the expected outcome from the code snippet.
+ validations:
+ required: true
+ - type: textarea
+ id: details
+ attributes:
+ label: Additional information
+ description: |
+ - What are the conditions under which this bug happens? input parameters, edge cases, etc?
+ - Has this worked in earlier versions?
+ - Do you know why this bug is happening?
+ - Do you maybe even know a fix?
+ - type: input
+ id: operating-system
+ attributes:
+ label: Operating system
+ description: Windows, OS/X, Arch, Debian, Ubuntu, etc.
+ - type: input
+ id: scikit-plots-version
+ attributes:
+ label: scikit-plots Version
+ description: "From Python prompt: `import scikitplot; print(scikitplot.__version__)`"
+ validations:
+ required: true
+ # - type: input
+ # id: scikit-plots-backend
+ # attributes:
+ # label: scikit-plots Backend
+ # description: "From Python prompt: `import scikitplot; print(scikitplot.get_backend())`"
+ - type: input
+ id: python-version
+ attributes:
+ label: Python version
+ description: "In console: `python --version`"
+ - type: input
+ id: jupyter-version
+ attributes:
+ label: Jupyter version
+ description: "In console: `jupyter notebook --version` or `jupyter lab --version`"
+ - type: dropdown
+ id: install
+ attributes:
+ label: Installation
+ description: How did you install scikit-plots?
+ options:
+ - pip
+ - conda
+ - git checkout
+ - from source (.tar.gz)
+ - Linux package manager
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000..e8cb07a
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,12 @@
+# Reference:
+# https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository#configuring-the-template-chooser
+---
+blank_issues_enabled: true # default
+contact_links:
+ - name: Question/Support/Other
+ url: https://scikit-plots.github.io/
+ about: If you have a usage question
+ - name: Chat with devs
+ # url: https://gitter.im/matplotlib/matplotlib
+ url: https://gitter.im
+ about: Ask short questions about contributing to scikit-plots
diff --git a/.github/ISSUE_TEMPLATE/documentation.yml b/.github/ISSUE_TEMPLATE/documentation.yml
new file mode 100644
index 0000000..ffdeca5
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/documentation.yml
@@ -0,0 +1,33 @@
+---
+name: Documentation
+description: Create a report to help us improve the documentation
+title: "[Doc]: "
+labels: [Documentation]
+body:
+ - type: input
+ id: link
+ attributes:
+ label: Documentation Link
+ description: >-
+ Link to any documentation or examples that you are referencing. Suggested improvements should be based
+ on [the development version of the docs](https://scikit-plots.github.io/devdocs/)
+ placeholder: https://scikit-plots.github.io/devdocs/...
+ - type: textarea
+ id: problem
+ attributes:
+ label: Problem
+ description: What is missing, unclear, or wrong in the documentation?
+ placeholder: |
+ * I found [...] to be unclear because [...]
+ * [...] made me think that [...] when really it should be [...]
+ * There is no example showing how to do [...]
+ validations:
+ required: true
+ - type: textarea
+ id: improvement
+ attributes:
+ label: Suggested improvement
+ placeholder: |
+ * This line should be be changed to say [...]
+ * Include a paragraph explaining [...]
+ * Add a figure showing [...]
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..2d3682d
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,27 @@
+---
+name: Feature Request
+description: Suggest something to add to scikit-plots!
+title: "[ENH]: "
+labels: [New feature]
+body:
+ - type: markdown
+ attributes:
+ value: >-
+ Please search the [issues](https://github.com/scikit-plots/scikit-plots/issues) for relevant feature
+ requests before creating a new feature request.
+ - type: textarea
+ id: problem
+ attributes:
+ label: Problem
+ description: Briefly describe the problem this feature will solve. (2-4 sentences)
+ placeholder: |
+ * I'm always frustrated when [...] because [...]
+ * I would like it if [...] happened when I [...] because [...]
+ * Here is a sample image of what I am asking for [...]
+ validations:
+ required: true
+ - type: textarea
+ id: solution
+ attributes:
+ label: Proposed solution
+ description: Describe a way to accomplish the goals of this feature request.
diff --git a/.github/ISSUE_TEMPLATE/maintenance.yml b/.github/ISSUE_TEMPLATE/maintenance.yml
new file mode 100644
index 0000000..6ebb64c
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/maintenance.yml
@@ -0,0 +1,18 @@
+---
+name: Maintenance
+description: Help improve performance, usability and/or consistency.
+title: "[MNT]: "
+labels: [Maintenance]
+body:
+ - type: textarea
+ id: summary
+ attributes:
+ label: Summary
+ description: Please provide 1-2 short sentences that succinctly describes what could be improved.
+ validations:
+ required: true
+ - type: textarea
+ id: fix
+ attributes:
+ label: Proposed fix
+ description: Please describe how you think this could be improved.
diff --git a/.github/ISSUE_TEMPLATE/tag_proposal.yml b/.github/ISSUE_TEMPLATE/tag_proposal.yml
new file mode 100644
index 0000000..2bb856d
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/tag_proposal.yml
@@ -0,0 +1,28 @@
+---
+name: Tag Proposal
+description: Suggest a new tag or subcategory for the gallery of examples
+title: "[Tag]: "
+labels: ["Documentation: tags"]
+body:
+ - type: markdown
+ attributes:
+ value: >-
+ Please search the [tag glossary]() for relevant tags before creating a new tag proposal.
+ - type: textarea
+ id: need
+ attributes:
+ label: Need
+ description: Briefly describe the need this tag will fill. (1-4 sentences)
+ placeholder: |
+ * A tag is needed for examples that share [...]
+ * Existing tags do not work because [...]
+ * Current gallery examples that would use this tag include [...]
+ * Indicate which subcategory this tag falls under, or whether a new subcategory is proposed.
+ validations:
+ required: true
+ - type: textarea
+ id: solution
+ attributes:
+ label: Proposed solution
+ description: >-
+ What should the tag be? All tags are in the format `subcategory: tag`
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..385663e
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,30 @@
+
+
+## PR summary
+
+
+
+## PR checklist
+
+
+- [ ] "closes #0000" is in the body of the PR description to [link the related issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue)
+- [ ] new and changed code is [tested](https://scikit-plots.github.io/devdocs/devel/testing.html)
+- [ ] *Plotting related* features are demonstrated in an [example](https://scikit-plots.github.io/devdocs/devel/document.html#write-examples-and-tutorials)
+- [ ] *New Features* and *API Changes* are noted with a [directive and release note](https://scikit-plots.github.io/devdocs/devel/api_changes.html#announce-changes-deprecations-and-new-features)
+- [ ] Documentation complies with [general](https://scikit-plots.github.io/devdocs/devel/document.html#write-rest-pages) and [docstring](https://scikit-plots.github.io/devdocs/devel/document.html#write-docstrings) guidelines
+
+
diff --git a/.github/codecov.yml b/.github/codecov.yml
new file mode 100644
index 0000000..00e7612
--- /dev/null
+++ b/.github/codecov.yml
@@ -0,0 +1,33 @@
+# codecov used to be able to find this anywhere, now we have to manually
+# tell it where to look
+---
+comment: false
+
+codecov:
+ notify:
+ require_ci_to_pass: false
+
+coverage:
+ status:
+ patch:
+ default:
+ target: 50%
+ if_no_uploads: error
+ if_not_found: success
+ if_ci_failed: error
+ project:
+ default: false
+ library:
+ target: 50%
+ if_no_uploads: error
+ if_not_found: success
+ if_ci_failed: error
+ paths:
+ - '!lib/.*/tests/.*'
+ tests:
+ target: auto
+ if_no_uploads: error
+ if_not_found: success
+ if_ci_failed: error
+ paths:
+ - 'lib/.*/tests/.*'
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..34902e5
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,11 @@
+---
+version: 2
+updates:
+ - package-ecosystem: "github-actions"
+ directory: "/"
+ schedule:
+ interval: "weekly"
+ groups:
+ actions:
+ patterns:
+ - "*"
diff --git a/.github/labeler.yml b/.github/labeler.yml
new file mode 100644
index 0000000..75adfed
--- /dev/null
+++ b/.github/labeler.yml
@@ -0,0 +1,282 @@
+---
+"CI: Run cibuildwheel":
+ - changed-files:
+ - any-glob-to-any-file: ['.github/workflows/cibuildwheel.yml']
+"CI: Run cygwin":
+ - changed-files:
+ - any-glob-to-any-file: ['.github/workflows/cygwin.yml']
+
+"backend: agg":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'extern/agg24-svn/'
+ - 'lib/matplotlib/backends/_backend_agg.pyi'
+ - 'lib/matplotlib/backends/backend_agg.py*'
+ - 'src/_backend_agg*'
+"backend: cairo":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backends/backend_*cairo.py*'
+"backend: pdf":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backends/_backend_pdf_ps.py'
+ - 'lib/matplotlib/backends/backend_pdf.py'
+"backend: pgf":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backends/backend_pgf.py'
+"backend: ps":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backends/_backend_pdf_ps.py'
+ - 'lib/matplotlib/backends/backend_ps.py'
+"backend: svg":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backends/backend_svg.py'
+
+"GUI: gtk":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backends/_backend_gtk.py*'
+ - 'lib/matplotlib/backends/backend_gtk*'
+"GUI: MacOSX":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backends/*_macosx.py*'
+ - 'src/_macosx.m'
+"GUI: nbagg":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backends/*_nbagg*.py*'
+ - 'lib/matplotlib/backends/web_backend/js/nbagg_mpl.js'
+"GUI: Qt":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backends/backend_qt*'
+ - 'lib/matplotlib/backends/qt_compat.py'
+ - 'lib/matplotlib/backends/qt_editor/**'
+"GUI: tk":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backends/*backend_tk*'
+ - 'lib/matplotlib/backends/_tkagg.pyi'
+ - 'src/_tkagg.cpp'
+ - 'src/_tkmini.h'
+"GUI: webagg":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backends/*_webagg*.py*'
+ - 'lib/matplotlib/backends/web_backend/**'
+"GUI: wx":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backends/backend_wx*.py*'
+
+"Documentation: API":
+ - all:
+ - changed-files:
+ - any-glob-to-any-file:
+ # Also files in lib/**, but we can't be sure those are only documentation.
+ - 'doc/api/**'
+ - all-globs-to-all-files:
+ - '!doc/api/next_api_changes/**'
+
+"Documentation: build":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'doc/conf.py'
+ - 'doc/Makefile'
+ - 'doc/make.bat'
+ - 'doc/sphinxext/**'
+"Documentation: devdocs":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'doc/devel/**'
+"Documentation: examples":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'galleries/examples/**'
+"Documentation: plot types":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'galleries/plot_types/**'
+"Documentation: tutorials":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'galleries/tutorials/**'
+"Documentation: user guide":
+ - all:
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'doc/users/**'
+ - 'galleries/users_explain/**'
+ - all-globs-to-all-files:
+ - '!doc/users/next_whats_new/**'
+
+"topic: animation":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/animation.py*'
+ - 'lib/matplotlib/_animation_data.py*'
+"topic: axes":
+ - changed-files:
+ - any-glob-to-any-file:
+ # Note, axes.py is not included here because it contains many plotting
+ # methods, for which changes would not be considered on topic.
+ - 'lib/matplotlib/axes/_base.py*'
+"topic: canvas and figure manager":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backend_bases.py*'
+"topic: categorical":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/category.py*'
+"topic: collections and mappables":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/collections.py*'
+"topic: color/color & colormaps":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/colorbar.py*'
+ - 'lib/matplotlib/colors.py*'
+ - 'lib/matplotlib/_color_data.py*'
+ - 'lib/matplotlib/cm.py*'
+ - 'lib/matplotlib/_cm.py*'
+ - 'lib/matplotlib/_cm_listed.py*'
+"topic: contour":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/contour.py*'
+ - 'src/_qhull_wrapper.cpp'
+"topic: date handling":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/dates.py*'
+"topic: figures and subfigures":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/figure.py*'
+"topic: geometry manager":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/_constrained_layout.py*'
+ - 'lib/matplotlib/_layoutgrid.py*'
+ - 'lib/matplotlib/_tight_bbox.py*'
+ - 'lib/matplotlib/_tight_layout.py*'
+ - 'lib/matplotlib/gridspec.py*'
+ - 'lib/matplotlib/layout_engine.py*'
+"topic: hatch":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/hatch.py*'
+"topic: images":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/image.py*'
+ - 'lib/matplotlib/_image.pyi'
+ - 'src/_image_*'
+"topic: legend":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/legend.py*'
+ - 'lib/matplotlib/legend_handler.py*'
+"topic: markers":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/markers.py*'
+"topic: mpl_toolkit":
+ - all:
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/mpl_toolkits/**'
+ - all-globs-to-all-files:
+ - '!lib/mpl_toolkits/mplot3d/**'
+"topic: mplot3d":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/mpl_toolkits/mplot3d/**'
+"topic: path handling":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/path.py*'
+ - 'lib/matplotlib/patheffects.py*'
+ - 'lib/matplotlib/_path.pyi'
+ - 'src/*path*'
+"topic: polar":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/projections/polar.py*'
+"topic: pyplot API":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/pyplot.py'
+ - 'lib/matplotlib/_pylab_helpers.py*'
+"topic: rcparams":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/rcsetup.py*'
+"topic: sankey":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/sankey.py*'
+"topic: sphinx extension":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/sphinxext/**'
+"topic: styles":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/mpl-data/stylelib/**'
+ - 'lib/matplotlib/style/**'
+"topic: table":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/table.py*'
+"topic: text":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/text.py*'
+ - 'lib/matplotlib/textpath.py*'
+"topic: text/fonts":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'src/checkdep_freetype2.c'
+ - 'src/ft2font*'
+"topic: text/mathtext":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/mathtext.py*'
+ - 'lib/matplotlib/_mathtext.py*'
+ - 'lib/matplotlib/_mathtext_data.py*'
+"topic: ticks axis labels":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/axis.py*'
+ - 'lib/matplotlib/ticker.py*'
+"topic: toolbar":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/backend_managers.py*'
+ - 'lib/matplotlib/backend_tools.py*'
+"topic: transforms and scales":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/scale.py*'
+ - 'lib/matplotlib/transforms.py*'
+"topic: tri":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/tri/**'
+ - 'src/tri/**'
+"topic: units and array ducktypes":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/units.py*'
+"topic: widgets/UI":
+ - changed-files:
+ - any-glob-to-any-file:
+ - 'lib/matplotlib/widgets.py*'
diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml
new file mode 100644
index 0000000..0db8c53
--- /dev/null
+++ b/.github/workflows/cibuildwheel.yml
@@ -0,0 +1,239 @@
+---
+name: Build CI wheels
+
+on:
+ # Save CI by only running this on release branches or tags.
+ push:
+ branches:
+ - main
+ - v[0-9]+.[0-9]+.x
+ tags:
+ - v*
+ # Also allow running this action on PRs if requested by applying the
+ # "Run cibuildwheel" label.
+ pull_request:
+ types:
+ - opened
+ - synchronize
+ - reopened
+ - labeled
+
+permissions:
+ contents: read
+
+jobs:
+ build_sdist:
+ if: >-
+ github.event_name == 'push' ||
+ github.event_name == 'pull_request' && (
+ (
+ github.event.action == 'labeled' &&
+ github.event.label.name == 'CI: Run cibuildwheel'
+ ) ||
+ contains(github.event.pull_request.labels.*.name,
+ 'CI: Run cibuildwheel')
+ )
+ name: Build sdist
+ runs-on: ubuntu-20.04
+ outputs:
+ SDIST_NAME: ${{ steps.sdist.outputs.SDIST_NAME }}
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - uses: actions/setup-python@v5
+ name: Install Python
+ with:
+ python-version: '3.10'
+
+ # Something changed somewhere that prevents the downloaded-at-build-time
+ # licenses from being included in built wheels, so pre-download them so
+ # that they exist before the build and are included.
+ - name: Pre-download bundled licenses
+ run: >
+ curl -Lo LICENSE/LICENSE_QHULL
+ https://github.com/qhull/qhull/raw/2020.2/COPYING.txt
+
+ - name: Install dependencies
+ run: python -m pip install build twine
+
+ - name: Build sdist
+ id: sdist
+ run: |
+ python -m build --sdist
+ python ci/export_sdist_name.py
+
+ - name: Check README rendering for PyPI
+ run: twine check dist/*
+
+ - name: Upload sdist result
+ uses: actions/upload-artifact@v4
+ with:
+ name: cibw-sdist
+ path: dist/*.tar.gz
+ if-no-files-found: error
+
+ build_wheels:
+ if: >-
+ github.event_name == 'push' ||
+ github.event_name == 'pull_request' && (
+ (
+ github.event.action == 'labeled' &&
+ github.event.label.name == 'CI: Run cibuildwheel'
+ ) ||
+ contains(github.event.pull_request.labels.*.name,
+ 'CI: Run cibuildwheel')
+ )
+ needs: build_sdist
+ name: Build wheels on ${{ matrix.os }} for ${{ matrix.cibw_archs }}
+ runs-on: ${{ matrix.os }}
+ env:
+ CIBW_BEFORE_BUILD: >-
+ rm -rf {package}/build
+ CIBW_BEFORE_BUILD_WINDOWS: >-
+ pip install delvewheel &&
+ rm -rf {package}/build
+ CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: >-
+ delvewheel repair -w {dest_dir} {wheel}
+ CIBW_AFTER_BUILD: >-
+ twine check {wheel} &&
+ python {package}/ci/check_wheel_licenses.py {wheel}
+ # On Windows, we explicitly request MSVC compilers (as GitHub Action runners have
+ # MinGW on PATH that would be picked otherwise), switch to a static build for
+ # runtimes, but use dynamic linking for `VCRUNTIME140.dll`, `VCRUNTIME140_1.dll`,
+ # and the UCRT. This avoids requiring specific versions of `MSVCP140.dll`, while
+ # keeping shared state with the rest of the Python process/extensions.
+ CIBW_CONFIG_SETTINGS_WINDOWS: >-
+ setup-args="--vsenv"
+ setup-args="-Db_vscrt=mt"
+ setup-args="-Dcpp_link_args=['ucrt.lib','vcruntime.lib','/nodefaultlib:libucrt.lib','/nodefaultlib:libvcruntime.lib']"
+ CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
+ CIBW_SKIP: "*-musllinux_aarch64"
+ CIBW_TEST_COMMAND: >-
+ python {package}/ci/check_version_number.py
+ MACOSX_DEPLOYMENT_TARGET: "10.12"
+ MPL_DISABLE_FH4: "yes"
+ strategy:
+ matrix:
+ include:
+ - os: ubuntu-20.04
+ cibw_archs: "x86_64"
+ - os: ubuntu-20.04
+ cibw_archs: "aarch64"
+ - os: windows-latest
+ cibw_archs: "auto64"
+ - os: macos-12
+ cibw_archs: "x86_64"
+ - os: macos-14
+ cibw_archs: "arm64"
+
+ steps:
+ - name: Set up QEMU
+ if: matrix.cibw_archs == 'aarch64'
+ uses: docker/setup-qemu-action@v3
+ with:
+ platforms: arm64
+
+ - name: Download sdist
+ uses: actions/download-artifact@v4
+ with:
+ name: cibw-sdist
+ path: dist/
+
+ - name: Build wheels for CPython 3.13
+ uses: pypa/cibuildwheel@bd033a44476646b606efccdd5eed92d5ea1d77ad # v2.20.0
+ with:
+ package-dir: dist/${{ needs.build_sdist.outputs.SDIST_NAME }}
+ env:
+ CIBW_BUILD: "cp313-* cp313t-*"
+ # No free-threading wheels for NumPy; musllinux skipped for main builds also.
+ CIBW_SKIP: "cp313t-win_amd64 *-musllinux_aarch64"
+ CIBW_BUILD_FRONTEND:
+ "pip; args: --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
+ CIBW_FREE_THREADED_SUPPORT: true
+ # No free-threading wheels available for aarch64 on Pillow.
+ CIBW_TEST_SKIP: "cp313t-manylinux_aarch64"
+ # We need pre-releases to get the nightly wheels.
+ CIBW_BEFORE_TEST: >-
+ pip install --pre
+ --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
+ contourpy numpy pillow
+ CIBW_ARCHS: ${{ matrix.cibw_archs }}
+
+ - name: Build wheels for CPython 3.12
+ uses: pypa/cibuildwheel@bd033a44476646b606efccdd5eed92d5ea1d77ad # v2.20.0
+ with:
+ package-dir: dist/${{ needs.build_sdist.outputs.SDIST_NAME }}
+ env:
+ CIBW_BUILD: "cp312-*"
+ CIBW_ARCHS: ${{ matrix.cibw_archs }}
+
+ - name: Build wheels for CPython 3.11
+ uses: pypa/cibuildwheel@bd033a44476646b606efccdd5eed92d5ea1d77ad # v2.20.0
+ with:
+ package-dir: dist/${{ needs.build_sdist.outputs.SDIST_NAME }}
+ env:
+ CIBW_BUILD: "cp311-*"
+ CIBW_ARCHS: ${{ matrix.cibw_archs }}
+
+ - name: Build wheels for CPython 3.10
+ uses: pypa/cibuildwheel@bd033a44476646b606efccdd5eed92d5ea1d77ad # v2.20.0
+ with:
+ package-dir: dist/${{ needs.build_sdist.outputs.SDIST_NAME }}
+ env:
+ CIBW_BUILD: "cp310-*"
+ CIBW_ARCHS: ${{ matrix.cibw_archs }}
+
+ - name: Build wheels for PyPy
+ uses: pypa/cibuildwheel@bd033a44476646b606efccdd5eed92d5ea1d77ad # v2.20.0
+ with:
+ package-dir: dist/${{ needs.build_sdist.outputs.SDIST_NAME }}
+ env:
+ CIBW_BUILD: "pp310-*"
+ CIBW_ARCHS: ${{ matrix.cibw_archs }}
+ # Work around for https://github.com/pypa/setuptools/issues/4571
+ # This can be removed once kiwisolver has wheels for PyPy 3.10
+ # https://github.com/nucleic/kiwi/pull/182
+ CIBW_BEFORE_TEST: >-
+ export PIP_CONSTRAINT=pypy-constraint.txt &&
+ echo "setuptools!=72.2.0" > $PIP_CONSTRAINT &&
+ pip install kiwisolver &&
+ unset PIP_CONSTRAINT
+ if: matrix.cibw_archs != 'aarch64' && matrix.os != 'windows-latest'
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: cibw-wheels-${{ runner.os }}-${{ matrix.cibw_archs }}
+ path: ./wheelhouse/*.whl
+ if-no-files-found: error
+
+ publish:
+ if: github.event_name == 'push' && github.ref_type == 'tag'
+ name: Upload release to PyPI
+ needs: [build_sdist, build_wheels]
+ runs-on: ubuntu-latest
+ environment: release
+ permissions:
+ id-token: write
+ attestations: write
+ contents: read
+ steps:
+ - name: Download packages
+ uses: actions/download-artifact@v4
+ with:
+ pattern: cibw-*
+ path: dist
+ merge-multiple: true
+
+ - name: Print out packages
+ run: ls dist
+
+ - name: Generate artifact attestation for sdist and wheel
+ uses: actions/attest-build-provenance@310b0a4a3b0b78ef57ecda988ee04b132db73ef8 # v1.4.1
+ with:
+ subject-path: dist/matplotlib-*
+
+ - name: Publish package distributions to PyPI
+ uses: pypa/gh-action-pypi-publish@ec4db0b4ddc65acdf4bff5fa45ac92d78b56bdf0 # v1.9.0
diff --git a/.github/workflows/circleci.yml b/.github/workflows/circleci.yml
new file mode 100644
index 0000000..a64b312
--- /dev/null
+++ b/.github/workflows/circleci.yml
@@ -0,0 +1,71 @@
+---
+name: "CircleCI artifact handling"
+on: [status]
+jobs:
+ circleci_artifacts_redirector_job:
+ if: "${{ github.event.context == 'ci/circleci: docs-python3' }}"
+ permissions:
+ statuses: write
+ runs-on: ubuntu-latest
+ name: Run CircleCI artifacts redirector
+ steps:
+ - name: GitHub Action step
+ uses:
+ scientific-python/circleci-artifacts-redirector-action@4e13a10d89177f4bfc8007a7064bdbeda848d8d1 # v1.0.0
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+ api-token: ${{ secrets.CIRCLECI_TOKEN }}
+ artifact-path: 0/doc/build/html/index.html
+ circleci-jobs: docs-python3
+ job-title: View the built docs
+
+ post_warnings_as_review:
+ if: "${{ github.event.context == 'ci/circleci: docs-python3' }}"
+ permissions:
+ contents: read
+ checks: write
+ pull-requests: write
+ runs-on: ubuntu-latest
+ name: Post warnings/errors as review
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Fetch result artifacts
+ id: fetch-artifacts
+ run: |
+ python .circleci/fetch_doc_logs.py "${{ github.event.target_url }}"
+
+ - name: Set up reviewdog
+ if: "${{ steps.fetch-artifacts.outputs.count != 0 }}"
+ uses: reviewdog/action-setup@v1
+ with:
+ reviewdog_version: latest
+
+ - name: Post review
+ if: "${{ steps.fetch-artifacts.outputs.count != 0 }}"
+ env:
+ REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ REVIEWDOG_SKIP_DOGHOUSE: "true"
+ CI_COMMIT: ${{ github.event.sha }}
+ CI_REPO_OWNER: ${{ github.event.repository.owner.login }}
+ CI_REPO_NAME: ${{ github.event.repository.name }}
+ run: |
+ # The 'status' event does not contain information in the way that
+ # reviewdog expects, so we unset those so it reads from the
+ # environment variables we set above.
+ unset GITHUB_ACTIONS GITHUB_EVENT_PATH
+ cat logs/sphinx-errors-warnings.log | \
+ reviewdog \
+ -efm '%f\:%l: %tEBUG: %m' \
+ -efm '%f\:%l: %tNFO: %m' \
+ -efm '%f\:%l: %tARNING: %m' \
+ -efm '%f\:%l: %tRROR: %m' \
+ -efm '%f\:%l: %tEVERE: %m' \
+ -efm '%f\:%s: %tARNING: %m' \
+ -efm '%f\:%s: %tRROR: %m' \
+ -name=sphinx -tee -fail-on-error=false \
+ -reporter=github-check -filter-mode=nofilter
+ cat logs/sphinx-deprecations.log | \
+ reviewdog \
+ -efm '%f\:%l: %m' \
+ -name=examples -tee -reporter=github-check -filter-mode=nofilter
diff --git a/.github/workflows/clean_pr.yml b/.github/workflows/clean_pr.yml
new file mode 100644
index 0000000..77e49f7
--- /dev/null
+++ b/.github/workflows/clean_pr.yml
@@ -0,0 +1,53 @@
+---
+name: PR cleanliness
+on: [pull_request]
+
+permissions:
+ contents: read
+
+jobs:
+ pr_clean:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: '0'
+ - name: Check for added-and-deleted files
+ run: |
+ git fetch --quiet origin "$GITHUB_BASE_REF"
+ base="$(git merge-base "origin/$GITHUB_BASE_REF" 'HEAD^2')"
+ ad="$(git log "$base..HEAD^2" --pretty=tformat: --name-status --diff-filter=AD |
+ cut --fields 2 | sort | uniq --repeated)"
+ if [[ -n "$ad" ]]; then
+ printf 'The following files were both added and deleted in this PR:\n%s\n' "$ad"
+ exit 1
+ fi
+ - name: Check for added-and-modified images
+ run: |
+ git fetch --quiet origin "$GITHUB_BASE_REF"
+ base="$(git merge-base "origin/$GITHUB_BASE_REF" 'HEAD^2')"
+ am="$(git log "$base..HEAD^2" --pretty=tformat: --name-status --diff-filter=AM |
+ cut --fields 2 | sort | uniq --repeated |
+ grep -E '\.(png|pdf|ps|eps|svg)' || true)"
+ if [[ -n "$am" ]]; then
+ printf 'The following images were both added and modified in this PR:\n%s\n' "$am"
+ exit 1
+ fi
+ - name: Check for invalid backports to -doc branches
+ if: endsWith(github.base_ref, '-doc')
+ run: |
+ git fetch --quiet origin "$GITHUB_BASE_REF"
+ base="$(git merge-base "origin/$GITHUB_BASE_REF" 'HEAD^2')"
+ lib="$(git log "$base..HEAD^2" --pretty=tformat: --name-status -- lib src |
+ cut --fields 2 | sort || true)"
+ if [[ -n "$lib" ]]; then
+ printf 'Changes to the following files have no effect and should not be backported:\n%s\n' "$lib"
+ exit 1
+ fi
+ - name: Check for branches opened against main
+ if: github.ref_name == 'main'
+ run: |
+ echo 'PR branch should not be main.'
+ echo 'See https://matplotlib.org/devdocs/devel/development_workflow.html#make-a-new-feature-branch'
+ exit 1
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
new file mode 100644
index 0000000..203b0ee
--- /dev/null
+++ b/.github/workflows/codeql-analysis.yml
@@ -0,0 +1,43 @@
+---
+name: "CodeQL"
+
+on:
+ push:
+ branches: [main, v*.x]
+ pull_request:
+ # The branches below must be a subset of the branches above
+ branches: [main]
+ schedule:
+ - cron: '45 19 * * 1'
+
+jobs:
+ analyze:
+ name: Analyze
+ runs-on: ubuntu-latest
+ permissions:
+ actions: read
+ contents: read
+ security-events: write
+
+ strategy:
+ fail-fast: false
+ matrix:
+ language: ['c-cpp', 'javascript', 'python']
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Initialize CodeQL
+ uses: github/codeql-action/init@v3
+ with:
+ languages: ${{ matrix.language }}
+
+ - name: Build compiled code
+ if: matrix.language == 'c-cpp'
+ run: |
+ pip install --user --upgrade pip
+ pip install --user -v .
+
+ - name: Perform CodeQL Analysis
+ uses: github/codeql-action/analyze@v3
diff --git a/.github/workflows/conflictcheck.yml b/.github/workflows/conflictcheck.yml
new file mode 100644
index 0000000..3110839
--- /dev/null
+++ b/.github/workflows/conflictcheck.yml
@@ -0,0 +1,24 @@
+---
+name: "Maintenance"
+on:
+ # So that PRs touching the same files as the push are updated
+ push:
+ # So that the `dirtyLabel` is removed if conflicts are resolve
+ # We recommend `pull_request_target` so that github secrets are available.
+ # In `pull_request` we wouldn't be able to change labels of fork PRs
+ pull_request_target:
+ types: [synchronize]
+
+permissions:
+ pull-requests: write
+
+jobs:
+ main:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check if PRs have merge conflicts
+ uses: eps1lon/actions-label-merge-conflict@1b1b1fcde06a9b3d089f3464c96417961dde1168 # v3.0.2
+ with:
+ dirtyLabel: "status: needs rebase"
+ repoToken: "${{ secrets.GITHUB_TOKEN }}"
+ retryMax: 10
diff --git a/.github/workflows/cygwin.yml b/.github/workflows/cygwin.yml
new file mode 100644
index 0000000..6f05bdc
--- /dev/null
+++ b/.github/workflows/cygwin.yml
@@ -0,0 +1,250 @@
+---
+name: Cygwin Tests
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.number }}-${{ github.event.ref }}
+ cancel-in-progress: true
+
+on:
+ push:
+ branches:
+ - main
+ - v[0-9]+.[0-9]+.[0-9x]+
+ tags:
+ - v*
+ paths:
+ - 'src/**'
+ - '.github/workflows/cygwin.yml'
+ pull_request:
+ types:
+ - opened
+ - synchronize
+ - reopened
+ - labeled
+ branches-ignore:
+ - v[0-9]+.[0-9]+.[0-9x]+-doc
+ paths:
+ - 'src/**'
+ - '.github/workflows/cygwin.yml'
+ schedule:
+ # 5:47 UTC on Saturdays
+ - cron: "47 5 * * 6"
+ workflow_dispatch:
+ workflow: "*"
+
+permissions:
+ contents: read
+
+env:
+ NO_AT_BRIDGE: 1 # Necessary for GTK3 interactive test.
+ OPENBLAS_NUM_THREADS: 1
+ PYTHONFAULTHANDLER: 1
+ SHELLOPTS: igncr
+ CYGWIN_NOWINPATH: 1
+ CHERE_INVOKING: 1
+ TMP: /tmp
+ TEMP: /tmp
+
+jobs:
+
+ test-cygwin:
+ runs-on: windows-latest
+ name: Python 3.${{ matrix.python-minor-version }} on Cygwin
+ # Enable these when Cygwin has Python 3.12.
+ if: >-
+ github.event_name == 'workflow_dispatch' ||
+ (false && github.event_name == 'schedule') ||
+ (
+ false &&
+ github.repository == 'scikit-plots/scikit-plots' &&
+ !contains(github.event.head_commit.message, '[ci skip]') &&
+ !contains(github.event.head_commit.message, '[skip ci]') &&
+ !contains(github.event.head_commit.message, '[skip github]') &&
+ !contains(github.event.head_commit.message, '[ci doc]') &&
+ (
+ github.event_name == 'push' ||
+ github.event_name == 'pull_request' &&
+ (
+ (
+ github.event.action == 'labeled' &&
+ github.event.label.name == 'CI: Run cygwin'
+ ) ||
+ contains(github.event.pull_request.labels.*.name, 'CI: Run cygwin')
+ )
+ )
+ )
+ strategy:
+ matrix:
+ python-minor-version: [12]
+
+ steps:
+ - name: Fix line endings
+ run: git config --global core.autocrlf input
+
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - uses: cygwin/cygwin-install-action@v4
+ with:
+ packages: >-
+ ccache gcc-g++ gdb git graphviz libcairo-devel libffi-devel
+ libgeos-devel libQt5Core-devel pkgconf libglib2.0-devel ninja
+ noto-cjk-fonts
+ python3${{ matrix.python-minor-version }}-devel
+ python3${{ matrix.python-minor-version }}-pip
+ python3${{ matrix.python-minor-version }}-wheel
+ python3${{ matrix.python-minor-version }}-setuptools
+ python3${{ matrix.python-minor-version }}-cycler
+ python3${{ matrix.python-minor-version }}-dateutil
+ python3${{ matrix.python-minor-version }}-fonttools
+ python3${{ matrix.python-minor-version }}-imaging
+ python3${{ matrix.python-minor-version }}-kiwisolver
+ python3${{ matrix.python-minor-version }}-numpy
+ python3${{ matrix.python-minor-version }}-packaging
+ python3${{ matrix.python-minor-version }}-pyparsing
+ python3${{ matrix.python-minor-version }}-sip
+ python3${{ matrix.python-minor-version }}-sphinx
+ python-cairo-devel
+ python3${{ matrix.python-minor-version }}-cairo
+ python3${{ matrix.python-minor-version }}-gi
+ python3${{ matrix.python-minor-version }}-scikit-plots
+ xorg-server-extra libxcb-icccm4 libxcb-image0
+ libxcb-keysyms1 libxcb-randr0 libxcb-render-util0
+ libxcb-xinerama0
+ make autoconf autoconf2.5 automake automake1.10 libtool m4
+ libqhull-devel libfreetype-devel
+ libjpeg-devel libwebp-devel
+
+ - name: Set runner username to root and id to 0
+ shell: bash.exe -eo pipefail -o igncr "{0}"
+ # GitHub Actions runs everything as Administrator. I don't
+ # know how to test for this, so set the uid for the CI job so
+ # that the existing unix root detection will work.
+ run: /bin/mkpasswd.exe -c | sed -e "s/$(id -u)/0/" >/etc/passwd
+
+ - name: Mark test repo safe
+ shell: bash.exe -eo pipefail -o igncr "{0}"
+ run: |
+ git.exe config --global --add safe.directory /proc/cygdrive/d/a/scikit-plots/scikit-plots
+ git config --global --add safe.directory /cygdrive/d/a/scikit-plots/scikit-plots
+ C:/cygwin/bin/git.exe config --global --add safe.directory D:/a/scikit-plots/scikit-plots
+ /usr/bin/git config --global --add safe.directory /cygdrive/d/a/scikit-plots/scikit-plots
+
+ - name: Use dash for /bin/sh
+ shell: bash.exe -eo pipefail -o igncr "{0}"
+ run: |
+ ls -l /bin/sh.exe /bin/bash.exe /bin/dash.exe
+ /bin/rm -f /bin/sh.exe || exit 1
+ cp -sf /bin/dash.exe /bin/sh.exe || exit 1
+ ls -l /bin/sh.exe /bin/bash.exe /bin/dash.exe
+ # FreeType build fails with bash, succeeds with dash
+
+ - name: Cache pip
+ uses: actions/cache@v4
+ with:
+ path: C:\cygwin\home\runneradmin\.cache\pip
+ key: Cygwin-py3.${{ matrix.python-minor-version }}-pip-${{ hashFiles('requirements/*/*.txt') }}
+ restore-keys: ${{ matrix.os }}-py3.${{ matrix.python-minor-version }}-pip-
+
+ - name: Cache ccache
+ uses: actions/cache@v4
+ with:
+ path: C:\cygwin\home\runneradmin\.ccache
+ key: Cygwin-py3.${{ matrix.python-minor-version }}-ccache-${{ hashFiles('src/*') }}
+ restore-keys: Cygwin-py3.${{ matrix.python-minor-version }}-ccache-
+
+ - name: Cache Matplotlib
+ uses: actions/cache@v4
+ with:
+ path: |
+ C:\cygwin\home\runneradmin\.cache\scikit-plots
+ !C:\cygwin\home\runneradmin\.cache\scikit-plots\tex.cache
+ !C:\cygwin\home\runneradmin\.cache\scikit-plots\test_cache
+ key: 1-Cygwin-py3.${{ matrix.python-minor-version }}-mpl-${{ github.ref }}-${{ github.sha }}
+ restore-keys: |
+ 1-Cygwin-py3.${{ matrix.python-minor-version }}-mpl-${{ github.ref }}-
+ 1-Cygwin-py3.${{ matrix.python-minor-version }}-mpl-
+
+ - name: Ensure correct Python version
+ shell: bash.exe -eo pipefail -o igncr "{0}"
+ run: |
+ /usr/sbin/alternatives --set python /usr/bin/python3.${{ matrix.python-minor-version }}
+ /usr/sbin/alternatives --set python3 /usr/bin/python3.${{ matrix.python-minor-version }}
+
+ - name: Install Python dependencies
+ shell: bash.exe -eo pipefail -o igncr "{0}"
+ run: |
+ python -m pip install --upgrade pip setuptools wheel
+ python -m pip install kiwisolver 'numpy>=1.22,<1.26' pillow importlib_resources
+ grep -v -F -e psutil requirements/testing/all.txt >requirements_test.txt
+ python -m pip install meson-python pybind11
+ export PATH="/usr/local/bin:$PATH"
+ python -m pip install --no-build-isolation 'contourpy>=1.0.1'
+ python -m pip install --upgrade cycler fonttools \
+ packaging pyparsing python-dateutil setuptools-scm \
+ -r requirements_test.txt sphinx ipython
+ python -m pip install --upgrade pycairo 'cairocffi>=0.8' PyGObject &&
+ python -c 'import gi; gi.require_version("Gtk", "3.0"); from gi.repository import Gtk' &&
+ echo 'PyGObject is available' ||
+ echo 'PyGObject is not available'
+ python -m pip install --upgrade pyqt5 &&
+ python -c 'import PyQt5.QtCore' &&
+ echo 'PyQt5 is available' ||
+ echo 'PyQt5 is not available'
+ python -mpip install --upgrade pyside2 &&
+ python -c 'import PySide2.QtCore' &&
+ echo 'PySide2 is available' ||
+ echo 'PySide2 is not available'
+ python -m pip uninstall --yes wxpython || echo 'wxPython already uninstalled'
+
+ - name: Install scikit-plots
+ shell: bash.exe -eo pipefail -o igncr "{0}"
+ env:
+ AUTOCONF: /usr/bin/autoconf-2.69
+ MAKEFLAGS: dw
+ run: |
+ export PATH="/usr/local/bin:$PATH"
+ ccache -s
+ git describe
+ # All dependencies must have been pre-installed, so that the minver
+ # constraints are held.
+ python -m pip install --no-deps --no-build-isolation --verbose \
+ --config-settings=setup-args="-DrcParams-backend=Agg" \
+ --editable .[dev]
+
+ - name: Find DLLs to rebase
+ shell: bash.exe -eo pipefail -o igncr "{0}"
+ run: |
+ find {/usr,/usr/local}/{bin,lib/python3.*/site-packages} /usr/lib/lapack . \
+ -name \*.exe -o -name \*.dll -print >files_to_rebase.txt
+
+ - name: Rebase DLL list
+ shell: ash.exe "{0}"
+ run: "rebase --database --filelist=files_to_rebase.txt"
+ # Inplace modification of DLLs to assign non-overlapping load
+ # addresses so fork() works as expected. Ash is used as it
+ # does not link against any Cygwin DLLs that might need to be
+ # rebased.
+
+ - name: Check that scikit-plots imports
+ shell: bash.exe -eo pipefail -o igncr "{0}"
+ run: |
+ /usr/bin/python -c "import matplotlib as mpl; import matplotlib.pyplot as plt"
+
+ - name: Set ffmpeg path
+ shell: bash.exe -eo pipefail -o igncr "{0}"
+ run: |
+ oldmplrc=$(python -c "from matplotlib import matplotlib_fname as mplrc_file; print(mplrc_file())")
+ echo "${oldmplrc}"
+ mkdir -p ~/.matplotlib/
+ sed -E \
+ -e 's~#animation\.ffmpeg_path:.+~animation.ffmpeg_path: /usr/bin/ffmpeg.exe~' \
+ "${oldmplrc}" >~/.matplotlib/matplotlibrc
+
+ - name: Run pytest
+ shell: bash.exe -eo pipefail -o igncr "{0}"
+ id: cygwin-run-pytest
+ run: |
+ xvfb-run pytest-3.${{ matrix.python-minor-version }} -rfEsXR -n auto \
+ --maxfail=50 --timeout=300 --durations=25 \
+ --cov-report=term --cov=lib --log-level=DEBUG --color=yes
diff --git a/.github/workflows/do_not_merge.yml b/.github/workflows/do_not_merge.yml
new file mode 100644
index 0000000..dde5bfb
--- /dev/null
+++ b/.github/workflows/do_not_merge.yml
@@ -0,0 +1,30 @@
+---
+name: Do Not Merge
+
+# action to block merging on specific labels
+on:
+ pull_request:
+ types: [synchronize, opened, reopened, labeled, unlabeled]
+
+permissions: {}
+
+jobs:
+ do-not-merge:
+ name: Prevent Merging
+ runs-on: ubuntu-latest
+ env:
+ has_tag: >-
+ ${{contains(github.event.pull_request.labels.*.name, 'status: needs comment/discussion') ||
+ contains(github.event.pull_request.labels.*.name, 'status: waiting for other PR')}}
+ steps:
+ - name: Check for label
+ if: ${{'true' == env.has_tag}}
+ run: |
+ echo "This PR cannot be merged because it has one of the following labels: "
+ echo "* status: needs comment/discussion"
+ echo "* status: waiting for other PR"
+ echo "${{env.has_tag}}"
+ exit 1
+ - name: Allow merging
+ if: ${{'false' == env.has_tag}}
+ run: exit 0
diff --git a/.github/workflows/good-first-issue.yml b/.github/workflows/good-first-issue.yml
new file mode 100644
index 0000000..8905511
--- /dev/null
+++ b/.github/workflows/good-first-issue.yml
@@ -0,0 +1,30 @@
+---
+name: Add comment on good first issues
+on:
+ issues:
+ types:
+ - labeled
+jobs:
+ add-comment:
+ if: github.event.label.name == 'Good first issue'
+ runs-on: ubuntu-latest
+ permissions:
+ issues: write
+ steps:
+ - name: Add comment
+ uses: peter-evans/create-or-update-comment@v4
+ with:
+ issue-number: ${{ github.event.issue.number }}
+ body: |
+ ### Good first issue - notes for new contributors
+
+ This issue is suited to new contributors because it does not require understanding of the
+ Matplotlib internals. To get started, please see our [contributing
+ guide](https://matplotlib.org/stable/devel/index).
+
+ **We do not assign issues**. Check the *Development* section in the sidebar for linked pull
+ requests (PRs). If there are none, feel free to start working on it. If there is an open PR, please
+ collaborate on the work by reviewing it rather than duplicating it in a competing PR.
+
+ If something is unclear, please reach out on any of our [communication
+ channels](https://matplotlib.org/stable/devel/contributing.html#get-connected).
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
new file mode 100644
index 0000000..dc7a071
--- /dev/null
+++ b/.github/workflows/labeler.yml
@@ -0,0 +1,15 @@
+---
+name: "Pull Request Labeler"
+on:
+ - pull_request_target
+
+jobs:
+ labeler:
+ permissions:
+ contents: read
+ pull-requests: write
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/labeler@v5
+ with:
+ sync-labels: true
diff --git a/.github/workflows/mypy-stubtest.yml b/.github/workflows/mypy-stubtest.yml
new file mode 100644
index 0000000..5b29a93
--- /dev/null
+++ b/.github/workflows/mypy-stubtest.yml
@@ -0,0 +1,44 @@
+---
+name: Mypy Stubtest
+on: [pull_request]
+
+permissions:
+ contents: read
+ checks: write
+
+jobs:
+ mypy-stubtest:
+ name: mypy-stubtest
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python 3
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+
+ - name: Set up reviewdog
+ uses: reviewdog/action-setup@v1
+
+ - name: Install tox
+ run: python -m pip install tox
+
+ - name: Run mypy stubtest
+ env:
+ REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ set -o pipefail
+ tox -e stubtest | \
+ sed -e "s!.tox/stubtest/lib/python3.10/site-packages!lib!g" | \
+ reviewdog \
+ -efm '%Eerror: %m' \
+ -efm '%CStub: in file %f:%l' \
+ -efm '%CStub: in file %f' \
+ -efm '%+CRuntime:%.%#' \
+ -efm '%+CMISSING' \
+ -efm '%+Cdef %.%#' \
+ -efm '%+C<%.%#>' \
+ -efm '%Z' \
+ -reporter=github-check -tee -name=mypy-stubtest \
+ -filter-mode=nofilter
diff --git a/.github/workflows/nightlies.yml b/.github/workflows/nightlies.yml
new file mode 100644
index 0000000..54e81f0
--- /dev/null
+++ b/.github/workflows/nightlies.yml
@@ -0,0 +1,65 @@
+---
+name: Upload nightly wheels to Anaconda Cloud
+
+on:
+ # Run daily at 1:23 UTC to upload nightly wheels to Anaconda Cloud
+ schedule:
+ - cron: '23 1 * * *'
+ # Run on demand with workflow dispatch
+ workflow_dispatch:
+
+permissions:
+ actions: read
+
+jobs:
+ upload_nightly_wheels:
+ name: Upload nightly wheels to Anaconda Cloud
+ runs-on: ubuntu-latest
+ defaults:
+ run:
+ # The login shell is necessary for the setup-micromamba setup
+ # to work in subsequent jobs.
+ # https://github.com/mamba-org/setup-micromamba#about-login-shells
+ shell: bash -e -l {0}
+ if: github.repository_owner == 'matplotlib'
+
+ steps:
+ # https://github.com/actions/download-artifact/issues/3#issuecomment-1017141067
+ - name: Download wheel artifacts from last build on 'main'
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ PROJECT_REPO="matplotlib/matplotlib"
+ BRANCH="main"
+ WORKFLOW_NAME="cibuildwheel.yml"
+ ARTIFACT_PATTERN="cibw-wheels-*"
+
+ gh run --repo "${PROJECT_REPO}" \
+ list --branch "${BRANCH}" \
+ --workflow "${WORKFLOW_NAME}" \
+ --json event,status,conclusion,databaseId > runs.json
+ RUN_ID=$(
+ jq --compact-output \
+ '[
+ .[] |
+ # Filter on "push" events to main (merged PRs) ...
+ select(.event == "push") |
+ # that have completed successfully ...
+ select(.status == "completed" and .conclusion == "success")
+ ] |
+ # and get ID of latest build of wheels.
+ sort_by(.databaseId) | reverse | .[0].databaseId' runs.json
+ )
+ gh run --repo "${PROJECT_REPO}" view "${RUN_ID}"
+ gh run --repo "${PROJECT_REPO}" \
+ download "${RUN_ID}" --pattern "${ARTIFACT_PATTERN}"
+
+ mkdir dist
+ mv ${ARTIFACT_PATTERN}/*.whl dist/
+ ls -l dist/
+
+ - name: Upload wheels to Anaconda Cloud as nightlies
+ uses: scientific-python/upload-nightly-action@b67d7fcc0396e1128a474d1ab2b48aa94680f9fc # 0.5.0
+ with:
+ artifacts_path: dist
+ anaconda_nightly_upload_token: ${{ secrets.ANACONDA_ORG_UPLOAD_TOKEN }}
diff --git a/.github/workflows/pr_welcome.yml b/.github/workflows/pr_welcome.yml
new file mode 100644
index 0000000..533f676
--- /dev/null
+++ b/.github/workflows/pr_welcome.yml
@@ -0,0 +1,39 @@
+---
+name: PR Greetings
+
+on: [pull_request_target]
+
+permissions:
+ pull-requests: write
+
+jobs:
+ greeting:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/first-interaction@v1
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+ pr-message: >+
+ Thank you for opening your first PR into Matplotlib!
+
+
+ If you have not heard from us in a week or so, please leave a new
+ comment below and that should bring it to our attention.
+ Most of our reviewers are volunteers and sometimes things fall
+ through the cracks.
+
+
+ You can also join us [on
+ gitter](https://gitter.im/matplotlib/matplotlib) for real-time
+ discussion.
+
+
+ For details on testing, writing docs, and our review process,
+ please see [the developer
+ guide](https://matplotlib.org/devdocs/devel/index.html)
+
+
+ We strive to be a welcoming and open project. Please follow our
+ [Code of
+ Conduct](https://github.com/matplotlib/matplotlib/blob/main/CODE_OF_CONDUCT.md).
diff --git a/.github/workflows/reviewdog.yml b/.github/workflows/reviewdog.yml
new file mode 100644
index 0000000..12b59d8
--- /dev/null
+++ b/.github/workflows/reviewdog.yml
@@ -0,0 +1,75 @@
+---
+name: Linting
+on: [pull_request]
+
+permissions:
+ contents: read
+ checks: write
+ pull-requests: write
+
+jobs:
+ flake8:
+ name: flake8
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python 3
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+
+ - name: Install flake8
+ run: pip3 install -r requirements/testing/flake8.txt
+
+ - name: Set up reviewdog
+ uses: reviewdog/action-setup@v1
+
+ - name: Run flake8
+ env:
+ REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ set -o pipefail
+ flake8 --docstring-convention=all | \
+ reviewdog -f=pep8 -name=flake8 \
+ -tee -reporter=github-check -filter-mode nofilter
+ mypy:
+ name: mypy
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python 3
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+
+ - name: Install mypy
+ run: pip3 install -r requirements/testing/mypy.txt -r requirements/testing/all.txt
+
+ - name: Set up reviewdog
+ uses: reviewdog/action-setup@v1
+
+ - name: Run mypy
+ env:
+ REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ set -o pipefail
+ mypy --config pyproject.toml | \
+ reviewdog -f=mypy -name=mypy \
+ -tee -reporter=github-check -filter-mode nofilter
+
+
+ eslint:
+ name: eslint
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: eslint
+ uses: reviewdog/action-eslint@v1
+ with:
+ filter_mode: nofilter
+ github_token: ${{ secrets.GITHUB_TOKEN }}
+ reporter: github-check
+ workdir: 'lib/matplotlib/backends/web_backend/'
diff --git a/.github/workflows/stale-tidy.yml b/.github/workflows/stale-tidy.yml
new file mode 100644
index 0000000..92a81ee
--- /dev/null
+++ b/.github/workflows/stale-tidy.yml
@@ -0,0 +1,24 @@
+---
+name: 'Close inactive issues'
+on:
+ schedule:
+ - cron: '30 1 * * 2,4,6'
+
+jobs:
+ stale:
+ if: github.repository == 'matplotlib/matplotlib'
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/stale@v9
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+ operations-per-run: 300
+ days-before-stale: -1
+ stale-pr-label: "status: inactive"
+ days-before-pr-close: -1
+ stale-issue-label: "status: inactive"
+ close-issue-label: "status: closed as inactive"
+ days-before-issue-close: 30
+ ascending: true
+ exempt-issue-labels: "keep"
+ exempt-pr-labels: "keep,status: orphaned PR"
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 0000000..c606d42
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,38 @@
+---
+name: 'Label inactive PRs'
+on:
+ schedule:
+ - cron: '30 1 * * 1,3,5'
+
+jobs:
+ stale:
+ if: github.repository == 'matplotlib/matplotlib'
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/stale@v9
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+ operations-per-run: 20
+ stale-pr-message: >-
+ Since this Pull Request has not been updated in 60 days, it has been marked "inactive." This does
+ not mean that it will be closed, though it may be moved to a "Draft" state. This helps maintainers
+ prioritize their reviewing efforts. You can pick the PR back up anytime - please ping us if you
+ need a review or guidance to move the PR forward! If you do not plan on continuing the work, please
+ let us know so that we can either find someone to take the PR over, or close it.
+ stale-pr-label: "status: inactive"
+ days-before-pr-stale: 60
+ days-before-pr-close: -1
+ stale-issue-message: >-
+ This issue has been marked "inactive" because it has been 365 days since the last comment. If this
+ issue is still present in recent Matplotlib releases, or the feature request is still wanted,
+ please leave a comment and this label will be removed. If there are no updates in another 30 days,
+ this issue will be automatically closed, but you are free to re-open or create a new issue if
+ needed. We value issue reports, and this procedure is meant to help us resurface and prioritize
+ issues that have not been addressed yet, not make them disappear. Thanks for your help!
+ stale-issue-label: "status: inactive"
+ close-issue-label: "status: closed as inactive"
+ days-before-issue-stale: 365
+ days-before-issue-close: 30
+ ascending: true
+ exempt-issue-labels: "keep"
+ exempt-pr-labels: "keep,status: orphaned PR"
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 0000000..4de46a1
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,452 @@
+---
+name: Tests
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.number }}-${{ github.event.ref }}
+ cancel-in-progress: true
+
+on:
+ push:
+ branches-ignore:
+ - auto-backport-of-pr-[0-9]+
+ - v[0-9]+.[0-9]+.[0-9x]+-doc
+ - dependabot/**
+ pull_request:
+ branches-ignore:
+ - v[0-9]+.[0-9]+.[0-9x]+-doc
+ paths-ignore:
+ # Skip running tests if changes are only in documentation directories
+ - 'doc/**'
+ - 'galleries/**'
+ schedule:
+ # 5:47 UTC on Saturdays
+ - cron: "47 5 * * 6"
+ workflow_dispatch:
+ workflow: "*"
+
+env:
+ NO_AT_BRIDGE: 1 # Necessary for GTK3 interactive test.
+ OPENBLAS_NUM_THREADS: 1
+ PYTHONFAULTHANDLER: 1
+
+jobs:
+ test:
+ if: >-
+ github.event_name == 'workflow_dispatch' ||
+ (
+ github.repository == 'matplotlib/matplotlib' &&
+ !contains(github.event.head_commit.message, '[ci skip]') &&
+ !contains(github.event.head_commit.message, '[skip ci]') &&
+ !contains(github.event.head_commit.message, '[skip github]') &&
+ !contains(github.event.head_commit.message, '[ci doc]')
+ )
+ permissions:
+ contents: read
+ name: "Python ${{ matrix.python-version }} on ${{ matrix.os }} ${{ matrix.name-suffix }}"
+ runs-on: ${{ matrix.os }}
+
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - name-suffix: "(Minimum Versions)"
+ os: ubuntu-20.04
+ python-version: '3.10'
+ extra-requirements: '-c requirements/testing/minver.txt'
+ delete-font-cache: true
+ # Oldest versions with Py3.10 wheels.
+ pyqt5-ver: '==5.15.5 sip==6.3.0'
+ pyqt6-ver: '==6.2.0 PyQt6-Qt6==6.2.0'
+ pyside2-ver: '==5.15.2.1'
+ pyside6-ver: '==6.2.0'
+ - os: ubuntu-20.04
+ python-version: '3.10'
+ # One CI run tests ipython/matplotlib-inline before backend mapping moved to mpl
+ extra-requirements:
+ -r requirements/testing/extra.txt
+ "ipython==7.29.0"
+ "ipykernel==5.5.6"
+ "matplotlib-inline<0.1.7"
+ CFLAGS: "-fno-lto" # Ensure that disabling LTO works.
+ # https://github.com/matplotlib/matplotlib/pull/26052#issuecomment-1574595954
+ # https://www.riverbankcomputing.com/pipermail/pyqt/2023-November/045606.html
+ pyqt6-ver: '!=6.5.1,!=6.6.0,!=6.7.1'
+ # https://bugreports.qt.io/projects/PYSIDE/issues/PYSIDE-2346
+ pyside6-ver: '!=6.5.1'
+ - os: ubuntu-22.04
+ python-version: '3.11'
+ # https://www.riverbankcomputing.com/pipermail/pyqt/2023-November/045606.html
+ pyqt6-ver: '!=6.6.0'
+ # https://bugreports.qt.io/projects/PYSIDE/issues/PYSIDE-2346
+ pyside6-ver: '!=6.5.1'
+ extra-requirements: '-r requirements/testing/extra.txt'
+ - os: ubuntu-22.04
+ python-version: '3.12'
+ # https://www.riverbankcomputing.com/pipermail/pyqt/2023-November/045606.html
+ pyqt6-ver: '!=6.6.0'
+ # https://bugreports.qt.io/projects/PYSIDE/issues/PYSIDE-2346
+ pyside6-ver: '!=6.5.1'
+ - os: ubuntu-22.04
+ python-version: '3.13'
+ # https://www.riverbankcomputing.com/pipermail/pyqt/2023-November/045606.html
+ pyqt6-ver: '!=6.6.0'
+ # https://bugreports.qt.io/projects/PYSIDE/issues/PYSIDE-2346
+ pyside6-ver: '!=6.5.1'
+ - name-suffix: "Free-threaded"
+ os: ubuntu-22.04
+ python-version: '3.13t'
+ # https://www.riverbankcomputing.com/pipermail/pyqt/2023-November/045606.html
+ pyqt6-ver: '!=6.6.0'
+ # https://bugreports.qt.io/projects/PYSIDE/issues/PYSIDE-2346
+ pyside6-ver: '!=6.5.1'
+ - os: macos-12 # This runner is on Intel chips.
+ python-version: '3.10'
+ # https://bugreports.qt.io/projects/PYSIDE/issues/PYSIDE-2346
+ pyside6-ver: '!=6.5.1'
+ - os: macos-14 # This runner is on M1 (arm64) chips.
+ python-version: '3.12'
+ # https://bugreports.qt.io/projects/PYSIDE/issues/PYSIDE-2346
+ pyside6-ver: '!=6.5.1'
+ - os: macos-14 # This runner is on M1 (arm64) chips.
+ python-version: '3.13'
+ # https://bugreports.qt.io/projects/PYSIDE/issues/PYSIDE-2346
+ pyside6-ver: '!=6.5.1'
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ if: matrix.python-version != '3.13t'
+ with:
+ python-version: ${{ matrix.python-version }}
+ allow-prereleases: true
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: deadsnakes/action@6c8b9b82fe0b4344f4b98f2775fcc395df45e494 # v3.1.0
+ if: matrix.python-version == '3.13t'
+ with:
+ python-version: '3.13'
+ nogil: true
+
+ - name: Install OS dependencies
+ run: |
+ case "${{ runner.os }}" in
+ Linux)
+ echo 'Acquire::Retries "3";' | sudo tee /etc/apt/apt.conf.d/80-retries
+ sudo apt-get update -yy
+ sudo apt-get install -yy --no-install-recommends \
+ ccache \
+ cm-super \
+ dvipng \
+ ffmpeg \
+ fonts-freefont-otf \
+ fonts-noto-cjk \
+ fonts-wqy-zenhei \
+ gdb \
+ gir1.2-gtk-3.0 \
+ graphviz \
+ inkscape \
+ language-pack-de \
+ lcov \
+ libcairo2 \
+ libcairo2-dev \
+ libffi-dev \
+ libgeos-dev \
+ libgirepository1.0-dev \
+ libsdl2-2.0-0 \
+ libxkbcommon-x11-0 \
+ libxcb-cursor0 \
+ libxcb-icccm4 \
+ libxcb-image0 \
+ libxcb-keysyms1 \
+ libxcb-randr0 \
+ libxcb-render-util0 \
+ libxcb-xinerama0 \
+ lmodern \
+ ninja-build \
+ pkg-config \
+ qtbase5-dev \
+ texlive-fonts-recommended \
+ texlive-latex-base \
+ texlive-latex-extra \
+ texlive-latex-recommended \
+ texlive-luatex \
+ texlive-pictures \
+ texlive-xetex
+ if [[ "${{ matrix.python-version }}" = '3.13t' ]]; then
+ # TODO: Remove this once setup-python supports nogil distributions.
+ sudo apt-get install -yy --no-install-recommends \
+ python3.13-tk-nogil
+ fi
+ if [[ "${{ matrix.os }}" = ubuntu-20.04 ]]; then
+ sudo apt-get install -yy --no-install-recommends libopengl0
+ else # ubuntu-22.04
+ sudo apt-get install -yy --no-install-recommends \
+ gir1.2-gtk-4.0 libnotify4
+ fi
+ ;;
+ macOS)
+ brew update
+ export HOMEBREW_NO_INSTALL_UPGRADE=1 HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1
+ brew install ccache ghostscript ninja
+ # The macOS 12 images have an older Python, and this causes homebrew to generate conflicts.
+ # We'll just skip GTK for now, to not pull in Python updates.
+ if [[ "${{ matrix.os }}" = macos-14 ]]; then
+ brew install gobject-introspection gtk4
+ fi
+ brew install --cask font-noto-sans-cjk inkscape
+ ;;
+ esac
+
+ - name: Cache pip
+ uses: actions/cache@v4
+ if: startsWith(runner.os, 'Linux')
+ with:
+ path: ~/.cache/pip
+ key: ${{ matrix.os }}-py${{ matrix.python-version }}-pip-${{ hashFiles('requirements/*/*.txt') }}
+ restore-keys: |
+ ${{ matrix.os }}-py${{ matrix.python-version }}-pip-
+ - name: Cache pip
+ uses: actions/cache@v4
+ if: startsWith(runner.os, 'macOS')
+ with:
+ path: ~/Library/Caches/pip
+ key: ${{ matrix.os }}-py${{ matrix.python-version }}-pip-${{ hashFiles('requirements/*/*.txt') }}
+ restore-keys: |
+ ${{ matrix.os }}-py${{ matrix.python-version }}-pip-
+ - name: Cache ccache
+ uses: actions/cache@v4
+ with:
+ path: |
+ ~/.ccache
+ key: ${{ matrix.os }}-py${{ matrix.python-version }}-ccache-${{ hashFiles('src/*') }}
+ restore-keys: |
+ ${{ matrix.os }}-py${{ matrix.python-version }}-ccache-
+ - name: Cache Matplotlib
+ uses: actions/cache@v4
+ with:
+ path: |
+ ~/.cache/matplotlib
+ !~/.cache/matplotlib/tex.cache
+ !~/.cache/matplotlib/test_cache
+ key: 4-${{ runner.os }}-py${{ matrix.python-version }}-mpl-${{ github.ref }}-${{ github.sha }}
+ restore-keys: |
+ 4-${{ runner.os }}-py${{ matrix.python-version }}-mpl-${{ github.ref }}-
+ 4-${{ runner.os }}-py${{ matrix.python-version }}-mpl-
+
+ - name: Install the nightly dependencies
+ if: matrix.python-version == '3.13t'
+ run: |
+ python -m pip install pytz tzdata python-dateutil # Must be installed for Pandas.
+ python -m pip install \
+ --pre \
+ --index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
+ --upgrade --only-binary=:all: numpy pandas pillow contourpy
+
+ - name: Install Python dependencies
+ run: |
+ # Upgrade pip and setuptools and wheel to get as clean an install as
+ # possible.
+ python -m pip install --upgrade pip setuptools wheel
+
+ # Install pre-release versions during our weekly upcoming dependency tests.
+ if [[ "${{ github.event_name }}" == 'schedule'
+ && "${{ matrix.name-suffix }}" != '(Minimum Versions)' ]]; then
+ PRE="--pre"
+ fi
+
+ # Install dependencies from PyPI.
+ # Preinstall build requirements to enable no-build-isolation builds.
+ python -m pip install --upgrade $PRE \
+ 'contourpy>=1.0.1' cycler fonttools kiwisolver importlib_resources \
+ numpy packaging pillow 'pyparsing!=3.1.0' python-dateutil setuptools-scm \
+ 'meson-python>=0.13.1' 'pybind11>=2.6' \
+ -r requirements/testing/all.txt \
+ ${{ matrix.extra-requirements }}
+
+ # Install optional dependencies from PyPI.
+ # Sphinx is needed to run sphinxext tests
+ python -m pip install --upgrade sphinx!=6.1.2
+
+ if [[ "${{ matrix.python-version }}" != '3.13t' ]]; then
+ # GUI toolkits are pip-installable only for some versions of Python
+ # so don't fail if we can't install them. Make it easier to check
+ # whether the install was successful by trying to import the toolkit
+ # (sometimes, the install appears to be successful but shared
+ # libraries cannot be loaded at runtime, so an actual import is a
+ # better check).
+ python -m pip install --upgrade pycairo 'cairocffi>=0.8' PyGObject &&
+ (
+ python -c 'import gi; gi.require_version("Gtk", "4.0"); from gi.repository import Gtk' &&
+ echo 'PyGObject 4 is available' || echo 'PyGObject 4 is not available'
+ ) && (
+ python -c 'import gi; gi.require_version("Gtk", "3.0"); from gi.repository import Gtk' &&
+ echo 'PyGObject 3 is available' || echo 'PyGObject 3 is not available'
+ )
+
+ python -mpip install --upgrade pyqt5${{ matrix.pyqt5-ver }} &&
+ python -c 'import PyQt5.QtCore' &&
+ echo 'PyQt5 is available' ||
+ echo 'PyQt5 is not available'
+ # Even though PySide2 wheels can be installed on Python 3.12+, they are broken and since PySide2 is
+ # deprecated, they are unlikely to be fixed. For the same deprecation reason, there are no wheels
+ # on M1 macOS, so don't bother there either.
+ if [[ "${{ matrix.os }}" != 'macos-14'
+ && "${{ matrix.python-version }}" != '3.12' && "${{ matrix.python-version }}" != '3.13' ]]; then
+ python -mpip install --upgrade pyside2${{ matrix.pyside2-ver }} &&
+ python -c 'import PySide2.QtCore' &&
+ echo 'PySide2 is available' ||
+ echo 'PySide2 is not available'
+ fi
+ python -mpip install --upgrade pyqt6${{ matrix.pyqt6-ver }} &&
+ python -c 'import PyQt6.QtCore' &&
+ echo 'PyQt6 is available' ||
+ echo 'PyQt6 is not available'
+ python -mpip install --upgrade pyside6${{ matrix.pyside6-ver }} &&
+ python -c 'import PySide6.QtCore' &&
+ echo 'PySide6 is available' ||
+ echo 'PySide6 is not available'
+
+ python -mpip install --upgrade \
+ -f "https://extras.wxpython.org/wxPython4/extras/linux/gtk3/${{ matrix.os }}" \
+ wxPython &&
+ python -c 'import wx' &&
+ echo 'wxPython is available' ||
+ echo 'wxPython is not available'
+
+ fi # Skip backends on Python 3.13t.
+
+ - name: Install the nightly dependencies
+ # Only install the nightly dependencies during the scheduled event
+ if: github.event_name == 'schedule' && matrix.name-suffix != '(Minimum Versions)'
+ run: |
+ python -m pip install pytz tzdata # Must be installed for Pandas.
+ python -m pip install \
+ --index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
+ --upgrade --only-binary=:all: numpy pandas
+
+ - name: Install Matplotlib
+ run: |
+ ccache -s
+ git describe
+
+ # Set flag in a delayed manner to avoid issues with installing other
+ # packages
+ if [[ "${{ runner.os }}" == 'macOS' ]]; then
+ export CPPFLAGS='-fprofile-instr-generate=default.%m.profraw'
+ export CPPFLAGS="$CPPFLAGS -fcoverage-mapping"
+ else
+ export CPPFLAGS='--coverage -fprofile-abs-path'
+ fi
+
+ python -m pip install --no-deps --no-build-isolation --verbose \
+ --config-settings=setup-args="-DrcParams-backend=Agg" \
+ --editable .[dev]
+
+ if [[ "${{ runner.os }}" != 'macOS' ]]; then
+ unset CPPFLAGS
+ fi
+
+ - name: Clear font cache
+ run: |
+ rm -rf ~/.cache/matplotlib
+ if: matrix.delete-font-cache
+
+ - name: Run pytest
+ run: |
+ if [[ "${{ matrix.python-version }}" == '3.13t' ]]; then
+ export PYTHON_GIL=0
+ fi
+ pytest -rfEsXR -n auto \
+ --maxfail=50 --timeout=300 --durations=25 \
+ --cov-report=xml --cov=lib --log-level=DEBUG --color=yes
+
+ - name: Cleanup non-failed image files
+ if: failure()
+ run: |
+ function remove_files() {
+ local extension=$1
+ find ./result_images -type f -name "*-expected*.$extension" | while read file; do
+ if [[ $file == *"-expected_pdf"* ]]; then
+ base=${file%-expected_pdf.$extension}_pdf
+ elif [[ $file == *"-expected_eps"* ]]; then
+ base=${file%-expected_eps.$extension}_eps
+ elif [[ $file == *"-expected_svg"* ]]; then
+ base=${file%-expected_svg.$extension}_svg
+ else
+ base=${file%-expected.$extension}
+ fi
+ if [[ ! -e "${base}-failed-diff.$extension" ]]; then
+ if [[ -e "$file" ]]; then
+ rm "$file"
+ echo "Removed $file"
+ fi
+ if [[ -e "${base}.$extension" ]]; then
+ rm "${base}.$extension"
+ echo " Removed ${base}.$extension"
+ fi
+ fi
+ done
+ }
+
+ remove_files "png"; remove_files "svg"; remove_files "pdf"; remove_files "eps";
+
+ if [ "$(find ./result_images -mindepth 1 -type d)" ]; then
+ find ./result_images/* -type d -empty -delete
+ fi
+
+ - name: Filter C coverage
+ if: ${{ !cancelled() && github.event_name != 'schedule' }}
+ run: |
+ if [[ "${{ runner.os }}" != 'macOS' ]]; then
+ lcov --rc lcov_branch_coverage=1 --capture --directory . \
+ --output-file coverage.info
+ lcov --rc lcov_branch_coverage=1 --output-file coverage.info \
+ --extract coverage.info $PWD/src/'*' $PWD/lib/'*'
+ lcov --rc lcov_branch_coverage=1 --list coverage.info
+ find . -name '*.gc*' -delete
+ else
+ xcrun llvm-profdata merge -sparse default.*.profraw \
+ -o default.profdata
+ xcrun llvm-cov export -format="lcov" build/*/src/*.so \
+ -instr-profile default.profdata > info.lcov
+ fi
+ - name: Upload code coverage
+ if: ${{ !cancelled() && github.event_name != 'schedule' }}
+ uses: codecov/codecov-action@v4
+ with:
+ name: "${{ matrix.python-version }} ${{ matrix.os }} ${{ matrix.name-suffix }}"
+ token: ${{ secrets.CODECOV_TOKEN }}
+
+ - uses: actions/upload-artifact@v4
+ if: failure()
+ with:
+ name: "${{ matrix.python-version }} ${{ matrix.os }} ${{ matrix.name-suffix }} result images"
+ path: ./result_images
+
+ # Separate dependent job to only upload one issue from the matrix of jobs
+ create-issue:
+ if: ${{ failure() && github.event_name == 'schedule' }}
+ needs: [test]
+ permissions:
+ issues: write
+ runs-on: ubuntu-latest
+ name: "Create issue on failure"
+
+ steps:
+ - name: Create issue on failure
+ uses: imjohnbo/issue-bot@v3
+ with:
+ title: "[TST] Upcoming dependency test failures"
+ body: |
+ The weekly build with nightly wheels from numpy and pandas
+ has failed. Check the logs for any updates that need to be
+ made in matplotlib.
+ https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}
+
+ pinned: false
+ close-previous: false
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 763513e..a8e7085 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,100 @@
+# Jupyter Notebook
.ipynb_checkpoints
+
+# Sphinx documentation
+docs/_build/
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# pyenv
+.python-version
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+oryx-build-commands.txt
+pip-log.txt
+pip-delete-this-directory.txt
+
+# PyTest
+.pytest_cache
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+
+# IDE/editor droppings
+*.swp
+*.swo
+.vscode
+.vscode/settings.json
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# PyCharm
+.idea/workspace.xml
+.idea/tasks.xml
+.idea/dictionaries
+.idea/vcs.xml
+.idea/jsLibraryMappings.xml
+.idea/dataSources.ids
+.idea/dataSources.xml
+.idea/dataSources.local.xml
+.idea/sqlDataSources.xml
+.idea/dynamic.xml
+.idea/uiDesigner.xml
+.idea/gradle.xml
+.idea/libraries
+.idea/mongoSettings.xml
+*.iws
+/out/
+.idea_modules/
+.idea
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# OS droppings
+.DS_Store
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..49260a3
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,20 @@
+[submodule "third_party/array-api-compat"]
+ path = third_party/array-api-compat
+ url = https://github.com/scikit-plots/array-api-compat
+ branch = gh_array_api_compat
+[submodule "third_party/math"]
+ path = third_party/math
+ url = https://github.com/scikit-plots/math.git
+ branch = gh_boost_math
+[submodule "third_party/boost"]
+ path = third_party/boost
+ url = https://github.com/scikit-plots/boost.git
+ branch = gh_boost
+[submodule "third_party/NumCpp"]
+ path = third_party/NumCpp
+ url = https://github.com/scikit-plots/NumCpp
+ branch = gh_numcpp
+[submodule "third_party/xla"]
+ path = third_party/xla
+ url = https://github.com/scikit-plots/xla.git
+ branch = gh_xla
diff --git a/.readthedocs.yml b/.readthedocs.yml
index 4c77a45..12ba31c 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -1,5 +1,45 @@
-conda:
- file: environment.yml
+## Read the Docs configuration file
+## See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+## Required
+version: 2
+formats:
+ - htmlzip
+
+## Set the OS, Python version, and other tools you might need
+build:
+ os: ubuntu-22.04
+ tools:
+ python: "3.12"
+ # apt_packages:
+ # - graphviz
+ # jobs:
+ ## before building the doc
+ # post_install:
+ # - pip install -r requirements.txt
+ # commands:
+ # - pip install -r requirements.txt
+ # - sphinx-build -b html docs/source/ docs/build/
+
+# # Build documentation in the "docs/" directory with Sphinx
+# sphinx:
+# configuration: docs/source/conf.py
+# # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
+# builder: "html"
+# # Fail on all warnings to avoid broken references
+# # fail_on_warning: true
-requirements_file:
- requirements.txt
+# Optionally build your docs in additional formats such as PDF and ePub
+# formats:
+# - pdf
+# - epub
+
+# Optionally, but recommended,
+# declare the Python requirements required to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+ install:
+ - requirements: requirements.txt
+
+conda:
+ environment: environment.yml
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
index c7f34db..7c8160f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,8 +1,7 @@
language: python
python:
- - "2.7"
- "3.5"
- - "3.6"
+ - "3.10"
# command to install dependencies
before_script: # configure a headless display to test plot generation
- "export DISPLAY=:99.0"
diff --git a/CITATION.bib b/CITATION.bib
new file mode 100644
index 0000000..9f5f3c3
--- /dev/null
+++ b/CITATION.bib
@@ -0,0 +1,15 @@
+% ---------------------------------------------------------
+% CITATION.bib file for scikit-plots
+% This file provides citation information for users
+% who want to cite the library, related papers, and books.
+% ---------------------------------------------------------
+
+@misc{scikit-plots:vlatest,
+ author = { scikit-plots developers },
+ title = { scikit-plots: Machine Learning Visualization in Python },
+ year = { 2024 },
+ version = { latest },
+ url = { https://scikit-plots.github.io },
+ note = { Scikit-plot is the result of an unartistic data scientist's dreadful realization that visualization is one of the most crucial components in the data science process, not just a mere afterthought. },
+ doi = { 10.5281/zenodo.13367000 }
+}
\ No newline at end of file
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 0000000..798210a
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,32 @@
+# ---------------------------------------------------------
+# CITATION.cff file for scikit-plots
+# This file provides citation information for users
+# who want to cite the library, related papers, and books.
+# ---------------------------------------------------------
+
+cff-version: 1.2.0 # The version of the CFF format used.
+message: "If you use this software, please cite it using the following metadata."
+title: "scikit-plots: Machine Learning Visualization in Python"
+version: "latest"
+doi: "10.5281/zenodo.13367000"
+date-released: "2024-10-27"
+
+# Authors and contributors of the software project.
+authors:
+ - team: "scikit-plots developers"
+ website: "https://scikit-plots.github.io"
+
+# Repository information
+repository-code: "https://github.com/scikit-plots/scikit-plots"
+repository-artifact: "https://zenodo.org/records/13367000"
+
+# License for your softwarelicense: "BSD-3-Clause"
+type: software
+url: "https://scikit-plots.github.io"
+
+# Keywords related to your software
+keywords:
+ - Software
+ - Python
+ - scikit-plots
+ - Machine Learning Visualization
\ No newline at end of file
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..9b7fe6c
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,6 @@
+
+
+Our Code of Conduct is at
+https://scikit-plots.github.io/stable/project/code_of_conduct.html
+
+It is rendered from `doc/source/project/code_of_conduct.rst`
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b12c0f7..17b82c6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,14 +6,49 @@ Fixing a bug you found in Scikit-plot? Suggesting a feature? Adding your own plo
2. **Fork the repository**. If you're contributing code, clone the forked repository into your local machine.
+- If you are a first-time contributor:
+ - Go to github and click the “fork” button to create your own copy of the project.
+ - Clone the project to your local computer:
+ ```bash
+ git clone --recurse-submodules https://github.com/your-username/scikit-plot.git
+ ```
+ - Now, `git remote -v` will show two remote repositories named:
+ - upstream, which refers to the scikit-plot repository
+ - origin, which refers to your personal fork
+ - Pull the latest changes from upstream, including tags:
+ ```bash
+ git checkout main
+ git pull upstream main --tags
+ ```
+ - Initialize submodules:
+ ```bash
+ git submodule update --init
+ ```
3. **Run the tests** to make sure they pass on your machine. Simply run `pytest` at the root folder and make sure all tests pass.
4. **Create a new branch**. Please do not commit directly to the master branch. Create your own branch and place your additions there.
-5. **Write your code**. Please follow PEP8 coding standards. Also, if you're adding a function, you must [write a docstring using the Google format](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) detailing the API of your function. Take a look at the docstrings of the other Scikit-plot functions to get an idea of what the docstring of yours should look like.
+- Develop your contribution:
+ - Create a branch for the feature you want to work on. Since the branch name will appear in the merge message, use a sensible name such as 'linspace-speedups':
+ ```bash
+ git checkout -b linspace-speedups
+ ```
+5. **Write your code**. Please follow PEP8 coding standards. Also, if you're adding a function, you must currently [write a docstring using the Google format](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) detailing the API of your function or In Feature [NumPy docstring standard](https://numpy.org/devdocs/dev/howto-docs.html#howto-document). Take a look at the docstrings of the other Scikit-plot functions to get an idea of what the docstring of yours should look like.
+
+- Commit locally as you progress (`git add` and `git commit`) Use a properly formatted commit message, write tests that fail before your change and pass afterward, run all the tests locally. Be sure to document any changed behavior in docstrings, keeping to the NumPy docstring [standard](https://numpy.org/devdocs/dev/howto-docs.html#howto-document).
6. **Write/modify the corresponding unit tests**. After adding in your code and the corresponding unit tests, run `pytest` again to make sure they pass.
-7. **Submit a pull request**. After submitting a PR, if all tests pass, your code will be reviewed and merged promptly.
+7. **Submit a pull request**. After submitting a PR (pull requests), if all tests pass, your code will be reviewed and merged promptly.
+
+- To submit your contribution:
+ - Push your changes back to your fork on GitHub:
+ ```bash
+ git push origin linspace-speedups
+ ```
+ - Go to GitHub. The new branch will show up with a green Pull Request button. Make sure the title and message are clear, concise, and self- explanatory. Then click the button to submit it.
+
+ - If your commit introduces a new feature or changes functionality, post on the mailing list to explain your changes. For bug fixes, documentation updates, etc., this is generally not necessary, though if you do not get any reaction, do feel free to ask for review.
+- Review process:
Thank you for taking the time to make Scikit-plot better!
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
index 72affcd..0ec7b9a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,21 +1,28 @@
-MIT License
+BSD-3 Clause License
-Copyright (c) [2018] [Reiichiro Nakano]
+Copyright (c) [2024 - ], [scikit-plots Developers]
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/LICENSES/LICENSE_NumCpp b/LICENSES/LICENSE_NumCpp
new file mode 100644
index 0000000..811736c
--- /dev/null
+++ b/LICENSES/LICENSE_NumCpp
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (C) 2018-2023 David Pilger
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/LICENSES/LICENSE_OpenXLA-xla b/LICENSES/LICENSE_OpenXLA-xla
new file mode 100644
index 0000000..f49a4e1
--- /dev/null
+++ b/LICENSES/LICENSE_OpenXLA-xla
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
\ No newline at end of file
diff --git a/LICENSES/LICENSE_appdirs b/LICENSES/LICENSE_appdirs
new file mode 100644
index 0000000..8367467
--- /dev/null
+++ b/LICENSES/LICENSE_appdirs
@@ -0,0 +1,31 @@
+Copyright (c) 2005-2010 ActiveState Software Inc.
+Copyright (c) 2013 Eddy Petrișor
+
+This file is directly from
+https://github.com/ActiveState/appdirs/blob/3fe6a83776843a46f20c2e5587afcffe05e03b39/appdirs.py
+
+The license of https://github.com/ActiveState/appdirs copied below:
+
+
+# This is the MIT license
+
+Copyright (c) 2010 ActiveState Software Inc.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/LICENSES/LICENSE_array-api-compat b/LICENSES/LICENSE_array-api-compat
new file mode 100644
index 0000000..ca9f2fe
--- /dev/null
+++ b/LICENSES/LICENSE_array-api-compat
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Consortium for Python Data API Standards
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/LICENSES/LICENSE_boost b/LICENSES/LICENSE_boost
new file mode 100644
index 0000000..36b7cd9
--- /dev/null
+++ b/LICENSES/LICENSE_boost
@@ -0,0 +1,23 @@
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/LICENSES/LICENSE_boost-math b/LICENSES/LICENSE_boost-math
new file mode 100644
index 0000000..36b7cd9
--- /dev/null
+++ b/LICENSES/LICENSE_boost-math
@@ -0,0 +1,23 @@
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/LICENSES/LICENSE_husl b/LICENSES/LICENSE_husl
new file mode 100644
index 0000000..aa2a242
--- /dev/null
+++ b/LICENSES/LICENSE_husl
@@ -0,0 +1,19 @@
+Copyright (C) 2012 Alexei Boronine
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/LICENSES/LICENSE_kds b/LICENSES/LICENSE_kds
new file mode 100644
index 0000000..8b265f6
--- /dev/null
+++ b/LICENSES/LICENSE_kds
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) [2021] [Prateek Sharma]
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/LICENSES/LICENSE_matplotlib b/LICENSES/LICENSE_matplotlib
new file mode 100644
index 0000000..ec51537
--- /dev/null
+++ b/LICENSES/LICENSE_matplotlib
@@ -0,0 +1,99 @@
+License agreement for matplotlib versions 1.3.0 and later
+=========================================================
+
+1. This LICENSE AGREEMENT is between the Matplotlib Development Team
+("MDT"), and the Individual or Organization ("Licensee") accessing and
+otherwise using matplotlib software in source or binary form and its
+associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, MDT
+hereby grants Licensee a nonexclusive, royalty-free, world-wide license
+to reproduce, analyze, test, perform and/or display publicly, prepare
+derivative works, distribute, and otherwise use matplotlib
+alone or in any derivative version, provided, however, that MDT's
+License Agreement and MDT's notice of copyright, i.e., "Copyright (c)
+2012- Matplotlib Development Team; All Rights Reserved" are retained in
+matplotlib alone or in any derivative version prepared by
+Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on or
+incorporates matplotlib or any part thereof, and wants to
+make the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to matplotlib .
+
+4. MDT is making matplotlib available to Licensee on an "AS
+IS" basis. MDT MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, MDT MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB
+WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. MDT SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB
+ FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR
+LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING
+MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF
+THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+relationship of agency, partnership, or joint venture between MDT and
+Licensee. This License Agreement does not grant permission to use MDT
+trademarks or trade name in a trademark sense to endorse or promote
+products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using matplotlib ,
+Licensee agrees to be bound by the terms and conditions of this License
+Agreement.
+
+License agreement for matplotlib versions prior to 1.3.0
+========================================================
+
+1. This LICENSE AGREEMENT is between John D. Hunter ("JDH"), and the
+Individual or Organization ("Licensee") accessing and otherwise using
+matplotlib software in source or binary form and its associated
+documentation.
+
+2. Subject to the terms and conditions of this License Agreement, JDH
+hereby grants Licensee a nonexclusive, royalty-free, world-wide license
+to reproduce, analyze, test, perform and/or display publicly, prepare
+derivative works, distribute, and otherwise use matplotlib
+alone or in any derivative version, provided, however, that JDH's
+License Agreement and JDH's notice of copyright, i.e., "Copyright (c)
+2002-2011 John D. Hunter; All Rights Reserved" are retained in
+matplotlib alone or in any derivative version prepared by
+Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on or
+incorporates matplotlib or any part thereof, and wants to
+make the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to matplotlib.
+
+4. JDH is making matplotlib available to Licensee on an "AS
+IS" basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB
+WILL NOT INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB
+ FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR
+LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING
+MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF
+THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+relationship of agency, partnership, or joint venture between JDH and
+Licensee. This License Agreement does not grant permission to use JDH
+trademarks or trade name in a trademark sense to endorse or promote
+products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using matplotlib,
+Licensee agrees to be bound by the terms and conditions of this License
+Agreement.
\ No newline at end of file
diff --git a/LICENSES/LICENSE_modelplotpy b/LICENSES/LICENSE_modelplotpy
new file mode 100644
index 0000000..20d40b6
--- /dev/null
+++ b/LICENSES/LICENSE_modelplotpy
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ Copyright (C)
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+.
\ No newline at end of file
diff --git a/LICENSES/LICENSE_mpl-probscale b/LICENSES/LICENSE_mpl-probscale
new file mode 100644
index 0000000..7c388ec
--- /dev/null
+++ b/LICENSES/LICENSE_mpl-probscale
@@ -0,0 +1,28 @@
+Copyright (c) 2015, Paul Hobson
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+* Neither the name of mpl-probscale nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/LICENSES/LICENSE_numpydoc b/LICENSES/LICENSE_numpydoc
new file mode 100644
index 0000000..21b55c9
--- /dev/null
+++ b/LICENSES/LICENSE_numpydoc
@@ -0,0 +1,24 @@
+Copyright (C) 2008 Stefan van der Walt , Pauli Virtanen
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/LICENSES/LICENSE_packaging b/LICENSES/LICENSE_packaging
new file mode 100644
index 0000000..1144d71
--- /dev/null
+++ b/LICENSES/LICENSE_packaging
@@ -0,0 +1,23 @@
+Copyright (c) Donald Stufft and individual contributors.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/LICENSES/LICENSE_scikit-plot b/LICENSES/LICENSE_scikit-plot
new file mode 100644
index 0000000..72affcd
--- /dev/null
+++ b/LICENSES/LICENSE_scikit-plot
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) [2018] [Reiichiro Nakano]
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/LICENSES/LICENSE_scikit-plots b/LICENSES/LICENSE_scikit-plots
new file mode 100644
index 0000000..0ec7b9a
--- /dev/null
+++ b/LICENSES/LICENSE_scikit-plots
@@ -0,0 +1,28 @@
+BSD-3 Clause License
+
+Copyright (c) [2024 - ], [scikit-plots Developers]
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/LICENSES/LICENSE_scipy b/LICENSES/LICENSE_scipy
new file mode 100644
index 0000000..03718db
--- /dev/null
+++ b/LICENSES/LICENSE_scipy
@@ -0,0 +1,30 @@
+Copyright (c) 2001-2002 Enthought, Inc. 2003-2019, SciPy Developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/LICENSES/LICENSE_seaborn b/LICENSES/LICENSE_seaborn
new file mode 100644
index 0000000..266921c
--- /dev/null
+++ b/LICENSES/LICENSE_seaborn
@@ -0,0 +1,27 @@
+Copyright (c) 2012-2023, Michael L. Waskom
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+* Neither the name of the project nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
index e149bfc..272cfbf 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,18 @@
-include README.md
+## Optional file to specify additional files for packaging.
+## Setuptools Build System
+
+# Include the LICENSE file from the LICENSE directory
include LICENSE
-include requirements.txt
\ No newline at end of file
+include LICENSES/
+
+# Include other files as needed
+include README.md
+include requirements.txt
+include setup.cfg
+include pyproject.toml
+
+# Include all files under the tests directory
+recursive-include tests *
+
+# Include the package source code
+recursive-include scikitplot *
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..56eafe7
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,173 @@
+## Makefile
+## This Makefile contains various targets for project management tasks such as running the project,
+## cleaning up build files, running tests, building Docker images, and more.
+## Phony targets are used to avoid conflicts with files of the same name.
+## Declare phony targets to indicate these are not files but commands to be executed.
+.PHONY: clean examples test publish all
+## target: The name of the file or action to be created.
+## dependencies: The files that are needed to create the target.
+## command: The commands to execute, indented with a tab (not spaces).
+# target: dependencies
+# command
+
+## all target: A convenience target that cleans the build directory and then builds the app.
+## Ensures that the project is rebuilt from a clean state.
+all: test clean publish
+ @echo "all completed."
+
+help:
+ @echo "Please use \`make ' where is one of"
+ @echo " clean to remove build artifacts and temporary files"
+ @echo " examples to execute py scripts under 'examples/' folder"
+ @echo " test to run unit tests after 'clean'"
+ @echo " publish to build the project"
+ @echo " all to run 'test clean publish'"
+
+## get project structure
+tree:
+ # tree
+ find . -type d
+ find . | sed -e "s/[^-][^\/]*\// |/g" -e "s/|\([^ ]\)/|-\1/"
+
+## Clean up all the generated files
+## clean target: Removes build artifacts and cleans up the project directory.
+## Useful for ensuring a fresh build environment.
+## basic cleaning without 'third_party'
+clean_basic:
+ @# Command Substitution "$(...)" "(`...`)" (its output in place of the backticks):
+ @rm -rf `find -L . -type d -name ".ipynb_checkpoints" -not -path "./third_party/*"`
+ @rm -rf "./third_party/.ipynb_checkpoints"
+ @echo "Removed all '.ipynb_checkpoints'"
+ @rm -rf `find -L . -type d -name "__pycache__" -not -path "./third_party/*"`
+ @echo "Removed all '__pycache__'"
+ @rm -rf `find -L . -type d -name ".pytest_cache" -not -path "./third_party/*"`
+ @echo "Removed all '.pytest_cache'"
+ @rm -rf `find -L . -type d -name "__MACOSX" -not -path "./third_party/*"`
+ @echo "Removed zip file leftovers '__MACOSX'"
+ @echo "basic cleaning completed."
+
+## pypi cleaning in 'build dirs'
+clean: clean_basic
+ @pip cache purge
+ @rm -rf "build" "build_dir" "builddir" "dist" "*.egg-info"
+ @echo "Removed folder 'build, egg etc.'"
+ @find -L -type f -name "*.so" -path "*/build*"
+ @echo "Modules '*.so' files in 'build dirs'"
+ @# find -L -type f -name "*.so" | xargs rm -rf
+ @find -L -type f -name "*.so" -path "*/build*" -exec rm -rf {} +
+ @echo "Removed all '*.so' files in 'build dirs'"
+ @echo "pypi cleaning completed."
+
+## test target: Runs pytest on the 'tests/' directory.
+## Run this target to execute unit tests.
+test: clean_basic
+ @cd scikitplot && pytest tests/
+ @echo "pytest completed."
+
+## example_script target: Runs py script on the 'examples/' directory.
+## Run this target to save generated script plot image.
+examples:
+ @cd galleries/examples && python calibration/plot_calibration_script.py
+ @cd galleries/examples && python classification/plot_classifier_eval_script.py
+ @cd galleries/examples && python classification/plot_confusion_matrix_script.py
+ @cd galleries/examples && python classification/plot_feature_importances_script.py
+ @cd galleries/examples && python classification/plot_learning_curve_script.py
+ @cd galleries/examples && python classification/plot_precision_recall_script.py
+ @cd galleries/examples && python classification/plot_roc_script.py
+ @cd galleries/examples && python clustering/plot_elbow_script.py
+ @cd galleries/examples && python clustering/plot_silhouette_script.py
+ @cd galleries/examples && python decomposition/plot_pca_2d_projection_script.py
+ @cd galleries/examples && python decomposition/plot_pca_component_variance_script.py
+ @cd galleries/examples && python kds/plot_cumulative_gain_script.py
+ @cd galleries/examples && python kds/plot_ks_statistic_script.py
+ @cd galleries/examples && python kds/plot_lift_script.py
+ @echo "All py Script executed."
+
+## Builds the pypi Packages
+## This target depends on clean and test.
+build:
+ @## https://mesonbuild.com/meson-python/how-to-guides/editable-installs.html#editable-installs
+ @## via 'setup.py' with setuptools
+ @## python setup.py build_ext --inplace --verbose
+ @# python setup.py sdist
+ @# python setup.py bdist_wheel
+ @# python setup.py sdist bdist_wheel
+ @# python -m pip install --no-build-isolation --no-cache-dir .
+ @# python -m pip install --no-build-isolation --no-cache-dir --editable .
+
+ @## Via 'build' or installer need 'pyproject.toml' with setuptools
+ @# pip install build
+ @# python -m build --sdist
+ @# python -m build --wheel
+ @# python -m build
+ @## python -m pip install --use-pep517 .
+ @# python -m pip install --no-build-isolation --no-cache-dir -e .
+ @# python -m pip install --no-build-isolation --no-cache-dir -e . -vvv
+
+ @## Via 'build' or installer need 'pyproject.toml' with Meson and Ninja
+ @# pip install build meson
+ @## Create a build directory
+ @# meson setup builddir
+ @## Clean previous build artifacts
+ @# meson clean -C builddir
+ @## Reconfigure the build directory
+ @# meson setup --reconfigure builddir
+ @# meson setup --wipe builddir
+ @## Compile the build directory
+ @# meson compile -C builddir
+ @# meson compile --clean
+ @## Build the project
+ @# ninja -C builddir
+ @## (Optional) Run tests
+ @# ninja -C builddir test
+ @python -m pip install --no-build-isolation --no-cache-dir -e .
+
+ @## Create a Tag for the Release
+ @git tag -a v0.4.0 -m "Release version 0.4.0"
+ @git push origin v1.0.0
+
+ @## Build the PyPI Package
+ @git config --global --add safe.directory /home/jovyan/work/contribution/scikit-plots/third_party/NumCpp
+ @git submodule sync
+ @git submodule update --init --recursive
+ @python -m build
+
+ @## *twine* for the upload
+ @twine check dist/*
+ @twine upload dist/*
+ @echo "pypi publish completed."
+
+## Generate a version based on the short commit hash and message
+LAST_COMMIT_ID = $(shell git rev-parse --short HEAD)
+LAST_COMMIT_MESSAGE = $(shell git log -1 --pretty=%B)
+
+## Tagging the latest commit
+tag:
+ @echo "Creating tag v$(LAST_COMMIT_ID) with message: $(LAST_COMMIT_MESSAGE)"
+ @git tag
+ @git tag -a v$(LAST_COMMIT_ID) -m "$(LAST_COMMIT_MESSAGE)"
+ @echo "Tag v$(LAST_COMMIT_ID) created with message: $(LAST_COMMIT_MESSAGE)."
+
+## Push the tag to the remote repository
+push-tag:
+ @echo "Pushing tag v$(LAST_COMMIT_ID) to the remote repository."
+ @git push origin v$(LAST_COMMIT_ID)
+ @echo "Tag v$(LAST_COMMIT_ID) pushed to the remote repository."
+
+## Release combines tagging and pushing the tag to remote
+release: tag push-tag
+ @echo "Release v$(LAST_COMMIT_ID) is ready."
+
+# Publish to PyPI (example for Python projects)
+publish:
+ @echo "Checking the distribution files with twine."
+ @twine check dist/*
+ @echo "Uploading the distribution files to PyPI."
+ @twine upload dist/*
+ @echo "PyPI publish completed."
+
+publish_docker:
+ # docker login
+ ## docker tag ::
+ # docker tag jupyter_notebook-base_notebook:latest skplt/scikit-plots-dev:latest
+ # docker push skplt/scikit-plots-dev:latest
\ No newline at end of file
diff --git a/README.md b/README.md
index 6c2b239..d1eb406 100644
--- a/README.md
+++ b/README.md
@@ -1,130 +1,168 @@
-# Welcome to Scikit-plot
-
-[](https://badge.fury.io/py/scikit-plot)
-[]()
-[](https://travis-ci.org/reiinakano/scikit-plot)
-[]()
-[](https://doi.org/10.5281/zenodo.293191)
-
-### Single line functions for detailed visualizations
-
-### The quickest and easiest way to go from analysis...
-
-
-
-### ...to this.
-
-Scikit-plot is the result of an unartistic data scientist's dreadful realization that *visualization is one of the most crucial components in the data science process, not just a mere afterthought*.
+# Welcome to 101 Scikit-plots
+
+## Single line functions for detailed visualizations
+
+The quickest and easiest way to go from analysis...
+
+## Sample Plots
+
+
+
+
Sample Plot 1
+
Sample Plot 2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Scikit-plots is the result of an unartistic data scientist's dreadful realization that *visualization is one of the most crucial components in the data science process, not just a mere afterthought*.
Gaining insights is simply a lot easier when you're looking at a colored heatmap of a confusion matrix complete with class labels rather than a single-line dump of numbers enclosed in brackets. Besides, if you ever need to present your results to someone (virtually any time anybody hires you to do data science), you show them visualizations, not a bunch of numbers in Excel.
-That said, there are a number of visualizations that frequently pop up in machine learning. Scikit-plot is a humble attempt to provide aesthetically-challenged programmers (such as myself) the opportunity to generate quick and beautiful graphs and plots with as little boilerplate as possible.
+That said, there are a number of visualizations that frequently pop up in machine learning. Scikit-plots is a humble attempt to provide aesthetically-challenged programmers (such as myself) the opportunity to generate quick and beautiful graphs and plots with as little boilerplate as possible.
## Okay then, prove it. Show us an example.
-Say we use Naive Bayes in multi-class classification and decide we want to visualize the results of a common classification metric, the Area under the Receiver Operating Characteristic curve. Since the ROC is only valid in binary classification, we want to show the respective ROC of each class if it were the positive class. As an added bonus, let's show the micro-averaged and macro-averaged curve in the plot as well.
+Say we use [Keras Classifier](https://keras.io/api/models/sequential/) in multi-class classification and decide we want to visualize the results of a common classification metric, such as sklearn's [classification report](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html) with a [confusion matrix](https://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html).
-Let's use scikit-plot with the sample digits dataset from scikit-learn.
+Let’s start with a basic example where we use a Keras classifier to evaluate the digits dataset provided by Scikit-learn.
```python
-# The usual train-test split mumbo-jumbo
+import os
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow warnings
+import numpy as np
+import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
-from sklearn.naive_bayes import GaussianNB
+import matplotlib.pyplot as plt
+import scikitplot as skplt
+# Load the digits dataset
X, y = load_digits(return_X_y=True)
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
-nb = GaussianNB()
-nb.fit(X_train, y_train)
-predicted_probas = nb.predict_proba(X_test)
-# The magic happens here
-import matplotlib.pyplot as plt
-import scikitplot as skplt
-skplt.metrics.plot_roc(y_test, predicted_probas)
+# Split the dataset into training and validation sets
+X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=1)
+
+# Convert labels to one-hot encoding
+Y_train = tf.keras.utils.to_categorical(y_train)
+Y_val = tf.keras.utils.to_categorical(y_val)
+
+# Define a simple TensorFlow model
+model = tf.keras.Sequential([
+ tf.keras.layers.Input(shape=(X_train.shape[1],)),
+ tf.keras.layers.Dense(64, activation='relu'),
+ tf.keras.layers.Dense(32, activation='relu'),
+ tf.keras.layers.Dense(10, activation='softmax')
+])
+
+# Compile the model
+model.compile(optimizer='adam',
+ loss='categorical_crossentropy',
+ metrics=['accuracy'])
+
+# Train the model
+model.fit(
+ X_train, Y_train,
+ batch_size=64,
+ epochs=10,
+ validation_data=(X_val, Y_val),
+ verbose=0
+)
+
+# Predict probabilities on the validation set
+y_probas = model.predict(X_val)
+
+# Plot precision-recall curves
+skplt.metrics.plot_precision_recall(y_val, y_probas)
plt.show()
```
-
-
-Pretty.
-And... That's it. Encaptured in that small example is the entire philosophy of Scikit-plot: **single line functions for detailed visualization**. You simply browse the plots available in the documentation, and call the function with the necessary arguments. Scikit-plot tries to stay out of your way as much as possible. No unnecessary bells and whistles. And when you *do* need the bells and whistles, each function offers a myriad of parameters for customizing various elements in your plots.
+
+
+
-Finally, compare and [view the non-scikit-plot way of plotting the multi-class ROC curve](http://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html). Which one would you rather do?
+Pretty.
## Maximum flexibility. Compatibility with non-scikit-learn objects.
-Although Scikit-plot is loosely based around the scikit-learn interface, you don't actually need Scikit-learn objects to use the available functions. As long as you provide the functions what they're asking for, they'll happily draw the plots for you.
-
-Here's a quick example to generate the precision-recall curves of a Keras classifier on a sample dataset.
-
-```python
-# Import what's needed for the Functions API
-import matplotlib.pyplot as plt
-import scikitplot as skplt
-
-# This is a Keras classifier. We'll generate probabilities on the test set.
-keras_clf.fit(X_train, y_train, batch_size=64, nb_epoch=10, verbose=2)
-probas = keras_clf.predict_proba(X_test, batch_size=64)
-
-# Now plot.
-skplt.metrics.plot_precision_recall_curve(y_test, probas)
-plt.show()
-```
-
-
-You can see clearly here that `skplt.metrics.plot_precision_recall_curve` needs only the ground truth y-values and the predicted probabilities to generate the plot. This lets you use *anything* you want as the classifier, from Keras NNs to NLTK Naive Bayes to that groundbreaking classifier algorithm you just wrote.
+Although Scikit-plot is loosely based around the scikit-learn interface, you don't actually need scikit-learn objects to use the available functions.
+As long as you provide the functions what they're asking for, they'll happily draw the plots for you.
The possibilities are endless.
-## Installation
+## User Installation
-Installation is simple! First, make sure you have the dependencies [Scikit-learn](http://scikit-learn.org) and [Matplotlib](http://matplotlib.org/) installed.
+1. **Install Scikit-plots**:
+ - Use pip to install Scikit-plots:
-Then just run:
-```bash
-pip install scikit-plot
-```
+ ```bash
+ pip install scikit-plots
+ ```
-Or if you want the latest development version, clone this repo and run
-```bash
-python setup.py install
-```
-at the root folder.
+## Release Notes
-If using conda, you can install Scikit-plot by running:
-```bash
-conda install -c conda-forge scikit-plot
-```
+See the [changelog](https://scikit-plots.github.io/stable/whats_new/whats_new.html)
+for a history of notable changes to scikit-plots.
## Documentation and Examples
Explore the full features of Scikit-plot.
-You can find detailed documentation [here](http://scikit-plot.readthedocs.io).
-
-Examples are found in the [examples folder of this repo](examples/).
-
-## Contributing to Scikit-plot
-
-Reporting a bug? Suggesting a feature? Want to add your own plot to the library? Visit our [contributor guidelines](CONTRIBUTING.md).
-
-## Citing Scikit-plot
-
-Are you using Scikit-plot in an academic paper? You should be! Reviewers love eye candy.
-
-If so, please consider citing Scikit-plot with DOI [](https://doi.org/10.5281/zenodo.293191)
-
-#### APA
-
-> Reiichiro Nakano. (2018). reiinakano/scikit-plot: 0.3.7 [Data set]. Zenodo. http://doi.org/10.5281/zenodo.293191
-
-#### IEEE
+## Contributing to scikit-plots
-> [1]Reiichiro Nakano, “reiinakano/scikit-plot: 0.3.7”. Zenodo, 19-Feb-2017.
+Reporting a bug? Suggesting a feature? Want to add your own plot to the library? Visit our.
-#### ACM
+## Citing scikit-plots
-> [1]Reiichiro Nakano 2018. reiinakano/scikit-plot: 0.3.7. Zenodo.
+1. scikit-plots, “scikit-plots: vlatest”. Zenodo, Aug. 23, 2024.
+ DOI: [10.5281/zenodo.13367000](https://doi.org/10.5281/zenodo.13367000).
-Happy plotting!
+2. scikit-plots, “scikit-plots: v0.3.8dev0”. Zenodo, Aug. 23, 2024.
+ DOI: [10.5281/zenodo.13367001](https://doi.org/10.5281/zenodo.13367001).
\ No newline at end of file
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..d46888a
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,26 @@
+# Security Policy
+
+## Supported Versions
+
+The following table lists versions and whether they are supported. Security
+vulnerability reports will be accepted and acted upon for all supported
+versions.
+
+| Version | Supported |
+| ------- | ------------------ |
+| 0.4.x | :white_check_mark: |
+| 0.3.x | :x: |
+| < 0.3 | :x: |
+
+
+## Reporting a Vulnerability
+
+
+To report a security vulnerability, please use the [Tidelift security
+contact](https://tidelift.com/security). Tidelift will coordinate the fix and
+disclosure.
+
+If you have found a security vulnerability, in order to keep it confidential,
+please do not report an issue on GitHub.
+
+We do not award bounties for security vulnerabilities.
\ No newline at end of file
diff --git a/auto_building_tools/.circleci/config.yml b/auto_building_tools/.circleci/config.yml
new file mode 100644
index 0000000..7a98f88
--- /dev/null
+++ b/auto_building_tools/.circleci/config.yml
@@ -0,0 +1,129 @@
+version: 2.1
+
+jobs:
+ lint:
+ docker:
+ - image: cimg/python:3.9.18
+ steps:
+ - checkout
+ - run:
+ name: dependencies
+ command: |
+ source build_tools/shared.sh
+ # Include pytest compatibility with mypy
+ pip install pytest $(get_dep ruff min) $(get_dep mypy min) $(get_dep black min) cython-lint
+ - run:
+ name: linting
+ command: ./build_tools/linting.sh
+
+ doc-min-dependencies:
+ docker:
+ - image: cimg/python:3.9.18
+ environment:
+ - MKL_NUM_THREADS: 2
+ - OPENBLAS_NUM_THREADS: 2
+ - CONDA_ENV_NAME: testenv
+ - LOCK_FILE: build_tools/circle/doc_min_dependencies_linux-64_conda.lock
+ # Do not fail if the documentation build generates warnings with minimum
+ # dependencies as long as we can avoid raising warnings with more recent
+ # versions of the same dependencies.
+ - SKLEARN_WARNINGS_AS_ERRORS: '0'
+ steps:
+ - checkout
+ - run: ./build_tools/circle/checkout_merge_commit.sh
+ - restore_cache:
+ key: v1-doc-min-deps-datasets-{{ .Branch }}
+ - restore_cache:
+ keys:
+ - doc-min-deps-ccache-{{ .Branch }}
+ - doc-min-deps-ccache
+ - run: ./build_tools/circle/build_doc.sh
+ - save_cache:
+ key: doc-min-deps-ccache-{{ .Branch }}-{{ .BuildNum }}
+ paths:
+ - ~/.ccache
+ - ~/.cache/pip
+ - save_cache:
+ key: v1-doc-min-deps-datasets-{{ .Branch }}
+ paths:
+ - ~/scikit_learn_data
+ - store_artifacts:
+ path: doc/_build/html/stable
+ destination: doc
+ - store_artifacts:
+ path: ~/log.txt
+ destination: log.txt
+
+ doc:
+ docker:
+ - image: cimg/python:3.9.18
+ environment:
+ - MKL_NUM_THREADS: 2
+ - OPENBLAS_NUM_THREADS: 2
+ - CONDA_ENV_NAME: testenv
+ - LOCK_FILE: build_tools/circle/doc_linux-64_conda.lock
+ # Make sure that we fail if the documentation build generates warnings with
+ # recent versions of the dependencies.
+ - SKLEARN_WARNINGS_AS_ERRORS: '1'
+ steps:
+ - checkout
+ - run: ./build_tools/circle/checkout_merge_commit.sh
+ - restore_cache:
+ key: v1-doc-datasets-{{ .Branch }}
+ - restore_cache:
+ keys:
+ - doc-ccache-{{ .Branch }}
+ - doc-ccache
+ - run: ./build_tools/circle/build_doc.sh
+ - save_cache:
+ key: doc-ccache-{{ .Branch }}-{{ .BuildNum }}
+ paths:
+ - ~/.ccache
+ - ~/.cache/pip
+ - save_cache:
+ key: v1-doc-datasets-{{ .Branch }}
+ paths:
+ - ~/scikit_learn_data
+ - store_artifacts:
+ path: doc/_build/html/stable
+ destination: doc
+ - store_artifacts:
+ path: ~/log.txt
+ destination: log.txt
+ # Persists generated documentation so that it can be attached and deployed
+ # in the 'deploy' step.
+ - persist_to_workspace:
+ root: doc/_build/html
+ paths: .
+
+ deploy:
+ docker:
+ - image: cimg/python:3.9.18
+ steps:
+ - checkout
+ - run: ./build_tools/circle/checkout_merge_commit.sh
+ # Attach documentation generated in the 'doc' step so that it can be
+ # deployed.
+ - attach_workspace:
+ at: doc/_build/html
+ - run: ls -ltrh doc/_build/html/stable
+ - deploy:
+ command: |
+ if [[ "${CIRCLE_BRANCH}" =~ ^main$|^[0-9]+\.[0-9]+\.X$ ]]; then
+ bash build_tools/circle/push_doc.sh doc/_build/html/stable
+ fi
+
+workflows:
+ version: 2
+ build-doc-and-deploy:
+ jobs:
+ - lint
+ - doc:
+ requires:
+ - lint
+ - doc-min-dependencies:
+ requires:
+ - lint
+ - deploy:
+ requires:
+ - doc
diff --git a/auto_building_tools/.github/FUNDING.yml b/auto_building_tools/.github/FUNDING.yml
new file mode 100644
index 0000000..5662909
--- /dev/null
+++ b/auto_building_tools/.github/FUNDING.yml
@@ -0,0 +1,12 @@
+# These are supported funding model platforms
+
+github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
+patreon: # Replace with a single Patreon username
+open_collective: # Replace with a single Open Collective username
+ko_fi: # Replace with a single Ko-fi username
+tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
+community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
+liberapay: # Replace with a single Liberapay username
+issuehunt: # Replace with a single IssueHunt username
+otechie: # Replace with a single Otechie username
+custom: ['https://numfocus.org/donate-to-scikit-learn']
diff --git a/auto_building_tools/.github/ISSUE_TEMPLATE/bug_report.yml b/auto_building_tools/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..bc8e5b5
--- /dev/null
+++ b/auto_building_tools/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,95 @@
+name: Bug Report
+description: Create a report to help us reproduce and correct the bug
+labels: ['Bug', 'Needs Triage']
+
+body:
+- type: markdown
+ attributes:
+ value: >
+ #### Before submitting a bug, please make sure the issue hasn't been already
+ addressed by searching through [the past issues](https://github.com/scikit-learn/scikit-learn/issues).
+- type: textarea
+ attributes:
+ label: Describe the bug
+ description: >
+ A clear and concise description of what the bug is.
+ validations:
+ required: true
+- type: textarea
+ attributes:
+ label: Steps/Code to Reproduce
+ description: |
+ Please add a [minimal code example](https://scikit-learn.org/dev/developers/minimal_reproducer.html) that can reproduce the error when running it. Be as succinct as possible, **do not depend on external data files**: instead you can generate synthetic data using `numpy.random`, [sklearn.datasets.make_regression](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_regression.html), [sklearn.datasets.make_classification](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html) or a few lines of Python code. Example:
+
+ ```python
+ from sklearn.feature_extraction.text import CountVectorizer
+ from sklearn.decomposition import LatentDirichletAllocation
+ docs = ["Help I have a bug" for i in range(1000)]
+ vectorizer = CountVectorizer(input=docs, analyzer='word')
+ lda_features = vectorizer.fit_transform(docs)
+ lda_model = LatentDirichletAllocation(
+ n_topics=10,
+ learning_method='online',
+ evaluate_every=10,
+ n_jobs=4,
+ )
+ model = lda_model.fit(lda_features)
+ ```
+
+ If the code is too long, feel free to put it in a public gist and link it in the issue: https://gist.github.com.
+
+ In short, **we are going to copy-paste your code** to run it and we expect to get the same result as you.
+
+ We acknowledge that crafting a [minimal reproducible code example](https://scikit-learn.org/dev/developers/minimal_reproducer.html) requires some effort on your side but it really helps the maintainers quickly reproduce the problem and analyze its cause without any ambiguity. Ambiguous bug reports tend to be slower to fix because they will require more effort and back and forth discussion between the maintainers and the reporter to pin-point the precise conditions necessary to reproduce the problem.
+ placeholder: |
+ ```
+ Sample code to reproduce the problem
+ ```
+ validations:
+ required: true
+- type: textarea
+ attributes:
+ label: Expected Results
+ description: >
+ Please paste or describe the expected results.
+ placeholder: >
+ Example: No error is thrown.
+ validations:
+ required: true
+- type: textarea
+ attributes:
+ label: Actual Results
+ description: |
+ Please paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full traceback** of the exception. For instance the code above raises the following exception:
+
+ ```python-traceback
+ ---------------------------------------------------------------------------
+ TypeError Traceback (most recent call last)
+ in
+ 4 vectorizer = CountVectorizer(input=docs, analyzer='word')
+ 5 lda_features = vectorizer.fit_transform(docs)
+ ----> 6 lda_model = LatentDirichletAllocation(
+ 7 n_topics=10,
+ 8 learning_method='online',
+
+ TypeError: __init__() got an unexpected keyword argument 'n_topics'
+ ```
+ placeholder: >
+ Please paste or specifically describe the actual output or traceback.
+ validations:
+ required: true
+- type: textarea
+ attributes:
+ label: Versions
+ render: shell
+ description: |
+ Please run the following and paste the output below.
+ ```python
+ import sklearn; sklearn.show_versions()
+ ```
+ validations:
+ required: true
+- type: markdown
+ attributes:
+ value: >
+ Thanks for contributing 🎉!
diff --git a/auto_building_tools/.github/ISSUE_TEMPLATE/config.yml b/auto_building_tools/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000..8d9c592
--- /dev/null
+++ b/auto_building_tools/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,17 @@
+blank_issues_enabled: true
+contact_links:
+ - name: Discussions
+ url: https://github.com/scikit-learn/scikit-learn/discussions/new
+ about: Ask questions and discuss with other scikit-learn community members
+ - name: Stack Overflow
+ url: https://stackoverflow.com/questions/tagged/scikit-learn
+ about: Please ask and answer usage questions on Stack Overflow
+ - name: Mailing list
+ url: https://mail.python.org/mailman/listinfo/scikit-learn
+ about: General discussions and announcements on the mailing list
+ - name: Discord server
+ url: https://discord.gg/h9qyrK8Jc8
+ about: Developers and users can be found on the Discord server
+ - name: Blank issue
+ url: https://github.com/scikit-learn/scikit-learn/issues/new
+ about: Please note that GitHub Discussions should be used in most cases instead
diff --git a/auto_building_tools/.github/ISSUE_TEMPLATE/doc_improvement.yml b/auto_building_tools/.github/ISSUE_TEMPLATE/doc_improvement.yml
new file mode 100644
index 0000000..48d0c3d
--- /dev/null
+++ b/auto_building_tools/.github/ISSUE_TEMPLATE/doc_improvement.yml
@@ -0,0 +1,17 @@
+name: Documentation improvement
+description: Create a report to help us improve the documentation. Alternatively you can just open a pull request with the suggested change.
+labels: [Documentation, 'Needs Triage']
+
+body:
+- type: textarea
+ attributes:
+ label: Describe the issue linked to the documentation
+ description: >
+ Tell us about the confusion introduced in the documentation.
+ validations:
+ required: true
+- type: textarea
+ attributes:
+ label: Suggest a potential alternative/fix
+ description: >
+ Tell us how we could improve the documentation in this regard.
diff --git a/auto_building_tools/.github/ISSUE_TEMPLATE/feature_request.yml b/auto_building_tools/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..51a2cdd
--- /dev/null
+++ b/auto_building_tools/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,25 @@
+name: Feature request
+description: Suggest a new algorithm, enhancement to an existing algorithm, etc.
+labels: ['New Feature', 'Needs Triage']
+
+body:
+- type: markdown
+ attributes:
+ value: >
+ #### If you want to propose a new algorithm, please refer first to the [scikit-learn inclusion criterion](https://scikit-learn.org/stable/faq.html#what-are-the-inclusion-criteria-for-new-algorithms).
+- type: textarea
+ attributes:
+ label: Describe the workflow you want to enable
+ validations:
+ required: true
+- type: textarea
+ attributes:
+ label: Describe your proposed solution
+ validations:
+ required: true
+- type: textarea
+ attributes:
+ label: Describe alternatives you've considered, if relevant
+- type: textarea
+ attributes:
+ label: Additional context
diff --git a/auto_building_tools/.github/PULL_REQUEST_TEMPLATE.md b/auto_building_tools/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..f59f9bc
--- /dev/null
+++ b/auto_building_tools/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,32 @@
+
+
+#### Reference Issues/PRs
+
+
+
+#### What does this implement/fix? Explain your changes.
+
+
+#### Any other comments?
+
+
+
diff --git a/auto_building_tools/.github/dependabot.yml b/auto_building_tools/.github/dependabot.yml
new file mode 100644
index 0000000..7ac17eb
--- /dev/null
+++ b/auto_building_tools/.github/dependabot.yml
@@ -0,0 +1,21 @@
+version: 2
+updates:
+ # Maintain dependencies for GitHub Actions as recommended in SPEC8:
+ # https://github.com/scientific-python/specs/pull/325
+ # At the time of writing, release critical workflows such as
+ # pypa/gh-action-pypi-publish should use hash-based versioning for security
+ # reasons. This strategy may be generalized to all other github actions
+ # in the future.
+ - package-ecosystem: "github-actions"
+ directory: "/"
+ schedule:
+ interval: "monthly"
+ groups:
+ actions:
+ patterns:
+ - "*"
+ labels:
+ - "Build / CI"
+ - "dependencies"
+ reviewers:
+ - "scikit-learn/core-devs"
diff --git a/auto_building_tools/.github/labeler-file-extensions.yml b/auto_building_tools/.github/labeler-file-extensions.yml
new file mode 100644
index 0000000..63fcfca
--- /dev/null
+++ b/auto_building_tools/.github/labeler-file-extensions.yml
@@ -0,0 +1,8 @@
+cython:
+- sklearn/**/*.pyx
+- sklearn/**/*.pxd
+- sklearn/**/*.pxi
+# Tempita templates
+- sklearn/**/*.pyx.tp
+- sklearn/**/*.pxd.tp
+- sklearn/**/*.pxi.tp
diff --git a/auto_building_tools/.github/labeler-module.yml b/auto_building_tools/.github/labeler-module.yml
new file mode 100644
index 0000000..faf2acd
--- /dev/null
+++ b/auto_building_tools/.github/labeler-module.yml
@@ -0,0 +1,80 @@
+module:cluster:
+- sklearn/cluster/**/*
+
+module:common:
+- sklearn/common/**/*
+
+module:compose:
+- sklearn/compose/**/*
+
+module:covariance:
+- sklearn/covariance/**/*
+
+module:cross_decomposition:
+- sklearn/cross_decomposition/**/*
+
+module:datasets:
+- sklearn/datasets/**/*
+
+module:decomposition:
+- sklearn/decomposition/**/*
+
+module:ensemble:
+- sklearn/ensemble/**/*
+
+module:feature_extraction:
+- sklearn/feature_extraction/**/*
+
+module:feature_selection:
+- sklearn/feature_selection/**/*
+
+module:gaussian_process:
+- sklearn/gaussian_process/**/*
+
+module:impute:
+- sklearn/impute/**/*
+
+module:inspection:
+- sklearn/inspection/**/*
+
+module:linear_model:
+- sklearn/linear_model/**/*
+
+module:manifold:
+- sklearn/manifold/**/*
+
+module:metrics:
+- sklearn/metrics/**/*
+
+module:mixture:
+- sklearn/mixture/**/*
+
+module:model_selection:
+- sklearn/model_selection/**/*
+
+module:naive_bayes:
+- sklearn/naive_bayes.py
+
+module:neighbors:
+- sklearn/neighbors/**/*
+
+module:neural_network:
+- sklearn/neural_network/**/*
+
+module:pipeline:
+- sklearn/pipeline.py
+
+module:preprocessing:
+- sklearn/preprocessing/**/*
+
+module:semi_supervised:
+- sklearn/semi_supervised/**/*
+
+module:svm:
+- sklearn/svm/**/*
+
+module:tree:
+- sklearn/tree/**/*
+
+module:utils:
+- sklearn/utils/**/*
diff --git a/auto_building_tools/.github/scripts/label_title_regex.py b/auto_building_tools/.github/scripts/label_title_regex.py
new file mode 100644
index 0000000..9a689b8
--- /dev/null
+++ b/auto_building_tools/.github/scripts/label_title_regex.py
@@ -0,0 +1,25 @@
+"""Labels PRs based on title. Must be run in a github action with the
+pull_request_target event."""
+
+import json
+import os
+import re
+
+from github import Github
+
+context_dict = json.loads(os.getenv("CONTEXT_GITHUB"))
+
+repo = context_dict["repository"]
+g = Github(context_dict["token"])
+repo = g.get_repo(repo)
+pr_number = context_dict["event"]["number"]
+issue = repo.get_issue(number=pr_number)
+title = issue.title
+
+
+regex_to_labels = [(r"\bDOC\b", "Documentation"), (r"\bCI\b", "Build / CI")]
+
+labels_to_add = [label for regex, label in regex_to_labels if re.search(regex, title)]
+
+if labels_to_add:
+ issue.add_to_labels(*labels_to_add)
diff --git a/auto_building_tools/.github/workflows/artifact-redirector.yml b/auto_building_tools/.github/workflows/artifact-redirector.yml
new file mode 100644
index 0000000..690cace
--- /dev/null
+++ b/auto_building_tools/.github/workflows/artifact-redirector.yml
@@ -0,0 +1,24 @@
+name: CircleCI artifacts redirector
+on: [status]
+
+# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this
+# github actions workflow:
+# https://docs.github.com/en/actions/security-guides/automatic-token-authentication
+permissions:
+ statuses: write
+
+jobs:
+ circleci_artifacts_redirector_job:
+ runs-on: ubuntu-latest
+ # For testing this action on a fork, remove the "github.repository =="" condition.
+ if: "github.repository == 'scikit-learn/scikit-learn' && github.event.context == 'ci/circleci: doc'"
+ name: Run CircleCI artifacts redirector
+ steps:
+ - name: GitHub Action step
+ uses: scientific-python/circleci-artifacts-redirector-action@v1
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+ api-token: ${{ secrets.CIRCLECI_TOKEN }}
+ artifact-path: 0/doc/_changed.html
+ circleci-jobs: doc
+ job-title: Check the rendered docs here!
diff --git a/auto_building_tools/.github/workflows/assign.yml b/auto_building_tools/.github/workflows/assign.yml
new file mode 100644
index 0000000..a69b60e
--- /dev/null
+++ b/auto_building_tools/.github/workflows/assign.yml
@@ -0,0 +1,30 @@
+
+name: Assign
+on:
+ issue_comment:
+ types: created
+
+# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this
+# github actions workflow:
+# https://docs.github.com/en/actions/security-guides/automatic-token-authentication
+permissions:
+ issues: write
+
+jobs:
+ one:
+ runs-on: ubuntu-latest
+ # Note that string comparisons is not case sensitive.
+ if: >-
+ startsWith(github.event.comment.body, '/take')
+ && !github.event.issue.assignee
+ steps:
+ - run: |
+ # Using REST API directly because assigning through gh has some severe limitations. For more details, see
+ # https://github.com/scikit-learn/scikit-learn/issues/29395#issuecomment-2206776963
+ echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
+ curl -H "Authorization: token $GH_TOKEN" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' \
+ https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
+ gh issue edit $ISSUE --remove-label "help wanted"
+ env:
+ GH_TOKEN: ${{ github.token }}
+ ISSUE: ${{ github.event.issue.html_url }}
diff --git a/auto_building_tools/.github/workflows/check-changelog.yml b/auto_building_tools/.github/workflows/check-changelog.yml
new file mode 100644
index 0000000..2c07921
--- /dev/null
+++ b/auto_building_tools/.github/workflows/check-changelog.yml
@@ -0,0 +1,68 @@
+name: Check Changelog
+# This check makes sure that the changelog is properly updated
+# when a PR introduces a change in a test file.
+# To bypass this check, label the PR with "No Changelog Needed".
+on:
+ pull_request:
+ types: [opened, edited, labeled, unlabeled, synchronize]
+
+jobs:
+ check:
+ name: A reviewer will let you know if it is required or can be bypassed
+ runs-on: ubuntu-latest
+ if: ${{ contains(github.event.pull_request.labels.*.name, 'No Changelog Needed') == 0 }}
+ steps:
+ - name: Get PR number and milestone
+ run: |
+ echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
+ echo "TAGGED_MILESTONE=${{ github.event.pull_request.milestone.title }}" >> $GITHUB_ENV
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: '0'
+ - name: Check the changelog entry
+ run: |
+ set -xe
+ changed_files=$(git diff --name-only origin/main)
+ # Changelog should be updated only if tests have been modified
+ if [[ ! "$changed_files" =~ tests ]]
+ then
+ exit 0
+ fi
+ all_changelogs=$(cat ./doc/whats_new/v*.rst)
+ if [[ "$all_changelogs" =~ :pr:\`$PR_NUMBER\` ]]
+ then
+ echo "Changelog has been updated."
+ # If the pull request is milestoned check the correspondent changelog
+ if exist -f ./doc/whats_new/v${TAGGED_MILESTONE:0:4}.rst
+ then
+ expected_changelog=$(cat ./doc/whats_new/v${TAGGED_MILESTONE:0:4}.rst)
+ if [[ "$expected_changelog" =~ :pr:\`$PR_NUMBER\` ]]
+ then
+ echo "Changelog and milestone correspond."
+ else
+ echo "Changelog and milestone do not correspond."
+ echo "If you see this error make sure that the tagged milestone for the PR"
+ echo "and the edited changelog filename properly match."
+ exit 1
+ fi
+ fi
+ else
+ echo "A Changelog entry is missing."
+ echo ""
+ echo "Please add an entry to the changelog at 'doc/whats_new/v*.rst'"
+ echo "to document your change assuming that the PR will be merged"
+ echo "in time for the next release of scikit-learn."
+ echo ""
+ echo "Look at other entries in that file for inspiration and please"
+ echo "reference this pull request using the ':pr:' directive and"
+ echo "credit yourself (and other contributors if applicable) with"
+ echo "the ':user:' directive."
+ echo ""
+ echo "If you see this error and there is already a changelog entry,"
+ echo "check that the PR number is correct."
+ echo ""
+ echo "If you believe that this PR does not warrant a changelog"
+ echo "entry, say so in a comment so that a maintainer will label"
+ echo "the PR with 'No Changelog Needed' to bypass this check."
+ exit 1
+ fi
diff --git a/auto_building_tools/.github/workflows/check-sdist.yml b/auto_building_tools/.github/workflows/check-sdist.yml
new file mode 100644
index 0000000..81a1329
--- /dev/null
+++ b/auto_building_tools/.github/workflows/check-sdist.yml
@@ -0,0 +1,33 @@
+name: "Check sdist"
+
+on:
+ schedule:
+ - cron: '0 0 * * *'
+
+jobs:
+ check-sdist:
+ # Don't run on forks
+ if: github.repository == 'scikit-learn/scikit-learn'
+
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: '3.9'
+ - name: Install dependencies
+ # scipy and cython are required to build sdist
+ run: |
+ python -m pip install --upgrade pip
+ pip install check-sdist
+ - run: |
+ check-sdist --inject-junk
+
+ update-tracker:
+ uses: ./.github/workflows/update_tracking_issue.yml
+ if: ${{ always() }}
+ needs: [check-sdist]
+ with:
+ job_status: ${{ needs.check-sdist.result }}
+ secrets:
+ BOT_GITHUB_TOKEN: ${{ secrets.BOT_GITHUB_TOKEN }}
diff --git a/auto_building_tools/.github/workflows/codeql.yml b/auto_building_tools/.github/workflows/codeql.yml
new file mode 100644
index 0000000..4d38b22
--- /dev/null
+++ b/auto_building_tools/.github/workflows/codeql.yml
@@ -0,0 +1,73 @@
+name: "CodeQL"
+
+on:
+ push:
+ branches: [ "main", "*.X" ]
+ pull_request:
+ branches: [ "main", "*.X" ]
+ schedule:
+ - cron: '0 6 * * 1'
+
+jobs:
+ analyze:
+ name: Analyze
+ # Runner size impacts CodeQL analysis time. To learn more, please see:
+ # - https://gh.io/recommended-hardware-resources-for-running-codeql
+ # - https://gh.io/supported-runners-and-hardware-resources
+ # - https://gh.io/using-larger-runners
+ # Consider using larger runners for possible analysis time improvements.
+ runs-on: 'ubuntu-latest'
+ timeout-minutes: 360
+ permissions:
+ # required for all workflows
+ security-events: write
+
+ # only required for workflows in private repositories
+ actions: read
+ contents: read
+
+ strategy:
+ fail-fast: false
+ matrix:
+ language: [ 'javascript-typescript', 'python' ]
+ # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ]
+ # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both
+ # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
+ # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ # Initializes the CodeQL tools for scanning.
+ - name: Initialize CodeQL
+ uses: github/codeql-action/init@v3
+ with:
+ languages: ${{ matrix.language }}
+ # If you wish to specify custom queries, you can do so here or in a config file.
+ # By default, queries listed here will override any specified in a config file.
+ # Prefix the list here with "+" to use these queries and those in the config file.
+
+ # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+ # queries: security-extended,security-and-quality
+
+
+ # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
+ # If this step fails, then you should remove it and run the build manually (see below)
+ - name: Autobuild
+ uses: github/codeql-action/autobuild@v3
+
+ # ℹ️ Command-line programs to run using the OS shell.
+ # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+
+ # If the Autobuild fails above, remove it and uncomment the following three lines.
+ # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
+
+ # - run: |
+ # echo "Run, Build Application using script"
+ # ./location_of_script_within_repo/buildscript.sh
+
+ - name: Perform CodeQL Analysis
+ uses: github/codeql-action/analyze@v3
+ with:
+ category: "/language:${{matrix.language}}"
diff --git a/auto_building_tools/.github/workflows/cuda-ci.yml b/auto_building_tools/.github/workflows/cuda-ci.yml
new file mode 100644
index 0000000..999fd17
--- /dev/null
+++ b/auto_building_tools/.github/workflows/cuda-ci.yml
@@ -0,0 +1,76 @@
+name: CUDA GPU
+
+# Only run this workflow when a Pull Request is labeled with the
+# 'CUDA CI' label.
+on:
+ pull_request:
+ types:
+ - labeled
+
+jobs:
+ build_wheel:
+ if: contains(github.event.pull_request.labels.*.name, 'CUDA CI')
+ runs-on: "ubuntu-latest"
+ name: Build wheel for Pull Request
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Build wheels
+ uses: pypa/cibuildwheel@v2.20.0
+ env:
+ CIBW_BUILD: cp312-manylinux_x86_64
+ CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
+ CIBW_BUILD_VERBOSITY: 1
+ CIBW_ARCHS: x86_64
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: cibw-wheels
+ path: ./wheelhouse/*.whl
+
+ tests:
+ if: contains(github.event.pull_request.labels.*.name, 'CUDA CI')
+ needs: [build_wheel]
+ runs-on:
+ group: cuda-gpu-runner-group
+ # Set this high enough so that the tests can comforatble run. We set a
+ # timeout to make abusing this workflow less attractive.
+ timeout-minutes: 20
+ name: Run Array API unit tests
+ steps:
+ - uses: actions/download-artifact@v4
+ with:
+ pattern: cibw-wheels
+ path: ~/dist
+
+ - uses: actions/setup-python@v5
+ with:
+ # XXX: The 3.12.4 release of Python on GitHub Actions is corrupted:
+ # https://github.com/actions/setup-python/issues/886
+ python-version: '3.12.3'
+ - name: Checkout main repository
+ uses: actions/checkout@v4
+ - name: Cache conda environment
+ id: cache-conda
+ uses: actions/cache@v4
+ with:
+ path: ~/conda
+ key: ${{ runner.os }}-build-${{ hashFiles('build_tools/github/create_gpu_environment.sh') }}-${{ hashFiles('build_tools/github/pylatest_conda_forge_cuda_array-api_linux-64_conda.lock') }}
+ - name: Install miniforge
+ if: ${{ steps.cache-conda.outputs.cache-hit != 'true' }}
+ run: bash build_tools/github/create_gpu_environment.sh
+ - name: Install scikit-learn
+ run: |
+ source "${HOME}/conda/etc/profile.d/conda.sh"
+ conda activate sklearn
+ pip install ~/dist/cibw-wheels/$(ls ~/dist/cibw-wheels)
+
+ - name: Run array API tests
+ run: |
+ source "${HOME}/conda/etc/profile.d/conda.sh"
+ conda activate sklearn
+ python -c "import sklearn; sklearn.show_versions()"
+
+ SCIPY_ARRAY_API=1 pytest --pyargs sklearn -k 'array_api'
+ # Run in /home/runner to not load sklearn from the checkout repo
+ working-directory: /home/runner
diff --git a/auto_building_tools/.github/workflows/cuda-label-remover.yml b/auto_building_tools/.github/workflows/cuda-label-remover.yml
new file mode 100644
index 0000000..f6a65a2
--- /dev/null
+++ b/auto_building_tools/.github/workflows/cuda-label-remover.yml
@@ -0,0 +1,23 @@
+name: Remove "CUDA CI" Label
+
+# This workflow removes the "CUDA CI" label that triggers the actual
+# CUDA CI. It is separate so that we can use the `pull_request_target`
+# trigger which has a API token with write access.
+on:
+ pull_request_target:
+ types:
+ - labeled
+
+# In order to remove the "CUDA CI" label we need to have write permissions for PRs
+permissions:
+ pull-requests: write
+
+jobs:
+ label-remover:
+ if: contains(github.event.pull_request.labels.*.name, 'CUDA CI')
+ name: Remove "CUDA CI" Label
+ runs-on: ubuntu-20.04
+ steps:
+ - uses: actions-ecosystem/action-remove-labels@v1
+ with:
+ labels: CUDA CI
diff --git a/auto_building_tools/.github/workflows/label-blank-issue.yml b/auto_building_tools/.github/workflows/label-blank-issue.yml
new file mode 100644
index 0000000..fce4fe6
--- /dev/null
+++ b/auto_building_tools/.github/workflows/label-blank-issue.yml
@@ -0,0 +1,14 @@
+name: Labels Blank issues
+
+on:
+ issues:
+ types: [opened]
+
+jobs:
+ label-blank-issues:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: andymckay/labeler@1.0.4
+ with:
+ add-labels: "Needs Triage"
+ ignore-if-labeled: true
diff --git a/auto_building_tools/.github/workflows/labeler-module.yml b/auto_building_tools/.github/workflows/labeler-module.yml
new file mode 100644
index 0000000..468d328
--- /dev/null
+++ b/auto_building_tools/.github/workflows/labeler-module.yml
@@ -0,0 +1,33 @@
+name: "Pull Request Labeler"
+on:
+ pull_request_target:
+ types: [opened]
+
+# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this
+# github actions workflow:
+# https://docs.github.com/en/actions/security-guides/automatic-token-authentication
+permissions:
+ contents: read
+ pull-requests: write
+
+jobs:
+ triage:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: thomasjpfan/labeler@v2.5.1
+ continue-on-error: true
+ if: github.repository == 'scikit-learn/scikit-learn'
+ with:
+ repo-token: "${{ secrets.GITHUB_TOKEN }}"
+ max-labels: "3"
+ configuration-path: ".github/labeler-module.yml"
+
+ triage_file_extensions:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: thomasjpfan/labeler@v2.5.1
+ continue-on-error: true
+ if: github.repository == 'scikit-learn/scikit-learn'
+ with:
+ repo-token: "${{ secrets.GITHUB_TOKEN }}"
+ configuration-path: ".github/labeler-file-extensions.yml"
diff --git a/auto_building_tools/.github/workflows/labeler-title-regex.yml b/auto_building_tools/.github/workflows/labeler-title-regex.yml
new file mode 100644
index 0000000..03de57d
--- /dev/null
+++ b/auto_building_tools/.github/workflows/labeler-title-regex.yml
@@ -0,0 +1,27 @@
+name: Pull Request Regex Title Labeler
+on:
+ pull_request_target:
+ types: [opened, edited]
+
+# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this
+# github actions workflow:
+# https://docs.github.com/en/actions/security-guides/automatic-token-authentication
+permissions:
+ contents: read
+ pull-requests: write
+
+jobs:
+
+ labeler:
+ runs-on: ubuntu-20.04
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: '3.9'
+ - name: Install PyGithub
+ run: pip install -Uq PyGithub
+ - name: Label pull request
+ run: python .github/scripts/label_title_regex.py
+ env:
+ CONTEXT_GITHUB: ${{ toJson(github) }}
diff --git a/auto_building_tools/.github/workflows/lint.yml b/auto_building_tools/.github/workflows/lint.yml
new file mode 100644
index 0000000..e2de3bb
--- /dev/null
+++ b/auto_building_tools/.github/workflows/lint.yml
@@ -0,0 +1,103 @@
+# This linter job on GH actions is used to trigger the commenter bot
+# in bot-lint-comment.yml file. It stores the output of the linter to be used
+# by the commenter bot.
+name: linter
+
+on:
+ - pull_request_target
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref }}
+ cancel-in-progress: true
+
+jobs:
+ lint:
+ runs-on: ubuntu-latest
+
+ # setting any permission will set everything else to none for GITHUB_TOKEN
+ permissions:
+ pull-requests: none
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+ with:
+ ref: ${{ github.event.pull_request.head.sha }}
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: 3.11
+
+ - name: Install dependencies
+ run: |
+ source build_tools/shared.sh
+ # Include pytest compatibility with mypy
+ pip install pytest $(get_dep ruff min) $(get_dep mypy min) $(get_dep black min) cython-lint
+ # we save the versions of the linters to be used in the error message later.
+ python -c "from importlib.metadata import version; print(f\"ruff={version('ruff')}\")" >> /tmp/versions.txt
+ python -c "from importlib.metadata import version; print(f\"mypy={version('mypy')}\")" >> /tmp/versions.txt
+ python -c "from importlib.metadata import version; print(f\"black={version('black')}\")" >> /tmp/versions.txt
+ python -c "from importlib.metadata import version; print(f\"cython-lint={version('cython-lint')}\")" >> /tmp/versions.txt
+
+ - name: Run linting
+ id: lint-script
+ # We download the linting script from main, since this workflow is run
+ # from main itself.
+ run: |
+ curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/linting.sh --retry 5 -o ./build_tools/linting.sh
+ set +e
+ ./build_tools/linting.sh &> /tmp/linting_output.txt
+ cat /tmp/linting_output.txt
+
+ - name: Upload Artifact
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: lint-log
+ path: |
+ /tmp/linting_output.txt
+ /tmp/versions.txt
+ retention-days: 1
+
+ comment:
+ needs: lint
+ if: ${{ !cancelled() }}
+ runs-on: ubuntu-latest
+
+ # We need these permissions to be able to post / update comments
+ permissions:
+ pull-requests: write
+ issues: write
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: 3.11
+
+ - name: Install dependencies
+ run: python -m pip install requests
+
+ - name: Download artifact
+ id: download-artifact
+ uses: actions/download-artifact@v4
+ with:
+ name: lint-log
+
+ - name: Print log
+ run: cat linting_output.txt
+
+ - name: Process Comments
+ id: process-comments
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ PR_NUMBER: ${{ github.event.pull_request.number }}
+ BRANCH_SHA: ${{ github.event.pull_request.head.sha }}
+ RUN_ID: ${{ github.run_id }}
+ LOG_FILE: linting_output.txt
+ VERSIONS_FILE: versions.txt
+ run: python ./build_tools/get_comment.py
diff --git a/auto_building_tools/.github/workflows/publish_pypi.yml b/auto_building_tools/.github/workflows/publish_pypi.yml
new file mode 100644
index 0000000..d21d3d4
--- /dev/null
+++ b/auto_building_tools/.github/workflows/publish_pypi.yml
@@ -0,0 +1,51 @@
+name: Publish to Pypi
+on:
+ workflow_dispatch:
+ inputs:
+ version:
+ description: 'Version upload to pypi'
+ required: true
+ pypi_repo:
+ description: 'Repo to upload to (testpypi or pypi)'
+ default: 'testpypi'
+ required: true
+
+jobs:
+ publish:
+ runs-on: ubuntu-latest
+ environment: publish_pypi
+ permissions:
+ # IMPORTANT: this permission is mandatory for trusted publishing
+ id-token: write
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: '3.8'
+ - name: Install dependencies
+ run: |
+ pip install -U wheelhouse_uploader pyyaml
+ - name: Downloading wheels and sdist from staging
+ env:
+ SKLEARN_VERSION: ${{ github.event.inputs.version }}
+ run: |
+ echo "Download $SKLEARN_VERSION wheels and sdist"
+ python -m wheelhouse_uploader fetch \
+ --version $SKLEARN_VERSION \
+ --local-folder dist/ \
+ scikit-learn \
+ https://pypi.anaconda.org/scikit-learn-wheels-staging/simple/scikit-learn/
+ - name: Check dist has the correct number of artifacts
+ run: |
+ python build_tools/github/check_wheels.py
+ - name: Publish package to TestPyPI
+ uses: pypa/gh-action-pypi-publish@ec4db0b4ddc65acdf4bff5fa45ac92d78b56bdf0 # v1.9.0
+ with:
+ repository-url: https://test.pypi.org/legacy/
+ print-hash: true
+ if: ${{ github.event.inputs.pypi_repo == 'testpypi' }}
+ - name: Publish package to PyPI
+ uses: pypa/gh-action-pypi-publish@ec4db0b4ddc65acdf4bff5fa45ac92d78b56bdf0 # v1.9.0
+ if: ${{ github.event.inputs.pypi_repo == 'pypi' }}
+ with:
+ print-hash: true
diff --git a/auto_building_tools/.github/workflows/unassign.yml b/auto_building_tools/.github/workflows/unassign.yml
new file mode 100644
index 0000000..94a50d4
--- /dev/null
+++ b/auto_building_tools/.github/workflows/unassign.yml
@@ -0,0 +1,24 @@
+name: Unassign
+#Runs when a contributor has unassigned themselves from the issue and adds 'help wanted'
+on:
+ issues:
+ types: unassigned
+
+# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this
+# github actions workflow:
+# https://docs.github.com/en/actions/security-guides/automatic-token-authentication
+permissions:
+ issues: write
+
+jobs:
+ one:
+ runs-on: ubuntu-latest
+ steps:
+ - name:
+ if: github.event.issue.state == 'open'
+ run: |
+ echo "Marking issue ${{ github.event.issue.number }} as help wanted"
+ gh issue edit $ISSUE --add-label "help wanted"
+ env:
+ GH_TOKEN: ${{ github.token }}
+ ISSUE: ${{ github.event.issue.html_url }}
diff --git a/auto_building_tools/.github/workflows/update-lock-files-pr.yml b/auto_building_tools/.github/workflows/update-lock-files-pr.yml
new file mode 100644
index 0000000..ca44cab
--- /dev/null
+++ b/auto_building_tools/.github/workflows/update-lock-files-pr.yml
@@ -0,0 +1,59 @@
+# Workflow to update lock files in a PR, triggered by specific PR comments
+name: Update lock files in PR
+on:
+ issue_comment:
+ types: [created]
+
+permissions:
+ contents: write
+
+jobs:
+ update-lock-files:
+ if: >-
+ github.repository == 'scikit-learn/scikit-learn'
+ && github.event.issue.pull_request
+ && startsWith(github.event.comment.body, '@scikit-learn-bot update lock-files')
+ runs-on: ubuntu-latest
+
+ steps:
+ # There is no direct way to get the HEAD information directly from issue_comment
+ # event, so we use the GitHub CLI to get the PR head ref and repository
+ - name: Get pull request HEAD information
+ id: pr-head-info
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ pr_info=$(gh pr view ${{ github.event.issue.number }} --repo ${{ github.repository }} --json headRefName,headRepository,headRepositoryOwner)
+ pr_head_ref=$(echo "$pr_info" | jq -r '.headRefName')
+ pr_head_repository=$(echo "$pr_info" | jq -r '.headRepositoryOwner.login + "/" + .headRepository.name')
+ echo "pr_head_ref=$pr_head_ref" >> $GITHUB_OUTPUT
+ echo "pr_head_repository=$pr_head_repository" >> $GITHUB_OUTPUT
+
+ - name: Check out the PR branch
+ uses: actions/checkout@v4
+ with:
+ ref: ${{ steps.pr-head-info.outputs.pr_head_ref }}
+ repository: ${{ steps.pr-head-info.outputs.pr_head_repository }}
+
+ # We overwrite all the scripts we are going to use in this workflow with their
+ # versions on main; since this workflow has the write permissions this is to avoid
+ # malicious changes to these scripts in PRs to be executed
+ - name: Download scripts from main
+ run: |
+ curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/shared.sh --retry 5 -o ./build_tools/shared.sh
+ curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/update_environments_and_lock_files.py --retry 5 -o ./build_tools/update_environments_and_lock_files.py
+ curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/on_pr_comment_update_environments_and_lock_files.py --retry 5 -o ./build_tools/on_pr_comment_update_environments_and_lock_files.py
+
+ - name: Update lock files
+ env:
+ COMMENT: ${{ github.event.comment.body }}
+ # We download the lock files update scripts from main, since this workflow is
+ # run from main itself
+ run: |
+ source build_tools/shared.sh
+ source $CONDA/bin/activate
+ conda install -n base conda conda-libmamba-solver -y
+ conda config --set solver libmamba
+ conda install -c conda-forge "$(get_dep conda-lock min)" -y
+
+ python build_tools/on_pr_comment_update_environments_and_lock_files.py
diff --git a/auto_building_tools/.github/workflows/update-lock-files.yml b/auto_building_tools/.github/workflows/update-lock-files.yml
new file mode 100644
index 0000000..4f149f5
--- /dev/null
+++ b/auto_building_tools/.github/workflows/update-lock-files.yml
@@ -0,0 +1,82 @@
+# Workflow to update lock files
+name: Update lock files
+
+on:
+ workflow_dispatch:
+ schedule:
+ - cron: '0 5 * * 1'
+
+# In order to add the "CUDA CI" label we need to have write permissions for PRs
+permissions:
+ pull-requests: write
+
+jobs:
+ update_lock_files:
+ if: github.repository == 'scikit-learn/scikit-learn'
+ runs-on: ubuntu-latest
+
+ strategy:
+ # Ensure that each build will continue even if one build in the matrix fails
+ fail-fast: false
+ matrix:
+ include:
+ - name: main
+ update_script_args: "--select-tag main-ci"
+ additional_commit_message: "[doc build]"
+ - name: scipy-dev
+ update_script_args: "--select-tag scipy-dev"
+ additional_commit_message: "[scipy-dev]"
+ - name: cirrus-arm
+ update_script_args: "--select-tag arm"
+ additional_commit_message: "[cirrus arm]"
+ - name: array-api
+ update_script_args: "--select-tag cuda"
+
+ steps:
+ - uses: actions/checkout@v4
+ - name: Generate lock files
+ run: |
+ source build_tools/shared.sh
+ source $CONDA/bin/activate
+ conda install -n base conda conda-libmamba-solver -y
+ conda config --set solver libmamba
+ conda install -c conda-forge "$(get_dep conda-lock min)" -y
+
+ python build_tools/update_environments_and_lock_files.py ${{ matrix.update_script_args }}
+
+ - name: Create Pull Request
+ id: cpr
+ uses: peter-evans/create-pull-request@v6
+ with:
+ token: ${{ secrets.BOT_GITHUB_TOKEN }}
+ push-to-fork: scikit-learn-bot/scikit-learn
+ commit-message: Update CI lock files ${{ matrix.additional_commit_message }}
+ committer: "Lock file bot "
+ author: "Lock file bot "
+ delete-branch: true
+ branch: auto-update-lock-files-${{ matrix.name }}
+ title: ":lock: :robot: CI Update lock files for ${{ matrix.name }} CI build(s) :lock: :robot:"
+ body: |
+ Update lock files.
+
+ ### Note
+ If the CI tasks fail, create a new branch based on this PR and add the required fixes to that branch.
+
+ # The CUDA workflow needs to be triggered explicitly as it uses an expensive runner
+ - name: Trigger additional tests
+ if: steps.cpr.outputs.pull-request-number != '' && matrix.name == 'array-api'
+ env:
+ GH_TOKEN: ${{ github.token }}
+ run: |
+ gh pr edit ${{steps.cpr.outputs.pull-request-number}} --add-label "CUDA CI"
+
+ - name: Check Pull Request
+ if: steps.cpr.outputs.pull-request-number != ''
+ run: |
+ echo "### :rocket: Pull-Request Summary" >> ${GITHUB_STEP_SUMMARY}
+ echo "" >> ${GITHUB_STEP_SUMMARY}
+ echo "The following lock files pull-request has been auto-generated:"
+ echo "- **PR** #${{ steps.cpr.outputs.pull-request-number }}" >> ${GITHUB_STEP_SUMMARY}
+ echo "- **URL** ${{ steps.cpr.outputs.pull-request-url }}" >> ${GITHUB_STEP_SUMMARY}
+ echo "- **Operation** [${{ steps.cpr.outputs.pull-request-operation }}]" >> ${GITHUB_STEP_SUMMARY}
+ echo "- **SHA** ${{ steps.cpr.outputs.pull-request-head-sha }}" >> ${GITHUB_STEP_SUMMARY}
diff --git a/auto_building_tools/.github/workflows/update_tracking_issue.yml b/auto_building_tools/.github/workflows/update_tracking_issue.yml
new file mode 100644
index 0000000..2039089
--- /dev/null
+++ b/auto_building_tools/.github/workflows/update_tracking_issue.yml
@@ -0,0 +1,48 @@
+# For workflows to use this workflow include the following:
+#
+# update-tracker:
+# uses: ./.github/workflows/update_tracking_issue.yml
+# if: ${{ always() }}
+# needs: [JOB_NAME]
+# with:
+# job_status: ${{ needs.JOB_NAME.result }}
+# secrets:
+# BOT_GITHUB_TOKEN: ${{ secrets.BOT_GITHUB_TOKEN }}
+# Where JOB_NAME is contains the status of the job you are interested in
+
+name: "Update tracking issue"
+on:
+ workflow_call:
+ inputs:
+ job_status:
+ required: true
+ type: string
+ secrets:
+ BOT_GITHUB_TOKEN:
+ required: true
+
+jobs:
+ update_tracking_issue:
+ runs-on: ubuntu-latest
+ if: github.repository == 'scikit-learn/scikit-learn' && github.event_name == 'schedule'
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: '3.9'
+ - name: Update tracking issue on GitHub
+ run: |
+ set -ex
+ if [[ ${{ inputs.job_status }} == "success" ]]; then
+ TESTS_PASSED=true
+ else
+ TESTS_PASSED=false
+ fi
+
+ pip install defusedxml PyGithub
+ python maint_tools/update_tracking_issue.py \
+ ${{ secrets.BOT_GITHUB_TOKEN }} \
+ "$GITHUB_WORKFLOW" \
+ "$GITHUB_REPOSITORY" \
+ https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID \
+ --tests-passed $TESTS_PASSED
diff --git a/auto_building_tools/.github/workflows/wheels.yml b/auto_building_tools/.github/workflows/wheels.yml
new file mode 100644
index 0000000..87e249d
--- /dev/null
+++ b/auto_building_tools/.github/workflows/wheels.yml
@@ -0,0 +1,260 @@
+# Workflow to build and test wheels
+name: Wheel builder
+
+on:
+ schedule:
+ # Nightly build at 3:42 A.M.
+ - cron: "42 3 */1 * *"
+ push:
+ branches:
+ - main
+ # Release branches
+ - "[0-9]+.[0-9]+.X"
+ pull_request:
+ branches:
+ - main
+ - "[0-9]+.[0-9]+.X"
+ # Manual run
+ workflow_dispatch:
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+ cancel-in-progress: true
+
+jobs:
+ # Check whether to build the wheels and the source tarball
+ check_build_trigger:
+ name: Check build trigger
+ runs-on: ubuntu-latest
+ if: github.repository == 'scikit-learn/scikit-learn'
+ outputs:
+ build: ${{ steps.check_build_trigger.outputs.build }}
+
+ steps:
+ - name: Checkout scikit-learn
+ uses: actions/checkout@v4
+ with:
+ ref: ${{ github.event.pull_request.head.sha }}
+
+ - id: check_build_trigger
+ name: Check build trigger
+ run: bash build_tools/github/check_build_trigger.sh
+
+ # Build the wheels for Linux, Windows and macOS for Python 3.9 and newer
+ build_wheels:
+ name: Build wheel for cp${{ matrix.python }}-${{ matrix.platform_id }}-${{ matrix.manylinux_image }}
+ runs-on: ${{ matrix.os }}
+ needs: check_build_trigger
+ if: needs.check_build_trigger.outputs.build
+
+ strategy:
+ # Ensure that a wheel builder finishes even if another fails
+ fail-fast: false
+ matrix:
+ include:
+ # Window 64 bit
+ - os: windows-latest
+ python: 39
+ platform_id: win_amd64
+ - os: windows-latest
+ python: 310
+ platform_id: win_amd64
+ - os: windows-latest
+ python: 311
+ platform_id: win_amd64
+ - os: windows-latest
+ python: 312
+ platform_id: win_amd64
+
+ # Linux 64 bit manylinux2014
+ - os: ubuntu-latest
+ python: 39
+ platform_id: manylinux_x86_64
+ manylinux_image: manylinux2014
+
+ # NumPy on Python 3.10 only supports 64bit and is only available with manylinux2014
+ - os: ubuntu-latest
+ python: 310
+ platform_id: manylinux_x86_64
+ manylinux_image: manylinux2014
+
+ - os: ubuntu-latest
+ python: 311
+ platform_id: manylinux_x86_64
+ manylinux_image: manylinux2014
+ - os: ubuntu-latest
+ python: 312
+ platform_id: manylinux_x86_64
+ manylinux_image: manylinux2014
+ - os: ubuntu-latest
+ python: 313t
+ platform_id: manylinux_x86_64
+ manylinux_image: manylinux2014
+ # TODO: remove next line when Python 3.13 is released
+ prerelease_pythons: True
+ free_threaded_support: True
+
+ # MacOS x86_64
+ - os: macos-12
+ python: 39
+ platform_id: macosx_x86_64
+ - os: macos-12
+ python: 310
+ platform_id: macosx_x86_64
+ - os: macos-12
+ python: 311
+ platform_id: macosx_x86_64
+ - os: macos-12
+ python: 312
+ platform_id: macosx_x86_64
+
+ # MacOS arm64
+ - os: macos-14
+ python: 39
+ platform_id: macosx_arm64
+ - os: macos-14
+ python: 310
+ platform_id: macosx_arm64
+ - os: macos-14
+ python: 311
+ platform_id: macosx_arm64
+ - os: macos-14
+ python: 312
+ platform_id: macosx_arm64
+
+ steps:
+ - name: Checkout scikit-learn
+ uses: actions/checkout@v4
+
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.11" # update once build dependencies are available
+
+ - name: Install conda for macos arm64
+ if: ${{ matrix.platform_id == 'macosx_arm64' }}
+ run: |
+ set -ex
+ # macos arm64 runners do not have conda installed. Thus we much install conda manually
+ EXPECTED_SHA="dd832d8a65a861b5592b2cf1d55f26031f7c1491b30321754443931e7b1e6832"
+ MINIFORGE_URL="https://github.com/conda-forge/miniforge/releases/download/23.11.0-0/Mambaforge-23.11.0-0-MacOSX-arm64.sh"
+ curl -L --retry 10 $MINIFORGE_URL -o miniforge.sh
+
+ # Check SHA
+ file_sha=$(shasum -a 256 miniforge.sh | awk '{print $1}')
+ if [ "$EXPECTED_SHA" != "$file_sha" ]; then
+ echo "SHA values did not match!"
+ exit 1
+ fi
+
+ # Install miniforge
+ MINIFORGE_PATH=$HOME/miniforge
+ bash ./miniforge.sh -b -p $MINIFORGE_PATH
+ echo "$MINIFORGE_PATH/bin" >> $GITHUB_PATH
+ echo "CONDA_HOME=$MINIFORGE_PATH" >> $GITHUB_ENV
+
+ - name: Set conda environment for non-macos arm64 environments
+ if: ${{ matrix.platform_id != 'macosx_arm64' }}
+ run: |
+ # Non-macos arm64 envrionments already have conda installed
+ echo "CONDA_HOME=/usr/local/miniconda" >> $GITHUB_ENV
+
+ - name: Build and test wheels
+ env:
+ CIBW_PRERELEASE_PYTHONS: ${{ matrix.prerelease_pythons }}
+ CIBW_FREE_THREADED_SUPPORT: ${{ matrix.free_threaded_support }}
+ CIBW_ENVIRONMENT: SKLEARN_SKIP_NETWORK_TESTS=1
+ CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }}
+ CIBW_ARCHS: all
+ CIBW_MANYLINUX_X86_64_IMAGE: ${{ matrix.manylinux_image }}
+ CIBW_MANYLINUX_I686_IMAGE: ${{ matrix.manylinux_image }}
+ # Needed on Windows CI to compile with Visual Studio compiler
+ # otherwise Meson detects a MINGW64 platform and use MINGW64
+ # toolchain
+ CIBW_CONFIG_SETTINGS_WINDOWS: "setup-args=--vsenv"
+ CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: bash build_tools/github/repair_windows_wheels.sh {wheel} {dest_dir}
+ CIBW_BEFORE_TEST_WINDOWS: bash build_tools/github/build_minimal_windows_image.sh ${{ matrix.python }}
+ CIBW_BEFORE_TEST: bash {project}/build_tools/wheels/cibw_before_test.sh
+ CIBW_TEST_REQUIRES: pytest pandas
+ CIBW_TEST_COMMAND: bash {project}/build_tools/wheels/test_wheels.sh
+ CIBW_TEST_COMMAND_WINDOWS: bash {project}/build_tools/github/test_windows_wheels.sh ${{ matrix.python }}
+ CIBW_BUILD_VERBOSITY: 1
+
+ run: bash build_tools/wheels/build_wheels.sh
+
+ - name: Store artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: cibw-wheels-cp${{ matrix.python }}-${{ matrix.platform_id }}
+ path: wheelhouse/*.whl
+
+ update-tracker:
+ uses: ./.github/workflows/update_tracking_issue.yml
+ if: ${{ always() }}
+ needs: [build_wheels]
+ with:
+ job_status: ${{ needs.build_wheels.result }}
+ secrets:
+ BOT_GITHUB_TOKEN: ${{ secrets.BOT_GITHUB_TOKEN }}
+
+ # Build the source distribution under Linux
+ build_sdist:
+ name: Source distribution
+ runs-on: ubuntu-latest
+ needs: check_build_trigger
+ if: needs.check_build_trigger.outputs.build
+
+ steps:
+ - name: Checkout scikit-learn
+ uses: actions/checkout@v4
+
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.9" # update once build dependencies are available
+
+ - name: Build source distribution
+ run: bash build_tools/github/build_source.sh
+
+ - name: Test source distribution
+ run: bash build_tools/github/test_source.sh
+ env:
+ SKLEARN_SKIP_NETWORK_TESTS: 1
+
+ - name: Store artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: cibw-sdist
+ path: dist/*.tar.gz
+
+ # Upload the wheels and the source distribution
+ upload_anaconda:
+ name: Upload to Anaconda
+ runs-on: ubuntu-latest
+ environment: upload_anaconda
+ needs: [build_wheels, build_sdist]
+ # The artifacts cannot be uploaded on PRs
+ if: github.event_name != 'pull_request'
+
+ steps:
+ - name: Checkout scikit-learn
+ uses: actions/checkout@v4
+
+ - name: Download artifacts
+ uses: actions/download-artifact@v4
+ with:
+ pattern: cibw-*
+ path: dist
+ merge-multiple: true
+
+ - name: Setup Python
+ uses: actions/setup-python@v5
+
+ - name: Upload artifacts
+ env:
+ # Secret variables need to be mapped to environment variables explicitly
+ SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN: ${{ secrets.SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN }}
+ SCIKIT_LEARN_STAGING_UPLOAD_TOKEN: ${{ secrets.SCIKIT_LEARN_STAGING_UPLOAD_TOKEN }}
+ ARTIFACTS_PATH: dist
+ # Force a replacement if the remote file already exists
+ run: bash build_tools/github/upload_anaconda.sh
diff --git a/auto_building_tools/.spin/cmds.py b/auto_building_tools/.spin/cmds.py
new file mode 100644
index 0000000..954749b
--- /dev/null
+++ b/auto_building_tools/.spin/cmds.py
@@ -0,0 +1,29 @@
+import shutil
+import sys
+
+import click
+from spin.cmds import util
+
+
+@click.command()
+def clean():
+ """🪥 Clean build folder.
+
+ Very rarely needed since meson-python recompiles as needed when sklearn is
+ imported.
+
+ One known use case where "spin clean" is useful: avoid compilation errors
+ when switching from numpy<2 to numpy>=2 in the same conda environment or
+ virtualenv.
+ """
+ util.run([sys.executable, "-m", "pip", "uninstall", "scikit-learn", "-y"])
+ default_meson_build_dir = (
+ f"build/cp{sys.version_info.major}{sys.version_info.minor}"
+ )
+ click.secho(
+ f"removing default Meson build dir: {default_meson_build_dir}",
+ bold=True,
+ fg="bright_blue",
+ )
+
+ shutil.rmtree(default_meson_build_dir, ignore_errors=True)
diff --git a/auto_building_tools/README.md b/auto_building_tools/README.md
new file mode 100644
index 0000000..3db28e1
--- /dev/null
+++ b/auto_building_tools/README.md
@@ -0,0 +1,4 @@
+# auto building tools scripts
+
+References: scikit-learn.org
+References: matplotlib.org
\ No newline at end of file
diff --git a/auto_building_tools/asv_benchmarks/.gitignore b/auto_building_tools/asv_benchmarks/.gitignore
new file mode 100644
index 0000000..a3fecdb
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/.gitignore
@@ -0,0 +1,6 @@
+*__pycache__*
+env/
+html/
+results/
+scikit-learn/
+benchmarks/cache/
diff --git a/auto_building_tools/asv_benchmarks/asv.conf.json b/auto_building_tools/asv_benchmarks/asv.conf.json
new file mode 100644
index 0000000..21770d6
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/asv.conf.json
@@ -0,0 +1,163 @@
+{
+ // The version of the config file format. Do not change, unless
+ // you know what you are doing.
+ "version": 1,
+
+ // The name of the project being benchmarked
+ "project": "scikit-learn",
+
+ // The project's homepage
+ "project_url": "scikit-learn.org/",
+
+ // The URL or local path of the source code repository for the
+ // project being benchmarked
+ "repo": "..",
+
+ // The Python project's subdirectory in your repo. If missing or
+ // the empty string, the project is assumed to be located at the root
+ // of the repository.
+ // "repo_subdir": "",
+
+ // Customizable commands for building, installing, and
+ // uninstalling the project. See asv.conf.json documentation.
+ "install_command": ["python -mpip install {wheel_file}"],
+ "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
+ "build_command": ["python -m build --wheel -o {build_cache_dir} {build_dir}"],
+
+ // List of branches to benchmark. If not provided, defaults to "master
+ // (for git) or "default" (for mercurial).
+ "branches": ["main"],
+ // "branches": ["default"], // for mercurial
+
+ // The DVCS being used. If not set, it will be automatically
+ // determined from "repo" by looking at the protocol in the URL
+ // (if remote), or by looking for special directories, such as
+ // ".git" (if local).
+ // "dvcs": "git",
+
+ // The tool to use to create environments. May be "conda",
+ // "virtualenv" or other value depending on the plugins in use.
+ // If missing or the empty string, the tool will be automatically
+ // determined by looking for tools on the PATH environment
+ // variable.
+ "environment_type": "conda",
+
+ // timeout in seconds for installing any dependencies in environment
+ // defaults to 10 min
+ //"install_timeout": 600,
+
+ // the base URL to show a commit for the project.
+ "show_commit_url": "https://github.com/scikit-learn/scikit-learn/commit/",
+
+ // The Pythons you'd like to test against. If not provided, defaults
+ // to the current version of Python used to run `asv`.
+ // "pythons": ["3.6"],
+
+ // The list of conda channel names to be searched for benchmark
+ // dependency packages in the specified order
+ // "conda_channels": ["conda-forge", "defaults"]
+
+ // The matrix of dependencies to test. Each key is the name of a
+ // package (in PyPI) and the values are version numbers. An empty
+ // list or empty string indicates to just test against the default
+ // (latest) version. null indicates that the package is to not be
+ // installed. If the package to be tested is only available from
+ // PyPi, and the 'environment_type' is conda, then you can preface
+ // the package name by 'pip+', and the package will be installed via
+ // pip (with all the conda available packages installed first,
+ // followed by the pip installed packages).
+ //
+ // The versions of the dependencies should be bumped in a dedicated commit
+ // to easily identify regressions/improvements due to code changes from
+ // those due to dependency changes.
+ //
+ "matrix": {
+ "numpy": ["2.0.0"],
+ "scipy": ["1.14.0"],
+ "cython": ["3.0.10"],
+ "joblib": ["1.3.2"],
+ "threadpoolctl": ["3.2.0"],
+ "pandas": ["2.2.2"]
+ },
+
+ // Combinations of libraries/python versions can be excluded/included
+ // from the set to test. Each entry is a dictionary containing additional
+ // key-value pairs to include/exclude.
+ //
+ // An exclude entry excludes entries where all values match. The
+ // values are regexps that should match the whole string.
+ //
+ // An include entry adds an environment. Only the packages listed
+ // are installed. The 'python' key is required. The exclude rules
+ // do not apply to includes.
+ //
+ // In addition to package names, the following keys are available:
+ //
+ // - python
+ // Python version, as in the *pythons* variable above.
+ // - environment_type
+ // Environment type, as above.
+ // - sys_platform
+ // Platform, as in sys.platform. Possible values for the common
+ // cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+ //
+ // "exclude": [
+ // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+ // {"environment_type": "conda", "six": null}, // don't run without six on conda
+ // ],
+ //
+ // "include": [
+ // // additional env for python2.7
+ // {"python": "2.7", "numpy": "1.8"},
+ // // additional env if run on windows+conda
+ // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
+ // ],
+
+ // The directory (relative to the current directory) that benchmarks are
+ // stored in. If not provided, defaults to "benchmarks"
+ // "benchmark_dir": "benchmarks",
+
+ // The directory (relative to the current directory) to cache the Python
+ // environments in. If not provided, defaults to "env"
+ // "env_dir": "env",
+
+ // The directory (relative to the current directory) that raw benchmark
+ // results are stored in. If not provided, defaults to "results".
+ // "results_dir": "results",
+
+ // The directory (relative to the current directory) that the html tree
+ // should be written to. If not provided, defaults to "html".
+ // "html_dir": "html",
+
+ // The number of characters to retain in the commit hashes.
+ // "hash_length": 8,
+
+ // `asv` will cache results of the recent builds in each
+ // environment, making them faster to install next time. This is
+ // the number of builds to keep, per environment.
+ // "build_cache_size": 2,
+
+ // The commits after which the regression search in `asv publish`
+ // should start looking for regressions. Dictionary whose keys are
+ // regexps matching to benchmark names, and values corresponding to
+ // the commit (exclusive) after which to start looking for
+ // regressions. The default is to start from the first commit
+ // with results. If the commit is `null`, regression detection is
+ // skipped for the matching benchmark.
+ //
+ // "regressions_first_commits": {
+ // "some_benchmark": "352cdf", // Consider regressions only after this commit
+ // "another_benchmark": null, // Skip regression detection altogether
+ // },
+
+ // The thresholds for relative change in results, after which `asv
+ // publish` starts reporting regressions. Dictionary of the same
+ // form as in ``regressions_first_commits``, with values
+ // indicating the thresholds. If multiple entries match, the
+ // maximum is taken. If no entry matches, the default is 5%.
+ //
+ // "regressions_thresholds": {
+ // "some_benchmark": 0.01, // Threshold of 1%
+ // "another_benchmark": 0.5, // Threshold of 50%
+ // },
+}
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/__init__.py b/auto_building_tools/asv_benchmarks/benchmarks/__init__.py
new file mode 100644
index 0000000..27dd476
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/__init__.py
@@ -0,0 +1 @@
+"""Benchmark suite for scikit-learn using ASV"""
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/cluster.py b/auto_building_tools/asv_benchmarks/benchmarks/cluster.py
new file mode 100644
index 0000000..457a15d
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/cluster.py
@@ -0,0 +1,104 @@
+from sklearn.cluster import KMeans, MiniBatchKMeans
+
+from .common import Benchmark, Estimator, Predictor, Transformer
+from .datasets import _20newsgroups_highdim_dataset, _blobs_dataset
+from .utils import neg_mean_inertia
+
+
+class KMeansBenchmark(Predictor, Transformer, Estimator, Benchmark):
+ """
+ Benchmarks for KMeans.
+ """
+
+ param_names = ["representation", "algorithm", "init"]
+ params = (["dense", "sparse"], ["lloyd", "elkan"], ["random", "k-means++"])
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ representation, algorithm, init = params
+
+ if representation == "sparse":
+ data = _20newsgroups_highdim_dataset(n_samples=8000)
+ else:
+ data = _blobs_dataset(n_clusters=20)
+
+ return data
+
+ def make_estimator(self, params):
+ representation, algorithm, init = params
+
+ max_iter = 30 if representation == "sparse" else 100
+
+ estimator = KMeans(
+ n_clusters=20,
+ algorithm=algorithm,
+ init=init,
+ n_init=1,
+ max_iter=max_iter,
+ tol=0,
+ random_state=0,
+ )
+
+ return estimator
+
+ def make_scorers(self):
+ self.train_scorer = lambda _, __: neg_mean_inertia(
+ self.X, self.estimator.predict(self.X), self.estimator.cluster_centers_
+ )
+ self.test_scorer = lambda _, __: neg_mean_inertia(
+ self.X_val,
+ self.estimator.predict(self.X_val),
+ self.estimator.cluster_centers_,
+ )
+
+
+class MiniBatchKMeansBenchmark(Predictor, Transformer, Estimator, Benchmark):
+ """
+ Benchmarks for MiniBatchKMeans.
+ """
+
+ param_names = ["representation", "init"]
+ params = (["dense", "sparse"], ["random", "k-means++"])
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ representation, init = params
+
+ if representation == "sparse":
+ data = _20newsgroups_highdim_dataset()
+ else:
+ data = _blobs_dataset(n_clusters=20)
+
+ return data
+
+ def make_estimator(self, params):
+ representation, init = params
+
+ max_iter = 5 if representation == "sparse" else 2
+
+ estimator = MiniBatchKMeans(
+ n_clusters=20,
+ init=init,
+ n_init=1,
+ max_iter=max_iter,
+ batch_size=1000,
+ max_no_improvement=None,
+ compute_labels=False,
+ random_state=0,
+ )
+
+ return estimator
+
+ def make_scorers(self):
+ self.train_scorer = lambda _, __: neg_mean_inertia(
+ self.X, self.estimator.predict(self.X), self.estimator.cluster_centers_
+ )
+ self.test_scorer = lambda _, __: neg_mean_inertia(
+ self.X_val,
+ self.estimator.predict(self.X_val),
+ self.estimator.cluster_centers_,
+ )
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/common.py b/auto_building_tools/asv_benchmarks/benchmarks/common.py
new file mode 100644
index 0000000..c12da55
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/common.py
@@ -0,0 +1,256 @@
+import itertools
+import json
+import os
+import pickle
+import timeit
+from abc import ABC, abstractmethod
+from multiprocessing import cpu_count
+from pathlib import Path
+
+import numpy as np
+
+
+def get_from_config():
+ """Get benchmarks configuration from the config.json file"""
+ current_path = Path(__file__).resolve().parent
+
+ config_path = current_path / "config.json"
+ with open(config_path, "r") as config_file:
+ config_file = "".join(line for line in config_file if line and "//" not in line)
+ config = json.loads(config_file)
+
+ profile = os.getenv("SKLBENCH_PROFILE", config["profile"])
+
+ n_jobs_vals_env = os.getenv("SKLBENCH_NJOBS")
+ if n_jobs_vals_env:
+ n_jobs_vals = json.loads(n_jobs_vals_env)
+ else:
+ n_jobs_vals = config["n_jobs_vals"]
+ if not n_jobs_vals:
+ n_jobs_vals = list(range(1, 1 + cpu_count()))
+
+ cache_path = current_path / "cache"
+ cache_path.mkdir(exist_ok=True)
+ (cache_path / "estimators").mkdir(exist_ok=True)
+ (cache_path / "tmp").mkdir(exist_ok=True)
+
+ save_estimators = os.getenv("SKLBENCH_SAVE_ESTIMATORS", config["save_estimators"])
+ save_dir = os.getenv("ASV_COMMIT", "new")[:8]
+
+ if save_estimators:
+ (cache_path / "estimators" / save_dir).mkdir(exist_ok=True)
+
+ base_commit = os.getenv("SKLBENCH_BASE_COMMIT", config["base_commit"])
+
+ bench_predict = os.getenv("SKLBENCH_PREDICT", config["bench_predict"])
+ bench_transform = os.getenv("SKLBENCH_TRANSFORM", config["bench_transform"])
+
+ return (
+ profile,
+ n_jobs_vals,
+ save_estimators,
+ save_dir,
+ base_commit,
+ bench_predict,
+ bench_transform,
+ )
+
+
+def get_estimator_path(benchmark, directory, params, save=False):
+ """Get path of pickled fitted estimator"""
+ path = Path(__file__).resolve().parent / "cache"
+ path = (path / "estimators" / directory) if save else (path / "tmp")
+
+ filename = (
+ benchmark.__class__.__name__
+ + "_estimator_"
+ + "_".join(list(map(str, params)))
+ + ".pkl"
+ )
+
+ return path / filename
+
+
+def clear_tmp():
+ """Clean the tmp directory"""
+ path = Path(__file__).resolve().parent / "cache" / "tmp"
+ for child in path.iterdir():
+ child.unlink()
+
+
+class Benchmark(ABC):
+ """Abstract base class for all the benchmarks"""
+
+ timer = timeit.default_timer # wall time
+ processes = 1
+ timeout = 500
+
+ (
+ profile,
+ n_jobs_vals,
+ save_estimators,
+ save_dir,
+ base_commit,
+ bench_predict,
+ bench_transform,
+ ) = get_from_config()
+
+ if profile == "fast":
+ warmup_time = 0
+ repeat = 1
+ number = 1
+ min_run_count = 1
+ data_size = "small"
+ elif profile == "regular":
+ warmup_time = 1
+ repeat = (3, 100, 30)
+ data_size = "small"
+ elif profile == "large_scale":
+ warmup_time = 1
+ repeat = 3
+ number = 1
+ data_size = "large"
+
+ @property
+ @abstractmethod
+ def params(self):
+ pass
+
+
+class Estimator(ABC):
+ """Abstract base class for all benchmarks of estimators"""
+
+ @abstractmethod
+ def make_data(self, params):
+ """Return the dataset for a combination of parameters"""
+ # The datasets are cached using joblib.Memory so it's fast and can be
+ # called for each repeat
+ pass
+
+ @abstractmethod
+ def make_estimator(self, params):
+ """Return an instance of the estimator for a combination of parameters"""
+ pass
+
+ def skip(self, params):
+ """Return True if the benchmark should be skipped for these params"""
+ return False
+
+ def setup_cache(self):
+ """Pickle a fitted estimator for all combinations of parameters"""
+ # This is run once per benchmark class.
+
+ clear_tmp()
+
+ param_grid = list(itertools.product(*self.params))
+
+ for params in param_grid:
+ if self.skip(params):
+ continue
+
+ estimator = self.make_estimator(params)
+ X, _, y, _ = self.make_data(params)
+
+ estimator.fit(X, y)
+
+ est_path = get_estimator_path(
+ self, Benchmark.save_dir, params, Benchmark.save_estimators
+ )
+ with est_path.open(mode="wb") as f:
+ pickle.dump(estimator, f)
+
+ def setup(self, *params):
+ """Generate dataset and load the fitted estimator"""
+ # This is run once per combination of parameters and per repeat so we
+ # need to avoid doing expensive operations there.
+
+ if self.skip(params):
+ raise NotImplementedError
+
+ self.X, self.X_val, self.y, self.y_val = self.make_data(params)
+
+ est_path = get_estimator_path(
+ self, Benchmark.save_dir, params, Benchmark.save_estimators
+ )
+ with est_path.open(mode="rb") as f:
+ self.estimator = pickle.load(f)
+
+ self.make_scorers()
+
+ def time_fit(self, *args):
+ self.estimator.fit(self.X, self.y)
+
+ def peakmem_fit(self, *args):
+ self.estimator.fit(self.X, self.y)
+
+ def track_train_score(self, *args):
+ if hasattr(self.estimator, "predict"):
+ y_pred = self.estimator.predict(self.X)
+ else:
+ y_pred = None
+ return float(self.train_scorer(self.y, y_pred))
+
+ def track_test_score(self, *args):
+ if hasattr(self.estimator, "predict"):
+ y_val_pred = self.estimator.predict(self.X_val)
+ else:
+ y_val_pred = None
+ return float(self.test_scorer(self.y_val, y_val_pred))
+
+
+class Predictor(ABC):
+ """Abstract base class for benchmarks of estimators implementing predict"""
+
+ if Benchmark.bench_predict:
+
+ def time_predict(self, *args):
+ self.estimator.predict(self.X)
+
+ def peakmem_predict(self, *args):
+ self.estimator.predict(self.X)
+
+ if Benchmark.base_commit is not None:
+
+ def track_same_prediction(self, *args):
+ est_path = get_estimator_path(self, Benchmark.base_commit, args, True)
+ with est_path.open(mode="rb") as f:
+ estimator_base = pickle.load(f)
+
+ y_val_pred_base = estimator_base.predict(self.X_val)
+ y_val_pred = self.estimator.predict(self.X_val)
+
+ return np.allclose(y_val_pred_base, y_val_pred)
+
+ @property
+ @abstractmethod
+ def params(self):
+ pass
+
+
+class Transformer(ABC):
+ """Abstract base class for benchmarks of estimators implementing transform"""
+
+ if Benchmark.bench_transform:
+
+ def time_transform(self, *args):
+ self.estimator.transform(self.X)
+
+ def peakmem_transform(self, *args):
+ self.estimator.transform(self.X)
+
+ if Benchmark.base_commit is not None:
+
+ def track_same_transform(self, *args):
+ est_path = get_estimator_path(self, Benchmark.base_commit, args, True)
+ with est_path.open(mode="rb") as f:
+ estimator_base = pickle.load(f)
+
+ X_val_t_base = estimator_base.transform(self.X_val)
+ X_val_t = self.estimator.transform(self.X_val)
+
+ return np.allclose(X_val_t_base, X_val_t)
+
+ @property
+ @abstractmethod
+ def params(self):
+ pass
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/config.json b/auto_building_tools/asv_benchmarks/benchmarks/config.json
new file mode 100644
index 0000000..f50827c
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/config.json
@@ -0,0 +1,33 @@
+{
+ // "regular": Bencharks are run on small to medium datasets. Each benchmark
+ // is run multiple times and averaged.
+ // "fast": Benchmarks are run on small to medium datasets. Each benchmark
+ // is run only once. May provide unstable benchmarks.
+ // "large_scale": Benchmarks are run on large datasets. Each benchmark is
+ // run multiple times and averaged. This profile is meant to
+ // benchmark scalability and will take hours on single core.
+ // Can be overridden by environment variable SKLBENCH_PROFILE.
+ "profile": "regular",
+
+ // List of values of n_jobs to use for estimators which accept this
+ // parameter (-1 means all cores). An empty list means all values from 1 to
+ // the maximum number of available cores.
+ // Can be overridden by environment variable SKLBENCH_NJOBS.
+ "n_jobs_vals": [1],
+
+ // If true, fitted estimators are saved in ./cache/estimators/
+ // Can be overridden by environment variable SKLBENCH_SAVE_ESTIMATORS.
+ "save_estimators": false,
+
+ // Commit hash to compare estimator predictions with.
+ // If null, predictions are not compared.
+ // Can be overridden by environment variable SKLBENCH_BASE_COMMIT.
+ "base_commit": null,
+
+ // If false, the predict (resp. transform) method of the estimators won't
+ // be benchmarked.
+ // Can be overridden by environment variables SKLBENCH_PREDICT and
+ // SKLBENCH_TRANSFORM.
+ "bench_predict": true,
+ "bench_transform": true
+}
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/datasets.py b/auto_building_tools/asv_benchmarks/benchmarks/datasets.py
new file mode 100644
index 0000000..bbf5029
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/datasets.py
@@ -0,0 +1,168 @@
+from pathlib import Path
+
+import numpy as np
+import scipy.sparse as sp
+from joblib import Memory
+
+from sklearn.datasets import (
+ fetch_20newsgroups,
+ fetch_olivetti_faces,
+ fetch_openml,
+ load_digits,
+ make_blobs,
+ make_classification,
+ make_regression,
+)
+from sklearn.decomposition import TruncatedSVD
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MaxAbsScaler, StandardScaler
+
+# memory location for caching datasets
+M = Memory(location=str(Path(__file__).resolve().parent / "cache"))
+
+
+@M.cache
+def _blobs_dataset(n_samples=500000, n_features=3, n_clusters=100, dtype=np.float32):
+ X, _ = make_blobs(
+ n_samples=n_samples, n_features=n_features, centers=n_clusters, random_state=0
+ )
+ X = X.astype(dtype, copy=False)
+
+ X, X_val = train_test_split(X, test_size=0.1, random_state=0)
+ return X, X_val, None, None
+
+
+@M.cache
+def _20newsgroups_highdim_dataset(n_samples=None, ngrams=(1, 1), dtype=np.float32):
+ newsgroups = fetch_20newsgroups(random_state=0)
+ vectorizer = TfidfVectorizer(ngram_range=ngrams, dtype=dtype)
+ X = vectorizer.fit_transform(newsgroups.data[:n_samples])
+ y = newsgroups.target[:n_samples]
+
+ X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+ return X, X_val, y, y_val
+
+
+@M.cache
+def _20newsgroups_lowdim_dataset(n_components=100, ngrams=(1, 1), dtype=np.float32):
+ newsgroups = fetch_20newsgroups()
+ vectorizer = TfidfVectorizer(ngram_range=ngrams)
+ X = vectorizer.fit_transform(newsgroups.data)
+ X = X.astype(dtype, copy=False)
+ svd = TruncatedSVD(n_components=n_components)
+ X = svd.fit_transform(X)
+ y = newsgroups.target
+
+ X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+ return X, X_val, y, y_val
+
+
+@M.cache
+def _mnist_dataset(dtype=np.float32):
+ X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False)
+ X = X.astype(dtype, copy=False)
+ X = MaxAbsScaler().fit_transform(X)
+
+ X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+ return X, X_val, y, y_val
+
+
+@M.cache
+def _digits_dataset(n_samples=None, dtype=np.float32):
+ X, y = load_digits(return_X_y=True)
+ X = X.astype(dtype, copy=False)
+ X = MaxAbsScaler().fit_transform(X)
+ X = X[:n_samples]
+ y = y[:n_samples]
+
+ X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+ return X, X_val, y, y_val
+
+
+@M.cache
+def _synth_regression_dataset(n_samples=100000, n_features=100, dtype=np.float32):
+ X, y = make_regression(
+ n_samples=n_samples,
+ n_features=n_features,
+ n_informative=n_features // 10,
+ noise=50,
+ random_state=0,
+ )
+ X = X.astype(dtype, copy=False)
+ X = StandardScaler().fit_transform(X)
+
+ X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+ return X, X_val, y, y_val
+
+
+@M.cache
+def _synth_regression_sparse_dataset(
+ n_samples=10000, n_features=10000, density=0.01, dtype=np.float32
+):
+ X = sp.random(
+ m=n_samples, n=n_features, density=density, format="csr", random_state=0
+ )
+ X.data = np.random.RandomState(0).randn(X.getnnz())
+ X = X.astype(dtype, copy=False)
+ coefs = sp.random(m=n_features, n=1, density=0.5, random_state=0)
+ coefs.data = np.random.RandomState(0).randn(coefs.getnnz())
+ y = X.dot(coefs.toarray()).reshape(-1)
+ y += 0.2 * y.std() * np.random.randn(n_samples)
+
+ X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+ return X, X_val, y, y_val
+
+
+@M.cache
+def _synth_classification_dataset(
+ n_samples=1000, n_features=10000, n_classes=2, dtype=np.float32
+):
+ X, y = make_classification(
+ n_samples=n_samples,
+ n_features=n_features,
+ n_classes=n_classes,
+ random_state=0,
+ n_informative=n_features,
+ n_redundant=0,
+ )
+ X = X.astype(dtype, copy=False)
+ X = StandardScaler().fit_transform(X)
+
+ X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+ return X, X_val, y, y_val
+
+
+@M.cache
+def _olivetti_faces_dataset():
+ dataset = fetch_olivetti_faces(shuffle=True, random_state=42)
+ faces = dataset.data
+ n_samples, n_features = faces.shape
+ faces_centered = faces - faces.mean(axis=0)
+ # local centering
+ faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)
+ X = faces_centered
+
+ X, X_val = train_test_split(X, test_size=0.1, random_state=0)
+ return X, X_val, None, None
+
+
+@M.cache
+def _random_dataset(
+ n_samples=1000, n_features=1000, representation="dense", dtype=np.float32
+):
+ if representation == "dense":
+ X = np.random.RandomState(0).random_sample((n_samples, n_features))
+ X = X.astype(dtype, copy=False)
+ else:
+ X = sp.random(
+ n_samples,
+ n_features,
+ density=0.05,
+ format="csr",
+ dtype=dtype,
+ random_state=0,
+ )
+
+ X, X_val = train_test_split(X, test_size=0.1, random_state=0)
+ return X, X_val, None, None
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/decomposition.py b/auto_building_tools/asv_benchmarks/benchmarks/decomposition.py
new file mode 100644
index 0000000..0a7bb7a
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/decomposition.py
@@ -0,0 +1,96 @@
+from sklearn.decomposition import PCA, DictionaryLearning, MiniBatchDictionaryLearning
+
+from .common import Benchmark, Estimator, Transformer
+from .datasets import _mnist_dataset, _olivetti_faces_dataset
+from .utils import make_dict_learning_scorers, make_pca_scorers
+
+
+class PCABenchmark(Transformer, Estimator, Benchmark):
+ """
+ Benchmarks for PCA.
+ """
+
+ param_names = ["svd_solver"]
+ params = (["full", "arpack", "randomized"],)
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ return _mnist_dataset()
+
+ def make_estimator(self, params):
+ (svd_solver,) = params
+
+ estimator = PCA(n_components=32, svd_solver=svd_solver, random_state=0)
+
+ return estimator
+
+ def make_scorers(self):
+ make_pca_scorers(self)
+
+
+class DictionaryLearningBenchmark(Transformer, Estimator, Benchmark):
+ """
+ Benchmarks for DictionaryLearning.
+ """
+
+ param_names = ["fit_algorithm", "n_jobs"]
+ params = (["lars", "cd"], Benchmark.n_jobs_vals)
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ return _olivetti_faces_dataset()
+
+ def make_estimator(self, params):
+ fit_algorithm, n_jobs = params
+
+ estimator = DictionaryLearning(
+ n_components=15,
+ fit_algorithm=fit_algorithm,
+ alpha=0.1,
+ transform_alpha=1,
+ max_iter=20,
+ tol=1e-16,
+ random_state=0,
+ n_jobs=n_jobs,
+ )
+
+ return estimator
+
+ def make_scorers(self):
+ make_dict_learning_scorers(self)
+
+
+class MiniBatchDictionaryLearningBenchmark(Transformer, Estimator, Benchmark):
+ """
+ Benchmarks for MiniBatchDictionaryLearning
+ """
+
+ param_names = ["fit_algorithm", "n_jobs"]
+ params = (["lars", "cd"], Benchmark.n_jobs_vals)
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ return _olivetti_faces_dataset()
+
+ def make_estimator(self, params):
+ fit_algorithm, n_jobs = params
+
+ estimator = MiniBatchDictionaryLearning(
+ n_components=15,
+ fit_algorithm=fit_algorithm,
+ alpha=0.1,
+ batch_size=3,
+ random_state=0,
+ n_jobs=n_jobs,
+ )
+
+ return estimator
+
+ def make_scorers(self):
+ make_dict_learning_scorers(self)
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/ensemble.py b/auto_building_tools/asv_benchmarks/benchmarks/ensemble.py
new file mode 100644
index 0000000..c336d1e
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/ensemble.py
@@ -0,0 +1,121 @@
+from sklearn.ensemble import (
+ GradientBoostingClassifier,
+ HistGradientBoostingClassifier,
+ RandomForestClassifier,
+)
+
+from .common import Benchmark, Estimator, Predictor
+from .datasets import (
+ _20newsgroups_highdim_dataset,
+ _20newsgroups_lowdim_dataset,
+ _synth_classification_dataset,
+)
+from .utils import make_gen_classif_scorers
+
+
+class RandomForestClassifierBenchmark(Predictor, Estimator, Benchmark):
+ """
+ Benchmarks for RandomForestClassifier.
+ """
+
+ param_names = ["representation", "n_jobs"]
+ params = (["dense", "sparse"], Benchmark.n_jobs_vals)
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ representation, n_jobs = params
+
+ if representation == "sparse":
+ data = _20newsgroups_highdim_dataset()
+ else:
+ data = _20newsgroups_lowdim_dataset()
+
+ return data
+
+ def make_estimator(self, params):
+ representation, n_jobs = params
+
+ n_estimators = 500 if Benchmark.data_size == "large" else 100
+
+ estimator = RandomForestClassifier(
+ n_estimators=n_estimators,
+ min_samples_split=10,
+ max_features="log2",
+ n_jobs=n_jobs,
+ random_state=0,
+ )
+
+ return estimator
+
+ def make_scorers(self):
+ make_gen_classif_scorers(self)
+
+
+class GradientBoostingClassifierBenchmark(Predictor, Estimator, Benchmark):
+ """
+ Benchmarks for GradientBoostingClassifier.
+ """
+
+ param_names = ["representation"]
+ params = (["dense", "sparse"],)
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ (representation,) = params
+
+ if representation == "sparse":
+ data = _20newsgroups_highdim_dataset()
+ else:
+ data = _20newsgroups_lowdim_dataset()
+
+ return data
+
+ def make_estimator(self, params):
+ (representation,) = params
+
+ n_estimators = 100 if Benchmark.data_size == "large" else 10
+
+ estimator = GradientBoostingClassifier(
+ n_estimators=n_estimators,
+ max_features="log2",
+ subsample=0.5,
+ random_state=0,
+ )
+
+ return estimator
+
+ def make_scorers(self):
+ make_gen_classif_scorers(self)
+
+
+class HistGradientBoostingClassifierBenchmark(Predictor, Estimator, Benchmark):
+ """
+ Benchmarks for HistGradientBoostingClassifier.
+ """
+
+ param_names = []
+ params = ()
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ data = _synth_classification_dataset(
+ n_samples=10000, n_features=100, n_classes=5
+ )
+
+ return data
+
+ def make_estimator(self, params):
+ estimator = HistGradientBoostingClassifier(
+ max_iter=100, max_leaf_nodes=15, early_stopping=False, random_state=0
+ )
+
+ return estimator
+
+ def make_scorers(self):
+ make_gen_classif_scorers(self)
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/linear_model.py b/auto_building_tools/asv_benchmarks/benchmarks/linear_model.py
new file mode 100644
index 0000000..2415389
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/linear_model.py
@@ -0,0 +1,257 @@
+from sklearn.linear_model import (
+ ElasticNet,
+ Lasso,
+ LinearRegression,
+ LogisticRegression,
+ Ridge,
+ SGDRegressor,
+)
+
+from .common import Benchmark, Estimator, Predictor
+from .datasets import (
+ _20newsgroups_highdim_dataset,
+ _20newsgroups_lowdim_dataset,
+ _synth_regression_dataset,
+ _synth_regression_sparse_dataset,
+)
+from .utils import make_gen_classif_scorers, make_gen_reg_scorers
+
+
+class LogisticRegressionBenchmark(Predictor, Estimator, Benchmark):
+ """
+ Benchmarks for LogisticRegression.
+ """
+
+ param_names = ["representation", "solver", "n_jobs"]
+ params = (["dense", "sparse"], ["lbfgs", "saga"], Benchmark.n_jobs_vals)
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ representation, solver, n_jobs = params
+
+ if Benchmark.data_size == "large":
+ if representation == "sparse":
+ data = _20newsgroups_highdim_dataset(n_samples=10000)
+ else:
+ data = _20newsgroups_lowdim_dataset(n_components=1e3)
+ else:
+ if representation == "sparse":
+ data = _20newsgroups_highdim_dataset(n_samples=2500)
+ else:
+ data = _20newsgroups_lowdim_dataset()
+
+ return data
+
+ def make_estimator(self, params):
+ representation, solver, n_jobs = params
+
+ penalty = "l2" if solver == "lbfgs" else "l1"
+
+ estimator = LogisticRegression(
+ solver=solver,
+ penalty=penalty,
+ tol=0.01,
+ n_jobs=n_jobs,
+ random_state=0,
+ )
+
+ return estimator
+
+ def make_scorers(self):
+ make_gen_classif_scorers(self)
+
+
+class RidgeBenchmark(Predictor, Estimator, Benchmark):
+ """
+ Benchmarks for Ridge.
+ """
+
+ param_names = ["representation", "solver"]
+ params = (
+ ["dense", "sparse"],
+ ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"],
+ )
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ representation, solver = params
+
+ if representation == "dense":
+ data = _synth_regression_dataset(n_samples=500000, n_features=100)
+ else:
+ data = _synth_regression_sparse_dataset(
+ n_samples=100000, n_features=10000, density=0.005
+ )
+
+ return data
+
+ def make_estimator(self, params):
+ representation, solver = params
+
+ estimator = Ridge(solver=solver, fit_intercept=False, random_state=0)
+
+ return estimator
+
+ def make_scorers(self):
+ make_gen_reg_scorers(self)
+
+ def skip(self, params):
+ representation, solver = params
+
+ if representation == "sparse" and solver == "svd":
+ return True
+ return False
+
+
+class LinearRegressionBenchmark(Predictor, Estimator, Benchmark):
+ """
+ Benchmarks for Linear Regression.
+ """
+
+ param_names = ["representation"]
+ params = (["dense", "sparse"],)
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ (representation,) = params
+
+ if representation == "dense":
+ data = _synth_regression_dataset(n_samples=1000000, n_features=100)
+ else:
+ data = _synth_regression_sparse_dataset(
+ n_samples=10000, n_features=100000, density=0.01
+ )
+
+ return data
+
+ def make_estimator(self, params):
+ estimator = LinearRegression()
+
+ return estimator
+
+ def make_scorers(self):
+ make_gen_reg_scorers(self)
+
+
+class SGDRegressorBenchmark(Predictor, Estimator, Benchmark):
+ """
+ Benchmark for SGD
+ """
+
+ param_names = ["representation"]
+ params = (["dense", "sparse"],)
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ (representation,) = params
+
+ if representation == "dense":
+ data = _synth_regression_dataset(n_samples=100000, n_features=200)
+ else:
+ data = _synth_regression_sparse_dataset(
+ n_samples=100000, n_features=1000, density=0.01
+ )
+
+ return data
+
+ def make_estimator(self, params):
+ (representation,) = params
+
+ max_iter = 60 if representation == "dense" else 300
+
+ estimator = SGDRegressor(max_iter=max_iter, tol=None, random_state=0)
+
+ return estimator
+
+ def make_scorers(self):
+ make_gen_reg_scorers(self)
+
+
+class ElasticNetBenchmark(Predictor, Estimator, Benchmark):
+ """
+ Benchmarks for ElasticNet.
+ """
+
+ param_names = ["representation", "precompute"]
+ params = (["dense", "sparse"], [True, False])
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ representation, precompute = params
+
+ if representation == "dense":
+ data = _synth_regression_dataset(n_samples=1000000, n_features=100)
+ else:
+ data = _synth_regression_sparse_dataset(
+ n_samples=50000, n_features=5000, density=0.01
+ )
+
+ return data
+
+ def make_estimator(self, params):
+ representation, precompute = params
+
+ estimator = ElasticNet(precompute=precompute, alpha=0.001, random_state=0)
+
+ return estimator
+
+ def make_scorers(self):
+ make_gen_reg_scorers(self)
+
+ def skip(self, params):
+ representation, precompute = params
+
+ if representation == "sparse" and precompute is False:
+ return True
+ return False
+
+
+class LassoBenchmark(Predictor, Estimator, Benchmark):
+ """
+ Benchmarks for Lasso.
+ """
+
+ param_names = ["representation", "precompute"]
+ params = (["dense", "sparse"], [True, False])
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ representation, precompute = params
+
+ if representation == "dense":
+ data = _synth_regression_dataset(n_samples=1000000, n_features=100)
+ else:
+ data = _synth_regression_sparse_dataset(
+ n_samples=50000, n_features=5000, density=0.01
+ )
+
+ return data
+
+ def make_estimator(self, params):
+ representation, precompute = params
+
+ estimator = Lasso(precompute=precompute, alpha=0.001, random_state=0)
+
+ return estimator
+
+ def make_scorers(self):
+ make_gen_reg_scorers(self)
+
+ def skip(self, params):
+ representation, precompute = params
+
+ if representation == "sparse" and precompute is False:
+ return True
+ return False
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/manifold.py b/auto_building_tools/asv_benchmarks/benchmarks/manifold.py
new file mode 100644
index 0000000..c32f3e0
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/manifold.py
@@ -0,0 +1,34 @@
+from sklearn.manifold import TSNE
+
+from .common import Benchmark, Estimator
+from .datasets import _digits_dataset
+
+
+class TSNEBenchmark(Estimator, Benchmark):
+ """
+ Benchmarks for t-SNE.
+ """
+
+ param_names = ["method"]
+ params = (["exact", "barnes_hut"],)
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ (method,) = params
+
+ n_samples = 500 if method == "exact" else None
+
+ return _digits_dataset(n_samples=n_samples)
+
+ def make_estimator(self, params):
+ (method,) = params
+
+ estimator = TSNE(random_state=0, method=method)
+
+ return estimator
+
+ def make_scorers(self):
+ self.train_scorer = lambda _, __: self.estimator.kl_divergence_
+ self.test_scorer = lambda _, __: self.estimator.kl_divergence_
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/metrics.py b/auto_building_tools/asv_benchmarks/benchmarks/metrics.py
new file mode 100644
index 0000000..597e5dc
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/metrics.py
@@ -0,0 +1,45 @@
+from sklearn.metrics.pairwise import pairwise_distances
+
+from .common import Benchmark
+from .datasets import _random_dataset
+
+
+class PairwiseDistancesBenchmark(Benchmark):
+ """
+ Benchmarks for pairwise distances.
+ """
+
+ param_names = ["representation", "metric", "n_jobs"]
+ params = (
+ ["dense", "sparse"],
+ ["cosine", "euclidean", "manhattan", "correlation"],
+ Benchmark.n_jobs_vals,
+ )
+
+ def setup(self, *params):
+ representation, metric, n_jobs = params
+
+ if representation == "sparse" and metric == "correlation":
+ raise NotImplementedError
+
+ if Benchmark.data_size == "large":
+ if metric in ("manhattan", "correlation"):
+ n_samples = 8000
+ else:
+ n_samples = 24000
+ else:
+ if metric in ("manhattan", "correlation"):
+ n_samples = 4000
+ else:
+ n_samples = 12000
+
+ data = _random_dataset(n_samples=n_samples, representation=representation)
+ self.X, self.X_val, self.y, self.y_val = data
+
+ self.pdist_params = {"metric": metric, "n_jobs": n_jobs}
+
+ def time_pairwise_distances(self, *args):
+ pairwise_distances(self.X, **self.pdist_params)
+
+ def peakmem_pairwise_distances(self, *args):
+ pairwise_distances(self.X, **self.pdist_params)
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/model_selection.py b/auto_building_tools/asv_benchmarks/benchmarks/model_selection.py
new file mode 100644
index 0000000..335ffe4
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/model_selection.py
@@ -0,0 +1,84 @@
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import GridSearchCV, cross_val_score
+
+from .common import Benchmark, Estimator, Predictor
+from .datasets import _synth_classification_dataset
+from .utils import make_gen_classif_scorers
+
+
+class CrossValidationBenchmark(Benchmark):
+ """
+ Benchmarks for Cross Validation.
+ """
+
+ timeout = 20000
+
+ param_names = ["n_jobs"]
+ params = (Benchmark.n_jobs_vals,)
+
+ def setup(self, *params):
+ (n_jobs,) = params
+
+ data = _synth_classification_dataset(n_samples=50000, n_features=100)
+ self.X, self.X_val, self.y, self.y_val = data
+
+ self.clf = RandomForestClassifier(n_estimators=50, max_depth=10, random_state=0)
+
+ cv = 16 if Benchmark.data_size == "large" else 4
+
+ self.cv_params = {"n_jobs": n_jobs, "cv": cv}
+
+ def time_crossval(self, *args):
+ cross_val_score(self.clf, self.X, self.y, **self.cv_params)
+
+ def peakmem_crossval(self, *args):
+ cross_val_score(self.clf, self.X, self.y, **self.cv_params)
+
+ def track_crossval(self, *args):
+ return float(cross_val_score(self.clf, self.X, self.y, **self.cv_params).mean())
+
+
+class GridSearchBenchmark(Predictor, Estimator, Benchmark):
+ """
+ Benchmarks for GridSearch.
+ """
+
+ timeout = 20000
+
+ param_names = ["n_jobs"]
+ params = (Benchmark.n_jobs_vals,)
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ data = _synth_classification_dataset(n_samples=10000, n_features=100)
+
+ return data
+
+ def make_estimator(self, params):
+ (n_jobs,) = params
+
+ clf = RandomForestClassifier(random_state=0)
+
+ if Benchmark.data_size == "large":
+ n_estimators_list = [10, 25, 50, 100, 500]
+ max_depth_list = [5, 10, None]
+ max_features_list = [0.1, 0.4, 0.8, 1.0]
+ else:
+ n_estimators_list = [10, 25, 50]
+ max_depth_list = [5, 10]
+ max_features_list = [0.1, 0.4, 0.8]
+
+ param_grid = {
+ "n_estimators": n_estimators_list,
+ "max_depth": max_depth_list,
+ "max_features": max_features_list,
+ }
+
+ estimator = GridSearchCV(clf, param_grid, n_jobs=n_jobs, cv=4)
+
+ return estimator
+
+ def make_scorers(self):
+ make_gen_classif_scorers(self)
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/neighbors.py b/auto_building_tools/asv_benchmarks/benchmarks/neighbors.py
new file mode 100644
index 0000000..b0bf6ab
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/neighbors.py
@@ -0,0 +1,39 @@
+from sklearn.neighbors import KNeighborsClassifier
+
+from .common import Benchmark, Estimator, Predictor
+from .datasets import _20newsgroups_lowdim_dataset
+from .utils import make_gen_classif_scorers
+
+
+class KNeighborsClassifierBenchmark(Predictor, Estimator, Benchmark):
+ """
+ Benchmarks for KNeighborsClassifier.
+ """
+
+ param_names = ["algorithm", "dimension", "n_jobs"]
+ params = (["brute", "kd_tree", "ball_tree"], ["low", "high"], Benchmark.n_jobs_vals)
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ algorithm, dimension, n_jobs = params
+
+ if Benchmark.data_size == "large":
+ n_components = 40 if dimension == "low" else 200
+ else:
+ n_components = 10 if dimension == "low" else 50
+
+ data = _20newsgroups_lowdim_dataset(n_components=n_components)
+
+ return data
+
+ def make_estimator(self, params):
+ algorithm, dimension, n_jobs = params
+
+ estimator = KNeighborsClassifier(algorithm=algorithm, n_jobs=n_jobs)
+
+ return estimator
+
+ def make_scorers(self):
+ make_gen_classif_scorers(self)
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/svm.py b/auto_building_tools/asv_benchmarks/benchmarks/svm.py
new file mode 100644
index 0000000..36d3066
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/svm.py
@@ -0,0 +1,30 @@
+from sklearn.svm import SVC
+
+from .common import Benchmark, Estimator, Predictor
+from .datasets import _synth_classification_dataset
+from .utils import make_gen_classif_scorers
+
+
+class SVCBenchmark(Predictor, Estimator, Benchmark):
+ """Benchmarks for SVC."""
+
+ param_names = ["kernel"]
+ params = (["linear", "poly", "rbf", "sigmoid"],)
+
+ def setup_cache(self):
+ super().setup_cache()
+
+ def make_data(self, params):
+ return _synth_classification_dataset()
+
+ def make_estimator(self, params):
+ (kernel,) = params
+
+ estimator = SVC(
+ max_iter=100, tol=1e-16, kernel=kernel, random_state=0, gamma="scale"
+ )
+
+ return estimator
+
+ def make_scorers(self):
+ make_gen_classif_scorers(self)
diff --git a/auto_building_tools/asv_benchmarks/benchmarks/utils.py b/auto_building_tools/asv_benchmarks/benchmarks/utils.py
new file mode 100644
index 0000000..fca3057
--- /dev/null
+++ b/auto_building_tools/asv_benchmarks/benchmarks/utils.py
@@ -0,0 +1,47 @@
+import numpy as np
+
+from sklearn.metrics import balanced_accuracy_score, r2_score
+
+
+def neg_mean_inertia(X, labels, centers):
+ return -(np.asarray(X - centers[labels]) ** 2).sum(axis=1).mean()
+
+
+def make_gen_classif_scorers(caller):
+ caller.train_scorer = balanced_accuracy_score
+ caller.test_scorer = balanced_accuracy_score
+
+
+def make_gen_reg_scorers(caller):
+ caller.test_scorer = r2_score
+ caller.train_scorer = r2_score
+
+
+def neg_mean_data_error(X, U, V):
+ return -np.sqrt(((X - U.dot(V)) ** 2).mean())
+
+
+def make_dict_learning_scorers(caller):
+ caller.train_scorer = lambda _, __: (
+ neg_mean_data_error(
+ caller.X, caller.estimator.transform(caller.X), caller.estimator.components_
+ )
+ )
+ caller.test_scorer = lambda _, __: (
+ neg_mean_data_error(
+ caller.X_val,
+ caller.estimator.transform(caller.X_val),
+ caller.estimator.components_,
+ )
+ )
+
+
+def explained_variance_ratio(Xt, X):
+ return np.var(Xt, axis=0).sum() / np.var(X, axis=0).sum()
+
+
+def make_pca_scorers(caller):
+ caller.train_scorer = lambda _, __: caller.estimator.explained_variance_ratio_.sum()
+ caller.test_scorer = lambda _, __: (
+ explained_variance_ratio(caller.estimator.transform(caller.X_val), caller.X_val)
+ )
diff --git a/auto_building_tools/benchmarks/.gitignore b/auto_building_tools/benchmarks/.gitignore
new file mode 100644
index 0000000..2b6f7ba
--- /dev/null
+++ b/auto_building_tools/benchmarks/.gitignore
@@ -0,0 +1,4 @@
+/bhtsne
+*.npy
+*.json
+/mnist_tsne_output/
diff --git a/auto_building_tools/benchmarks/bench_20newsgroups.py b/auto_building_tools/benchmarks/bench_20newsgroups.py
new file mode 100644
index 0000000..44a117f
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_20newsgroups.py
@@ -0,0 +1,94 @@
+import argparse
+from time import time
+
+import numpy as np
+
+from sklearn.datasets import fetch_20newsgroups_vectorized
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import (
+ AdaBoostClassifier,
+ ExtraTreesClassifier,
+ RandomForestClassifier,
+)
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.utils.validation import check_array
+
+ESTIMATORS = {
+ "dummy": DummyClassifier(),
+ "random_forest": RandomForestClassifier(max_features="sqrt", min_samples_split=10),
+ "extra_trees": ExtraTreesClassifier(max_features="sqrt", min_samples_split=10),
+ "logistic_regression": LogisticRegression(),
+ "naive_bayes": MultinomialNB(),
+ "adaboost": AdaBoostClassifier(n_estimators=10, algorithm="SAMME"),
+}
+
+
+###############################################################################
+# Data
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-e", "--estimators", nargs="+", required=True, choices=ESTIMATORS
+ )
+ args = vars(parser.parse_args())
+
+ data_train = fetch_20newsgroups_vectorized(subset="train")
+ data_test = fetch_20newsgroups_vectorized(subset="test")
+ X_train = check_array(data_train.data, dtype=np.float32, accept_sparse="csc")
+ X_test = check_array(data_test.data, dtype=np.float32, accept_sparse="csr")
+ y_train = data_train.target
+ y_test = data_test.target
+
+ print("20 newsgroups")
+ print("=============")
+ print(f"X_train.shape = {X_train.shape}")
+ print(f"X_train.format = {X_train.format}")
+ print(f"X_train.dtype = {X_train.dtype}")
+ print(f"X_train density = {X_train.nnz / np.prod(X_train.shape)}")
+ print(f"y_train {y_train.shape}")
+ print(f"X_test {X_test.shape}")
+ print(f"X_test.format = {X_test.format}")
+ print(f"X_test.dtype = {X_test.dtype}")
+ print(f"y_test {y_test.shape}")
+ print()
+ print("Classifier Training")
+ print("===================")
+ accuracy, train_time, test_time = {}, {}, {}
+ for name in sorted(args["estimators"]):
+ clf = ESTIMATORS[name]
+ try:
+ clf.set_params(random_state=0)
+ except (TypeError, ValueError):
+ pass
+
+ print("Training %s ... " % name, end="")
+ t0 = time()
+ clf.fit(X_train, y_train)
+ train_time[name] = time() - t0
+ t0 = time()
+ y_pred = clf.predict(X_test)
+ test_time[name] = time() - t0
+ accuracy[name] = accuracy_score(y_test, y_pred)
+ print("done")
+
+ print()
+ print("Classification performance:")
+ print("===========================")
+ print()
+ print("%s %s %s %s" % ("Classifier ", "train-time", "test-time", "Accuracy"))
+ print("-" * 44)
+ for name in sorted(accuracy, key=accuracy.get):
+ print(
+ "%s %s %s %s"
+ % (
+ name.ljust(16),
+ ("%.4fs" % train_time[name]).center(10),
+ ("%.4fs" % test_time[name]).center(10),
+ ("%.4f" % accuracy[name]).center(10),
+ )
+ )
+
+ print()
diff --git a/auto_building_tools/benchmarks/bench_covertype.py b/auto_building_tools/benchmarks/bench_covertype.py
new file mode 100644
index 0000000..243cce0
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_covertype.py
@@ -0,0 +1,234 @@
+"""
+===========================
+Covertype dataset benchmark
+===========================
+
+Benchmark stochastic gradient descent (SGD), Liblinear, and Naive Bayes, CART
+(decision tree), RandomForest and Extra-Trees on the forest covertype dataset
+of Blackard, Jock, and Dean [1]. The dataset comprises 581,012 samples. It is
+low dimensional with 54 features and a sparsity of approx. 23%. Here, we
+consider the task of predicting class 1 (spruce/fir). The classification
+performance of SGD is competitive with Liblinear while being two orders of
+magnitude faster to train::
+
+ [..]
+ Classification performance:
+ ===========================
+ Classifier train-time test-time error-rate
+ --------------------------------------------
+ liblinear 15.9744s 0.0705s 0.2305
+ GaussianNB 3.0666s 0.3884s 0.4841
+ SGD 1.0558s 0.1152s 0.2300
+ CART 79.4296s 0.0523s 0.0469
+ RandomForest 1190.1620s 0.5881s 0.0243
+ ExtraTrees 640.3194s 0.6495s 0.0198
+
+The same task has been used in a number of papers including:
+
+ * :doi:`"SVM Optimization: Inverse Dependence on Training Set Size"
+ S. Shalev-Shwartz, N. Srebro - In Proceedings of ICML '08.
+ <10.1145/1390156.1390273>`
+
+ * :doi:`"Pegasos: Primal estimated sub-gradient solver for svm"
+ S. Shalev-Shwartz, Y. Singer, N. Srebro - In Proceedings of ICML '07.
+ <10.1145/1273496.1273598>`
+
+ * `"Training Linear SVMs in Linear Time"
+ `_
+ T. Joachims - In SIGKDD '06
+
+[1] https://archive.ics.uci.edu/ml/datasets/Covertype
+
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import argparse
+import os
+from time import time
+
+import numpy as np
+from joblib import Memory
+
+from sklearn.datasets import fetch_covtype, get_data_home
+from sklearn.ensemble import (
+ ExtraTreesClassifier,
+ GradientBoostingClassifier,
+ RandomForestClassifier,
+)
+from sklearn.linear_model import LogisticRegression, SGDClassifier
+from sklearn.metrics import zero_one_loss
+from sklearn.naive_bayes import GaussianNB
+from sklearn.svm import LinearSVC
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils import check_array
+
+# Memoize the data extraction and memory map the resulting
+# train / test splits in readonly mode
+memory = Memory(
+ os.path.join(get_data_home(), "covertype_benchmark_data"), mmap_mode="r"
+)
+
+
+@memory.cache
+def load_data(dtype=np.float32, order="C", random_state=13):
+ """Load the data, then cache and memmap the train/test split"""
+ ######################################################################
+ # Load dataset
+ print("Loading dataset...")
+ data = fetch_covtype(
+ download_if_missing=True, shuffle=True, random_state=random_state
+ )
+ X = check_array(data["data"], dtype=dtype, order=order)
+ y = (data["target"] != 1).astype(int)
+
+ # Create train-test split (as [Joachims, 2006])
+ print("Creating train-test split...")
+ n_train = 522911
+ X_train = X[:n_train]
+ y_train = y[:n_train]
+ X_test = X[n_train:]
+ y_test = y[n_train:]
+
+ # Standardize first 10 features (the numerical ones)
+ mean = X_train.mean(axis=0)
+ std = X_train.std(axis=0)
+ mean[10:] = 0.0
+ std[10:] = 1.0
+ X_train = (X_train - mean) / std
+ X_test = (X_test - mean) / std
+ return X_train, X_test, y_train, y_test
+
+
+ESTIMATORS = {
+ "GBRT": GradientBoostingClassifier(n_estimators=250),
+ "ExtraTrees": ExtraTreesClassifier(n_estimators=20),
+ "RandomForest": RandomForestClassifier(n_estimators=20),
+ "CART": DecisionTreeClassifier(min_samples_split=5),
+ "SGD": SGDClassifier(alpha=0.001),
+ "GaussianNB": GaussianNB(),
+ "liblinear": LinearSVC(loss="l2", penalty="l2", C=1000, dual=False, tol=1e-3),
+ "SAG": LogisticRegression(solver="sag", max_iter=2, C=1000),
+}
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--classifiers",
+ nargs="+",
+ choices=ESTIMATORS,
+ type=str,
+ default=["liblinear", "GaussianNB", "SGD", "CART"],
+ help="list of classifiers to benchmark.",
+ )
+ parser.add_argument(
+ "--n-jobs",
+ nargs="?",
+ default=1,
+ type=int,
+ help=(
+ "Number of concurrently running workers for "
+ "models that support parallelism."
+ ),
+ )
+ parser.add_argument(
+ "--order",
+ nargs="?",
+ default="C",
+ type=str,
+ choices=["F", "C"],
+ help="Allow to choose between fortran and C ordered data",
+ )
+ parser.add_argument(
+ "--random-seed",
+ nargs="?",
+ default=13,
+ type=int,
+ help="Common seed used by random number generator.",
+ )
+ args = vars(parser.parse_args())
+
+ print(__doc__)
+
+ X_train, X_test, y_train, y_test = load_data(
+ order=args["order"], random_state=args["random_seed"]
+ )
+
+ print("")
+ print("Dataset statistics:")
+ print("===================")
+ print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
+ print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
+ print("%s %s" % ("data type:".ljust(25), X_train.dtype))
+ print(
+ "%s %d (pos=%d, neg=%d, size=%dMB)"
+ % (
+ "number of train samples:".ljust(25),
+ X_train.shape[0],
+ np.sum(y_train == 1),
+ np.sum(y_train == 0),
+ int(X_train.nbytes / 1e6),
+ )
+ )
+ print(
+ "%s %d (pos=%d, neg=%d, size=%dMB)"
+ % (
+ "number of test samples:".ljust(25),
+ X_test.shape[0],
+ np.sum(y_test == 1),
+ np.sum(y_test == 0),
+ int(X_test.nbytes / 1e6),
+ )
+ )
+
+ print()
+ print("Training Classifiers")
+ print("====================")
+ error, train_time, test_time = {}, {}, {}
+ for name in sorted(args["classifiers"]):
+ print("Training %s ... " % name, end="")
+ estimator = ESTIMATORS[name]
+ estimator_params = estimator.get_params()
+
+ estimator.set_params(
+ **{
+ p: args["random_seed"]
+ for p in estimator_params
+ if p.endswith("random_state")
+ }
+ )
+
+ if "n_jobs" in estimator_params:
+ estimator.set_params(n_jobs=args["n_jobs"])
+
+ time_start = time()
+ estimator.fit(X_train, y_train)
+ train_time[name] = time() - time_start
+
+ time_start = time()
+ y_pred = estimator.predict(X_test)
+ test_time[name] = time() - time_start
+
+ error[name] = zero_one_loss(y_test, y_pred)
+
+ print("done")
+
+ print()
+ print("Classification performance:")
+ print("===========================")
+ print("%s %s %s %s" % ("Classifier ", "train-time", "test-time", "error-rate"))
+ print("-" * 44)
+ for name in sorted(args["classifiers"], key=error.get):
+ print(
+ "%s %s %s %s"
+ % (
+ name.ljust(12),
+ ("%.4fs" % train_time[name]).center(10),
+ ("%.4fs" % test_time[name]).center(10),
+ ("%.4f" % error[name]).center(10),
+ )
+ )
+
+ print()
diff --git a/auto_building_tools/benchmarks/bench_feature_expansions.py b/auto_building_tools/benchmarks/bench_feature_expansions.py
new file mode 100644
index 0000000..b9d9efb
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_feature_expansions.py
@@ -0,0 +1,58 @@
+from time import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy.sparse as sparse
+
+from sklearn.preprocessing import PolynomialFeatures
+
+degree = 2
+trials = 3
+num_rows = 1000
+dimensionalities = np.array([1, 2, 8, 16, 32, 64])
+densities = np.array([0.01, 0.1, 1.0])
+csr_times = {d: np.zeros(len(dimensionalities)) for d in densities}
+dense_times = {d: np.zeros(len(dimensionalities)) for d in densities}
+transform = PolynomialFeatures(
+ degree=degree, include_bias=False, interaction_only=False
+)
+
+for trial in range(trials):
+ for density in densities:
+ for dim_index, dim in enumerate(dimensionalities):
+ print(trial, density, dim)
+ X_csr = sparse.random(num_rows, dim, density).tocsr()
+ X_dense = X_csr.toarray()
+ # CSR
+ t0 = time()
+ transform.fit_transform(X_csr)
+ csr_times[density][dim_index] += time() - t0
+ # Dense
+ t0 = time()
+ transform.fit_transform(X_dense)
+ dense_times[density][dim_index] += time() - t0
+
+csr_linestyle = (0, (3, 1, 1, 1, 1, 1)) # densely dashdotdotted
+dense_linestyle = (0, ()) # solid
+
+fig, axes = plt.subplots(nrows=len(densities), ncols=1, figsize=(8, 10))
+for density, ax in zip(densities, axes):
+ ax.plot(
+ dimensionalities,
+ csr_times[density] / trials,
+ label="csr",
+ linestyle=csr_linestyle,
+ )
+ ax.plot(
+ dimensionalities,
+ dense_times[density] / trials,
+ label="dense",
+ linestyle=dense_linestyle,
+ )
+ ax.set_title("density %0.2f, degree=%d, n_samples=%d" % (density, degree, num_rows))
+ ax.legend()
+ ax.set_xlabel("Dimensionality")
+ ax.set_ylabel("Time (seconds)")
+
+plt.tight_layout()
+plt.show()
diff --git a/auto_building_tools/benchmarks/bench_glm.py b/auto_building_tools/benchmarks/bench_glm.py
new file mode 100644
index 0000000..84cf318
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_glm.py
@@ -0,0 +1,57 @@
+"""
+A comparison of different methods in GLM
+
+Data comes from a random square matrix.
+
+"""
+
+from datetime import datetime
+
+import numpy as np
+
+from sklearn import linear_model
+
+if __name__ == "__main__":
+ import matplotlib.pyplot as plt
+
+ n_iter = 40
+
+ time_ridge = np.empty(n_iter)
+ time_ols = np.empty(n_iter)
+ time_lasso = np.empty(n_iter)
+
+ dimensions = 500 * np.arange(1, n_iter + 1)
+
+ for i in range(n_iter):
+ print("Iteration %s of %s" % (i, n_iter))
+
+ n_samples, n_features = 10 * i + 3, 10 * i + 3
+
+ X = np.random.randn(n_samples, n_features)
+ Y = np.random.randn(n_samples)
+
+ start = datetime.now()
+ ridge = linear_model.Ridge(alpha=1.0)
+ ridge.fit(X, Y)
+ time_ridge[i] = (datetime.now() - start).total_seconds()
+
+ start = datetime.now()
+ ols = linear_model.LinearRegression()
+ ols.fit(X, Y)
+ time_ols[i] = (datetime.now() - start).total_seconds()
+
+ start = datetime.now()
+ lasso = linear_model.LassoLars()
+ lasso.fit(X, Y)
+ time_lasso[i] = (datetime.now() - start).total_seconds()
+
+ plt.figure("scikit-learn GLM benchmark results")
+ plt.xlabel("Dimensions")
+ plt.ylabel("Time (s)")
+ plt.plot(dimensions, time_ridge, color="r")
+ plt.plot(dimensions, time_ols, color="g")
+ plt.plot(dimensions, time_lasso, color="b")
+
+ plt.legend(["Ridge", "OLS", "LassoLars"], loc="upper left")
+ plt.axis("tight")
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_glmnet.py b/auto_building_tools/benchmarks/bench_glmnet.py
new file mode 100644
index 0000000..1aaad99
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_glmnet.py
@@ -0,0 +1,140 @@
+"""
+To run this, you'll need to have installed.
+
+ * glmnet-python
+ * scikit-learn (of course)
+
+Does two benchmarks
+
+First, we fix a training set and increase the number of
+samples. Then we plot the computation time as function of
+the number of samples.
+
+In the second benchmark, we increase the number of dimensions of the
+training set. Then we plot the computation time as function of
+the number of dimensions.
+
+In both cases, only 10% of the features are informative.
+"""
+
+import gc
+from time import time
+
+import numpy as np
+
+from sklearn.datasets import make_regression
+
+alpha = 0.1
+# alpha = 0.01
+
+
+def rmse(a, b):
+ return np.sqrt(np.mean((a - b) ** 2))
+
+
+def bench(factory, X, Y, X_test, Y_test, ref_coef):
+ gc.collect()
+
+ # start time
+ tstart = time()
+ clf = factory(alpha=alpha).fit(X, Y)
+ delta = time() - tstart
+ # stop time
+
+ print("duration: %0.3fs" % delta)
+ print("rmse: %f" % rmse(Y_test, clf.predict(X_test)))
+ print("mean coef abs diff: %f" % abs(ref_coef - clf.coef_.ravel()).mean())
+ return delta
+
+
+if __name__ == "__main__":
+ # Delayed import of matplotlib.pyplot
+ import matplotlib.pyplot as plt
+ from glmnet.elastic_net import Lasso as GlmnetLasso
+
+ from sklearn.linear_model import Lasso as ScikitLasso
+
+ scikit_results = []
+ glmnet_results = []
+ n = 20
+ step = 500
+ n_features = 1000
+ n_informative = n_features / 10
+ n_test_samples = 1000
+ for i in range(1, n + 1):
+ print("==================")
+ print("Iteration %s of %s" % (i, n))
+ print("==================")
+
+ X, Y, coef_ = make_regression(
+ n_samples=(i * step) + n_test_samples,
+ n_features=n_features,
+ noise=0.1,
+ n_informative=n_informative,
+ coef=True,
+ )
+
+ X_test = X[-n_test_samples:]
+ Y_test = Y[-n_test_samples:]
+ X = X[: (i * step)]
+ Y = Y[: (i * step)]
+
+ print("benchmarking scikit-learn: ")
+ scikit_results.append(bench(ScikitLasso, X, Y, X_test, Y_test, coef_))
+ print("benchmarking glmnet: ")
+ glmnet_results.append(bench(GlmnetLasso, X, Y, X_test, Y_test, coef_))
+
+ plt.clf()
+ xx = range(0, n * step, step)
+ plt.title("Lasso regression on sample dataset (%d features)" % n_features)
+ plt.plot(xx, scikit_results, "b-", label="scikit-learn")
+ plt.plot(xx, glmnet_results, "r-", label="glmnet")
+ plt.legend()
+ plt.xlabel("number of samples to classify")
+ plt.ylabel("Time (s)")
+ plt.show()
+
+ # now do a benchmark where the number of points is fixed
+ # and the variable is the number of features
+
+ scikit_results = []
+ glmnet_results = []
+ n = 20
+ step = 100
+ n_samples = 500
+
+ for i in range(1, n + 1):
+ print("==================")
+ print("Iteration %02d of %02d" % (i, n))
+ print("==================")
+ n_features = i * step
+ n_informative = n_features / 10
+
+ X, Y, coef_ = make_regression(
+ n_samples=(i * step) + n_test_samples,
+ n_features=n_features,
+ noise=0.1,
+ n_informative=n_informative,
+ coef=True,
+ )
+
+ X_test = X[-n_test_samples:]
+ Y_test = Y[-n_test_samples:]
+ X = X[:n_samples]
+ Y = Y[:n_samples]
+
+ print("benchmarking scikit-learn: ")
+ scikit_results.append(bench(ScikitLasso, X, Y, X_test, Y_test, coef_))
+ print("benchmarking glmnet: ")
+ glmnet_results.append(bench(GlmnetLasso, X, Y, X_test, Y_test, coef_))
+
+ xx = np.arange(100, 100 + n * step, step)
+ plt.figure("scikit-learn vs. glmnet benchmark results")
+ plt.title("Regression in high dimensional spaces (%d samples)" % n_samples)
+ plt.plot(xx, scikit_results, "b-", label="scikit-learn")
+ plt.plot(xx, glmnet_results, "r-", label="glmnet")
+ plt.legend()
+ plt.xlabel("number of features")
+ plt.ylabel("Time (s)")
+ plt.axis("tight")
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_hist_gradient_boosting.py b/auto_building_tools/benchmarks/bench_hist_gradient_boosting.py
new file mode 100644
index 0000000..c1dfffa
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_hist_gradient_boosting.py
@@ -0,0 +1,292 @@
+import argparse
+from time import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.datasets import make_classification, make_regression
+from sklearn.ensemble import (
+ HistGradientBoostingClassifier,
+ HistGradientBoostingRegressor,
+)
+from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
+from sklearn.model_selection import train_test_split
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--n-leaf-nodes", type=int, default=31)
+parser.add_argument("--n-trees", type=int, default=10)
+parser.add_argument(
+ "--lightgbm", action="store_true", default=False, help="also plot lightgbm"
+)
+parser.add_argument(
+ "--xgboost", action="store_true", default=False, help="also plot xgboost"
+)
+parser.add_argument(
+ "--catboost", action="store_true", default=False, help="also plot catboost"
+)
+parser.add_argument("--learning-rate", type=float, default=0.1)
+parser.add_argument(
+ "--problem",
+ type=str,
+ default="classification",
+ choices=["classification", "regression"],
+)
+parser.add_argument("--loss", type=str, default="default")
+parser.add_argument("--missing-fraction", type=float, default=0)
+parser.add_argument("--n-classes", type=int, default=2)
+parser.add_argument("--n-samples-max", type=int, default=int(1e6))
+parser.add_argument("--n-features", type=int, default=20)
+parser.add_argument("--max-bins", type=int, default=255)
+parser.add_argument(
+ "--random-sample-weights",
+ action="store_true",
+ default=False,
+ help="generate and use random sample weights",
+)
+args = parser.parse_args()
+
+n_leaf_nodes = args.n_leaf_nodes
+n_trees = args.n_trees
+lr = args.learning_rate
+max_bins = args.max_bins
+
+
+def get_estimator_and_data():
+ if args.problem == "classification":
+ X, y = make_classification(
+ args.n_samples_max * 2,
+ n_features=args.n_features,
+ n_classes=args.n_classes,
+ n_clusters_per_class=1,
+ n_informative=args.n_classes,
+ random_state=0,
+ )
+ return X, y, HistGradientBoostingClassifier
+ elif args.problem == "regression":
+ X, y = make_regression(
+ args.n_samples_max * 2, n_features=args.n_features, random_state=0
+ )
+ return X, y, HistGradientBoostingRegressor
+
+
+X, y, Estimator = get_estimator_and_data()
+if args.missing_fraction:
+ mask = np.random.binomial(1, args.missing_fraction, size=X.shape).astype(bool)
+ X[mask] = np.nan
+
+if args.random_sample_weights:
+ sample_weight = np.random.rand(len(X)) * 10
+else:
+ sample_weight = None
+
+if sample_weight is not None:
+ (X_train_, X_test_, y_train_, y_test_, sample_weight_train_, _) = train_test_split(
+ X, y, sample_weight, test_size=0.5, random_state=0
+ )
+else:
+ X_train_, X_test_, y_train_, y_test_ = train_test_split(
+ X, y, test_size=0.5, random_state=0
+ )
+ sample_weight_train_ = None
+
+
+def one_run(n_samples):
+ X_train = X_train_[:n_samples]
+ X_test = X_test_[:n_samples]
+ y_train = y_train_[:n_samples]
+ y_test = y_test_[:n_samples]
+ if sample_weight is not None:
+ sample_weight_train = sample_weight_train_[:n_samples]
+ else:
+ sample_weight_train = None
+ assert X_train.shape[0] == n_samples
+ assert X_test.shape[0] == n_samples
+ print("Data size: %d samples train, %d samples test." % (n_samples, n_samples))
+ print("Fitting a sklearn model...")
+ tic = time()
+ est = Estimator(
+ learning_rate=lr,
+ max_iter=n_trees,
+ max_bins=max_bins,
+ max_leaf_nodes=n_leaf_nodes,
+ early_stopping=False,
+ random_state=0,
+ verbose=0,
+ )
+ loss = args.loss
+ if args.problem == "classification":
+ if loss == "default":
+ loss = "log_loss"
+ else:
+ # regression
+ if loss == "default":
+ loss = "squared_error"
+ est.set_params(loss=loss)
+ est.fit(X_train, y_train, sample_weight=sample_weight_train)
+ sklearn_fit_duration = time() - tic
+ tic = time()
+ sklearn_score = est.score(X_test, y_test)
+ sklearn_score_duration = time() - tic
+ print("score: {:.4f}".format(sklearn_score))
+ print("fit duration: {:.3f}s,".format(sklearn_fit_duration))
+ print("score duration: {:.3f}s,".format(sklearn_score_duration))
+
+ lightgbm_score = None
+ lightgbm_fit_duration = None
+ lightgbm_score_duration = None
+ if args.lightgbm:
+ print("Fitting a LightGBM model...")
+ lightgbm_est = get_equivalent_estimator(
+ est, lib="lightgbm", n_classes=args.n_classes
+ )
+
+ tic = time()
+ lightgbm_est.fit(X_train, y_train, sample_weight=sample_weight_train)
+ lightgbm_fit_duration = time() - tic
+ tic = time()
+ lightgbm_score = lightgbm_est.score(X_test, y_test)
+ lightgbm_score_duration = time() - tic
+ print("score: {:.4f}".format(lightgbm_score))
+ print("fit duration: {:.3f}s,".format(lightgbm_fit_duration))
+ print("score duration: {:.3f}s,".format(lightgbm_score_duration))
+
+ xgb_score = None
+ xgb_fit_duration = None
+ xgb_score_duration = None
+ if args.xgboost:
+ print("Fitting an XGBoost model...")
+ xgb_est = get_equivalent_estimator(est, lib="xgboost", n_classes=args.n_classes)
+
+ tic = time()
+ xgb_est.fit(X_train, y_train, sample_weight=sample_weight_train)
+ xgb_fit_duration = time() - tic
+ tic = time()
+ xgb_score = xgb_est.score(X_test, y_test)
+ xgb_score_duration = time() - tic
+ print("score: {:.4f}".format(xgb_score))
+ print("fit duration: {:.3f}s,".format(xgb_fit_duration))
+ print("score duration: {:.3f}s,".format(xgb_score_duration))
+
+ cat_score = None
+ cat_fit_duration = None
+ cat_score_duration = None
+ if args.catboost:
+ print("Fitting a CatBoost model...")
+ cat_est = get_equivalent_estimator(
+ est, lib="catboost", n_classes=args.n_classes
+ )
+
+ tic = time()
+ cat_est.fit(X_train, y_train, sample_weight=sample_weight_train)
+ cat_fit_duration = time() - tic
+ tic = time()
+ cat_score = cat_est.score(X_test, y_test)
+ cat_score_duration = time() - tic
+ print("score: {:.4f}".format(cat_score))
+ print("fit duration: {:.3f}s,".format(cat_fit_duration))
+ print("score duration: {:.3f}s,".format(cat_score_duration))
+
+ return (
+ sklearn_score,
+ sklearn_fit_duration,
+ sklearn_score_duration,
+ lightgbm_score,
+ lightgbm_fit_duration,
+ lightgbm_score_duration,
+ xgb_score,
+ xgb_fit_duration,
+ xgb_score_duration,
+ cat_score,
+ cat_fit_duration,
+ cat_score_duration,
+ )
+
+
+n_samples_list = [1000, 10000, 100000, 500000, 1000000, 5000000, 10000000]
+n_samples_list = [
+ n_samples for n_samples in n_samples_list if n_samples <= args.n_samples_max
+]
+
+sklearn_scores = []
+sklearn_fit_durations = []
+sklearn_score_durations = []
+lightgbm_scores = []
+lightgbm_fit_durations = []
+lightgbm_score_durations = []
+xgb_scores = []
+xgb_fit_durations = []
+xgb_score_durations = []
+cat_scores = []
+cat_fit_durations = []
+cat_score_durations = []
+
+for n_samples in n_samples_list:
+ (
+ sklearn_score,
+ sklearn_fit_duration,
+ sklearn_score_duration,
+ lightgbm_score,
+ lightgbm_fit_duration,
+ lightgbm_score_duration,
+ xgb_score,
+ xgb_fit_duration,
+ xgb_score_duration,
+ cat_score,
+ cat_fit_duration,
+ cat_score_duration,
+ ) = one_run(n_samples)
+
+ for scores, score in (
+ (sklearn_scores, sklearn_score),
+ (sklearn_fit_durations, sklearn_fit_duration),
+ (sklearn_score_durations, sklearn_score_duration),
+ (lightgbm_scores, lightgbm_score),
+ (lightgbm_fit_durations, lightgbm_fit_duration),
+ (lightgbm_score_durations, lightgbm_score_duration),
+ (xgb_scores, xgb_score),
+ (xgb_fit_durations, xgb_fit_duration),
+ (xgb_score_durations, xgb_score_duration),
+ (cat_scores, cat_score),
+ (cat_fit_durations, cat_fit_duration),
+ (cat_score_durations, cat_score_duration),
+ ):
+ scores.append(score)
+
+fig, axs = plt.subplots(3, sharex=True)
+
+axs[0].plot(n_samples_list, sklearn_scores, label="sklearn")
+axs[1].plot(n_samples_list, sklearn_fit_durations, label="sklearn")
+axs[2].plot(n_samples_list, sklearn_score_durations, label="sklearn")
+
+if args.lightgbm:
+ axs[0].plot(n_samples_list, lightgbm_scores, label="lightgbm")
+ axs[1].plot(n_samples_list, lightgbm_fit_durations, label="lightgbm")
+ axs[2].plot(n_samples_list, lightgbm_score_durations, label="lightgbm")
+
+if args.xgboost:
+ axs[0].plot(n_samples_list, xgb_scores, label="XGBoost")
+ axs[1].plot(n_samples_list, xgb_fit_durations, label="XGBoost")
+ axs[2].plot(n_samples_list, xgb_score_durations, label="XGBoost")
+
+if args.catboost:
+ axs[0].plot(n_samples_list, cat_scores, label="CatBoost")
+ axs[1].plot(n_samples_list, cat_fit_durations, label="CatBoost")
+ axs[2].plot(n_samples_list, cat_score_durations, label="CatBoost")
+
+for ax in axs:
+ ax.set_xscale("log")
+ ax.legend(loc="best")
+ ax.set_xlabel("n_samples")
+
+axs[0].set_title("scores")
+axs[1].set_title("fit duration (s)")
+axs[2].set_title("score duration (s)")
+
+title = args.problem
+if args.problem == "classification":
+ title += " n_classes = {}".format(args.n_classes)
+fig.suptitle(title)
+
+
+plt.tight_layout()
+plt.show()
diff --git a/auto_building_tools/benchmarks/bench_hist_gradient_boosting_adult.py b/auto_building_tools/benchmarks/bench_hist_gradient_boosting_adult.py
new file mode 100644
index 0000000..97c762e
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_hist_gradient_boosting_adult.py
@@ -0,0 +1,100 @@
+import argparse
+from time import time
+
+import numpy as np
+import pandas as pd
+
+from sklearn.compose import make_column_selector, make_column_transformer
+from sklearn.datasets import fetch_openml
+from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
+from sklearn.metrics import accuracy_score, roc_auc_score
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import OrdinalEncoder
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--n-leaf-nodes", type=int, default=31)
+parser.add_argument("--n-trees", type=int, default=100)
+parser.add_argument("--lightgbm", action="store_true", default=False)
+parser.add_argument("--learning-rate", type=float, default=0.1)
+parser.add_argument("--max-bins", type=int, default=255)
+parser.add_argument("--no-predict", action="store_true", default=False)
+parser.add_argument("--verbose", action="store_true", default=False)
+args = parser.parse_args()
+
+n_leaf_nodes = args.n_leaf_nodes
+n_trees = args.n_trees
+lr = args.learning_rate
+max_bins = args.max_bins
+verbose = args.verbose
+
+
+def fit(est, data_train, target_train, libname, **fit_params):
+ print(f"Fitting a {libname} model...")
+ tic = time()
+ est.fit(data_train, target_train, **fit_params)
+ toc = time()
+ print(f"fitted in {toc - tic:.3f}s")
+
+
+def predict(est, data_test, target_test):
+ if args.no_predict:
+ return
+ tic = time()
+ predicted_test = est.predict(data_test)
+ predicted_proba_test = est.predict_proba(data_test)
+ toc = time()
+ roc_auc = roc_auc_score(target_test, predicted_proba_test[:, 1])
+ acc = accuracy_score(target_test, predicted_test)
+ print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}")
+
+
+data = fetch_openml(data_id=179, as_frame=True) # adult dataset
+X, y = data.data, data.target
+
+# Ordinal encode the categories to use the native support available in HGBDT
+cat_columns = make_column_selector(dtype_include="category")(X)
+preprocessing = make_column_transformer(
+ (OrdinalEncoder(), cat_columns),
+ remainder="passthrough",
+ verbose_feature_names_out=False,
+)
+X = pd.DataFrame(
+ preprocessing.fit_transform(X),
+ columns=preprocessing.get_feature_names_out(),
+)
+
+n_classes = len(np.unique(y))
+n_features = X.shape[1]
+n_categorical_features = len(cat_columns)
+n_numerical_features = n_features - n_categorical_features
+print(f"Number of features: {n_features}")
+print(f"Number of categorical features: {n_categorical_features}")
+print(f"Number of numerical features: {n_numerical_features}")
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
+
+is_categorical = [True] * n_categorical_features + [False] * n_numerical_features
+est = HistGradientBoostingClassifier(
+ loss="log_loss",
+ learning_rate=lr,
+ max_iter=n_trees,
+ max_bins=max_bins,
+ max_leaf_nodes=n_leaf_nodes,
+ categorical_features=is_categorical,
+ early_stopping=False,
+ random_state=0,
+ verbose=verbose,
+)
+
+fit(est, X_train, y_train, "sklearn")
+predict(est, X_test, y_test)
+
+if args.lightgbm:
+ est = get_equivalent_estimator(est, lib="lightgbm", n_classes=n_classes)
+ est.set_params(max_cat_to_onehot=1) # dont use OHE
+ categorical_features = [
+ f_idx for (f_idx, is_cat) in enumerate(is_categorical) if is_cat
+ ]
+ fit(est, X_train, y_train, "lightgbm", categorical_feature=categorical_features)
+ predict(est, X_test, y_test)
diff --git a/auto_building_tools/benchmarks/bench_hist_gradient_boosting_categorical_only.py b/auto_building_tools/benchmarks/bench_hist_gradient_boosting_categorical_only.py
new file mode 100644
index 0000000..1085bbc
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_hist_gradient_boosting_categorical_only.py
@@ -0,0 +1,79 @@
+import argparse
+from time import time
+
+from sklearn.datasets import make_classification
+from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
+from sklearn.preprocessing import KBinsDiscretizer
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--n-leaf-nodes", type=int, default=31)
+parser.add_argument("--n-trees", type=int, default=100)
+parser.add_argument("--n-features", type=int, default=20)
+parser.add_argument("--n-cats", type=int, default=20)
+parser.add_argument("--n-samples", type=int, default=10_000)
+parser.add_argument("--lightgbm", action="store_true", default=False)
+parser.add_argument("--learning-rate", type=float, default=0.1)
+parser.add_argument("--max-bins", type=int, default=255)
+parser.add_argument("--no-predict", action="store_true", default=False)
+parser.add_argument("--verbose", action="store_true", default=False)
+args = parser.parse_args()
+
+n_leaf_nodes = args.n_leaf_nodes
+n_features = args.n_features
+n_categories = args.n_cats
+n_samples = args.n_samples
+n_trees = args.n_trees
+lr = args.learning_rate
+max_bins = args.max_bins
+verbose = args.verbose
+
+
+def fit(est, data_train, target_train, libname, **fit_params):
+ print(f"Fitting a {libname} model...")
+ tic = time()
+ est.fit(data_train, target_train, **fit_params)
+ toc = time()
+ print(f"fitted in {toc - tic:.3f}s")
+
+
+def predict(est, data_test):
+ # We don't report accuracy or ROC because the dataset doesn't really make
+ # sense: we treat ordered features as un-ordered categories.
+ if args.no_predict:
+ return
+ tic = time()
+ est.predict(data_test)
+ toc = time()
+ print(f"predicted in {toc - tic:.3f}s")
+
+
+X, y = make_classification(n_samples=n_samples, n_features=n_features, random_state=0)
+
+X = KBinsDiscretizer(n_bins=n_categories, encode="ordinal").fit_transform(X)
+
+print(f"Number of features: {n_features}")
+print(f"Number of samples: {n_samples}")
+
+is_categorical = [True] * n_features
+est = HistGradientBoostingClassifier(
+ loss="log_loss",
+ learning_rate=lr,
+ max_iter=n_trees,
+ max_bins=max_bins,
+ max_leaf_nodes=n_leaf_nodes,
+ categorical_features=is_categorical,
+ early_stopping=False,
+ random_state=0,
+ verbose=verbose,
+)
+
+fit(est, X, y, "sklearn")
+predict(est, X)
+
+if args.lightgbm:
+ est = get_equivalent_estimator(est, lib="lightgbm", n_classes=2)
+ est.set_params(max_cat_to_onehot=1) # dont use OHE
+ categorical_features = list(range(n_features))
+ fit(est, X, y, "lightgbm", categorical_feature=categorical_features)
+ predict(est, X)
diff --git a/auto_building_tools/benchmarks/bench_hist_gradient_boosting_higgsboson.py b/auto_building_tools/benchmarks/bench_hist_gradient_boosting_higgsboson.py
new file mode 100644
index 0000000..20057c5
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_hist_gradient_boosting_higgsboson.py
@@ -0,0 +1,127 @@
+import argparse
+import os
+from gzip import GzipFile
+from time import time
+from urllib.request import urlretrieve
+
+import numpy as np
+import pandas as pd
+from joblib import Memory
+
+from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
+from sklearn.metrics import accuracy_score, roc_auc_score
+from sklearn.model_selection import train_test_split
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--n-leaf-nodes", type=int, default=31)
+parser.add_argument("--n-trees", type=int, default=10)
+parser.add_argument("--lightgbm", action="store_true", default=False)
+parser.add_argument("--xgboost", action="store_true", default=False)
+parser.add_argument("--catboost", action="store_true", default=False)
+parser.add_argument("--learning-rate", type=float, default=1.0)
+parser.add_argument("--subsample", type=int, default=None)
+parser.add_argument("--max-bins", type=int, default=255)
+parser.add_argument("--no-predict", action="store_true", default=False)
+parser.add_argument("--cache-loc", type=str, default="/tmp")
+parser.add_argument("--no-interactions", type=bool, default=False)
+parser.add_argument("--max-features", type=float, default=1.0)
+args = parser.parse_args()
+
+HERE = os.path.dirname(__file__)
+URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz"
+m = Memory(location=args.cache_loc, mmap_mode="r")
+
+n_leaf_nodes = args.n_leaf_nodes
+n_trees = args.n_trees
+subsample = args.subsample
+lr = args.learning_rate
+max_bins = args.max_bins
+max_features = args.max_features
+
+
+@m.cache
+def load_data():
+ filename = os.path.join(HERE, URL.rsplit("/", 1)[-1])
+ if not os.path.exists(filename):
+ print(f"Downloading {URL} to {filename} (2.6 GB)...")
+ urlretrieve(URL, filename)
+ print("done.")
+
+ print(f"Parsing {filename}...")
+ tic = time()
+ with GzipFile(filename) as f:
+ df = pd.read_csv(f, header=None, dtype=np.float32)
+ toc = time()
+ print(f"Loaded {df.values.nbytes / 1e9:0.3f} GB in {toc - tic:0.3f}s")
+ return df
+
+
+def fit(est, data_train, target_train, libname):
+ print(f"Fitting a {libname} model...")
+ tic = time()
+ est.fit(data_train, target_train)
+ toc = time()
+ print(f"fitted in {toc - tic:.3f}s")
+
+
+def predict(est, data_test, target_test):
+ if args.no_predict:
+ return
+ tic = time()
+ predicted_test = est.predict(data_test)
+ predicted_proba_test = est.predict_proba(data_test)
+ toc = time()
+ roc_auc = roc_auc_score(target_test, predicted_proba_test[:, 1])
+ acc = accuracy_score(target_test, predicted_test)
+ print(f"predicted in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}")
+
+
+df = load_data()
+target = df.values[:, 0]
+data = np.ascontiguousarray(df.values[:, 1:])
+data_train, data_test, target_train, target_test = train_test_split(
+ data, target, test_size=0.2, random_state=0
+)
+n_classes = len(np.unique(target))
+
+if subsample is not None:
+ data_train, target_train = data_train[:subsample], target_train[:subsample]
+
+n_samples, n_features = data_train.shape
+print(f"Training set with {n_samples} records with {n_features} features.")
+
+if args.no_interactions:
+ interaction_cst = [[i] for i in range(n_features)]
+else:
+ interaction_cst = None
+
+est = HistGradientBoostingClassifier(
+ loss="log_loss",
+ learning_rate=lr,
+ max_iter=n_trees,
+ max_bins=max_bins,
+ max_leaf_nodes=n_leaf_nodes,
+ early_stopping=False,
+ random_state=0,
+ verbose=1,
+ interaction_cst=interaction_cst,
+ max_features=max_features,
+)
+fit(est, data_train, target_train, "sklearn")
+predict(est, data_test, target_test)
+
+if args.lightgbm:
+ est = get_equivalent_estimator(est, lib="lightgbm", n_classes=n_classes)
+ fit(est, data_train, target_train, "lightgbm")
+ predict(est, data_test, target_test)
+
+if args.xgboost:
+ est = get_equivalent_estimator(est, lib="xgboost", n_classes=n_classes)
+ fit(est, data_train, target_train, "xgboost")
+ predict(est, data_test, target_test)
+
+if args.catboost:
+ est = get_equivalent_estimator(est, lib="catboost", n_classes=n_classes)
+ fit(est, data_train, target_train, "catboost")
+ predict(est, data_test, target_test)
diff --git a/auto_building_tools/benchmarks/bench_hist_gradient_boosting_threading.py b/auto_building_tools/benchmarks/bench_hist_gradient_boosting_threading.py
new file mode 100644
index 0000000..9acf65b
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_hist_gradient_boosting_threading.py
@@ -0,0 +1,347 @@
+import argparse
+import os
+from pprint import pprint
+from time import time
+
+import numpy as np
+from threadpoolctl import threadpool_limits
+
+import sklearn
+from sklearn.datasets import make_classification, make_regression
+from sklearn.ensemble import (
+ HistGradientBoostingClassifier,
+ HistGradientBoostingRegressor,
+)
+from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
+from sklearn.model_selection import train_test_split
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--n-leaf-nodes", type=int, default=31)
+parser.add_argument("--n-trees", type=int, default=10)
+parser.add_argument(
+ "--lightgbm", action="store_true", default=False, help="also benchmark lightgbm"
+)
+parser.add_argument(
+ "--xgboost", action="store_true", default=False, help="also benchmark xgboost"
+)
+parser.add_argument(
+ "--catboost", action="store_true", default=False, help="also benchmark catboost"
+)
+parser.add_argument("--learning-rate", type=float, default=0.1)
+parser.add_argument(
+ "--problem",
+ type=str,
+ default="classification",
+ choices=["classification", "regression"],
+)
+parser.add_argument("--loss", type=str, default="default")
+parser.add_argument("--missing-fraction", type=float, default=0)
+parser.add_argument("--n-classes", type=int, default=2)
+parser.add_argument("--n-samples", type=int, default=int(1e6))
+parser.add_argument("--n-features", type=int, default=100)
+parser.add_argument("--max-bins", type=int, default=255)
+
+parser.add_argument("--print-params", action="store_true", default=False)
+parser.add_argument(
+ "--random-sample-weights",
+ action="store_true",
+ default=False,
+ help="generate and use random sample weights",
+)
+parser.add_argument(
+ "--plot", action="store_true", default=False, help="show a plot results"
+)
+parser.add_argument(
+ "--plot-filename", default=None, help="filename to save the figure to disk"
+)
+args = parser.parse_args()
+
+n_samples = args.n_samples
+n_leaf_nodes = args.n_leaf_nodes
+n_trees = args.n_trees
+lr = args.learning_rate
+max_bins = args.max_bins
+
+
+print("Data size: %d samples train, %d samples test." % (n_samples, n_samples))
+print(f"n_features: {args.n_features}")
+
+
+def get_estimator_and_data():
+ if args.problem == "classification":
+ X, y = make_classification(
+ args.n_samples * 2,
+ n_features=args.n_features,
+ n_classes=args.n_classes,
+ n_clusters_per_class=1,
+ n_informative=args.n_features // 2,
+ random_state=0,
+ )
+ return X, y, HistGradientBoostingClassifier
+ elif args.problem == "regression":
+ X, y = make_regression(
+ args.n_samples_max * 2, n_features=args.n_features, random_state=0
+ )
+ return X, y, HistGradientBoostingRegressor
+
+
+X, y, Estimator = get_estimator_and_data()
+if args.missing_fraction:
+ mask = np.random.binomial(1, args.missing_fraction, size=X.shape).astype(bool)
+ X[mask] = np.nan
+
+if args.random_sample_weights:
+ sample_weight = np.random.rand(len(X)) * 10
+else:
+ sample_weight = None
+
+if sample_weight is not None:
+ (X_train_, X_test_, y_train_, y_test_, sample_weight_train_, _) = train_test_split(
+ X, y, sample_weight, test_size=0.5, random_state=0
+ )
+else:
+ X_train_, X_test_, y_train_, y_test_ = train_test_split(
+ X, y, test_size=0.5, random_state=0
+ )
+ sample_weight_train_ = None
+
+
+sklearn_est = Estimator(
+ learning_rate=lr,
+ max_iter=n_trees,
+ max_bins=max_bins,
+ max_leaf_nodes=n_leaf_nodes,
+ early_stopping=False,
+ random_state=0,
+ verbose=0,
+)
+loss = args.loss
+if args.problem == "classification":
+ if loss == "default":
+ # loss='auto' does not work with get_equivalent_estimator()
+ loss = "log_loss"
+else:
+ # regression
+ if loss == "default":
+ loss = "squared_error"
+sklearn_est.set_params(loss=loss)
+
+
+if args.print_params:
+ print("scikit-learn")
+ pprint(sklearn_est.get_params())
+
+ for libname in ["lightgbm", "xgboost", "catboost"]:
+ if getattr(args, libname):
+ print(libname)
+ est = get_equivalent_estimator(
+ sklearn_est, lib=libname, n_classes=args.n_classes
+ )
+ pprint(est.get_params())
+
+
+def one_run(n_threads, n_samples):
+ X_train = X_train_[:n_samples]
+ X_test = X_test_[:n_samples]
+ y_train = y_train_[:n_samples]
+ y_test = y_test_[:n_samples]
+ if sample_weight is not None:
+ sample_weight_train = sample_weight_train_[:n_samples]
+ else:
+ sample_weight_train = None
+ assert X_train.shape[0] == n_samples
+ assert X_test.shape[0] == n_samples
+ print("Fitting a sklearn model...")
+ tic = time()
+ est = sklearn.base.clone(sklearn_est)
+
+ with threadpool_limits(n_threads, user_api="openmp"):
+ est.fit(X_train, y_train, sample_weight=sample_weight_train)
+ sklearn_fit_duration = time() - tic
+ tic = time()
+ sklearn_score = est.score(X_test, y_test)
+ sklearn_score_duration = time() - tic
+ print("score: {:.4f}".format(sklearn_score))
+ print("fit duration: {:.3f}s,".format(sklearn_fit_duration))
+ print("score duration: {:.3f}s,".format(sklearn_score_duration))
+
+ lightgbm_score = None
+ lightgbm_fit_duration = None
+ lightgbm_score_duration = None
+ if args.lightgbm:
+ print("Fitting a LightGBM model...")
+ lightgbm_est = get_equivalent_estimator(
+ est, lib="lightgbm", n_classes=args.n_classes
+ )
+ lightgbm_est.set_params(num_threads=n_threads)
+
+ tic = time()
+ lightgbm_est.fit(X_train, y_train, sample_weight=sample_weight_train)
+ lightgbm_fit_duration = time() - tic
+ tic = time()
+ lightgbm_score = lightgbm_est.score(X_test, y_test)
+ lightgbm_score_duration = time() - tic
+ print("score: {:.4f}".format(lightgbm_score))
+ print("fit duration: {:.3f}s,".format(lightgbm_fit_duration))
+ print("score duration: {:.3f}s,".format(lightgbm_score_duration))
+
+ xgb_score = None
+ xgb_fit_duration = None
+ xgb_score_duration = None
+ if args.xgboost:
+ print("Fitting an XGBoost model...")
+ xgb_est = get_equivalent_estimator(est, lib="xgboost", n_classes=args.n_classes)
+ xgb_est.set_params(nthread=n_threads)
+
+ tic = time()
+ xgb_est.fit(X_train, y_train, sample_weight=sample_weight_train)
+ xgb_fit_duration = time() - tic
+ tic = time()
+ xgb_score = xgb_est.score(X_test, y_test)
+ xgb_score_duration = time() - tic
+ print("score: {:.4f}".format(xgb_score))
+ print("fit duration: {:.3f}s,".format(xgb_fit_duration))
+ print("score duration: {:.3f}s,".format(xgb_score_duration))
+
+ cat_score = None
+ cat_fit_duration = None
+ cat_score_duration = None
+ if args.catboost:
+ print("Fitting a CatBoost model...")
+ cat_est = get_equivalent_estimator(
+ est, lib="catboost", n_classes=args.n_classes
+ )
+ cat_est.set_params(thread_count=n_threads)
+
+ tic = time()
+ cat_est.fit(X_train, y_train, sample_weight=sample_weight_train)
+ cat_fit_duration = time() - tic
+ tic = time()
+ cat_score = cat_est.score(X_test, y_test)
+ cat_score_duration = time() - tic
+ print("score: {:.4f}".format(cat_score))
+ print("fit duration: {:.3f}s,".format(cat_fit_duration))
+ print("score duration: {:.3f}s,".format(cat_score_duration))
+
+ return (
+ sklearn_score,
+ sklearn_fit_duration,
+ sklearn_score_duration,
+ lightgbm_score,
+ lightgbm_fit_duration,
+ lightgbm_score_duration,
+ xgb_score,
+ xgb_fit_duration,
+ xgb_score_duration,
+ cat_score,
+ cat_fit_duration,
+ cat_score_duration,
+ )
+
+
+max_threads = os.cpu_count()
+n_threads_list = [2**i for i in range(8) if (2**i) < max_threads]
+n_threads_list.append(max_threads)
+
+sklearn_scores = []
+sklearn_fit_durations = []
+sklearn_score_durations = []
+lightgbm_scores = []
+lightgbm_fit_durations = []
+lightgbm_score_durations = []
+xgb_scores = []
+xgb_fit_durations = []
+xgb_score_durations = []
+cat_scores = []
+cat_fit_durations = []
+cat_score_durations = []
+
+for n_threads in n_threads_list:
+ print(f"n_threads: {n_threads}")
+ (
+ sklearn_score,
+ sklearn_fit_duration,
+ sklearn_score_duration,
+ lightgbm_score,
+ lightgbm_fit_duration,
+ lightgbm_score_duration,
+ xgb_score,
+ xgb_fit_duration,
+ xgb_score_duration,
+ cat_score,
+ cat_fit_duration,
+ cat_score_duration,
+ ) = one_run(n_threads, n_samples)
+
+ for scores, score in (
+ (sklearn_scores, sklearn_score),
+ (sklearn_fit_durations, sklearn_fit_duration),
+ (sklearn_score_durations, sklearn_score_duration),
+ (lightgbm_scores, lightgbm_score),
+ (lightgbm_fit_durations, lightgbm_fit_duration),
+ (lightgbm_score_durations, lightgbm_score_duration),
+ (xgb_scores, xgb_score),
+ (xgb_fit_durations, xgb_fit_duration),
+ (xgb_score_durations, xgb_score_duration),
+ (cat_scores, cat_score),
+ (cat_fit_durations, cat_fit_duration),
+ (cat_score_durations, cat_score_duration),
+ ):
+ scores.append(score)
+
+
+if args.plot or args.plot_filename:
+ import matplotlib
+ import matplotlib.pyplot as plt
+
+ fig, axs = plt.subplots(2, figsize=(12, 12))
+
+ label = f"sklearn {sklearn.__version__}"
+ axs[0].plot(n_threads_list, sklearn_fit_durations, label=label)
+ axs[1].plot(n_threads_list, sklearn_score_durations, label=label)
+
+ if args.lightgbm:
+ import lightgbm
+
+ label = f"LightGBM {lightgbm.__version__}"
+ axs[0].plot(n_threads_list, lightgbm_fit_durations, label=label)
+ axs[1].plot(n_threads_list, lightgbm_score_durations, label=label)
+
+ if args.xgboost:
+ import xgboost
+
+ label = f"XGBoost {xgboost.__version__}"
+ axs[0].plot(n_threads_list, xgb_fit_durations, label=label)
+ axs[1].plot(n_threads_list, xgb_score_durations, label=label)
+
+ if args.catboost:
+ import catboost
+
+ label = f"CatBoost {catboost.__version__}"
+ axs[0].plot(n_threads_list, cat_fit_durations, label=label)
+ axs[1].plot(n_threads_list, cat_score_durations, label=label)
+
+ for ax in axs:
+ ax.set_xscale("log")
+ ax.set_xlabel("n_threads")
+ ax.set_ylabel("duration (s)")
+ ax.set_ylim(0, None)
+ ax.set_xticks(n_threads_list)
+ ax.get_xaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
+ ax.legend(loc="best")
+
+ axs[0].set_title("fit duration (s)")
+ axs[1].set_title("score duration (s)")
+
+ title = args.problem
+ if args.problem == "classification":
+ title += " n_classes = {}".format(args.n_classes)
+ fig.suptitle(title)
+
+ plt.tight_layout()
+
+ if args.plot_filename:
+ plt.savefig(args.plot_filename)
+
+ if args.plot:
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_isolation_forest.py b/auto_building_tools/benchmarks/bench_isolation_forest.py
new file mode 100644
index 0000000..7439119
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_isolation_forest.py
@@ -0,0 +1,164 @@
+"""
+==========================================
+IsolationForest benchmark
+==========================================
+A test of IsolationForest on classical anomaly detection datasets.
+
+The benchmark is run as follows:
+1. The dataset is randomly split into a training set and a test set, both
+assumed to contain outliers.
+2. Isolation Forest is trained on the training set.
+3. The ROC curve is computed on the test set using the knowledge of the labels.
+
+Note that the smtp dataset contains a very small proportion of outliers.
+Therefore, depending on the seed of the random number generator, randomly
+splitting the data set might lead to a test set containing no outliers. In this
+case a warning is raised when computing the ROC curve.
+"""
+
+from time import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml
+from sklearn.ensemble import IsolationForest
+from sklearn.metrics import auc, roc_curve
+from sklearn.preprocessing import LabelBinarizer
+from sklearn.utils import shuffle as sh
+
+print(__doc__)
+
+
+def print_outlier_ratio(y):
+ """
+ Helper function to show the distinct value count of element in the target.
+ Useful indicator for the datasets used in bench_isolation_forest.py.
+ """
+ uniq, cnt = np.unique(y, return_counts=True)
+ print("----- Target count values: ")
+ for u, c in zip(uniq, cnt):
+ print("------ %s -> %d occurrences" % (str(u), c))
+ print("----- Outlier ratio: %.5f" % (np.min(cnt) / len(y)))
+
+
+random_state = 1
+fig_roc, ax_roc = plt.subplots(1, 1, figsize=(8, 5))
+
+# Set this to true for plotting score histograms for each dataset:
+with_decision_function_histograms = False
+
+# datasets available = ['http', 'smtp', 'SA', 'SF', 'shuttle', 'forestcover']
+datasets = ["http", "smtp", "SA", "SF", "shuttle", "forestcover"]
+
+# Loop over all datasets for fitting and scoring the estimator:
+for dat in datasets:
+ # Loading and vectorizing the data:
+ print("====== %s ======" % dat)
+ print("--- Fetching data...")
+ if dat in ["http", "smtp", "SF", "SA"]:
+ dataset = fetch_kddcup99(
+ subset=dat, shuffle=True, percent10=True, random_state=random_state
+ )
+ X = dataset.data
+ y = dataset.target
+
+ if dat == "shuttle":
+ dataset = fetch_openml("shuttle", as_frame=False)
+ X = dataset.data
+ y = dataset.target.astype(np.int64)
+ X, y = sh(X, y, random_state=random_state)
+ # we remove data with label 4
+ # normal data are then those of class 1
+ s = y != 4
+ X = X[s, :]
+ y = y[s]
+ y = (y != 1).astype(int)
+ print("----- ")
+
+ if dat == "forestcover":
+ dataset = fetch_covtype(shuffle=True, random_state=random_state)
+ X = dataset.data
+ y = dataset.target
+ # normal data are those with attribute 2
+ # abnormal those with attribute 4
+ s = (y == 2) + (y == 4)
+ X = X[s, :]
+ y = y[s]
+ y = (y != 2).astype(int)
+ print_outlier_ratio(y)
+
+ print("--- Vectorizing data...")
+
+ if dat == "SF":
+ lb = LabelBinarizer()
+ x1 = lb.fit_transform(X[:, 1].astype(str))
+ X = np.c_[X[:, :1], x1, X[:, 2:]]
+ y = (y != b"normal.").astype(int)
+ print_outlier_ratio(y)
+
+ if dat == "SA":
+ lb = LabelBinarizer()
+ x1 = lb.fit_transform(X[:, 1].astype(str))
+ x2 = lb.fit_transform(X[:, 2].astype(str))
+ x3 = lb.fit_transform(X[:, 3].astype(str))
+ X = np.c_[X[:, :1], x1, x2, x3, X[:, 4:]]
+ y = (y != b"normal.").astype(int)
+ print_outlier_ratio(y)
+
+ if dat in ("http", "smtp"):
+ y = (y != b"normal.").astype(int)
+ print_outlier_ratio(y)
+
+ n_samples, n_features = X.shape
+ n_samples_train = n_samples // 2
+
+ X = X.astype(float)
+ X_train = X[:n_samples_train, :]
+ X_test = X[n_samples_train:, :]
+ y_train = y[:n_samples_train]
+ y_test = y[n_samples_train:]
+
+ print("--- Fitting the IsolationForest estimator...")
+ model = IsolationForest(n_jobs=-1, random_state=random_state)
+ tstart = time()
+ model.fit(X_train)
+ fit_time = time() - tstart
+ tstart = time()
+
+ scoring = -model.decision_function(X_test) # the lower, the more abnormal
+
+ print("--- Preparing the plot elements...")
+ if with_decision_function_histograms:
+ fig, ax = plt.subplots(3, sharex=True, sharey=True)
+ bins = np.linspace(-0.5, 0.5, 200)
+ ax[0].hist(scoring, bins, color="black")
+ ax[0].set_title("Decision function for %s dataset" % dat)
+ ax[1].hist(scoring[y_test == 0], bins, color="b", label="normal data")
+ ax[1].legend(loc="lower right")
+ ax[2].hist(scoring[y_test == 1], bins, color="r", label="outliers")
+ ax[2].legend(loc="lower right")
+
+ # Show ROC Curves
+ predict_time = time() - tstart
+ fpr, tpr, thresholds = roc_curve(y_test, scoring)
+ auc_score = auc(fpr, tpr)
+ label = "%s (AUC: %0.3f, train_time= %0.2fs, test_time= %0.2fs)" % (
+ dat,
+ auc_score,
+ fit_time,
+ predict_time,
+ )
+ # Print AUC score and train/test time:
+ print(label)
+ ax_roc.plot(fpr, tpr, lw=1, label=label)
+
+
+ax_roc.set_xlim([-0.05, 1.05])
+ax_roc.set_ylim([-0.05, 1.05])
+ax_roc.set_xlabel("False Positive Rate")
+ax_roc.set_ylabel("True Positive Rate")
+ax_roc.set_title("Receiver operating characteristic (ROC) curves")
+ax_roc.legend(loc="lower right")
+fig_roc.tight_layout()
+plt.show()
diff --git a/auto_building_tools/benchmarks/bench_isolation_forest_predict.py b/auto_building_tools/benchmarks/bench_isolation_forest_predict.py
new file mode 100644
index 0000000..f16e65c
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_isolation_forest_predict.py
@@ -0,0 +1,213 @@
+"""
+==========================================
+IsolationForest prediction benchmark
+==========================================
+A test of IsolationForest on classical anomaly detection datasets.
+
+The benchmark is run as follows:
+1. The dataset is randomly split into a training set and a test set, both
+assumed to contain outliers.
+2. Isolation Forest is trained on the training set fixed at 1000 samples.
+3. The test samples are scored using the trained model at:
+ - 1000, 10000, 50000 samples
+ - 10, 100, 1000 features
+ - 0.01, 0.1, 0.5 contamination
+ - 1, 2, 3, 4 n_jobs
+
+We compare the prediction time at the very end.
+
+Here are instructions for running this benchmark to compare runtime against main branch:
+
+1. Build and run on a branch or main, e.g. for a branch named `pr`:
+
+```bash
+python bench_isolation_forest_predict.py bench ~/bench_results pr
+```
+
+2. Plotting to compare two branches `pr` and `main`:
+
+```bash
+python bench_isolation_forest_predict.py plot ~/bench_results pr main results_image.png
+```
+"""
+
+import argparse
+from collections import defaultdict
+from pathlib import Path
+from time import time
+
+import numpy as np
+import pandas as pd
+from joblib import parallel_config
+
+from sklearn.ensemble import IsolationForest
+
+print(__doc__)
+
+
+def get_data(
+ n_samples_train, n_samples_test, n_features, contamination=0.1, random_state=0
+):
+ """Function based on code from: https://scikit-learn.org/stable/
+ auto_examples/ensemble/plot_isolation_forest.html#sphx-glr-auto-
+ examples-ensemble-plot-isolation-forest-py
+ """
+ rng = np.random.RandomState(random_state)
+
+ X = 0.3 * rng.randn(n_samples_train, n_features)
+ X_train = np.r_[X + 2, X - 2]
+
+ X = 0.3 * rng.randn(n_samples_test, n_features)
+ X_test = np.r_[X + 2, X - 2]
+
+ n_outliers = int(np.floor(contamination * n_samples_test))
+ X_outliers = rng.uniform(low=-4, high=4, size=(n_outliers, n_features))
+
+ outlier_idx = rng.choice(np.arange(0, n_samples_test), n_outliers, replace=False)
+ X_test[outlier_idx, :] = X_outliers
+
+ return X_train, X_test
+
+
+def plot(args):
+ import matplotlib.pyplot as plt
+ import seaborn as sns
+
+ bench_results = Path(args.bench_results)
+ pr_name = args.pr_name
+ main_name = args.main_name
+ image_path = args.image_path
+
+ results_path = Path(bench_results)
+ pr_path = results_path / f"{pr_name}.csv"
+ main_path = results_path / f"{main_name}.csv"
+ image_path = results_path / image_path
+
+ df_pr = pd.read_csv(pr_path).assign(branch=pr_name)
+ df_main = pd.read_csv(main_path).assign(branch=main_name)
+
+ # Merge the two datasets on the common columns
+ merged_data = pd.merge(
+ df_pr,
+ df_main,
+ on=["n_samples_test", "n_jobs"],
+ suffixes=("_pr", "_main"),
+ )
+
+ # Set up the plotting grid
+ sns.set(style="whitegrid", context="notebook", font_scale=1.5)
+
+ # Create a figure with subplots
+ fig, axes = plt.subplots(1, 2, figsize=(18, 6), sharex=True, sharey=True)
+
+ # Plot predict time as a function of n_samples_test with different n_jobs
+ print(merged_data["n_jobs"].unique())
+ ax = axes[0]
+ sns.lineplot(
+ data=merged_data,
+ x="n_samples_test",
+ y="predict_time_pr",
+ hue="n_jobs",
+ style="n_jobs",
+ markers="o",
+ ax=ax,
+ legend="full",
+ )
+ ax.set_title(f"Predict Time vs. n_samples_test - {pr_name} branch")
+ ax.set_ylabel("Predict Time (Seconds)")
+ ax.set_xlabel("n_samples_test")
+
+ ax = axes[1]
+ sns.lineplot(
+ data=merged_data,
+ x="n_samples_test",
+ y="predict_time_main",
+ hue="n_jobs",
+ style="n_jobs",
+ markers="X",
+ dashes=True,
+ ax=ax,
+ legend=None,
+ )
+ ax.set_title(f"Predict Time vs. n_samples_test - {main_name} branch")
+ ax.set_ylabel("Predict Time")
+ ax.set_xlabel("n_samples_test")
+
+ # Adjust layout and display the plots
+ plt.tight_layout()
+ fig.savefig(image_path, bbox_inches="tight")
+ print(f"Saved image to {image_path}")
+
+
+def bench(args):
+ results_dir = Path(args.bench_results)
+ branch = args.branch
+ random_state = 1
+
+ results = defaultdict(list)
+
+ # Loop over all datasets for fitting and scoring the estimator:
+ n_samples_train = 1000
+ for n_samples_test in [
+ 1000,
+ 10000,
+ 50000,
+ ]:
+ for n_features in [10, 100, 1000]:
+ for contamination in [0.01, 0.1, 0.5]:
+ for n_jobs in [1, 2, 3, 4]:
+ X_train, X_test = get_data(
+ n_samples_train,
+ n_samples_test,
+ n_features,
+ contamination,
+ random_state,
+ )
+
+ print("--- Fitting the IsolationForest estimator...")
+ model = IsolationForest(n_jobs=-1, random_state=random_state)
+ tstart = time()
+ model.fit(X_train)
+ fit_time = time() - tstart
+
+ # clearcache
+ for _ in range(1000):
+ 1 + 1
+ with parallel_config("threading", n_jobs=n_jobs):
+ tstart = time()
+ model.decision_function(X_test) # the lower, the more abnormal
+ predict_time = time() - tstart
+
+ results["predict_time"].append(predict_time)
+ results["fit_time"].append(fit_time)
+ results["n_samples_train"].append(n_samples_train)
+ results["n_samples_test"].append(n_samples_test)
+ results["n_features"].append(n_features)
+ results["contamination"].append(contamination)
+ results["n_jobs"].append(n_jobs)
+
+ df = pd.DataFrame(results)
+ df.to_csv(results_dir / f"{branch}.csv", index=False)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+
+ # parse arguments for benchmarking
+ subparsers = parser.add_subparsers()
+ bench_parser = subparsers.add_parser("bench")
+ bench_parser.add_argument("bench_results")
+ bench_parser.add_argument("branch")
+ bench_parser.set_defaults(func=bench)
+
+ # parse arguments for plotting
+ plot_parser = subparsers.add_parser("plot")
+ plot_parser.add_argument("bench_results")
+ plot_parser.add_argument("pr_name")
+ plot_parser.add_argument("main_name")
+ plot_parser.add_argument("image_path")
+ plot_parser.set_defaults(func=plot)
+
+ # enable the parser and run the relevant function
+ args = parser.parse_args()
+ args.func(args)
diff --git a/auto_building_tools/benchmarks/bench_isotonic.py b/auto_building_tools/benchmarks/bench_isotonic.py
new file mode 100644
index 0000000..be2ff65
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_isotonic.py
@@ -0,0 +1,111 @@
+"""
+Benchmarks of isotonic regression performance.
+
+We generate a synthetic dataset of size 10^n, for n in [min, max], and
+examine the time taken to run isotonic regression over the dataset.
+
+The timings are then output to stdout, or visualized on a log-log scale
+with matplotlib.
+
+This allows the scaling of the algorithm with the problem size to be
+visualized and understood.
+"""
+
+import argparse
+import gc
+from timeit import default_timer
+
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.special import expit
+
+from sklearn.isotonic import isotonic_regression
+
+
+def generate_perturbed_logarithm_dataset(size):
+ return np.random.randint(-50, 50, size=size) + 50.0 * np.log(1 + np.arange(size))
+
+
+def generate_logistic_dataset(size):
+ X = np.sort(np.random.normal(size=size))
+ return np.random.random(size=size) < expit(X)
+
+
+def generate_pathological_dataset(size):
+ # Triggers O(n^2) complexity on the original implementation.
+ return np.r_[
+ np.arange(size), np.arange(-(size - 1), size), np.arange(-(size - 1), 1)
+ ]
+
+
+DATASET_GENERATORS = {
+ "perturbed_logarithm": generate_perturbed_logarithm_dataset,
+ "logistic": generate_logistic_dataset,
+ "pathological": generate_pathological_dataset,
+}
+
+
+def bench_isotonic_regression(Y):
+ """
+ Runs a single iteration of isotonic regression on the input data,
+ and reports the total time taken (in seconds).
+ """
+ gc.collect()
+
+ tstart = default_timer()
+ isotonic_regression(Y)
+ return default_timer() - tstart
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Isotonic Regression benchmark tool")
+ parser.add_argument("--seed", type=int, help="RNG seed")
+ parser.add_argument(
+ "--iterations",
+ type=int,
+ required=True,
+ help="Number of iterations to average timings over for each problem size",
+ )
+ parser.add_argument(
+ "--log_min_problem_size",
+ type=int,
+ required=True,
+ help="Base 10 logarithm of the minimum problem size",
+ )
+ parser.add_argument(
+ "--log_max_problem_size",
+ type=int,
+ required=True,
+ help="Base 10 logarithm of the maximum problem size",
+ )
+ parser.add_argument(
+ "--show_plot", action="store_true", help="Plot timing output with matplotlib"
+ )
+ parser.add_argument("--dataset", choices=DATASET_GENERATORS.keys(), required=True)
+
+ args = parser.parse_args()
+
+ np.random.seed(args.seed)
+
+ timings = []
+ for exponent in range(args.log_min_problem_size, args.log_max_problem_size):
+ n = 10**exponent
+ Y = DATASET_GENERATORS[args.dataset](n)
+ time_per_iteration = [
+ bench_isotonic_regression(Y) for i in range(args.iterations)
+ ]
+ timing = (n, np.mean(time_per_iteration))
+ timings.append(timing)
+
+ # If we're not plotting, dump the timing to stdout
+ if not args.show_plot:
+ print(n, np.mean(time_per_iteration))
+
+ if args.show_plot:
+ plt.plot(*zip(*timings))
+ plt.title("Average time taken running isotonic regression")
+ plt.xlabel("Number of observations")
+ plt.ylabel("Time (s)")
+ plt.axis("tight")
+ plt.loglog()
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/auto_building_tools/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
new file mode 100644
index 0000000..a468f7b
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
@@ -0,0 +1,177 @@
+"""
+=============================================================
+Kernel PCA Solvers comparison benchmark: time vs n_components
+=============================================================
+
+This benchmark shows that the approximate solvers provided in Kernel PCA can
+help significantly improve its execution speed when an approximate solution
+(small `n_components`) is acceptable. In many real-world datasets a few
+hundreds of principal components are indeed sufficient enough to capture the
+underlying distribution.
+
+Description:
+------------
+A fixed number of training (default: 2000) and test (default: 1000) samples
+with 2 features is generated using the `make_circles` helper method.
+
+KernelPCA models are trained on the training set with an increasing number of
+principal components, between 1 and `max_n_compo` (default: 1999), with
+`n_compo_grid_size` positions (default: 10). For each value of `n_components`
+to try, KernelPCA models are trained for the various possible `eigen_solver`
+values. The execution times are displayed in a plot at the end of the
+experiment.
+
+What you can observe:
+---------------------
+When the number of requested principal components is small, the dense solver
+takes more time to complete, while the randomized method returns similar
+results with shorter execution times.
+
+Going further:
+--------------
+You can adjust `max_n_compo` and `n_compo_grid_size` if you wish to explore a
+different range of values for `n_components`.
+
+You can also set `arpack_all=True` to activate arpack solver for large number
+of components (this takes more time).
+"""
+
+import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+
+from sklearn.datasets import make_circles
+from sklearn.decomposition import KernelPCA
+
+print(__doc__)
+
+
+# 1- Design the Experiment
+# ------------------------
+n_train, n_test = 2000, 1000 # the sample sizes to use
+max_n_compo = 1999 # max n_components to try
+n_compo_grid_size = 10 # nb of positions in the grid to try
+# generate the grid
+n_compo_range = [
+ np.round(np.exp((x / (n_compo_grid_size - 1)) * np.log(max_n_compo)))
+ for x in range(0, n_compo_grid_size)
+]
+
+n_iter = 3 # the number of times each experiment will be repeated
+arpack_all = False # set to True if you wish to run arpack for all n_compo
+
+
+# 2- Generate random data
+# -----------------------
+n_features = 2
+X, y = make_circles(
+ n_samples=(n_train + n_test), factor=0.3, noise=0.05, random_state=0
+)
+X_train, X_test = X[:n_train, :], X[n_train:, :]
+
+
+# 3- Benchmark
+# ------------
+# init
+ref_time = np.empty((len(n_compo_range), n_iter)) * np.nan
+a_time = np.empty((len(n_compo_range), n_iter)) * np.nan
+r_time = np.empty((len(n_compo_range), n_iter)) * np.nan
+# loop
+for j, n_components in enumerate(n_compo_range):
+ n_components = int(n_components)
+ print("Performing kPCA with n_components = %i" % n_components)
+
+ # A- reference (dense)
+ print(" - dense solver")
+ for i in range(n_iter):
+ start_time = time.perf_counter()
+ ref_pred = (
+ KernelPCA(n_components, eigen_solver="dense").fit(X_train).transform(X_test)
+ )
+ ref_time[j, i] = time.perf_counter() - start_time
+
+ # B- arpack (for small number of components only, too slow otherwise)
+ if arpack_all or n_components < 100:
+ print(" - arpack solver")
+ for i in range(n_iter):
+ start_time = time.perf_counter()
+ a_pred = (
+ KernelPCA(n_components, eigen_solver="arpack")
+ .fit(X_train)
+ .transform(X_test)
+ )
+ a_time[j, i] = time.perf_counter() - start_time
+ # check that the result is still correct despite the approx
+ assert_array_almost_equal(np.abs(a_pred), np.abs(ref_pred))
+
+ # C- randomized
+ print(" - randomized solver")
+ for i in range(n_iter):
+ start_time = time.perf_counter()
+ r_pred = (
+ KernelPCA(n_components, eigen_solver="randomized")
+ .fit(X_train)
+ .transform(X_test)
+ )
+ r_time[j, i] = time.perf_counter() - start_time
+ # check that the result is still correct despite the approximation
+ assert_array_almost_equal(np.abs(r_pred), np.abs(ref_pred))
+
+# Compute statistics for the 3 methods
+avg_ref_time = ref_time.mean(axis=1)
+std_ref_time = ref_time.std(axis=1)
+avg_a_time = a_time.mean(axis=1)
+std_a_time = a_time.std(axis=1)
+avg_r_time = r_time.mean(axis=1)
+std_r_time = r_time.std(axis=1)
+
+
+# 4- Plots
+# --------
+fig, ax = plt.subplots(figsize=(12, 8))
+
+# Display 1 plot with error bars per method
+ax.errorbar(
+ n_compo_range,
+ avg_ref_time,
+ yerr=std_ref_time,
+ marker="x",
+ linestyle="",
+ color="r",
+ label="full",
+)
+ax.errorbar(
+ n_compo_range,
+ avg_a_time,
+ yerr=std_a_time,
+ marker="x",
+ linestyle="",
+ color="g",
+ label="arpack",
+)
+ax.errorbar(
+ n_compo_range,
+ avg_r_time,
+ yerr=std_r_time,
+ marker="x",
+ linestyle="",
+ color="b",
+ label="randomized",
+)
+ax.legend(loc="upper left")
+
+# customize axes
+ax.set_xscale("log")
+ax.set_xlim(1, max(n_compo_range) * 1.1)
+ax.set_ylabel("Execution time (s)")
+ax.set_xlabel("n_components")
+
+ax.set_title(
+ "kPCA Execution time comparison on %i samples with %i "
+ "features, according to the choice of `eigen_solver`"
+ "" % (n_train, n_features)
+)
+
+plt.show()
diff --git a/auto_building_tools/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py b/auto_building_tools/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py
new file mode 100644
index 0000000..cae74c6
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py
@@ -0,0 +1,183 @@
+"""
+==========================================================
+Kernel PCA Solvers comparison benchmark: time vs n_samples
+==========================================================
+
+This benchmark shows that the approximate solvers provided in Kernel PCA can
+help significantly improve its execution speed when an approximate solution
+(small `n_components`) is acceptable. In many real-world datasets the number of
+samples is very large, but a few hundreds of principal components are
+sufficient enough to capture the underlying distribution.
+
+Description:
+------------
+An increasing number of examples is used to train a KernelPCA, between
+`min_n_samples` (default: 101) and `max_n_samples` (default: 4000) with
+`n_samples_grid_size` positions (default: 4). Samples have 2 features, and are
+generated using `make_circles`. For each training sample size, KernelPCA models
+are trained for the various possible `eigen_solver` values. All of them are
+trained to obtain `n_components` principal components (default: 100). The
+execution times are displayed in a plot at the end of the experiment.
+
+What you can observe:
+---------------------
+When the number of samples provided gets large, the dense solver takes a lot
+of time to complete, while the randomized method returns similar results in
+much shorter execution times.
+
+Going further:
+--------------
+You can increase `max_n_samples` and `nb_n_samples_to_try` if you wish to
+explore a wider range of values for `n_samples`.
+
+You can also set `include_arpack=True` to add this other solver in the
+experiments (much slower).
+
+Finally you can have a look at the second example of this series, "Kernel PCA
+Solvers comparison benchmark: time vs n_components", where this time the number
+of examples is fixed, and the desired number of components varies.
+"""
+
+# Author: Sylvain MARIE, Schneider Electric
+
+import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+
+from sklearn.datasets import make_circles
+from sklearn.decomposition import KernelPCA
+
+print(__doc__)
+
+
+# 1- Design the Experiment
+# ------------------------
+min_n_samples, max_n_samples = 101, 4000 # min and max n_samples to try
+n_samples_grid_size = 4 # nb of positions in the grid to try
+# generate the grid
+n_samples_range = [
+ min_n_samples
+ + np.floor((x / (n_samples_grid_size - 1)) * (max_n_samples - min_n_samples))
+ for x in range(0, n_samples_grid_size)
+]
+
+n_components = 100 # the number of principal components we want to use
+n_iter = 3 # the number of times each experiment will be repeated
+include_arpack = False # set this to True to include arpack solver (slower)
+
+
+# 2- Generate random data
+# -----------------------
+n_features = 2
+X, y = make_circles(n_samples=max_n_samples, factor=0.3, noise=0.05, random_state=0)
+
+
+# 3- Benchmark
+# ------------
+# init
+ref_time = np.empty((len(n_samples_range), n_iter)) * np.nan
+a_time = np.empty((len(n_samples_range), n_iter)) * np.nan
+r_time = np.empty((len(n_samples_range), n_iter)) * np.nan
+
+# loop
+for j, n_samples in enumerate(n_samples_range):
+ n_samples = int(n_samples)
+ print("Performing kPCA with n_samples = %i" % n_samples)
+
+ X_train = X[:n_samples, :]
+ X_test = X_train
+
+ # A- reference (dense)
+ print(" - dense")
+ for i in range(n_iter):
+ start_time = time.perf_counter()
+ ref_pred = (
+ KernelPCA(n_components, eigen_solver="dense").fit(X_train).transform(X_test)
+ )
+ ref_time[j, i] = time.perf_counter() - start_time
+
+ # B- arpack
+ if include_arpack:
+ print(" - arpack")
+ for i in range(n_iter):
+ start_time = time.perf_counter()
+ a_pred = (
+ KernelPCA(n_components, eigen_solver="arpack")
+ .fit(X_train)
+ .transform(X_test)
+ )
+ a_time[j, i] = time.perf_counter() - start_time
+ # check that the result is still correct despite the approx
+ assert_array_almost_equal(np.abs(a_pred), np.abs(ref_pred))
+
+ # C- randomized
+ print(" - randomized")
+ for i in range(n_iter):
+ start_time = time.perf_counter()
+ r_pred = (
+ KernelPCA(n_components, eigen_solver="randomized")
+ .fit(X_train)
+ .transform(X_test)
+ )
+ r_time[j, i] = time.perf_counter() - start_time
+ # check that the result is still correct despite the approximation
+ assert_array_almost_equal(np.abs(r_pred), np.abs(ref_pred))
+
+# Compute statistics for the 3 methods
+avg_ref_time = ref_time.mean(axis=1)
+std_ref_time = ref_time.std(axis=1)
+avg_a_time = a_time.mean(axis=1)
+std_a_time = a_time.std(axis=1)
+avg_r_time = r_time.mean(axis=1)
+std_r_time = r_time.std(axis=1)
+
+
+# 4- Plots
+# --------
+fig, ax = plt.subplots(figsize=(12, 8))
+
+# Display 1 plot with error bars per method
+ax.errorbar(
+ n_samples_range,
+ avg_ref_time,
+ yerr=std_ref_time,
+ marker="x",
+ linestyle="",
+ color="r",
+ label="full",
+)
+if include_arpack:
+ ax.errorbar(
+ n_samples_range,
+ avg_a_time,
+ yerr=std_a_time,
+ marker="x",
+ linestyle="",
+ color="g",
+ label="arpack",
+ )
+ax.errorbar(
+ n_samples_range,
+ avg_r_time,
+ yerr=std_r_time,
+ marker="x",
+ linestyle="",
+ color="b",
+ label="randomized",
+)
+ax.legend(loc="upper left")
+
+# customize axes
+ax.set_xlim(min(n_samples_range) * 0.9, max(n_samples_range) * 1.1)
+ax.set_ylabel("Execution time (s)")
+ax.set_xlabel("n_samples")
+
+ax.set_title(
+ "Execution time comparison of kPCA with %i components on samples "
+ "with %i features, according to the choice of `eigen_solver`"
+ "" % (n_components, n_features)
+)
+
+plt.show()
diff --git a/auto_building_tools/benchmarks/bench_lasso.py b/auto_building_tools/benchmarks/bench_lasso.py
new file mode 100644
index 0000000..9bae570
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_lasso.py
@@ -0,0 +1,99 @@
+"""
+Benchmarks of Lasso vs LassoLars
+
+First, we fix a training set and increase the number of
+samples. Then we plot the computation time as function of
+the number of samples.
+
+In the second benchmark, we increase the number of dimensions of the
+training set. Then we plot the computation time as function of
+the number of dimensions.
+
+In both cases, only 10% of the features are informative.
+"""
+
+import gc
+from time import time
+
+import numpy as np
+
+from sklearn.datasets import make_regression
+
+
+def compute_bench(alpha, n_samples, n_features, precompute):
+ lasso_results = []
+ lars_lasso_results = []
+
+ it = 0
+
+ for ns in n_samples:
+ for nf in n_features:
+ it += 1
+ print("==================")
+ print("Iteration %s of %s" % (it, max(len(n_samples), len(n_features))))
+ print("==================")
+ n_informative = nf // 10
+ X, Y, coef_ = make_regression(
+ n_samples=ns,
+ n_features=nf,
+ n_informative=n_informative,
+ noise=0.1,
+ coef=True,
+ )
+
+ X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data
+
+ gc.collect()
+ print("- benchmarking Lasso")
+ clf = Lasso(alpha=alpha, fit_intercept=False, precompute=precompute)
+ tstart = time()
+ clf.fit(X, Y)
+ lasso_results.append(time() - tstart)
+
+ gc.collect()
+ print("- benchmarking LassoLars")
+ clf = LassoLars(alpha=alpha, fit_intercept=False, precompute=precompute)
+ tstart = time()
+ clf.fit(X, Y)
+ lars_lasso_results.append(time() - tstart)
+
+ return lasso_results, lars_lasso_results
+
+
+if __name__ == "__main__":
+ import matplotlib.pyplot as plt
+
+ from sklearn.linear_model import Lasso, LassoLars
+
+ alpha = 0.01 # regularization parameter
+
+ n_features = 10
+ list_n_samples = np.linspace(100, 1000000, 5).astype(int)
+ lasso_results, lars_lasso_results = compute_bench(
+ alpha, list_n_samples, [n_features], precompute=True
+ )
+
+ plt.figure("scikit-learn LASSO benchmark results")
+ plt.subplot(211)
+ plt.plot(list_n_samples, lasso_results, "b-", label="Lasso")
+ plt.plot(list_n_samples, lars_lasso_results, "r-", label="LassoLars")
+ plt.title("precomputed Gram matrix, %d features, alpha=%s" % (n_features, alpha))
+ plt.legend(loc="upper left")
+ plt.xlabel("number of samples")
+ plt.ylabel("Time (s)")
+ plt.axis("tight")
+
+ n_samples = 2000
+ list_n_features = np.linspace(500, 3000, 5).astype(int)
+ lasso_results, lars_lasso_results = compute_bench(
+ alpha, [n_samples], list_n_features, precompute=False
+ )
+ plt.subplot(212)
+ plt.plot(list_n_features, lasso_results, "b-", label="Lasso")
+ plt.plot(list_n_features, lars_lasso_results, "r-", label="LassoLars")
+ plt.title("%d samples, alpha=%s" % (n_samples, alpha))
+ plt.legend(loc="upper left")
+ plt.xlabel("number of features")
+ plt.ylabel("Time (s)")
+ plt.axis("tight")
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_lof.py b/auto_building_tools/benchmarks/bench_lof.py
new file mode 100644
index 0000000..2c9732f
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_lof.py
@@ -0,0 +1,113 @@
+"""
+============================
+LocalOutlierFactor benchmark
+============================
+
+A test of LocalOutlierFactor on classical anomaly detection datasets.
+
+Note that LocalOutlierFactor is not meant to predict on a test set and its
+performance is assessed in an outlier detection context:
+1. The model is trained on the whole dataset which is assumed to contain
+outliers.
+2. The ROC curve is computed on the same dataset using the knowledge of the
+labels.
+In this context there is no need to shuffle the dataset because the model
+is trained and tested on the whole dataset. The randomness of this benchmark
+is only caused by the random selection of anomalies in the SA dataset.
+
+"""
+
+from time import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml
+from sklearn.metrics import auc, roc_curve
+from sklearn.neighbors import LocalOutlierFactor
+from sklearn.preprocessing import LabelBinarizer
+
+print(__doc__)
+
+random_state = 2 # to control the random selection of anomalies in SA
+
+# datasets available: ['http', 'smtp', 'SA', 'SF', 'shuttle', 'forestcover']
+datasets = ["http", "smtp", "SA", "SF", "shuttle", "forestcover"]
+
+plt.figure()
+for dataset_name in datasets:
+ # loading and vectorization
+ print("loading data")
+ if dataset_name in ["http", "smtp", "SA", "SF"]:
+ dataset = fetch_kddcup99(
+ subset=dataset_name, percent10=True, random_state=random_state
+ )
+ X = dataset.data
+ y = dataset.target
+
+ if dataset_name == "shuttle":
+ dataset = fetch_openml("shuttle", as_frame=False)
+ X = dataset.data
+ y = dataset.target.astype(np.int64)
+ # we remove data with label 4
+ # normal data are then those of class 1
+ s = y != 4
+ X = X[s, :]
+ y = y[s]
+ y = (y != 1).astype(int)
+
+ if dataset_name == "forestcover":
+ dataset = fetch_covtype()
+ X = dataset.data
+ y = dataset.target
+ # normal data are those with attribute 2
+ # abnormal those with attribute 4
+ s = (y == 2) + (y == 4)
+ X = X[s, :]
+ y = y[s]
+ y = (y != 2).astype(int)
+
+ print("vectorizing data")
+
+ if dataset_name == "SF":
+ lb = LabelBinarizer()
+ x1 = lb.fit_transform(X[:, 1].astype(str))
+ X = np.c_[X[:, :1], x1, X[:, 2:]]
+ y = (y != b"normal.").astype(int)
+
+ if dataset_name == "SA":
+ lb = LabelBinarizer()
+ x1 = lb.fit_transform(X[:, 1].astype(str))
+ x2 = lb.fit_transform(X[:, 2].astype(str))
+ x3 = lb.fit_transform(X[:, 3].astype(str))
+ X = np.c_[X[:, :1], x1, x2, x3, X[:, 4:]]
+ y = (y != b"normal.").astype(int)
+
+ if dataset_name == "http" or dataset_name == "smtp":
+ y = (y != b"normal.").astype(int)
+
+ X = X.astype(float)
+
+ print("LocalOutlierFactor processing...")
+ model = LocalOutlierFactor(n_neighbors=20)
+ tstart = time()
+ model.fit(X)
+ fit_time = time() - tstart
+ scoring = -model.negative_outlier_factor_ # the lower, the more normal
+ fpr, tpr, thresholds = roc_curve(y, scoring)
+ AUC = auc(fpr, tpr)
+ plt.plot(
+ fpr,
+ tpr,
+ lw=1,
+ label="ROC for %s (area = %0.3f, train-time: %0.2fs)"
+ % (dataset_name, AUC, fit_time),
+ )
+
+plt.xlim([-0.05, 1.05])
+plt.ylim([-0.05, 1.05])
+plt.xlabel("False Positive Rate")
+plt.ylabel("True Positive Rate")
+plt.title("Receiver operating characteristic")
+plt.legend(loc="lower right")
+plt.show()
diff --git a/auto_building_tools/benchmarks/bench_mnist.py b/auto_building_tools/benchmarks/bench_mnist.py
new file mode 100644
index 0000000..5745a6d
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_mnist.py
@@ -0,0 +1,229 @@
+"""
+=======================
+MNIST dataset benchmark
+=======================
+
+Benchmark on the MNIST dataset. The dataset comprises 70,000 samples
+and 784 features. Here, we consider the task of predicting
+10 classes - digits from 0 to 9 from their raw images. By contrast to the
+covertype dataset, the feature space is homogeneous.
+
+Example of output :
+ [..]
+
+ Classification performance:
+ ===========================
+ Classifier train-time test-time error-rate
+ ------------------------------------------------------------
+ MLP_adam 53.46s 0.11s 0.0224
+ Nystroem-SVM 112.97s 0.92s 0.0228
+ MultilayerPerceptron 24.33s 0.14s 0.0287
+ ExtraTrees 42.99s 0.57s 0.0294
+ RandomForest 42.70s 0.49s 0.0318
+ SampledRBF-SVM 135.81s 0.56s 0.0486
+ LinearRegression-SAG 16.67s 0.06s 0.0824
+ CART 20.69s 0.02s 0.1219
+ dummy 0.00s 0.01s 0.8973
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import argparse
+import os
+from time import time
+
+import numpy as np
+from joblib import Memory
+
+from sklearn.datasets import fetch_openml, get_data_home
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
+from sklearn.kernel_approximation import Nystroem, RBFSampler
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import zero_one_loss
+from sklearn.neural_network import MLPClassifier
+from sklearn.pipeline import make_pipeline
+from sklearn.svm import LinearSVC
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils import check_array
+
+# Memoize the data extraction and memory map the resulting
+# train / test splits in readonly mode
+memory = Memory(os.path.join(get_data_home(), "mnist_benchmark_data"), mmap_mode="r")
+
+
+@memory.cache
+def load_data(dtype=np.float32, order="F"):
+ """Load the data, then cache and memmap the train/test split"""
+ ######################################################################
+ # Load dataset
+ print("Loading dataset...")
+ data = fetch_openml("mnist_784", as_frame=True)
+ X = check_array(data["data"], dtype=dtype, order=order)
+ y = data["target"]
+
+ # Normalize features
+ X = X / 255
+
+ # Create train-test split (as [Joachims, 2006])
+ print("Creating train-test split...")
+ n_train = 60000
+ X_train = X[:n_train]
+ y_train = y[:n_train]
+ X_test = X[n_train:]
+ y_test = y[n_train:]
+
+ return X_train, X_test, y_train, y_test
+
+
+ESTIMATORS = {
+ "dummy": DummyClassifier(),
+ "CART": DecisionTreeClassifier(),
+ "ExtraTrees": ExtraTreesClassifier(),
+ "RandomForest": RandomForestClassifier(),
+ "Nystroem-SVM": make_pipeline(
+ Nystroem(gamma=0.015, n_components=1000), LinearSVC(C=100)
+ ),
+ "SampledRBF-SVM": make_pipeline(
+ RBFSampler(gamma=0.015, n_components=1000), LinearSVC(C=100)
+ ),
+ "LogisticRegression-SAG": LogisticRegression(solver="sag", tol=1e-1, C=1e4),
+ "LogisticRegression-SAGA": LogisticRegression(solver="saga", tol=1e-1, C=1e4),
+ "MultilayerPerceptron": MLPClassifier(
+ hidden_layer_sizes=(100, 100),
+ max_iter=400,
+ alpha=1e-4,
+ solver="sgd",
+ learning_rate_init=0.2,
+ momentum=0.9,
+ verbose=1,
+ tol=1e-4,
+ random_state=1,
+ ),
+ "MLP-adam": MLPClassifier(
+ hidden_layer_sizes=(100, 100),
+ max_iter=400,
+ alpha=1e-4,
+ solver="adam",
+ learning_rate_init=0.001,
+ verbose=1,
+ tol=1e-4,
+ random_state=1,
+ ),
+}
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--classifiers",
+ nargs="+",
+ choices=ESTIMATORS,
+ type=str,
+ default=["ExtraTrees", "Nystroem-SVM"],
+ help="list of classifiers to benchmark.",
+ )
+ parser.add_argument(
+ "--n-jobs",
+ nargs="?",
+ default=1,
+ type=int,
+ help=(
+ "Number of concurrently running workers for "
+ "models that support parallelism."
+ ),
+ )
+ parser.add_argument(
+ "--order",
+ nargs="?",
+ default="C",
+ type=str,
+ choices=["F", "C"],
+ help="Allow to choose between fortran and C ordered data",
+ )
+ parser.add_argument(
+ "--random-seed",
+ nargs="?",
+ default=0,
+ type=int,
+ help="Common seed used by random number generator.",
+ )
+ args = vars(parser.parse_args())
+
+ print(__doc__)
+
+ X_train, X_test, y_train, y_test = load_data(order=args["order"])
+
+ print("")
+ print("Dataset statistics:")
+ print("===================")
+ print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
+ print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
+ print("%s %s" % ("data type:".ljust(25), X_train.dtype))
+ print(
+ "%s %d (size=%dMB)"
+ % (
+ "number of train samples:".ljust(25),
+ X_train.shape[0],
+ int(X_train.nbytes / 1e6),
+ )
+ )
+ print(
+ "%s %d (size=%dMB)"
+ % (
+ "number of test samples:".ljust(25),
+ X_test.shape[0],
+ int(X_test.nbytes / 1e6),
+ )
+ )
+
+ print()
+ print("Training Classifiers")
+ print("====================")
+ error, train_time, test_time = {}, {}, {}
+ for name in sorted(args["classifiers"]):
+ print("Training %s ... " % name, end="")
+ estimator = ESTIMATORS[name]
+ estimator_params = estimator.get_params()
+
+ estimator.set_params(
+ **{
+ p: args["random_seed"]
+ for p in estimator_params
+ if p.endswith("random_state")
+ }
+ )
+
+ if "n_jobs" in estimator_params:
+ estimator.set_params(n_jobs=args["n_jobs"])
+
+ time_start = time()
+ estimator.fit(X_train, y_train)
+ train_time[name] = time() - time_start
+
+ time_start = time()
+ y_pred = estimator.predict(X_test)
+ test_time[name] = time() - time_start
+
+ error[name] = zero_one_loss(y_test, y_pred)
+
+ print("done")
+
+ print()
+ print("Classification performance:")
+ print("===========================")
+ print(
+ "{0: <24} {1: >10} {2: >11} {3: >12}".format(
+ "Classifier ", "train-time", "test-time", "error-rate"
+ )
+ )
+ print("-" * 60)
+ for name in sorted(args["classifiers"], key=error.get):
+ print(
+ "{0: <23} {1: >10.2f}s {2: >10.2f}s {3: >12.4f}".format(
+ name, train_time[name], test_time[name], error[name]
+ )
+ )
+
+ print()
diff --git a/auto_building_tools/benchmarks/bench_multilabel_metrics.py b/auto_building_tools/benchmarks/bench_multilabel_metrics.py
new file mode 100644
index 0000000..1b8449a
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_multilabel_metrics.py
@@ -0,0 +1,227 @@
+#!/usr/bin/env python
+"""
+A comparison of multilabel target formats and metrics over them
+"""
+
+import argparse
+import itertools
+import sys
+from functools import partial
+from timeit import timeit
+
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy.sparse as sp
+
+from sklearn.datasets import make_multilabel_classification
+from sklearn.metrics import (
+ accuracy_score,
+ f1_score,
+ hamming_loss,
+ jaccard_similarity_score,
+)
+from sklearn.utils._testing import ignore_warnings
+
+METRICS = {
+ "f1": partial(f1_score, average="micro"),
+ "f1-by-sample": partial(f1_score, average="samples"),
+ "accuracy": accuracy_score,
+ "hamming": hamming_loss,
+ "jaccard": jaccard_similarity_score,
+}
+
+FORMATS = {
+ "sequences": lambda y: [list(np.flatnonzero(s)) for s in y],
+ "dense": lambda y: y,
+ "csr": sp.csr_matrix,
+ "csc": sp.csc_matrix,
+}
+
+
+@ignore_warnings
+def benchmark(
+ metrics=tuple(v for k, v in sorted(METRICS.items())),
+ formats=tuple(v for k, v in sorted(FORMATS.items())),
+ samples=1000,
+ classes=4,
+ density=0.2,
+ n_times=5,
+):
+ """Times metric calculations for a number of inputs
+
+ Parameters
+ ----------
+ metrics : array-like of callables (1d or 0d)
+ The metric functions to time.
+
+ formats : array-like of callables (1d or 0d)
+ These may transform a dense indicator matrix into multilabel
+ representation.
+
+ samples : array-like of ints (1d or 0d)
+ The number of samples to generate as input.
+
+ classes : array-like of ints (1d or 0d)
+ The number of classes in the input.
+
+ density : array-like of ints (1d or 0d)
+ The density of positive labels in the input.
+
+ n_times : int
+ Time calling the metric n_times times.
+
+ Returns
+ -------
+ array of floats shaped like (metrics, formats, samples, classes, density)
+ Time in seconds.
+ """
+ metrics = np.atleast_1d(metrics)
+ samples = np.atleast_1d(samples)
+ classes = np.atleast_1d(classes)
+ density = np.atleast_1d(density)
+ formats = np.atleast_1d(formats)
+ out = np.zeros(
+ (len(metrics), len(formats), len(samples), len(classes), len(density)),
+ dtype=float,
+ )
+ it = itertools.product(samples, classes, density)
+ for i, (s, c, d) in enumerate(it):
+ _, y_true = make_multilabel_classification(
+ n_samples=s, n_features=1, n_classes=c, n_labels=d * c, random_state=42
+ )
+ _, y_pred = make_multilabel_classification(
+ n_samples=s, n_features=1, n_classes=c, n_labels=d * c, random_state=84
+ )
+ for j, f in enumerate(formats):
+ f_true = f(y_true)
+ f_pred = f(y_pred)
+ for k, metric in enumerate(metrics):
+ t = timeit(partial(metric, f_true, f_pred), number=n_times)
+
+ out[k, j].flat[i] = t
+ return out
+
+
+def _tabulate(results, metrics, formats):
+ """Prints results by metric and format
+
+ Uses the last ([-1]) value of other fields
+ """
+ column_width = max(max(len(k) for k in formats) + 1, 8)
+ first_width = max(len(k) for k in metrics)
+ head_fmt = "{:<{fw}s}" + "{:>{cw}s}" * len(formats)
+ row_fmt = "{:<{fw}s}" + "{:>{cw}.3f}" * len(formats)
+ print(head_fmt.format("Metric", *formats, cw=column_width, fw=first_width))
+ for metric, row in zip(metrics, results[:, :, -1, -1, -1]):
+ print(row_fmt.format(metric, *row, cw=column_width, fw=first_width))
+
+
+def _plot(
+ results,
+ metrics,
+ formats,
+ title,
+ x_ticks,
+ x_label,
+ format_markers=("x", "|", "o", "+"),
+ metric_colors=("c", "m", "y", "k", "g", "r", "b"),
+):
+ """
+ Plot the results by metric, format and some other variable given by
+ x_label
+ """
+ fig = plt.figure("scikit-learn multilabel metrics benchmarks")
+ plt.title(title)
+ ax = fig.add_subplot(111)
+ for i, metric in enumerate(metrics):
+ for j, format in enumerate(formats):
+ ax.plot(
+ x_ticks,
+ results[i, j].flat,
+ label="{}, {}".format(metric, format),
+ marker=format_markers[j],
+ color=metric_colors[i % len(metric_colors)],
+ )
+ ax.set_xlabel(x_label)
+ ax.set_ylabel("Time (s)")
+ ax.legend()
+ plt.show()
+
+
+if __name__ == "__main__":
+ ap = argparse.ArgumentParser()
+ ap.add_argument(
+ "metrics",
+ nargs="*",
+ default=sorted(METRICS),
+ help="Specifies metrics to benchmark, defaults to all. Choices are: {}".format(
+ sorted(METRICS)
+ ),
+ )
+ ap.add_argument(
+ "--formats",
+ nargs="+",
+ choices=sorted(FORMATS),
+ help="Specifies multilabel formats to benchmark (defaults to all).",
+ )
+ ap.add_argument(
+ "--samples", type=int, default=1000, help="The number of samples to generate"
+ )
+ ap.add_argument("--classes", type=int, default=10, help="The number of classes")
+ ap.add_argument(
+ "--density",
+ type=float,
+ default=0.2,
+ help="The average density of labels per sample",
+ )
+ ap.add_argument(
+ "--plot",
+ choices=["classes", "density", "samples"],
+ default=None,
+ help=(
+ "Plot time with respect to this parameter varying up to the specified value"
+ ),
+ )
+ ap.add_argument(
+ "--n-steps", default=10, type=int, help="Plot this many points for each metric"
+ )
+ ap.add_argument(
+ "--n-times", default=5, type=int, help="Time performance over n_times trials"
+ )
+ args = ap.parse_args()
+
+ if args.plot is not None:
+ max_val = getattr(args, args.plot)
+ if args.plot in ("classes", "samples"):
+ min_val = 2
+ else:
+ min_val = 0
+ steps = np.linspace(min_val, max_val, num=args.n_steps + 1)[1:]
+ if args.plot in ("classes", "samples"):
+ steps = np.unique(np.round(steps).astype(int))
+ setattr(args, args.plot, steps)
+
+ if args.metrics is None:
+ args.metrics = sorted(METRICS)
+ if args.formats is None:
+ args.formats = sorted(FORMATS)
+
+ results = benchmark(
+ [METRICS[k] for k in args.metrics],
+ [FORMATS[k] for k in args.formats],
+ args.samples,
+ args.classes,
+ args.density,
+ args.n_times,
+ )
+
+ _tabulate(results, args.metrics, args.formats)
+
+ if args.plot is not None:
+ print("Displaying plot", file=sys.stderr)
+ title = "Multilabel metrics with %s" % ", ".join(
+ "{0}={1}".format(field, getattr(args, field))
+ for field in ["samples", "classes", "density"]
+ if args.plot != field
+ )
+ _plot(results, args.metrics, args.formats, title, steps, args.plot)
diff --git a/auto_building_tools/benchmarks/bench_online_ocsvm.py b/auto_building_tools/benchmarks/bench_online_ocsvm.py
new file mode 100644
index 0000000..9f92150
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_online_ocsvm.py
@@ -0,0 +1,294 @@
+"""
+=====================================
+SGDOneClassSVM benchmark
+=====================================
+This benchmark compares the :class:`SGDOneClassSVM` with :class:`OneClassSVM`.
+The former is an online One-Class SVM implemented with a Stochastic Gradient
+Descent (SGD). The latter is based on the LibSVM implementation. The
+complexity of :class:`SGDOneClassSVM` is linear in the number of samples
+whereas the one of :class:`OneClassSVM` is at best quadratic in the number of
+samples. We here compare the performance in terms of AUC and training time on
+classical anomaly detection datasets.
+
+The :class:`OneClassSVM` is applied with a Gaussian kernel and we therefore
+use a kernel approximation prior to the application of :class:`SGDOneClassSVM`.
+"""
+
+from time import time
+
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.interpolate import interp1d
+
+from sklearn.datasets import fetch_covtype, fetch_kddcup99
+from sklearn.kernel_approximation import Nystroem
+from sklearn.linear_model import SGDOneClassSVM
+from sklearn.metrics import auc, roc_curve
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import LabelBinarizer, StandardScaler
+from sklearn.svm import OneClassSVM
+from sklearn.utils import shuffle
+
+font = {"weight": "normal", "size": 15}
+
+matplotlib.rc("font", **font)
+
+print(__doc__)
+
+
+def print_outlier_ratio(y):
+ """
+ Helper function to show the distinct value count of element in the target.
+ Useful indicator for the datasets used in bench_isolation_forest.py.
+ """
+ uniq, cnt = np.unique(y, return_counts=True)
+ print("----- Target count values: ")
+ for u, c in zip(uniq, cnt):
+ print("------ %s -> %d occurrences" % (str(u), c))
+ print("----- Outlier ratio: %.5f" % (np.min(cnt) / len(y)))
+
+
+# for roc curve computation
+n_axis = 1000
+x_axis = np.linspace(0, 1, n_axis)
+
+datasets = ["http", "smtp", "SA", "SF", "forestcover"]
+
+novelty_detection = False # if False, training set polluted by outliers
+
+random_states = [42]
+nu = 0.05
+
+results_libsvm = np.empty((len(datasets), n_axis + 5))
+results_online = np.empty((len(datasets), n_axis + 5))
+
+for dat, dataset_name in enumerate(datasets):
+ print(dataset_name)
+
+ # Loading datasets
+ if dataset_name in ["http", "smtp", "SA", "SF"]:
+ dataset = fetch_kddcup99(
+ subset=dataset_name, shuffle=False, percent10=False, random_state=88
+ )
+ X = dataset.data
+ y = dataset.target
+
+ if dataset_name == "forestcover":
+ dataset = fetch_covtype(shuffle=False)
+ X = dataset.data
+ y = dataset.target
+ # normal data are those with attribute 2
+ # abnormal those with attribute 4
+ s = (y == 2) + (y == 4)
+ X = X[s, :]
+ y = y[s]
+ y = (y != 2).astype(int)
+
+ # Vectorizing data
+ if dataset_name == "SF":
+ # Casting type of X (object) as string is needed for string categorical
+ # features to apply LabelBinarizer
+ lb = LabelBinarizer()
+ x1 = lb.fit_transform(X[:, 1].astype(str))
+ X = np.c_[X[:, :1], x1, X[:, 2:]]
+ y = (y != b"normal.").astype(int)
+
+ if dataset_name == "SA":
+ lb = LabelBinarizer()
+ # Casting type of X (object) as string is needed for string categorical
+ # features to apply LabelBinarizer
+ x1 = lb.fit_transform(X[:, 1].astype(str))
+ x2 = lb.fit_transform(X[:, 2].astype(str))
+ x3 = lb.fit_transform(X[:, 3].astype(str))
+ X = np.c_[X[:, :1], x1, x2, x3, X[:, 4:]]
+ y = (y != b"normal.").astype(int)
+
+ if dataset_name in ["http", "smtp"]:
+ y = (y != b"normal.").astype(int)
+
+ print_outlier_ratio(y)
+
+ n_samples, n_features = np.shape(X)
+ if dataset_name == "SA": # LibSVM too long with n_samples // 2
+ n_samples_train = n_samples // 20
+ else:
+ n_samples_train = n_samples // 2
+
+ n_samples_test = n_samples - n_samples_train
+ print("n_train: ", n_samples_train)
+ print("n_features: ", n_features)
+
+ tpr_libsvm = np.zeros(n_axis)
+ tpr_online = np.zeros(n_axis)
+ fit_time_libsvm = 0
+ fit_time_online = 0
+ predict_time_libsvm = 0
+ predict_time_online = 0
+
+ X = X.astype(float)
+
+ gamma = 1 / n_features # OCSVM default parameter
+
+ for random_state in random_states:
+ print("random state: %s" % random_state)
+
+ X, y = shuffle(X, y, random_state=random_state)
+ X_train = X[:n_samples_train]
+ X_test = X[n_samples_train:]
+ y_train = y[:n_samples_train]
+ y_test = y[n_samples_train:]
+
+ if novelty_detection:
+ X_train = X_train[y_train == 0]
+ y_train = y_train[y_train == 0]
+
+ std = StandardScaler()
+
+ print("----------- LibSVM OCSVM ------------")
+ ocsvm = OneClassSVM(kernel="rbf", gamma=gamma, nu=nu)
+ pipe_libsvm = make_pipeline(std, ocsvm)
+
+ tstart = time()
+ pipe_libsvm.fit(X_train)
+ fit_time_libsvm += time() - tstart
+
+ tstart = time()
+ # scoring such that the lower, the more normal
+ scoring = -pipe_libsvm.decision_function(X_test)
+ predict_time_libsvm += time() - tstart
+ fpr_libsvm_, tpr_libsvm_, _ = roc_curve(y_test, scoring)
+
+ f_libsvm = interp1d(fpr_libsvm_, tpr_libsvm_)
+ tpr_libsvm += f_libsvm(x_axis)
+
+ print("----------- Online OCSVM ------------")
+ nystroem = Nystroem(gamma=gamma, random_state=random_state)
+ online_ocsvm = SGDOneClassSVM(nu=nu, random_state=random_state)
+ pipe_online = make_pipeline(std, nystroem, online_ocsvm)
+
+ tstart = time()
+ pipe_online.fit(X_train)
+ fit_time_online += time() - tstart
+
+ tstart = time()
+ # scoring such that the lower, the more normal
+ scoring = -pipe_online.decision_function(X_test)
+ predict_time_online += time() - tstart
+ fpr_online_, tpr_online_, _ = roc_curve(y_test, scoring)
+
+ f_online = interp1d(fpr_online_, tpr_online_)
+ tpr_online += f_online(x_axis)
+
+ tpr_libsvm /= len(random_states)
+ tpr_libsvm[0] = 0.0
+ fit_time_libsvm /= len(random_states)
+ predict_time_libsvm /= len(random_states)
+ auc_libsvm = auc(x_axis, tpr_libsvm)
+
+ results_libsvm[dat] = [
+ fit_time_libsvm,
+ predict_time_libsvm,
+ auc_libsvm,
+ n_samples_train,
+ n_features,
+ ] + list(tpr_libsvm)
+
+ tpr_online /= len(random_states)
+ tpr_online[0] = 0.0
+ fit_time_online /= len(random_states)
+ predict_time_online /= len(random_states)
+ auc_online = auc(x_axis, tpr_online)
+
+ results_online[dat] = [
+ fit_time_online,
+ predict_time_online,
+ auc_online,
+ n_samples_train,
+ n_features,
+ ] + list(tpr_libsvm)
+
+
+# -------- Plotting bar charts -------------
+fit_time_libsvm_all = results_libsvm[:, 0]
+predict_time_libsvm_all = results_libsvm[:, 1]
+auc_libsvm_all = results_libsvm[:, 2]
+n_train_all = results_libsvm[:, 3]
+n_features_all = results_libsvm[:, 4]
+
+fit_time_online_all = results_online[:, 0]
+predict_time_online_all = results_online[:, 1]
+auc_online_all = results_online[:, 2]
+
+
+width = 0.7
+ind = 2 * np.arange(len(datasets))
+x_tickslabels = [
+ (name + "\n" + r"$n={:,d}$" + "\n" + r"$d={:d}$").format(int(n), int(d))
+ for name, n, d in zip(datasets, n_train_all, n_features_all)
+]
+
+
+def autolabel_auc(rects, ax):
+ """Attach a text label above each bar displaying its height."""
+ for rect in rects:
+ height = rect.get_height()
+ ax.text(
+ rect.get_x() + rect.get_width() / 2.0,
+ 1.05 * height,
+ "%.3f" % height,
+ ha="center",
+ va="bottom",
+ )
+
+
+def autolabel_time(rects, ax):
+ """Attach a text label above each bar displaying its height."""
+ for rect in rects:
+ height = rect.get_height()
+ ax.text(
+ rect.get_x() + rect.get_width() / 2.0,
+ 1.05 * height,
+ "%.1f" % height,
+ ha="center",
+ va="bottom",
+ )
+
+
+fig, ax = plt.subplots(figsize=(15, 8))
+ax.set_ylabel("AUC")
+ax.set_ylim((0, 1.3))
+rect_libsvm = ax.bar(ind, auc_libsvm_all, width=width, color="r")
+rect_online = ax.bar(ind + width, auc_online_all, width=width, color="y")
+ax.legend((rect_libsvm[0], rect_online[0]), ("LibSVM", "Online SVM"))
+ax.set_xticks(ind + width / 2)
+ax.set_xticklabels(x_tickslabels)
+autolabel_auc(rect_libsvm, ax)
+autolabel_auc(rect_online, ax)
+plt.show()
+
+
+fig, ax = plt.subplots(figsize=(15, 8))
+ax.set_ylabel("Training time (sec) - Log scale")
+ax.set_yscale("log")
+rect_libsvm = ax.bar(ind, fit_time_libsvm_all, color="r", width=width)
+rect_online = ax.bar(ind + width, fit_time_online_all, color="y", width=width)
+ax.legend((rect_libsvm[0], rect_online[0]), ("LibSVM", "Online SVM"))
+ax.set_xticks(ind + width / 2)
+ax.set_xticklabels(x_tickslabels)
+autolabel_time(rect_libsvm, ax)
+autolabel_time(rect_online, ax)
+plt.show()
+
+
+fig, ax = plt.subplots(figsize=(15, 8))
+ax.set_ylabel("Testing time (sec) - Log scale")
+ax.set_yscale("log")
+rect_libsvm = ax.bar(ind, predict_time_libsvm_all, color="r", width=width)
+rect_online = ax.bar(ind + width, predict_time_online_all, color="y", width=width)
+ax.legend((rect_libsvm[0], rect_online[0]), ("LibSVM", "Online SVM"))
+ax.set_xticks(ind + width / 2)
+ax.set_xticklabels(x_tickslabels)
+autolabel_time(rect_libsvm, ax)
+autolabel_time(rect_online, ax)
+plt.show()
diff --git a/auto_building_tools/benchmarks/bench_pca_solvers.py b/auto_building_tools/benchmarks/bench_pca_solvers.py
new file mode 100644
index 0000000..337af3a
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_pca_solvers.py
@@ -0,0 +1,165 @@
+# %%
+#
+# This benchmark compares the speed of PCA solvers on datasets of different
+# sizes in order to determine the best solver to select by default via the
+# "auto" heuristic.
+#
+# Note: we do not control for the accuracy of the solvers: we assume that all
+# solvers yield transformed data with similar explained variance. This
+# assumption is generally true, except for the randomized solver that might
+# require more power iterations.
+#
+# We generate synthetic data with dimensions that are useful to plot:
+# - time vs n_samples for a fixed n_features and,
+# - time vs n_features for a fixed n_samples for a fixed n_features.
+import itertools
+from math import log10
+from time import perf_counter
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+from sklearn import config_context
+from sklearn.decomposition import PCA
+
+REF_DIMS = [100, 1000, 10_000]
+data_shapes = []
+for ref_dim in REF_DIMS:
+ data_shapes.extend([(ref_dim, 10**i) for i in range(1, 8 - int(log10(ref_dim)))])
+ data_shapes.extend(
+ [(ref_dim, 3 * 10**i) for i in range(1, 8 - int(log10(ref_dim)))]
+ )
+ data_shapes.extend([(10**i, ref_dim) for i in range(1, 8 - int(log10(ref_dim)))])
+ data_shapes.extend(
+ [(3 * 10**i, ref_dim) for i in range(1, 8 - int(log10(ref_dim)))]
+ )
+
+# Remove duplicates:
+data_shapes = sorted(set(data_shapes))
+
+print("Generating test datasets...")
+rng = np.random.default_rng(0)
+datasets = [rng.normal(size=shape) for shape in data_shapes]
+
+
+# %%
+def measure_one(data, n_components, solver, method_name="fit"):
+ print(
+ f"Benchmarking {solver=!r}, {n_components=}, {method_name=!r} on data with"
+ f" shape {data.shape}"
+ )
+ pca = PCA(n_components=n_components, svd_solver=solver, random_state=0)
+ timings = []
+ elapsed = 0
+ method = getattr(pca, method_name)
+ with config_context(assume_finite=True):
+ while elapsed < 0.5:
+ tic = perf_counter()
+ method(data)
+ duration = perf_counter() - tic
+ timings.append(duration)
+ elapsed += duration
+ return np.median(timings)
+
+
+SOLVERS = ["full", "covariance_eigh", "arpack", "randomized", "auto"]
+measurements = []
+for data, n_components, method_name in itertools.product(
+ datasets, [2, 50], ["fit", "fit_transform"]
+):
+ if n_components >= min(data.shape):
+ continue
+ for solver in SOLVERS:
+ if solver == "covariance_eigh" and data.shape[1] > 5000:
+ # Too much memory and too slow.
+ continue
+ if solver in ["arpack", "full"] and log10(data.size) > 7:
+ # Too slow, in particular for the full solver.
+ continue
+ time = measure_one(data, n_components, solver, method_name=method_name)
+ measurements.append(
+ {
+ "n_components": n_components,
+ "n_samples": data.shape[0],
+ "n_features": data.shape[1],
+ "time": time,
+ "solver": solver,
+ "method_name": method_name,
+ }
+ )
+measurements = pd.DataFrame(measurements)
+measurements.to_csv("bench_pca_solvers.csv", index=False)
+
+# %%
+all_method_names = measurements["method_name"].unique()
+all_n_components = measurements["n_components"].unique()
+
+for method_name in all_method_names:
+ fig, axes = plt.subplots(
+ figsize=(16, 16),
+ nrows=len(REF_DIMS),
+ ncols=len(all_n_components),
+ sharey=True,
+ constrained_layout=True,
+ )
+ fig.suptitle(f"Benchmarks for PCA.{method_name}, varying n_samples", fontsize=16)
+
+ for row_idx, ref_dim in enumerate(REF_DIMS):
+ for n_components, ax in zip(all_n_components, axes[row_idx]):
+ for solver in SOLVERS:
+ if solver == "auto":
+ style_kwargs = dict(linewidth=2, color="black", style="--")
+ else:
+ style_kwargs = dict(style="o-")
+ ax.set(
+ title=f"n_components={n_components}, n_features={ref_dim}",
+ ylabel="time (s)",
+ )
+ measurements.query(
+ "n_components == @n_components and n_features == @ref_dim"
+ " and solver == @solver and method_name == @method_name"
+ ).plot.line(
+ x="n_samples",
+ y="time",
+ label=solver,
+ logx=True,
+ logy=True,
+ ax=ax,
+ **style_kwargs,
+ )
+# %%
+for method_name in all_method_names:
+ fig, axes = plt.subplots(
+ figsize=(16, 16),
+ nrows=len(REF_DIMS),
+ ncols=len(all_n_components),
+ sharey=True,
+ )
+ fig.suptitle(f"Benchmarks for PCA.{method_name}, varying n_features", fontsize=16)
+
+ for row_idx, ref_dim in enumerate(REF_DIMS):
+ for n_components, ax in zip(all_n_components, axes[row_idx]):
+ for solver in SOLVERS:
+ if solver == "auto":
+ style_kwargs = dict(linewidth=2, color="black", style="--")
+ else:
+ style_kwargs = dict(style="o-")
+ ax.set(
+ title=f"n_components={n_components}, n_samples={ref_dim}",
+ ylabel="time (s)",
+ )
+ measurements.query(
+ "n_components == @n_components and n_samples == @ref_dim "
+ " and solver == @solver and method_name == @method_name"
+ ).plot.line(
+ x="n_features",
+ y="time",
+ label=solver,
+ logx=True,
+ logy=True,
+ ax=ax,
+ **style_kwargs,
+ )
+
+# %%
diff --git a/auto_building_tools/benchmarks/bench_plot_fastkmeans.py b/auto_building_tools/benchmarks/bench_plot_fastkmeans.py
new file mode 100644
index 0000000..1d420d1
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_plot_fastkmeans.py
@@ -0,0 +1,141 @@
+from collections import defaultdict
+from time import time
+
+import numpy as np
+from numpy import random as nr
+
+from sklearn.cluster import KMeans, MiniBatchKMeans
+
+
+def compute_bench(samples_range, features_range):
+ it = 0
+ results = defaultdict(lambda: [])
+ chunk = 100
+
+ max_it = len(samples_range) * len(features_range)
+ for n_samples in samples_range:
+ for n_features in features_range:
+ it += 1
+ print("==============================")
+ print("Iteration %03d of %03d" % (it, max_it))
+ print("==============================")
+ print()
+ data = nr.randint(-50, 51, (n_samples, n_features))
+
+ print("K-Means")
+ tstart = time()
+ kmeans = KMeans(init="k-means++", n_clusters=10).fit(data)
+
+ delta = time() - tstart
+ print("Speed: %0.3fs" % delta)
+ print("Inertia: %0.5f" % kmeans.inertia_)
+ print()
+
+ results["kmeans_speed"].append(delta)
+ results["kmeans_quality"].append(kmeans.inertia_)
+
+ print("Fast K-Means")
+ # let's prepare the data in small chunks
+ mbkmeans = MiniBatchKMeans(
+ init="k-means++", n_clusters=10, batch_size=chunk
+ )
+ tstart = time()
+ mbkmeans.fit(data)
+ delta = time() - tstart
+ print("Speed: %0.3fs" % delta)
+ print("Inertia: %f" % mbkmeans.inertia_)
+ print()
+ print()
+
+ results["MiniBatchKMeans Speed"].append(delta)
+ results["MiniBatchKMeans Quality"].append(mbkmeans.inertia_)
+
+ return results
+
+
+def compute_bench_2(chunks):
+ results = defaultdict(lambda: [])
+ n_features = 50000
+ means = np.array(
+ [
+ [1, 1],
+ [-1, -1],
+ [1, -1],
+ [-1, 1],
+ [0.5, 0.5],
+ [0.75, -0.5],
+ [-1, 0.75],
+ [1, 0],
+ ]
+ )
+ X = np.empty((0, 2))
+ for i in range(8):
+ X = np.r_[X, means[i] + 0.8 * np.random.randn(n_features, 2)]
+ max_it = len(chunks)
+ it = 0
+ for chunk in chunks:
+ it += 1
+ print("==============================")
+ print("Iteration %03d of %03d" % (it, max_it))
+ print("==============================")
+ print()
+
+ print("Fast K-Means")
+ tstart = time()
+ mbkmeans = MiniBatchKMeans(init="k-means++", n_clusters=8, batch_size=chunk)
+
+ mbkmeans.fit(X)
+ delta = time() - tstart
+ print("Speed: %0.3fs" % delta)
+ print("Inertia: %0.3fs" % mbkmeans.inertia_)
+ print()
+
+ results["MiniBatchKMeans Speed"].append(delta)
+ results["MiniBatchKMeans Quality"].append(mbkmeans.inertia_)
+
+ return results
+
+
+if __name__ == "__main__":
+ from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection
+ import matplotlib.pyplot as plt
+
+ samples_range = np.linspace(50, 150, 5).astype(int)
+ features_range = np.linspace(150, 50000, 5).astype(int)
+ chunks = np.linspace(500, 10000, 15).astype(int)
+
+ results = compute_bench(samples_range, features_range)
+ results_2 = compute_bench_2(chunks)
+
+ max_time = max(
+ [max(i) for i in [t for (label, t) in results.items() if "speed" in label]]
+ )
+ max_inertia = max(
+ [max(i) for i in [t for (label, t) in results.items() if "speed" not in label]]
+ )
+
+ fig = plt.figure("scikit-learn K-Means benchmark results")
+ for c, (label, timings) in zip("brcy", sorted(results.items())):
+ if "speed" in label:
+ ax = fig.add_subplot(2, 2, 1, projection="3d")
+ ax.set_zlim3d(0.0, max_time * 1.1)
+ else:
+ ax = fig.add_subplot(2, 2, 2, projection="3d")
+ ax.set_zlim3d(0.0, max_inertia * 1.1)
+
+ X, Y = np.meshgrid(samples_range, features_range)
+ Z = np.asarray(timings).reshape(samples_range.shape[0], features_range.shape[0])
+ ax.plot_surface(X, Y, Z.T, cstride=1, rstride=1, color=c, alpha=0.5)
+ ax.set_xlabel("n_samples")
+ ax.set_ylabel("n_features")
+
+ i = 0
+ for c, (label, timings) in zip("br", sorted(results_2.items())):
+ i += 1
+ ax = fig.add_subplot(2, 2, i + 2)
+ y = np.asarray(timings)
+ ax.plot(chunks, y, color=c, alpha=0.8)
+ ax.set_xlabel("Chunks")
+ ax.set_ylabel(label)
+
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_plot_hierarchical.py b/auto_building_tools/benchmarks/bench_plot_hierarchical.py
new file mode 100644
index 0000000..861a0ea
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_plot_hierarchical.py
@@ -0,0 +1,77 @@
+from collections import defaultdict
+from time import time
+
+import numpy as np
+from numpy import random as nr
+
+from sklearn.cluster import AgglomerativeClustering
+
+
+def compute_bench(samples_range, features_range):
+ it = 0
+ results = defaultdict(lambda: [])
+
+ max_it = len(samples_range) * len(features_range)
+ for n_samples in samples_range:
+ for n_features in features_range:
+ it += 1
+ print("==============================")
+ print("Iteration %03d of %03d" % (it, max_it))
+ print("n_samples %05d; n_features %02d" % (n_samples, n_features))
+ print("==============================")
+ print()
+ data = nr.randint(-50, 51, (n_samples, n_features))
+
+ for linkage in ("single", "average", "complete", "ward"):
+ print(linkage.capitalize())
+ tstart = time()
+ AgglomerativeClustering(linkage=linkage, n_clusters=10).fit(data)
+
+ delta = time() - tstart
+ print("Speed: %0.3fs" % delta)
+ print()
+
+ results[linkage].append(delta)
+
+ return results
+
+
+if __name__ == "__main__":
+ import matplotlib.pyplot as plt
+
+ samples_range = np.linspace(1000, 15000, 8).astype(int)
+ features_range = np.array([2, 10, 20, 50])
+
+ results = compute_bench(samples_range, features_range)
+
+ max_time = max([max(i) for i in [t for (label, t) in results.items()]])
+
+ colors = plt.get_cmap("tab10")(np.linspace(0, 1, 10))[:4]
+ lines = {linkage: None for linkage in results.keys()}
+ fig, axs = plt.subplots(2, 2, sharex=True, sharey=True)
+ fig.suptitle("Scikit-learn agglomerative clustering benchmark results", fontsize=16)
+ for c, (label, timings) in zip(colors, sorted(results.items())):
+ timing_by_samples = np.asarray(timings).reshape(
+ samples_range.shape[0], features_range.shape[0]
+ )
+
+ for n in range(timing_by_samples.shape[1]):
+ ax = axs.flatten()[n]
+ (lines[label],) = ax.plot(
+ samples_range, timing_by_samples[:, n], color=c, label=label
+ )
+ ax.set_title("n_features = %d" % features_range[n])
+ if n >= 2:
+ ax.set_xlabel("n_samples")
+ if n % 2 == 0:
+ ax.set_ylabel("time (s)")
+
+ fig.subplots_adjust(right=0.8)
+ fig.legend(
+ [lines[link] for link in sorted(results.keys())],
+ sorted(results.keys()),
+ loc="center right",
+ fontsize=8,
+ )
+
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_plot_incremental_pca.py b/auto_building_tools/benchmarks/bench_plot_incremental_pca.py
new file mode 100644
index 0000000..49b87c8
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_plot_incremental_pca.py
@@ -0,0 +1,159 @@
+"""
+========================
+IncrementalPCA benchmark
+========================
+
+Benchmarks for IncrementalPCA
+
+"""
+
+import gc
+from collections import defaultdict
+from time import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.datasets import fetch_lfw_people
+from sklearn.decomposition import PCA, IncrementalPCA
+
+
+def plot_results(X, y, label):
+ plt.plot(X, y, label=label, marker="o")
+
+
+def benchmark(estimator, data):
+ gc.collect()
+ print("Benching %s" % estimator)
+ t0 = time()
+ estimator.fit(data)
+ training_time = time() - t0
+ data_t = estimator.transform(data)
+ data_r = estimator.inverse_transform(data_t)
+ reconstruction_error = np.mean(np.abs(data - data_r))
+ return {"time": training_time, "error": reconstruction_error}
+
+
+def plot_feature_times(all_times, batch_size, all_components, data):
+ plt.figure()
+ plot_results(all_components, all_times["pca"], label="PCA")
+ plot_results(
+ all_components, all_times["ipca"], label="IncrementalPCA, bsize=%i" % batch_size
+ )
+ plt.legend(loc="upper left")
+ plt.suptitle(
+ "Algorithm runtime vs. n_components\n LFW, size %i x %i"
+ % data.shape
+ )
+ plt.xlabel("Number of components (out of max %i)" % data.shape[1])
+ plt.ylabel("Time (seconds)")
+
+
+def plot_feature_errors(all_errors, batch_size, all_components, data):
+ plt.figure()
+ plot_results(all_components, all_errors["pca"], label="PCA")
+ plot_results(
+ all_components,
+ all_errors["ipca"],
+ label="IncrementalPCA, bsize=%i" % batch_size,
+ )
+ plt.legend(loc="lower left")
+ plt.suptitle("Algorithm error vs. n_components\nLFW, size %i x %i" % data.shape)
+ plt.xlabel("Number of components (out of max %i)" % data.shape[1])
+ plt.ylabel("Mean absolute error")
+
+
+def plot_batch_times(all_times, n_features, all_batch_sizes, data):
+ plt.figure()
+ plot_results(all_batch_sizes, all_times["pca"], label="PCA")
+ plot_results(all_batch_sizes, all_times["ipca"], label="IncrementalPCA")
+ plt.legend(loc="lower left")
+ plt.suptitle(
+ "Algorithm runtime vs. batch_size for n_components %i\n LFW,"
+ " size %i x %i" % (n_features, data.shape[0], data.shape[1])
+ )
+ plt.xlabel("Batch size")
+ plt.ylabel("Time (seconds)")
+
+
+def plot_batch_errors(all_errors, n_features, all_batch_sizes, data):
+ plt.figure()
+ plot_results(all_batch_sizes, all_errors["pca"], label="PCA")
+ plot_results(all_batch_sizes, all_errors["ipca"], label="IncrementalPCA")
+ plt.legend(loc="lower left")
+ plt.suptitle(
+ "Algorithm error vs. batch_size for n_components %i\n LFW,"
+ " size %i x %i" % (n_features, data.shape[0], data.shape[1])
+ )
+ plt.xlabel("Batch size")
+ plt.ylabel("Mean absolute error")
+
+
+def fixed_batch_size_comparison(data):
+ all_features = [
+ i.astype(int) for i in np.linspace(data.shape[1] // 10, data.shape[1], num=5)
+ ]
+ batch_size = 1000
+ # Compare runtimes and error for fixed batch size
+ all_times = defaultdict(list)
+ all_errors = defaultdict(list)
+ for n_components in all_features:
+ pca = PCA(n_components=n_components)
+ ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
+ results_dict = {
+ k: benchmark(est, data) for k, est in [("pca", pca), ("ipca", ipca)]
+ }
+
+ for k in sorted(results_dict.keys()):
+ all_times[k].append(results_dict[k]["time"])
+ all_errors[k].append(results_dict[k]["error"])
+
+ plot_feature_times(all_times, batch_size, all_features, data)
+ plot_feature_errors(all_errors, batch_size, all_features, data)
+
+
+def variable_batch_size_comparison(data):
+ batch_sizes = [
+ i.astype(int) for i in np.linspace(data.shape[0] // 10, data.shape[0], num=10)
+ ]
+
+ for n_components in [
+ i.astype(int) for i in np.linspace(data.shape[1] // 10, data.shape[1], num=4)
+ ]:
+ all_times = defaultdict(list)
+ all_errors = defaultdict(list)
+ pca = PCA(n_components=n_components)
+ rpca = PCA(
+ n_components=n_components, svd_solver="randomized", random_state=1999
+ )
+ results_dict = {
+ k: benchmark(est, data) for k, est in [("pca", pca), ("rpca", rpca)]
+ }
+
+ # Create flat baselines to compare the variation over batch size
+ all_times["pca"].extend([results_dict["pca"]["time"]] * len(batch_sizes))
+ all_errors["pca"].extend([results_dict["pca"]["error"]] * len(batch_sizes))
+ all_times["rpca"].extend([results_dict["rpca"]["time"]] * len(batch_sizes))
+ all_errors["rpca"].extend([results_dict["rpca"]["error"]] * len(batch_sizes))
+ for batch_size in batch_sizes:
+ ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
+ results_dict = {k: benchmark(est, data) for k, est in [("ipca", ipca)]}
+ all_times["ipca"].append(results_dict["ipca"]["time"])
+ all_errors["ipca"].append(results_dict["ipca"]["error"])
+
+ plot_batch_times(all_times, n_components, batch_sizes, data)
+ plot_batch_errors(all_errors, n_components, batch_sizes, data)
+
+
+faces = fetch_lfw_people(resize=0.2, min_faces_per_person=5)
+# limit dataset to 5000 people (don't care who they are!)
+X = faces.data[:5000]
+n_samples, h, w = faces.images.shape
+n_features = X.shape[1]
+
+X -= X.mean(axis=0)
+X /= X.std(axis=0)
+
+fixed_batch_size_comparison(X)
+variable_batch_size_comparison(X)
+plt.show()
diff --git a/auto_building_tools/benchmarks/bench_plot_lasso_path.py b/auto_building_tools/benchmarks/bench_plot_lasso_path.py
new file mode 100644
index 0000000..3b46e44
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_plot_lasso_path.py
@@ -0,0 +1,113 @@
+"""Benchmarks of Lasso regularization path computation using Lars and CD
+
+The input data is mostly low rank but is a fat infinite tail.
+"""
+
+import gc
+import sys
+from collections import defaultdict
+from time import time
+
+import numpy as np
+
+from sklearn.datasets import make_regression
+from sklearn.linear_model import lars_path, lars_path_gram, lasso_path
+
+
+def compute_bench(samples_range, features_range):
+ it = 0
+
+ results = defaultdict(lambda: [])
+
+ max_it = len(samples_range) * len(features_range)
+ for n_samples in samples_range:
+ for n_features in features_range:
+ it += 1
+ print("====================")
+ print("Iteration %03d of %03d" % (it, max_it))
+ print("====================")
+ dataset_kwargs = {
+ "n_samples": n_samples,
+ "n_features": n_features,
+ "n_informative": n_features // 10,
+ "effective_rank": min(n_samples, n_features) / 10,
+ # 'effective_rank': None,
+ "bias": 0.0,
+ }
+ print("n_samples: %d" % n_samples)
+ print("n_features: %d" % n_features)
+ X, y = make_regression(**dataset_kwargs)
+
+ gc.collect()
+ print("benchmarking lars_path (with Gram):", end="")
+ sys.stdout.flush()
+ tstart = time()
+ G = np.dot(X.T, X) # precomputed Gram matrix
+ Xy = np.dot(X.T, y)
+ lars_path_gram(Xy=Xy, Gram=G, n_samples=y.size, method="lasso")
+ delta = time() - tstart
+ print("%0.3fs" % delta)
+ results["lars_path (with Gram)"].append(delta)
+
+ gc.collect()
+ print("benchmarking lars_path (without Gram):", end="")
+ sys.stdout.flush()
+ tstart = time()
+ lars_path(X, y, method="lasso")
+ delta = time() - tstart
+ print("%0.3fs" % delta)
+ results["lars_path (without Gram)"].append(delta)
+
+ gc.collect()
+ print("benchmarking lasso_path (with Gram):", end="")
+ sys.stdout.flush()
+ tstart = time()
+ lasso_path(X, y, precompute=True)
+ delta = time() - tstart
+ print("%0.3fs" % delta)
+ results["lasso_path (with Gram)"].append(delta)
+
+ gc.collect()
+ print("benchmarking lasso_path (without Gram):", end="")
+ sys.stdout.flush()
+ tstart = time()
+ lasso_path(X, y, precompute=False)
+ delta = time() - tstart
+ print("%0.3fs" % delta)
+ results["lasso_path (without Gram)"].append(delta)
+
+ return results
+
+
+if __name__ == "__main__":
+ from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection
+ import matplotlib.pyplot as plt
+
+ samples_range = np.linspace(10, 2000, 5).astype(int)
+ features_range = np.linspace(10, 2000, 5).astype(int)
+ results = compute_bench(samples_range, features_range)
+
+ max_time = max(max(t) for t in results.values())
+
+ fig = plt.figure("scikit-learn Lasso path benchmark results")
+ i = 1
+ for c, (label, timings) in zip("bcry", sorted(results.items())):
+ ax = fig.add_subplot(2, 2, i, projection="3d")
+ X, Y = np.meshgrid(samples_range, features_range)
+ Z = np.asarray(timings).reshape(samples_range.shape[0], features_range.shape[0])
+
+ # plot the actual surface
+ ax.plot_surface(X, Y, Z.T, cstride=1, rstride=1, color=c, alpha=0.8)
+
+ # dummy point plot to stick the legend to since surface plot do not
+ # support legends (yet?)
+ # ax.plot([1], [1], [1], color=c, label=label)
+
+ ax.set_xlabel("n_samples")
+ ax.set_ylabel("n_features")
+ ax.set_zlabel("Time (s)")
+ ax.set_zlim3d(0.0, max_time * 1.1)
+ ax.set_title(label)
+ # ax.legend()
+ i += 1
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_plot_neighbors.py b/auto_building_tools/benchmarks/bench_plot_neighbors.py
new file mode 100644
index 0000000..2cedb19
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_plot_neighbors.py
@@ -0,0 +1,191 @@
+"""
+Plot the scaling of the nearest neighbors algorithms with k, D, and N
+"""
+
+from time import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib import ticker
+
+from sklearn import datasets, neighbors
+
+
+def get_data(N, D, dataset="dense"):
+ if dataset == "dense":
+ np.random.seed(0)
+ return np.random.random((N, D))
+ elif dataset == "digits":
+ X, _ = datasets.load_digits(return_X_y=True)
+ i = np.argsort(X[0])[::-1]
+ X = X[:, i]
+ return X[:N, :D]
+ else:
+ raise ValueError("invalid dataset: %s" % dataset)
+
+
+def barplot_neighbors(
+ Nrange=2 ** np.arange(1, 11),
+ Drange=2 ** np.arange(7),
+ krange=2 ** np.arange(10),
+ N=1000,
+ D=64,
+ k=5,
+ leaf_size=30,
+ dataset="digits",
+):
+ algorithms = ("kd_tree", "brute", "ball_tree")
+ fiducial_values = {"N": N, "D": D, "k": k}
+
+ # ------------------------------------------------------------
+ # varying N
+ N_results_build = {alg: np.zeros(len(Nrange)) for alg in algorithms}
+ N_results_query = {alg: np.zeros(len(Nrange)) for alg in algorithms}
+
+ for i, NN in enumerate(Nrange):
+ print("N = %i (%i out of %i)" % (NN, i + 1, len(Nrange)))
+ X = get_data(NN, D, dataset)
+ for algorithm in algorithms:
+ nbrs = neighbors.NearestNeighbors(
+ n_neighbors=min(NN, k), algorithm=algorithm, leaf_size=leaf_size
+ )
+ t0 = time()
+ nbrs.fit(X)
+ t1 = time()
+ nbrs.kneighbors(X)
+ t2 = time()
+
+ N_results_build[algorithm][i] = t1 - t0
+ N_results_query[algorithm][i] = t2 - t1
+
+ # ------------------------------------------------------------
+ # varying D
+ D_results_build = {alg: np.zeros(len(Drange)) for alg in algorithms}
+ D_results_query = {alg: np.zeros(len(Drange)) for alg in algorithms}
+
+ for i, DD in enumerate(Drange):
+ print("D = %i (%i out of %i)" % (DD, i + 1, len(Drange)))
+ X = get_data(N, DD, dataset)
+ for algorithm in algorithms:
+ nbrs = neighbors.NearestNeighbors(
+ n_neighbors=k, algorithm=algorithm, leaf_size=leaf_size
+ )
+ t0 = time()
+ nbrs.fit(X)
+ t1 = time()
+ nbrs.kneighbors(X)
+ t2 = time()
+
+ D_results_build[algorithm][i] = t1 - t0
+ D_results_query[algorithm][i] = t2 - t1
+
+ # ------------------------------------------------------------
+ # varying k
+ k_results_build = {alg: np.zeros(len(krange)) for alg in algorithms}
+ k_results_query = {alg: np.zeros(len(krange)) for alg in algorithms}
+
+ X = get_data(N, DD, dataset)
+
+ for i, kk in enumerate(krange):
+ print("k = %i (%i out of %i)" % (kk, i + 1, len(krange)))
+ for algorithm in algorithms:
+ nbrs = neighbors.NearestNeighbors(
+ n_neighbors=kk, algorithm=algorithm, leaf_size=leaf_size
+ )
+ t0 = time()
+ nbrs.fit(X)
+ t1 = time()
+ nbrs.kneighbors(X)
+ t2 = time()
+
+ k_results_build[algorithm][i] = t1 - t0
+ k_results_query[algorithm][i] = t2 - t1
+
+ plt.figure(figsize=(8, 11))
+
+ for sbplt, vals, quantity, build_time, query_time in [
+ (311, Nrange, "N", N_results_build, N_results_query),
+ (312, Drange, "D", D_results_build, D_results_query),
+ (313, krange, "k", k_results_build, k_results_query),
+ ]:
+ ax = plt.subplot(sbplt, yscale="log")
+ plt.grid(True)
+
+ tick_vals = []
+ tick_labels = []
+
+ bottom = 10 ** np.min(
+ [min(np.floor(np.log10(build_time[alg]))) for alg in algorithms]
+ )
+
+ for i, alg in enumerate(algorithms):
+ xvals = 0.1 + i * (1 + len(vals)) + np.arange(len(vals))
+ width = 0.8
+
+ c_bar = plt.bar(xvals, build_time[alg] - bottom, width, bottom, color="r")
+ q_bar = plt.bar(xvals, query_time[alg], width, build_time[alg], color="b")
+
+ tick_vals += list(xvals + 0.5 * width)
+ tick_labels += ["%i" % val for val in vals]
+
+ plt.text(
+ (i + 0.02) / len(algorithms),
+ 0.98,
+ alg,
+ transform=ax.transAxes,
+ ha="left",
+ va="top",
+ bbox=dict(facecolor="w", edgecolor="w", alpha=0.5),
+ )
+
+ plt.ylabel("Time (s)")
+
+ ax.xaxis.set_major_locator(ticker.FixedLocator(tick_vals))
+ ax.xaxis.set_major_formatter(ticker.FixedFormatter(tick_labels))
+
+ for label in ax.get_xticklabels():
+ label.set_rotation(-90)
+ label.set_fontsize(10)
+
+ title_string = "Varying %s" % quantity
+
+ descr_string = ""
+
+ for s in "NDk":
+ if s == quantity:
+ pass
+ else:
+ descr_string += "%s = %i, " % (s, fiducial_values[s])
+
+ descr_string = descr_string[:-2]
+
+ plt.text(
+ 1.01,
+ 0.5,
+ title_string,
+ transform=ax.transAxes,
+ rotation=-90,
+ ha="left",
+ va="center",
+ fontsize=20,
+ )
+
+ plt.text(
+ 0.99,
+ 0.5,
+ descr_string,
+ transform=ax.transAxes,
+ rotation=-90,
+ ha="right",
+ va="center",
+ )
+
+ plt.gcf().suptitle("%s data set" % dataset.capitalize(), fontsize=16)
+
+ plt.figlegend((c_bar, q_bar), ("construction", "N-point query"), "upper right")
+
+
+if __name__ == "__main__":
+ barplot_neighbors(dataset="digits")
+ barplot_neighbors(dataset="dense")
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_plot_nmf.py b/auto_building_tools/benchmarks/bench_plot_nmf.py
new file mode 100644
index 0000000..76d1a6d
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_plot_nmf.py
@@ -0,0 +1,472 @@
+"""
+Benchmarks of Non-Negative Matrix Factorization
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import numbers
+import sys
+import warnings
+from time import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas
+from joblib import Memory
+
+from sklearn.decomposition import NMF
+from sklearn.decomposition._nmf import _beta_divergence, _check_init, _initialize_nmf
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.utils import check_array
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils.extmath import safe_sparse_dot, squared_norm
+from sklearn.utils.validation import check_is_fitted, check_non_negative
+
+mem = Memory(cachedir=".", verbose=0)
+
+###################
+# Start of _PGNMF #
+###################
+# This class implements a projected gradient solver for the NMF.
+# The projected gradient solver was removed from scikit-learn in version 0.19,
+# and a simplified copy is used here for comparison purpose only.
+# It is not tested, and it may change or disappear without notice.
+
+
+def _norm(x):
+ """Dot product-based Euclidean norm implementation
+ See: https://fa.bianp.net/blog/2011/computing-the-vector-norm/
+ """
+ return np.sqrt(squared_norm(x))
+
+
+def _nls_subproblem(
+ X, W, H, tol, max_iter, alpha=0.0, l1_ratio=0.0, sigma=0.01, beta=0.1
+):
+ """Non-negative least square solver
+ Solves a non-negative least squares subproblem using the projected
+ gradient descent algorithm.
+ Parameters
+ ----------
+ X : array-like, shape (n_samples, n_features)
+ Constant matrix.
+ W : array-like, shape (n_samples, n_components)
+ Constant matrix.
+ H : array-like, shape (n_components, n_features)
+ Initial guess for the solution.
+ tol : float
+ Tolerance of the stopping condition.
+ max_iter : int
+ Maximum number of iterations before timing out.
+ alpha : double, default: 0.
+ Constant that multiplies the regularization terms. Set it to zero to
+ have no regularization.
+ l1_ratio : double, default: 0.
+ The regularization mixing parameter, with 0 <= l1_ratio <= 1.
+ For l1_ratio = 0 the penalty is an L2 penalty.
+ For l1_ratio = 1 it is an L1 penalty.
+ For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
+ sigma : float
+ Constant used in the sufficient decrease condition checked by the line
+ search. Smaller values lead to a looser sufficient decrease condition,
+ thus reducing the time taken by the line search, but potentially
+ increasing the number of iterations of the projected gradient
+ procedure. 0.01 is a commonly used value in the optimization
+ literature.
+ beta : float
+ Factor by which the step size is decreased (resp. increased) until
+ (resp. as long as) the sufficient decrease condition is satisfied.
+ Larger values allow to find a better step size but lead to longer line
+ search. 0.1 is a commonly used value in the optimization literature.
+ Returns
+ -------
+ H : array-like, shape (n_components, n_features)
+ Solution to the non-negative least squares problem.
+ grad : array-like, shape (n_components, n_features)
+ The gradient.
+ n_iter : int
+ The number of iterations done by the algorithm.
+ References
+ ----------
+ C.-J. Lin. Projected gradient methods for non-negative matrix
+ factorization. Neural Computation, 19(2007), 2756-2779.
+ https://www.csie.ntu.edu.tw/~cjlin/nmf/
+ """
+ WtX = safe_sparse_dot(W.T, X)
+ WtW = np.dot(W.T, W)
+
+ # values justified in the paper (alpha is renamed gamma)
+ gamma = 1
+ for n_iter in range(1, max_iter + 1):
+ grad = np.dot(WtW, H) - WtX
+ if alpha > 0 and l1_ratio == 1.0:
+ grad += alpha
+ elif alpha > 0:
+ grad += alpha * (l1_ratio + (1 - l1_ratio) * H)
+
+ # The following multiplication with a boolean array is more than twice
+ # as fast as indexing into grad.
+ if _norm(grad * np.logical_or(grad < 0, H > 0)) < tol:
+ break
+
+ Hp = H
+
+ for inner_iter in range(20):
+ # Gradient step.
+ Hn = H - gamma * grad
+ # Projection step.
+ Hn *= Hn > 0
+ d = Hn - H
+ gradd = np.dot(grad.ravel(), d.ravel())
+ dQd = np.dot(np.dot(WtW, d).ravel(), d.ravel())
+ suff_decr = (1 - sigma) * gradd + 0.5 * dQd < 0
+ if inner_iter == 0:
+ decr_gamma = not suff_decr
+
+ if decr_gamma:
+ if suff_decr:
+ H = Hn
+ break
+ else:
+ gamma *= beta
+ elif not suff_decr or (Hp == Hn).all():
+ H = Hp
+ break
+ else:
+ gamma /= beta
+ Hp = Hn
+
+ if n_iter == max_iter:
+ warnings.warn("Iteration limit reached in nls subproblem.", ConvergenceWarning)
+
+ return H, grad, n_iter
+
+
+def _fit_projected_gradient(X, W, H, tol, max_iter, nls_max_iter, alpha, l1_ratio):
+ gradW = np.dot(W, np.dot(H, H.T)) - safe_sparse_dot(X, H.T, dense_output=True)
+ gradH = np.dot(np.dot(W.T, W), H) - safe_sparse_dot(W.T, X, dense_output=True)
+
+ init_grad = squared_norm(gradW) + squared_norm(gradH.T)
+ # max(0.001, tol) to force alternating minimizations of W and H
+ tolW = max(0.001, tol) * np.sqrt(init_grad)
+ tolH = tolW
+
+ for n_iter in range(1, max_iter + 1):
+ # stopping condition as discussed in paper
+ proj_grad_W = squared_norm(gradW * np.logical_or(gradW < 0, W > 0))
+ proj_grad_H = squared_norm(gradH * np.logical_or(gradH < 0, H > 0))
+
+ if (proj_grad_W + proj_grad_H) / init_grad < tol**2:
+ break
+
+ # update W
+ Wt, gradWt, iterW = _nls_subproblem(
+ X.T, H.T, W.T, tolW, nls_max_iter, alpha=alpha, l1_ratio=l1_ratio
+ )
+ W, gradW = Wt.T, gradWt.T
+
+ if iterW == 1:
+ tolW = 0.1 * tolW
+
+ # update H
+ H, gradH, iterH = _nls_subproblem(
+ X, W, H, tolH, nls_max_iter, alpha=alpha, l1_ratio=l1_ratio
+ )
+ if iterH == 1:
+ tolH = 0.1 * tolH
+
+ H[H == 0] = 0 # fix up negative zeros
+
+ if n_iter == max_iter:
+ Wt, _, _ = _nls_subproblem(
+ X.T, H.T, W.T, tolW, nls_max_iter, alpha=alpha, l1_ratio=l1_ratio
+ )
+ W = Wt.T
+
+ return W, H, n_iter
+
+
+class _PGNMF(NMF):
+ """Non-Negative Matrix Factorization (NMF) with projected gradient solver.
+
+ This class is private and for comparison purpose only.
+ It may change or disappear without notice.
+
+ """
+
+ def __init__(
+ self,
+ n_components=None,
+ solver="pg",
+ init=None,
+ tol=1e-4,
+ max_iter=200,
+ random_state=None,
+ alpha=0.0,
+ l1_ratio=0.0,
+ nls_max_iter=10,
+ ):
+ super().__init__(
+ n_components=n_components,
+ init=init,
+ solver=solver,
+ tol=tol,
+ max_iter=max_iter,
+ random_state=random_state,
+ alpha_W=alpha,
+ alpha_H=alpha,
+ l1_ratio=l1_ratio,
+ )
+ self.nls_max_iter = nls_max_iter
+
+ def fit(self, X, y=None, **params):
+ self.fit_transform(X, **params)
+ return self
+
+ def transform(self, X):
+ check_is_fitted(self)
+ H = self.components_
+ W, _, self.n_iter_ = self._fit_transform(X, H=H, update_H=False)
+ return W
+
+ def inverse_transform(self, W):
+ check_is_fitted(self)
+ return np.dot(W, self.components_)
+
+ def fit_transform(self, X, y=None, W=None, H=None):
+ W, H, self.n_iter = self._fit_transform(X, W=W, H=H, update_H=True)
+ self.components_ = H
+ return W
+
+ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):
+ X = check_array(X, accept_sparse=("csr", "csc"))
+ check_non_negative(X, "NMF (input X)")
+
+ n_samples, n_features = X.shape
+ n_components = self.n_components
+ if n_components is None:
+ n_components = n_features
+
+ if not isinstance(n_components, numbers.Integral) or n_components <= 0:
+ raise ValueError(
+ "Number of components must be a positive integer; got (n_components=%r)"
+ % n_components
+ )
+ if not isinstance(self.max_iter, numbers.Integral) or self.max_iter < 0:
+ raise ValueError(
+ "Maximum number of iterations must be a positive "
+ "integer; got (max_iter=%r)" % self.max_iter
+ )
+ if not isinstance(self.tol, numbers.Number) or self.tol < 0:
+ raise ValueError(
+ "Tolerance for stopping criteria must be positive; got (tol=%r)"
+ % self.tol
+ )
+
+ # check W and H, or initialize them
+ if self.init == "custom" and update_H:
+ _check_init(H, (n_components, n_features), "NMF (input H)")
+ _check_init(W, (n_samples, n_components), "NMF (input W)")
+ elif not update_H:
+ _check_init(H, (n_components, n_features), "NMF (input H)")
+ W = np.zeros((n_samples, n_components))
+ else:
+ W, H = _initialize_nmf(
+ X, n_components, init=self.init, random_state=self.random_state
+ )
+
+ if update_H: # fit_transform
+ W, H, n_iter = _fit_projected_gradient(
+ X,
+ W,
+ H,
+ self.tol,
+ self.max_iter,
+ self.nls_max_iter,
+ self.alpha,
+ self.l1_ratio,
+ )
+ else: # transform
+ Wt, _, n_iter = _nls_subproblem(
+ X.T,
+ H.T,
+ W.T,
+ self.tol,
+ self.nls_max_iter,
+ alpha=self.alpha,
+ l1_ratio=self.l1_ratio,
+ )
+ W = Wt.T
+
+ if n_iter == self.max_iter and self.tol > 0:
+ warnings.warn(
+ "Maximum number of iteration %d reached. Increase it"
+ " to improve convergence." % self.max_iter,
+ ConvergenceWarning,
+ )
+
+ return W, H, n_iter
+
+
+#################
+# End of _PGNMF #
+#################
+
+
+def plot_results(results_df, plot_name):
+ if results_df is None:
+ return None
+
+ plt.figure(figsize=(16, 6))
+ colors = "bgr"
+ markers = "ovs"
+ ax = plt.subplot(1, 3, 1)
+ for i, init in enumerate(np.unique(results_df["init"])):
+ plt.subplot(1, 3, i + 1, sharex=ax, sharey=ax)
+ for j, method in enumerate(np.unique(results_df["method"])):
+ mask = np.logical_and(
+ results_df["init"] == init, results_df["method"] == method
+ )
+ selected_items = results_df[mask]
+
+ plt.plot(
+ selected_items["time"],
+ selected_items["loss"],
+ color=colors[j % len(colors)],
+ ls="-",
+ marker=markers[j % len(markers)],
+ label=method,
+ )
+
+ plt.legend(loc=0, fontsize="x-small")
+ plt.xlabel("Time (s)")
+ plt.ylabel("loss")
+ plt.title("%s" % init)
+ plt.suptitle(plot_name, fontsize=16)
+
+
+@ignore_warnings(category=ConvergenceWarning)
+# use joblib to cache the results.
+# X_shape is specified in arguments for avoiding hashing X
+@mem.cache(ignore=["X", "W0", "H0"])
+def bench_one(
+ name, X, W0, H0, X_shape, clf_type, clf_params, init, n_components, random_state
+):
+ W = W0.copy()
+ H = H0.copy()
+
+ clf = clf_type(**clf_params)
+ st = time()
+ W = clf.fit_transform(X, W=W, H=H)
+ end = time()
+ H = clf.components_
+
+ this_loss = _beta_divergence(X, W, H, 2.0, True)
+ duration = end - st
+ return this_loss, duration
+
+
+def run_bench(X, clfs, plot_name, n_components, tol, alpha, l1_ratio):
+ start = time()
+ results = []
+ for name, clf_type, iter_range, clf_params in clfs:
+ print("Training %s:" % name)
+ for rs, init in enumerate(("nndsvd", "nndsvdar", "random")):
+ print(" %s %s: " % (init, " " * (8 - len(init))), end="")
+ W, H = _initialize_nmf(X, n_components, init, 1e-6, rs)
+
+ for max_iter in iter_range:
+ clf_params["alpha"] = alpha
+ clf_params["l1_ratio"] = l1_ratio
+ clf_params["max_iter"] = max_iter
+ clf_params["tol"] = tol
+ clf_params["random_state"] = rs
+ clf_params["init"] = "custom"
+ clf_params["n_components"] = n_components
+
+ this_loss, duration = bench_one(
+ name, X, W, H, X.shape, clf_type, clf_params, init, n_components, rs
+ )
+
+ init_name = "init='%s'" % init
+ results.append((name, this_loss, duration, init_name))
+ # print("loss: %.6f, time: %.3f sec" % (this_loss, duration))
+ print(".", end="")
+ sys.stdout.flush()
+ print(" ")
+
+ # Use a panda dataframe to organize the results
+ results_df = pandas.DataFrame(results, columns="method loss time init".split())
+ print("Total time = %0.3f sec\n" % (time() - start))
+
+ # plot the results
+ plot_results(results_df, plot_name)
+ return results_df
+
+
+def load_20news():
+ print("Loading 20 newsgroups dataset")
+ print("-----------------------------")
+ from sklearn.datasets import fetch_20newsgroups
+
+ dataset = fetch_20newsgroups(
+ shuffle=True, random_state=1, remove=("headers", "footers", "quotes")
+ )
+ vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, stop_words="english")
+ tfidf = vectorizer.fit_transform(dataset.data)
+ return tfidf
+
+
+def load_faces():
+ print("Loading Olivetti face dataset")
+ print("-----------------------------")
+ from sklearn.datasets import fetch_olivetti_faces
+
+ faces = fetch_olivetti_faces(shuffle=True)
+ return faces.data
+
+
+def build_clfs(cd_iters, pg_iters, mu_iters):
+ clfs = [
+ ("Coordinate Descent", NMF, cd_iters, {"solver": "cd"}),
+ ("Projected Gradient", _PGNMF, pg_iters, {"solver": "pg"}),
+ ("Multiplicative Update", NMF, mu_iters, {"solver": "mu"}),
+ ]
+ return clfs
+
+
+if __name__ == "__main__":
+ alpha = 0.0
+ l1_ratio = 0.5
+ n_components = 10
+ tol = 1e-15
+
+ # first benchmark on 20 newsgroup dataset: sparse, shape(11314, 39116)
+ plot_name = "20 Newsgroups sparse dataset"
+ cd_iters = np.arange(1, 30)
+ pg_iters = np.arange(1, 6)
+ mu_iters = np.arange(1, 30)
+ clfs = build_clfs(cd_iters, pg_iters, mu_iters)
+ X_20news = load_20news()
+ run_bench(X_20news, clfs, plot_name, n_components, tol, alpha, l1_ratio)
+
+ # second benchmark on Olivetti faces dataset: dense, shape(400, 4096)
+ plot_name = "Olivetti Faces dense dataset"
+ cd_iters = np.arange(1, 30)
+ pg_iters = np.arange(1, 12)
+ mu_iters = np.arange(1, 30)
+ clfs = build_clfs(cd_iters, pg_iters, mu_iters)
+ X_faces = load_faces()
+ run_bench(
+ X_faces,
+ clfs,
+ plot_name,
+ n_components,
+ tol,
+ alpha,
+ l1_ratio,
+ )
+
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_plot_omp_lars.py b/auto_building_tools/benchmarks/bench_plot_omp_lars.py
new file mode 100644
index 0000000..8a4bc9b
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_plot_omp_lars.py
@@ -0,0 +1,120 @@
+"""Benchmarks of orthogonal matching pursuit (:ref:`OMP`) versus least angle
+regression (:ref:`least_angle_regression`)
+
+The input data is mostly low rank but is a fat infinite tail.
+"""
+
+import gc
+import sys
+from time import time
+
+import numpy as np
+
+from sklearn.datasets import make_sparse_coded_signal
+from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp
+
+
+def compute_bench(samples_range, features_range):
+ it = 0
+
+ results = dict()
+ lars = np.empty((len(features_range), len(samples_range)))
+ lars_gram = lars.copy()
+ omp = lars.copy()
+ omp_gram = lars.copy()
+
+ max_it = len(samples_range) * len(features_range)
+ for i_s, n_samples in enumerate(samples_range):
+ for i_f, n_features in enumerate(features_range):
+ it += 1
+ n_informative = n_features // 10
+ print("====================")
+ print("Iteration %03d of %03d" % (it, max_it))
+ print("====================")
+ # dataset_kwargs = {
+ # 'n_train_samples': n_samples,
+ # 'n_test_samples': 2,
+ # 'n_features': n_features,
+ # 'n_informative': n_informative,
+ # 'effective_rank': min(n_samples, n_features) / 10,
+ # #'effective_rank': None,
+ # 'bias': 0.0,
+ # }
+ dataset_kwargs = {
+ "n_samples": 1,
+ "n_components": n_features,
+ "n_features": n_samples,
+ "n_nonzero_coefs": n_informative,
+ "random_state": 0,
+ }
+ print("n_samples: %d" % n_samples)
+ print("n_features: %d" % n_features)
+ y, X, _ = make_sparse_coded_signal(**dataset_kwargs)
+ X = np.asfortranarray(X.T)
+
+ gc.collect()
+ print("benchmarking lars_path (with Gram):", end="")
+ sys.stdout.flush()
+ tstart = time()
+ G = np.dot(X.T, X) # precomputed Gram matrix
+ Xy = np.dot(X.T, y)
+ lars_path_gram(Xy=Xy, Gram=G, n_samples=y.size, max_iter=n_informative)
+ delta = time() - tstart
+ print("%0.3fs" % delta)
+ lars_gram[i_f, i_s] = delta
+
+ gc.collect()
+ print("benchmarking lars_path (without Gram):", end="")
+ sys.stdout.flush()
+ tstart = time()
+ lars_path(X, y, Gram=None, max_iter=n_informative)
+ delta = time() - tstart
+ print("%0.3fs" % delta)
+ lars[i_f, i_s] = delta
+
+ gc.collect()
+ print("benchmarking orthogonal_mp (with Gram):", end="")
+ sys.stdout.flush()
+ tstart = time()
+ orthogonal_mp(X, y, precompute=True, n_nonzero_coefs=n_informative)
+ delta = time() - tstart
+ print("%0.3fs" % delta)
+ omp_gram[i_f, i_s] = delta
+
+ gc.collect()
+ print("benchmarking orthogonal_mp (without Gram):", end="")
+ sys.stdout.flush()
+ tstart = time()
+ orthogonal_mp(X, y, precompute=False, n_nonzero_coefs=n_informative)
+ delta = time() - tstart
+ print("%0.3fs" % delta)
+ omp[i_f, i_s] = delta
+
+ results["time(LARS) / time(OMP)\n (w/ Gram)"] = lars_gram / omp_gram
+ results["time(LARS) / time(OMP)\n (w/o Gram)"] = lars / omp
+ return results
+
+
+if __name__ == "__main__":
+ samples_range = np.linspace(1000, 5000, 5).astype(int)
+ features_range = np.linspace(1000, 5000, 5).astype(int)
+ results = compute_bench(samples_range, features_range)
+ max_time = max(np.max(t) for t in results.values())
+
+ import matplotlib.pyplot as plt
+
+ fig = plt.figure("scikit-learn OMP vs. LARS benchmark results")
+ for i, (label, timings) in enumerate(sorted(results.items())):
+ ax = fig.add_subplot(1, 2, i + 1)
+ vmax = max(1 - timings.min(), -1 + timings.max())
+ plt.matshow(timings, fignum=False, vmin=1 - vmax, vmax=1 + vmax)
+ ax.set_xticklabels([""] + [str(each) for each in samples_range])
+ ax.set_yticklabels([""] + [str(each) for each in features_range])
+ plt.xlabel("n_samples")
+ plt.ylabel("n_features")
+ plt.title(label)
+
+ plt.subplots_adjust(0.1, 0.08, 0.96, 0.98, 0.4, 0.63)
+ ax = plt.axes([0.1, 0.08, 0.8, 0.06])
+ plt.colorbar(cax=ax, orientation="horizontal")
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_plot_parallel_pairwise.py b/auto_building_tools/benchmarks/bench_plot_parallel_pairwise.py
new file mode 100644
index 0000000..5b7cf81
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_plot_parallel_pairwise.py
@@ -0,0 +1,48 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import time
+
+import matplotlib.pyplot as plt
+
+from sklearn.metrics.pairwise import pairwise_distances, pairwise_kernels
+from sklearn.utils import check_random_state
+
+
+def plot(func):
+ random_state = check_random_state(0)
+ one_core = []
+ multi_core = []
+ sample_sizes = range(1000, 6000, 1000)
+
+ for n_samples in sample_sizes:
+ X = random_state.rand(n_samples, 300)
+
+ start = time.time()
+ func(X, n_jobs=1)
+ one_core.append(time.time() - start)
+
+ start = time.time()
+ func(X, n_jobs=-1)
+ multi_core.append(time.time() - start)
+
+ plt.figure("scikit-learn parallel %s benchmark results" % func.__name__)
+ plt.plot(sample_sizes, one_core, label="one core")
+ plt.plot(sample_sizes, multi_core, label="multi core")
+ plt.xlabel("n_samples")
+ plt.ylabel("Time (s)")
+ plt.title("Parallel %s" % func.__name__)
+ plt.legend()
+
+
+def euclidean_distances(X, n_jobs):
+ return pairwise_distances(X, metric="euclidean", n_jobs=n_jobs)
+
+
+def rbf_kernels(X, n_jobs):
+ return pairwise_kernels(X, metric="rbf", n_jobs=n_jobs, gamma=0.1)
+
+
+plot(euclidean_distances)
+plot(rbf_kernels)
+plt.show()
diff --git a/auto_building_tools/benchmarks/bench_plot_polynomial_kernel_approximation.py b/auto_building_tools/benchmarks/bench_plot_polynomial_kernel_approximation.py
new file mode 100644
index 0000000..1e23e0a
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_plot_polynomial_kernel_approximation.py
@@ -0,0 +1,176 @@
+"""
+========================================================================
+Benchmark for explicit feature map approximation of polynomial kernels
+========================================================================
+
+An example illustrating the approximation of the feature map
+of an Homogeneous Polynomial kernel.
+
+.. currentmodule:: sklearn.kernel_approximation
+
+It shows how to use :class:`PolynomialCountSketch` and :class:`Nystroem` to
+approximate the feature map of a polynomial kernel for
+classification with an SVM on the digits dataset. Results using a linear
+SVM in the original space, a linear SVM using the approximate mappings
+and a kernelized SVM are compared.
+
+The first plot shows the classification accuracy of Nystroem [2] and
+PolynomialCountSketch [1] as the output dimension (n_components) grows.
+It also shows the accuracy of a linear SVM and a polynomial kernel SVM
+on the same data.
+
+The second plot explores the scalability of PolynomialCountSketch
+and Nystroem. For a sufficiently large output dimension,
+PolynomialCountSketch should be faster as it is O(n(d+klog k))
+while Nystroem is O(n(dk+k^2)). In addition, Nystroem requires
+a time-consuming training phase, while training is almost immediate
+for PolynomialCountSketch, whose training phase boils down to
+initializing some random variables (because is data-independent).
+
+[1] Pham, N., & Pagh, R. (2013, August). Fast and scalable polynomial
+kernels via explicit feature maps. In Proceedings of the 19th ACM SIGKDD
+international conference on Knowledge discovery and data mining (pp. 239-247)
+(https://chbrown.github.io/kdd-2013-usb/kdd/p239.pdf)
+
+[2] Charikar, M., Chen, K., & Farach-Colton, M. (2002, July). Finding frequent
+items in data streams. In International Colloquium on Automata, Languages, and
+Programming (pp. 693-703). Springer, Berlin, Heidelberg.
+(https://people.cs.rutgers.edu/~farach/pubs/FrequentStream.pdf)
+
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+# Load data manipulation functions
+# Will use this for timing results
+from time import time
+
+# Some common libraries
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.datasets import load_digits
+from sklearn.kernel_approximation import Nystroem, PolynomialCountSketch
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import Pipeline
+
+# Import SVM classifiers and feature map approximation algorithms
+from sklearn.svm import SVC, LinearSVC
+
+# Split data in train and test sets
+X, y = load_digits()["data"], load_digits()["target"]
+X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)
+
+# Set the range of n_components for our experiments
+out_dims = range(20, 400, 20)
+
+# Evaluate Linear SVM
+lsvm = LinearSVC().fit(X_train, y_train)
+lsvm_score = 100 * lsvm.score(X_test, y_test)
+
+# Evaluate kernelized SVM
+ksvm = SVC(kernel="poly", degree=2, gamma=1.0).fit(X_train, y_train)
+ksvm_score = 100 * ksvm.score(X_test, y_test)
+
+# Evaluate PolynomialCountSketch + LinearSVM
+ps_svm_scores = []
+n_runs = 5
+
+# To compensate for the stochasticity of the method, we make n_tets runs
+for k in out_dims:
+ score_avg = 0
+ for _ in range(n_runs):
+ ps_svm = Pipeline(
+ [
+ ("PS", PolynomialCountSketch(degree=2, n_components=k)),
+ ("SVM", LinearSVC()),
+ ]
+ )
+ score_avg += ps_svm.fit(X_train, y_train).score(X_test, y_test)
+ ps_svm_scores.append(100 * score_avg / n_runs)
+
+# Evaluate Nystroem + LinearSVM
+ny_svm_scores = []
+n_runs = 5
+
+for k in out_dims:
+ score_avg = 0
+ for _ in range(n_runs):
+ ny_svm = Pipeline(
+ [
+ (
+ "NY",
+ Nystroem(
+ kernel="poly", gamma=1.0, degree=2, coef0=0, n_components=k
+ ),
+ ),
+ ("SVM", LinearSVC()),
+ ]
+ )
+ score_avg += ny_svm.fit(X_train, y_train).score(X_test, y_test)
+ ny_svm_scores.append(100 * score_avg / n_runs)
+
+# Show results
+fig, ax = plt.subplots(figsize=(6, 4))
+ax.set_title("Accuracy results")
+ax.plot(out_dims, ps_svm_scores, label="PolynomialCountSketch + linear SVM", c="orange")
+ax.plot(out_dims, ny_svm_scores, label="Nystroem + linear SVM", c="blue")
+ax.plot(
+ [out_dims[0], out_dims[-1]],
+ [lsvm_score, lsvm_score],
+ label="Linear SVM",
+ c="black",
+ dashes=[2, 2],
+)
+ax.plot(
+ [out_dims[0], out_dims[-1]],
+ [ksvm_score, ksvm_score],
+ label="Poly-kernel SVM",
+ c="red",
+ dashes=[2, 2],
+)
+ax.legend()
+ax.set_xlabel("N_components for PolynomialCountSketch and Nystroem")
+ax.set_ylabel("Accuracy (%)")
+ax.set_xlim([out_dims[0], out_dims[-1]])
+fig.tight_layout()
+
+# Now lets evaluate the scalability of PolynomialCountSketch vs Nystroem
+# First we generate some fake data with a lot of samples
+
+fakeData = np.random.randn(10000, 100)
+fakeDataY = np.random.randint(0, high=10, size=(10000))
+
+out_dims = range(500, 6000, 500)
+
+# Evaluate scalability of PolynomialCountSketch as n_components grows
+ps_svm_times = []
+for k in out_dims:
+ ps = PolynomialCountSketch(degree=2, n_components=k)
+
+ start = time()
+ ps.fit_transform(fakeData, None)
+ ps_svm_times.append(time() - start)
+
+# Evaluate scalability of Nystroem as n_components grows
+# This can take a while due to the inefficient training phase
+ny_svm_times = []
+for k in out_dims:
+ ny = Nystroem(kernel="poly", gamma=1.0, degree=2, coef0=0, n_components=k)
+
+ start = time()
+ ny.fit_transform(fakeData, None)
+ ny_svm_times.append(time() - start)
+
+# Show results
+fig, ax = plt.subplots(figsize=(6, 4))
+ax.set_title("Scalability results")
+ax.plot(out_dims, ps_svm_times, label="PolynomialCountSketch", c="orange")
+ax.plot(out_dims, ny_svm_times, label="Nystroem", c="blue")
+ax.legend()
+ax.set_xlabel("N_components for PolynomialCountSketch and Nystroem")
+ax.set_ylabel("fit_transform time \n(s/10.000 samples)")
+ax.set_xlim([out_dims[0], out_dims[-1]])
+fig.tight_layout()
+plt.show()
diff --git a/auto_building_tools/benchmarks/bench_plot_randomized_svd.py b/auto_building_tools/benchmarks/bench_plot_randomized_svd.py
new file mode 100644
index 0000000..6bb5618
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_plot_randomized_svd.py
@@ -0,0 +1,528 @@
+"""
+Benchmarks on the power iterations phase in randomized SVD.
+
+We test on various synthetic and real datasets the effect of increasing
+the number of power iterations in terms of quality of approximation
+and running time. A number greater than 0 should help with noisy matrices,
+which are characterized by a slow spectral decay.
+
+We test several policy for normalizing the power iterations. Normalization
+is crucial to avoid numerical issues.
+
+The quality of the approximation is measured by the spectral norm discrepancy
+between the original input matrix and the reconstructed one (by multiplying
+the randomized_svd's outputs). The spectral norm is always equivalent to the
+largest singular value of a matrix. (3) justifies this choice. However, one can
+notice in these experiments that Frobenius and spectral norms behave
+very similarly in a qualitative sense. Therefore, we suggest to run these
+benchmarks with `enable_spectral_norm = False`, as Frobenius' is MUCH faster to
+compute.
+
+The benchmarks follow.
+
+(a) plot: time vs norm, varying number of power iterations
+ data: many datasets
+ goal: compare normalization policies and study how the number of power
+ iterations affect time and norm
+
+(b) plot: n_iter vs norm, varying rank of data and number of components for
+ randomized_SVD
+ data: low-rank matrices on which we control the rank
+ goal: study whether the rank of the matrix and the number of components
+ extracted by randomized SVD affect "the optimal" number of power iterations
+
+(c) plot: time vs norm, varying datasets
+ data: many datasets
+ goal: compare default configurations
+
+We compare the following algorithms:
+- randomized_svd(..., power_iteration_normalizer='none')
+- randomized_svd(..., power_iteration_normalizer='LU')
+- randomized_svd(..., power_iteration_normalizer='QR')
+- randomized_svd(..., power_iteration_normalizer='auto')
+- fbpca.pca() from https://github.com/facebook/fbpca (if installed)
+
+Conclusion
+----------
+- n_iter=2 appears to be a good default value
+- power_iteration_normalizer='none' is OK if n_iter is small, otherwise LU
+ gives similar errors to QR but is cheaper. That's what 'auto' implements.
+
+References
+----------
+(1) :arxiv:`"Finding structure with randomness:
+ Stochastic algorithms for constructing approximate matrix decompositions."
+ <0909.4061>`
+ Halko, et al., (2009)
+
+(2) A randomized algorithm for the decomposition of matrices
+ Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert
+
+(3) An implementation of a randomized algorithm for principal component
+ analysis
+ A. Szlam et al. 2014
+"""
+
+# Author: Giorgio Patrini
+
+import gc
+import os.path
+import pickle
+from collections import defaultdict
+from time import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy as sp
+
+from sklearn.datasets import (
+ fetch_20newsgroups_vectorized,
+ fetch_lfw_people,
+ fetch_olivetti_faces,
+ fetch_openml,
+ fetch_rcv1,
+ make_low_rank_matrix,
+ make_sparse_uncorrelated,
+)
+from sklearn.utils import gen_batches
+from sklearn.utils._arpack import _init_arpack_v0
+from sklearn.utils.extmath import randomized_svd
+from sklearn.utils.validation import check_random_state
+
+try:
+ import fbpca
+
+ fbpca_available = True
+except ImportError:
+ fbpca_available = False
+
+# If this is enabled, tests are much slower and will crash with the large data
+enable_spectral_norm = False
+
+# TODO: compute approximate spectral norms with the power method as in
+# Estimating the largest eigenvalues by the power and Lanczos methods with
+# a random start, Jacek Kuczynski and Henryk Wozniakowski, SIAM Journal on
+# Matrix Analysis and Applications, 13 (4): 1094-1122, 1992.
+# This approximation is a very fast estimate of the spectral norm, but depends
+# on starting random vectors.
+
+# Determine when to switch to batch computation for matrix norms,
+# in case the reconstructed (dense) matrix is too large
+MAX_MEMORY = int(4e9)
+
+# The following datasets can be downloaded manually from:
+# CIFAR 10: https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
+# SVHN: http://ufldl.stanford.edu/housenumbers/train_32x32.mat
+CIFAR_FOLDER = "./cifar-10-batches-py/"
+SVHN_FOLDER = "./SVHN/"
+
+datasets = [
+ "low rank matrix",
+ "lfw_people",
+ "olivetti_faces",
+ "20newsgroups",
+ "mnist_784",
+ "CIFAR",
+ "a3a",
+ "SVHN",
+ "uncorrelated matrix",
+]
+
+big_sparse_datasets = ["big sparse matrix", "rcv1"]
+
+
+def unpickle(file_name):
+ with open(file_name, "rb") as fo:
+ return pickle.load(fo, encoding="latin1")["data"]
+
+
+def handle_missing_dataset(file_folder):
+ if not os.path.isdir(file_folder):
+ print("%s file folder not found. Test skipped." % file_folder)
+ return 0
+
+
+def get_data(dataset_name):
+ print("Getting dataset: %s" % dataset_name)
+
+ if dataset_name == "lfw_people":
+ X = fetch_lfw_people().data
+ elif dataset_name == "20newsgroups":
+ X = fetch_20newsgroups_vectorized().data[:, :100000]
+ elif dataset_name == "olivetti_faces":
+ X = fetch_olivetti_faces().data
+ elif dataset_name == "rcv1":
+ X = fetch_rcv1().data
+ elif dataset_name == "CIFAR":
+ if handle_missing_dataset(CIFAR_FOLDER) == 0:
+ return
+ X1 = [unpickle("%sdata_batch_%d" % (CIFAR_FOLDER, i + 1)) for i in range(5)]
+ X = np.vstack(X1)
+ del X1
+ elif dataset_name == "SVHN":
+ if handle_missing_dataset(SVHN_FOLDER) == 0:
+ return
+ X1 = sp.io.loadmat("%strain_32x32.mat" % SVHN_FOLDER)["X"]
+ X2 = [X1[:, :, :, i].reshape(32 * 32 * 3) for i in range(X1.shape[3])]
+ X = np.vstack(X2)
+ del X1
+ del X2
+ elif dataset_name == "low rank matrix":
+ X = make_low_rank_matrix(
+ n_samples=500,
+ n_features=int(1e4),
+ effective_rank=100,
+ tail_strength=0.5,
+ random_state=random_state,
+ )
+ elif dataset_name == "uncorrelated matrix":
+ X, _ = make_sparse_uncorrelated(
+ n_samples=500, n_features=10000, random_state=random_state
+ )
+ elif dataset_name == "big sparse matrix":
+ sparsity = int(1e6)
+ size = int(1e6)
+ small_size = int(1e4)
+ data = np.random.normal(0, 1, int(sparsity / 10))
+ data = np.repeat(data, 10)
+ row = np.random.uniform(0, small_size, sparsity)
+ col = np.random.uniform(0, small_size, sparsity)
+ X = sp.sparse.csr_matrix((data, (row, col)), shape=(size, small_size))
+ del data
+ del row
+ del col
+ else:
+ X = fetch_openml(dataset_name).data
+ return X
+
+
+def plot_time_vs_s(time, norm, point_labels, title):
+ plt.figure()
+ colors = ["g", "b", "y"]
+ for i, l in enumerate(sorted(norm.keys())):
+ if l != "fbpca":
+ plt.plot(time[l], norm[l], label=l, marker="o", c=colors.pop())
+ else:
+ plt.plot(time[l], norm[l], label=l, marker="^", c="red")
+
+ for label, x, y in zip(point_labels, list(time[l]), list(norm[l])):
+ plt.annotate(
+ label,
+ xy=(x, y),
+ xytext=(0, -20),
+ textcoords="offset points",
+ ha="right",
+ va="bottom",
+ )
+ plt.legend(loc="upper right")
+ plt.suptitle(title)
+ plt.ylabel("norm discrepancy")
+ plt.xlabel("running time [s]")
+
+
+def scatter_time_vs_s(time, norm, point_labels, title):
+ plt.figure()
+ size = 100
+ for i, l in enumerate(sorted(norm.keys())):
+ if l != "fbpca":
+ plt.scatter(time[l], norm[l], label=l, marker="o", c="b", s=size)
+ for label, x, y in zip(point_labels, list(time[l]), list(norm[l])):
+ plt.annotate(
+ label,
+ xy=(x, y),
+ xytext=(0, -80),
+ textcoords="offset points",
+ ha="right",
+ arrowprops=dict(arrowstyle="->", connectionstyle="arc3"),
+ va="bottom",
+ size=11,
+ rotation=90,
+ )
+ else:
+ plt.scatter(time[l], norm[l], label=l, marker="^", c="red", s=size)
+ for label, x, y in zip(point_labels, list(time[l]), list(norm[l])):
+ plt.annotate(
+ label,
+ xy=(x, y),
+ xytext=(0, 30),
+ textcoords="offset points",
+ ha="right",
+ arrowprops=dict(arrowstyle="->", connectionstyle="arc3"),
+ va="bottom",
+ size=11,
+ rotation=90,
+ )
+
+ plt.legend(loc="best")
+ plt.suptitle(title)
+ plt.ylabel("norm discrepancy")
+ plt.xlabel("running time [s]")
+
+
+def plot_power_iter_vs_s(power_iter, s, title):
+ plt.figure()
+ for l in sorted(s.keys()):
+ plt.plot(power_iter, s[l], label=l, marker="o")
+ plt.legend(loc="lower right", prop={"size": 10})
+ plt.suptitle(title)
+ plt.ylabel("norm discrepancy")
+ plt.xlabel("n_iter")
+
+
+def svd_timing(
+ X, n_comps, n_iter, n_oversamples, power_iteration_normalizer="auto", method=None
+):
+ """
+ Measure time for decomposition
+ """
+ print("... running SVD ...")
+ if method != "fbpca":
+ gc.collect()
+ t0 = time()
+ U, mu, V = randomized_svd(
+ X,
+ n_comps,
+ n_oversamples=n_oversamples,
+ n_iter=n_iter,
+ power_iteration_normalizer=power_iteration_normalizer,
+ random_state=random_state,
+ transpose=False,
+ )
+ call_time = time() - t0
+ else:
+ gc.collect()
+ t0 = time()
+ # There is a different convention for l here
+ U, mu, V = fbpca.pca(
+ X, n_comps, raw=True, n_iter=n_iter, l=n_oversamples + n_comps
+ )
+ call_time = time() - t0
+
+ return U, mu, V, call_time
+
+
+def norm_diff(A, norm=2, msg=True, random_state=None):
+ """
+ Compute the norm diff with the original matrix, when randomized
+ SVD is called with *params.
+
+ norm: 2 => spectral; 'fro' => Frobenius
+ """
+
+ if msg:
+ print("... computing %s norm ..." % norm)
+ if norm == 2:
+ # s = sp.linalg.norm(A, ord=2) # slow
+ v0 = _init_arpack_v0(min(A.shape), random_state)
+ value = sp.sparse.linalg.svds(A, k=1, return_singular_vectors=False, v0=v0)
+ else:
+ if sp.sparse.issparse(A):
+ value = sp.sparse.linalg.norm(A, ord=norm)
+ else:
+ value = sp.linalg.norm(A, ord=norm)
+ return value
+
+
+def scalable_frobenius_norm_discrepancy(X, U, s, V):
+ if not sp.sparse.issparse(X) or (
+ X.shape[0] * X.shape[1] * X.dtype.itemsize < MAX_MEMORY
+ ):
+ # if the input is not sparse or sparse but not too big,
+ # U.dot(np.diag(s).dot(V)) will fit in RAM
+ A = X - U.dot(np.diag(s).dot(V))
+ return norm_diff(A, norm="fro")
+
+ print("... computing fro norm by batches...")
+ batch_size = 1000
+ Vhat = np.diag(s).dot(V)
+ cum_norm = 0.0
+ for batch in gen_batches(X.shape[0], batch_size):
+ M = X[batch, :] - U[batch, :].dot(Vhat)
+ cum_norm += norm_diff(M, norm="fro", msg=False)
+ return np.sqrt(cum_norm)
+
+
+def bench_a(X, dataset_name, power_iter, n_oversamples, n_comps):
+ all_time = defaultdict(list)
+ if enable_spectral_norm:
+ all_spectral = defaultdict(list)
+ X_spectral_norm = norm_diff(X, norm=2, msg=False, random_state=0)
+ all_frobenius = defaultdict(list)
+ X_fro_norm = norm_diff(X, norm="fro", msg=False)
+
+ for pi in power_iter:
+ for pm in ["none", "LU", "QR"]:
+ print("n_iter = %d on sklearn - %s" % (pi, pm))
+ U, s, V, time = svd_timing(
+ X,
+ n_comps,
+ n_iter=pi,
+ power_iteration_normalizer=pm,
+ n_oversamples=n_oversamples,
+ )
+ label = "sklearn - %s" % pm
+ all_time[label].append(time)
+ if enable_spectral_norm:
+ A = U.dot(np.diag(s).dot(V))
+ all_spectral[label].append(
+ norm_diff(X - A, norm=2, random_state=0) / X_spectral_norm
+ )
+ f = scalable_frobenius_norm_discrepancy(X, U, s, V)
+ all_frobenius[label].append(f / X_fro_norm)
+
+ if fbpca_available:
+ print("n_iter = %d on fbca" % (pi))
+ U, s, V, time = svd_timing(
+ X,
+ n_comps,
+ n_iter=pi,
+ power_iteration_normalizer=pm,
+ n_oversamples=n_oversamples,
+ method="fbpca",
+ )
+ label = "fbpca"
+ all_time[label].append(time)
+ if enable_spectral_norm:
+ A = U.dot(np.diag(s).dot(V))
+ all_spectral[label].append(
+ norm_diff(X - A, norm=2, random_state=0) / X_spectral_norm
+ )
+ f = scalable_frobenius_norm_discrepancy(X, U, s, V)
+ all_frobenius[label].append(f / X_fro_norm)
+
+ if enable_spectral_norm:
+ title = "%s: spectral norm diff vs running time" % (dataset_name)
+ plot_time_vs_s(all_time, all_spectral, power_iter, title)
+ title = "%s: Frobenius norm diff vs running time" % (dataset_name)
+ plot_time_vs_s(all_time, all_frobenius, power_iter, title)
+
+
+def bench_b(power_list):
+ n_samples, n_features = 1000, 10000
+ data_params = {
+ "n_samples": n_samples,
+ "n_features": n_features,
+ "tail_strength": 0.7,
+ "random_state": random_state,
+ }
+ dataset_name = "low rank matrix %d x %d" % (n_samples, n_features)
+ ranks = [10, 50, 100]
+
+ if enable_spectral_norm:
+ all_spectral = defaultdict(list)
+ all_frobenius = defaultdict(list)
+ for rank in ranks:
+ X = make_low_rank_matrix(effective_rank=rank, **data_params)
+ if enable_spectral_norm:
+ X_spectral_norm = norm_diff(X, norm=2, msg=False, random_state=0)
+ X_fro_norm = norm_diff(X, norm="fro", msg=False)
+
+ for n_comp in [int(rank / 2), rank, rank * 2]:
+ label = "rank=%d, n_comp=%d" % (rank, n_comp)
+ print(label)
+ for pi in power_list:
+ U, s, V, _ = svd_timing(
+ X,
+ n_comp,
+ n_iter=pi,
+ n_oversamples=2,
+ power_iteration_normalizer="LU",
+ )
+ if enable_spectral_norm:
+ A = U.dot(np.diag(s).dot(V))
+ all_spectral[label].append(
+ norm_diff(X - A, norm=2, random_state=0) / X_spectral_norm
+ )
+ f = scalable_frobenius_norm_discrepancy(X, U, s, V)
+ all_frobenius[label].append(f / X_fro_norm)
+
+ if enable_spectral_norm:
+ title = "%s: spectral norm diff vs n power iteration" % (dataset_name)
+ plot_power_iter_vs_s(power_iter, all_spectral, title)
+ title = "%s: Frobenius norm diff vs n power iteration" % (dataset_name)
+ plot_power_iter_vs_s(power_iter, all_frobenius, title)
+
+
+def bench_c(datasets, n_comps):
+ all_time = defaultdict(list)
+ if enable_spectral_norm:
+ all_spectral = defaultdict(list)
+ all_frobenius = defaultdict(list)
+
+ for dataset_name in datasets:
+ X = get_data(dataset_name)
+ if X is None:
+ continue
+
+ if enable_spectral_norm:
+ X_spectral_norm = norm_diff(X, norm=2, msg=False, random_state=0)
+ X_fro_norm = norm_diff(X, norm="fro", msg=False)
+ n_comps = np.minimum(n_comps, np.min(X.shape))
+
+ label = "sklearn"
+ print("%s %d x %d - %s" % (dataset_name, X.shape[0], X.shape[1], label))
+ U, s, V, time = svd_timing(X, n_comps, n_iter=2, n_oversamples=10, method=label)
+
+ all_time[label].append(time)
+ if enable_spectral_norm:
+ A = U.dot(np.diag(s).dot(V))
+ all_spectral[label].append(
+ norm_diff(X - A, norm=2, random_state=0) / X_spectral_norm
+ )
+ f = scalable_frobenius_norm_discrepancy(X, U, s, V)
+ all_frobenius[label].append(f / X_fro_norm)
+
+ if fbpca_available:
+ label = "fbpca"
+ print("%s %d x %d - %s" % (dataset_name, X.shape[0], X.shape[1], label))
+ U, s, V, time = svd_timing(
+ X, n_comps, n_iter=2, n_oversamples=2, method=label
+ )
+ all_time[label].append(time)
+ if enable_spectral_norm:
+ A = U.dot(np.diag(s).dot(V))
+ all_spectral[label].append(
+ norm_diff(X - A, norm=2, random_state=0) / X_spectral_norm
+ )
+ f = scalable_frobenius_norm_discrepancy(X, U, s, V)
+ all_frobenius[label].append(f / X_fro_norm)
+
+ if len(all_time) == 0:
+ raise ValueError("No tests ran. Aborting.")
+
+ if enable_spectral_norm:
+ title = "normalized spectral norm diff vs running time"
+ scatter_time_vs_s(all_time, all_spectral, datasets, title)
+ title = "normalized Frobenius norm diff vs running time"
+ scatter_time_vs_s(all_time, all_frobenius, datasets, title)
+
+
+if __name__ == "__main__":
+ random_state = check_random_state(1234)
+
+ power_iter = np.arange(0, 6)
+ n_comps = 50
+
+ for dataset_name in datasets:
+ X = get_data(dataset_name)
+ if X is None:
+ continue
+ print(
+ " >>>>>> Benching sklearn and fbpca on %s %d x %d"
+ % (dataset_name, X.shape[0], X.shape[1])
+ )
+ bench_a(
+ X,
+ dataset_name,
+ power_iter,
+ n_oversamples=2,
+ n_comps=np.minimum(n_comps, np.min(X.shape)),
+ )
+
+ print(" >>>>>> Benching on simulated low rank matrix with variable rank")
+ bench_b(power_iter)
+
+ print(" >>>>>> Benching sklearn and fbpca default configurations")
+ bench_c(datasets + big_sparse_datasets, n_comps)
+
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_plot_svd.py b/auto_building_tools/benchmarks/bench_plot_svd.py
new file mode 100644
index 0000000..ed99d1c
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_plot_svd.py
@@ -0,0 +1,80 @@
+"""Benchmarks of Singular Value Decomposition (Exact and Approximate)
+
+The data is mostly low rank but is a fat infinite tail.
+"""
+
+import gc
+from collections import defaultdict
+from time import time
+
+import numpy as np
+from scipy.linalg import svd
+
+from sklearn.datasets import make_low_rank_matrix
+from sklearn.utils.extmath import randomized_svd
+
+
+def compute_bench(samples_range, features_range, n_iter=3, rank=50):
+ it = 0
+
+ results = defaultdict(lambda: [])
+
+ max_it = len(samples_range) * len(features_range)
+ for n_samples in samples_range:
+ for n_features in features_range:
+ it += 1
+ print("====================")
+ print("Iteration %03d of %03d" % (it, max_it))
+ print("====================")
+ X = make_low_rank_matrix(
+ n_samples, n_features, effective_rank=rank, tail_strength=0.2
+ )
+
+ gc.collect()
+ print("benchmarking scipy svd: ")
+ tstart = time()
+ svd(X, full_matrices=False)
+ results["scipy svd"].append(time() - tstart)
+
+ gc.collect()
+ print("benchmarking scikit-learn randomized_svd: n_iter=0")
+ tstart = time()
+ randomized_svd(X, rank, n_iter=0)
+ results["scikit-learn randomized_svd (n_iter=0)"].append(time() - tstart)
+
+ gc.collect()
+ print("benchmarking scikit-learn randomized_svd: n_iter=%d " % n_iter)
+ tstart = time()
+ randomized_svd(X, rank, n_iter=n_iter)
+ results["scikit-learn randomized_svd (n_iter=%d)" % n_iter].append(
+ time() - tstart
+ )
+
+ return results
+
+
+if __name__ == "__main__":
+ from mpl_toolkits.mplot3d import axes3d # noqa register the 3d projection
+ import matplotlib.pyplot as plt
+
+ samples_range = np.linspace(2, 1000, 4).astype(int)
+ features_range = np.linspace(2, 1000, 4).astype(int)
+ results = compute_bench(samples_range, features_range)
+
+ label = "scikit-learn singular value decomposition benchmark results"
+ fig = plt.figure(label)
+ ax = fig.gca(projection="3d")
+ for c, (label, timings) in zip("rbg", sorted(results.items())):
+ X, Y = np.meshgrid(samples_range, features_range)
+ Z = np.asarray(timings).reshape(samples_range.shape[0], features_range.shape[0])
+ # plot the actual surface
+ ax.plot_surface(X, Y, Z, rstride=8, cstride=8, alpha=0.3, color=c)
+ # dummy point plot to stick the legend to since surface plot do not
+ # support legends (yet?)
+ ax.plot([1], [1], [1], color=c, label=label)
+
+ ax.set_xlabel("n_samples")
+ ax.set_ylabel("n_features")
+ ax.set_zlabel("Time (s)")
+ ax.legend()
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_plot_ward.py b/auto_building_tools/benchmarks/bench_plot_ward.py
new file mode 100644
index 0000000..fe5cee2
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_plot_ward.py
@@ -0,0 +1,48 @@
+"""
+Benchmark scikit-learn's Ward implement compared to SciPy's
+"""
+
+import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+from scipy.cluster import hierarchy
+
+from sklearn.cluster import AgglomerativeClustering
+
+ward = AgglomerativeClustering(n_clusters=3, linkage="ward")
+
+n_samples = np.logspace(0.5, 3, 9)
+n_features = np.logspace(1, 3.5, 7)
+N_samples, N_features = np.meshgrid(n_samples, n_features)
+scikits_time = np.zeros(N_samples.shape)
+scipy_time = np.zeros(N_samples.shape)
+
+for i, n in enumerate(n_samples):
+ for j, p in enumerate(n_features):
+ X = np.random.normal(size=(n, p))
+ t0 = time.time()
+ ward.fit(X)
+ scikits_time[j, i] = time.time() - t0
+ t0 = time.time()
+ hierarchy.ward(X)
+ scipy_time[j, i] = time.time() - t0
+
+ratio = scikits_time / scipy_time
+
+plt.figure("scikit-learn Ward's method benchmark results")
+plt.imshow(np.log(ratio), aspect="auto", origin="lower")
+plt.colorbar()
+plt.contour(
+ ratio,
+ levels=[
+ 1,
+ ],
+ colors="k",
+)
+plt.yticks(range(len(n_features)), n_features.astype(int))
+plt.ylabel("N features")
+plt.xticks(range(len(n_samples)), n_samples.astype(int))
+plt.xlabel("N samples")
+plt.title("Scikit's time, in units of scipy time (log)")
+plt.show()
diff --git a/auto_building_tools/benchmarks/bench_random_projections.py b/auto_building_tools/benchmarks/bench_random_projections.py
new file mode 100644
index 0000000..6551de6
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_random_projections.py
@@ -0,0 +1,304 @@
+"""
+===========================
+Random projection benchmark
+===========================
+
+Benchmarks for random projections.
+
+"""
+
+import collections
+import gc
+import optparse
+import sys
+from datetime import datetime
+
+import numpy as np
+import scipy.sparse as sp
+
+from sklearn import clone
+from sklearn.random_projection import (
+ GaussianRandomProjection,
+ SparseRandomProjection,
+ johnson_lindenstrauss_min_dim,
+)
+
+
+def type_auto_or_float(val):
+ if val == "auto":
+ return "auto"
+ else:
+ return float(val)
+
+
+def type_auto_or_int(val):
+ if val == "auto":
+ return "auto"
+ else:
+ return int(val)
+
+
+def compute_time(t_start, delta):
+ mu_second = 0.0 + 10**6 # number of microseconds in a second
+
+ return delta.seconds + delta.microseconds / mu_second
+
+
+def bench_scikit_transformer(X, transformer):
+ gc.collect()
+
+ clf = clone(transformer)
+
+ # start time
+ t_start = datetime.now()
+ clf.fit(X)
+ delta = datetime.now() - t_start
+ # stop time
+ time_to_fit = compute_time(t_start, delta)
+
+ # start time
+ t_start = datetime.now()
+ clf.transform(X)
+ delta = datetime.now() - t_start
+ # stop time
+ time_to_transform = compute_time(t_start, delta)
+
+ return time_to_fit, time_to_transform
+
+
+# Make some random data with uniformly located non zero entries with
+# Gaussian distributed values
+def make_sparse_random_data(n_samples, n_features, n_nonzeros, random_state=None):
+ rng = np.random.RandomState(random_state)
+ data_coo = sp.coo_matrix(
+ (
+ rng.randn(n_nonzeros),
+ (
+ rng.randint(n_samples, size=n_nonzeros),
+ rng.randint(n_features, size=n_nonzeros),
+ ),
+ ),
+ shape=(n_samples, n_features),
+ )
+ return data_coo.toarray(), data_coo.tocsr()
+
+
+def print_row(clf_type, time_fit, time_transform):
+ print(
+ "%s | %s | %s"
+ % (
+ clf_type.ljust(30),
+ ("%.4fs" % time_fit).center(12),
+ ("%.4fs" % time_transform).center(12),
+ )
+ )
+
+
+if __name__ == "__main__":
+ ###########################################################################
+ # Option parser
+ ###########################################################################
+ op = optparse.OptionParser()
+ op.add_option(
+ "--n-times",
+ dest="n_times",
+ default=5,
+ type=int,
+ help="Benchmark results are average over n_times experiments",
+ )
+
+ op.add_option(
+ "--n-features",
+ dest="n_features",
+ default=10**4,
+ type=int,
+ help="Number of features in the benchmarks",
+ )
+
+ op.add_option(
+ "--n-components",
+ dest="n_components",
+ default="auto",
+ help="Size of the random subspace. ('auto' or int > 0)",
+ )
+
+ op.add_option(
+ "--ratio-nonzeros",
+ dest="ratio_nonzeros",
+ default=10**-3,
+ type=float,
+ help="Number of features in the benchmarks",
+ )
+
+ op.add_option(
+ "--n-samples",
+ dest="n_samples",
+ default=500,
+ type=int,
+ help="Number of samples in the benchmarks",
+ )
+
+ op.add_option(
+ "--random-seed",
+ dest="random_seed",
+ default=13,
+ type=int,
+ help="Seed used by the random number generators.",
+ )
+
+ op.add_option(
+ "--density",
+ dest="density",
+ default=1 / 3,
+ help=(
+ "Density used by the sparse random projection. ('auto' or float (0.0, 1.0]"
+ ),
+ )
+
+ op.add_option(
+ "--eps",
+ dest="eps",
+ default=0.5,
+ type=float,
+ help="See the documentation of the underlying transformers.",
+ )
+
+ op.add_option(
+ "--transformers",
+ dest="selected_transformers",
+ default="GaussianRandomProjection,SparseRandomProjection",
+ type=str,
+ help=(
+ "Comma-separated list of transformer to benchmark. "
+ "Default: %default. Available: "
+ "GaussianRandomProjection,SparseRandomProjection"
+ ),
+ )
+
+ op.add_option(
+ "--dense",
+ dest="dense",
+ default=False,
+ action="store_true",
+ help="Set input space as a dense matrix.",
+ )
+
+ (opts, args) = op.parse_args()
+ if len(args) > 0:
+ op.error("this script takes no arguments.")
+ sys.exit(1)
+ opts.n_components = type_auto_or_int(opts.n_components)
+ opts.density = type_auto_or_float(opts.density)
+ selected_transformers = opts.selected_transformers.split(",")
+
+ ###########################################################################
+ # Generate dataset
+ ###########################################################################
+ n_nonzeros = int(opts.ratio_nonzeros * opts.n_features)
+
+ print("Dataset statistics")
+ print("===========================")
+ print("n_samples \t= %s" % opts.n_samples)
+ print("n_features \t= %s" % opts.n_features)
+ if opts.n_components == "auto":
+ print(
+ "n_components \t= %s (auto)"
+ % johnson_lindenstrauss_min_dim(n_samples=opts.n_samples, eps=opts.eps)
+ )
+ else:
+ print("n_components \t= %s" % opts.n_components)
+ print("n_elements \t= %s" % (opts.n_features * opts.n_samples))
+ print("n_nonzeros \t= %s per feature" % n_nonzeros)
+ print("ratio_nonzeros \t= %s" % opts.ratio_nonzeros)
+ print("")
+
+ ###########################################################################
+ # Set transformer input
+ ###########################################################################
+ transformers = {}
+
+ ###########################################################################
+ # Set GaussianRandomProjection input
+ gaussian_matrix_params = {
+ "n_components": opts.n_components,
+ "random_state": opts.random_seed,
+ }
+ transformers["GaussianRandomProjection"] = GaussianRandomProjection(
+ **gaussian_matrix_params
+ )
+
+ ###########################################################################
+ # Set SparseRandomProjection input
+ sparse_matrix_params = {
+ "n_components": opts.n_components,
+ "random_state": opts.random_seed,
+ "density": opts.density,
+ "eps": opts.eps,
+ }
+
+ transformers["SparseRandomProjection"] = SparseRandomProjection(
+ **sparse_matrix_params
+ )
+
+ ###########################################################################
+ # Perform benchmark
+ ###########################################################################
+ time_fit = collections.defaultdict(list)
+ time_transform = collections.defaultdict(list)
+
+ print("Benchmarks")
+ print("===========================")
+ print("Generate dataset benchmarks... ", end="")
+ X_dense, X_sparse = make_sparse_random_data(
+ opts.n_samples, opts.n_features, n_nonzeros, random_state=opts.random_seed
+ )
+ X = X_dense if opts.dense else X_sparse
+ print("done")
+
+ for name in selected_transformers:
+ print("Perform benchmarks for %s..." % name)
+
+ for iteration in range(opts.n_times):
+ print("\titer %s..." % iteration, end="")
+ time_to_fit, time_to_transform = bench_scikit_transformer(
+ X_dense, transformers[name]
+ )
+ time_fit[name].append(time_to_fit)
+ time_transform[name].append(time_to_transform)
+ print("done")
+
+ print("")
+
+ ###########################################################################
+ # Print results
+ ###########################################################################
+ print("Script arguments")
+ print("===========================")
+ arguments = vars(opts)
+ print(
+ "%s \t | %s "
+ % (
+ "Arguments".ljust(16),
+ "Value".center(12),
+ )
+ )
+ print(25 * "-" + ("|" + "-" * 14) * 1)
+ for key, value in arguments.items():
+ print("%s \t | %s " % (str(key).ljust(16), str(value).strip().center(12)))
+ print("")
+
+ print("Transformer performance:")
+ print("===========================")
+ print("Results are averaged over %s repetition(s)." % opts.n_times)
+ print("")
+ print(
+ "%s | %s | %s"
+ % ("Transformer".ljust(30), "fit".center(12), "transform".center(12))
+ )
+ print(31 * "-" + ("|" + "-" * 14) * 2)
+
+ for name in sorted(selected_transformers):
+ print_row(name, np.mean(time_fit[name]), np.mean(time_transform[name]))
+
+ print("")
+ print("")
diff --git a/auto_building_tools/benchmarks/bench_rcv1_logreg_convergence.py b/auto_building_tools/benchmarks/bench_rcv1_logreg_convergence.py
new file mode 100644
index 0000000..27e7307
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_rcv1_logreg_convergence.py
@@ -0,0 +1,311 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gc
+import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+from joblib import Memory
+
+from sklearn.datasets import fetch_rcv1
+from sklearn.linear_model import LogisticRegression, SGDClassifier
+from sklearn.linear_model._sag import get_auto_step_size
+
+try:
+ import lightning.classification as lightning_clf
+except ImportError:
+ lightning_clf = None
+
+m = Memory(cachedir=".", verbose=0)
+
+
+# compute logistic loss
+def get_loss(w, intercept, myX, myy, C):
+ n_samples = myX.shape[0]
+ w = w.ravel()
+ p = np.mean(np.log(1.0 + np.exp(-myy * (myX.dot(w) + intercept))))
+ print("%f + %f" % (p, w.dot(w) / 2.0 / C / n_samples))
+ p += w.dot(w) / 2.0 / C / n_samples
+ return p
+
+
+# We use joblib to cache individual fits. Note that we do not pass the dataset
+# as argument as the hashing would be too slow, so we assume that the dataset
+# never changes.
+@m.cache()
+def bench_one(name, clf_type, clf_params, n_iter):
+ clf = clf_type(**clf_params)
+ try:
+ clf.set_params(max_iter=n_iter, random_state=42)
+ except Exception:
+ clf.set_params(n_iter=n_iter, random_state=42)
+
+ st = time.time()
+ clf.fit(X, y)
+ end = time.time()
+
+ try:
+ C = 1.0 / clf.alpha / n_samples
+ except Exception:
+ C = clf.C
+
+ try:
+ intercept = clf.intercept_
+ except Exception:
+ intercept = 0.0
+
+ train_loss = get_loss(clf.coef_, intercept, X, y, C)
+ train_score = clf.score(X, y)
+ test_score = clf.score(X_test, y_test)
+ duration = end - st
+
+ return train_loss, train_score, test_score, duration
+
+
+def bench(clfs):
+ for (
+ name,
+ clf,
+ iter_range,
+ train_losses,
+ train_scores,
+ test_scores,
+ durations,
+ ) in clfs:
+ print("training %s" % name)
+ clf_type = type(clf)
+ clf_params = clf.get_params()
+
+ for n_iter in iter_range:
+ gc.collect()
+
+ train_loss, train_score, test_score, duration = bench_one(
+ name, clf_type, clf_params, n_iter
+ )
+
+ train_losses.append(train_loss)
+ train_scores.append(train_score)
+ test_scores.append(test_score)
+ durations.append(duration)
+ print("classifier: %s" % name)
+ print("train_loss: %.8f" % train_loss)
+ print("train_score: %.8f" % train_score)
+ print("test_score: %.8f" % test_score)
+ print("time for fit: %.8f seconds" % duration)
+ print("")
+
+ print("")
+ return clfs
+
+
+def plot_train_losses(clfs):
+ plt.figure()
+ for name, _, _, train_losses, _, _, durations in clfs:
+ plt.plot(durations, train_losses, "-o", label=name)
+ plt.legend(loc=0)
+ plt.xlabel("seconds")
+ plt.ylabel("train loss")
+
+
+def plot_train_scores(clfs):
+ plt.figure()
+ for name, _, _, _, train_scores, _, durations in clfs:
+ plt.plot(durations, train_scores, "-o", label=name)
+ plt.legend(loc=0)
+ plt.xlabel("seconds")
+ plt.ylabel("train score")
+ plt.ylim((0.92, 0.96))
+
+
+def plot_test_scores(clfs):
+ plt.figure()
+ for name, _, _, _, _, test_scores, durations in clfs:
+ plt.plot(durations, test_scores, "-o", label=name)
+ plt.legend(loc=0)
+ plt.xlabel("seconds")
+ plt.ylabel("test score")
+ plt.ylim((0.92, 0.96))
+
+
+def plot_dloss(clfs):
+ plt.figure()
+ pobj_final = []
+ for name, _, _, train_losses, _, _, durations in clfs:
+ pobj_final.append(train_losses[-1])
+
+ indices = np.argsort(pobj_final)
+ pobj_best = pobj_final[indices[0]]
+
+ for name, _, _, train_losses, _, _, durations in clfs:
+ log_pobj = np.log(abs(np.array(train_losses) - pobj_best)) / np.log(10)
+
+ plt.plot(durations, log_pobj, "-o", label=name)
+ plt.legend(loc=0)
+ plt.xlabel("seconds")
+ plt.ylabel("log(best - train_loss)")
+
+
+def get_max_squared_sum(X):
+ """Get the maximum row-wise sum of squares"""
+ return np.sum(X**2, axis=1).max()
+
+
+rcv1 = fetch_rcv1()
+X = rcv1.data
+n_samples, n_features = X.shape
+
+# consider the binary classification problem 'CCAT' vs the rest
+ccat_idx = rcv1.target_names.tolist().index("CCAT")
+y = rcv1.target.tocsc()[:, ccat_idx].toarray().ravel().astype(np.float64)
+y[y == 0] = -1
+
+# parameters
+C = 1.0
+fit_intercept = True
+tol = 1.0e-14
+
+# max_iter range
+sgd_iter_range = list(range(1, 121, 10))
+newton_iter_range = list(range(1, 25, 3))
+lbfgs_iter_range = list(range(1, 242, 12))
+liblinear_iter_range = list(range(1, 37, 3))
+liblinear_dual_iter_range = list(range(1, 85, 6))
+sag_iter_range = list(range(1, 37, 3))
+
+clfs = [
+ (
+ "LR-liblinear",
+ LogisticRegression(
+ C=C,
+ tol=tol,
+ solver="liblinear",
+ fit_intercept=fit_intercept,
+ intercept_scaling=1,
+ ),
+ liblinear_iter_range,
+ [],
+ [],
+ [],
+ [],
+ ),
+ (
+ "LR-liblinear-dual",
+ LogisticRegression(
+ C=C,
+ tol=tol,
+ dual=True,
+ solver="liblinear",
+ fit_intercept=fit_intercept,
+ intercept_scaling=1,
+ ),
+ liblinear_dual_iter_range,
+ [],
+ [],
+ [],
+ [],
+ ),
+ (
+ "LR-SAG",
+ LogisticRegression(C=C, tol=tol, solver="sag", fit_intercept=fit_intercept),
+ sag_iter_range,
+ [],
+ [],
+ [],
+ [],
+ ),
+ (
+ "LR-newton-cg",
+ LogisticRegression(
+ C=C, tol=tol, solver="newton-cg", fit_intercept=fit_intercept
+ ),
+ newton_iter_range,
+ [],
+ [],
+ [],
+ [],
+ ),
+ (
+ "LR-lbfgs",
+ LogisticRegression(C=C, tol=tol, solver="lbfgs", fit_intercept=fit_intercept),
+ lbfgs_iter_range,
+ [],
+ [],
+ [],
+ [],
+ ),
+ (
+ "SGD",
+ SGDClassifier(
+ alpha=1.0 / C / n_samples,
+ penalty="l2",
+ loss="log_loss",
+ fit_intercept=fit_intercept,
+ verbose=0,
+ ),
+ sgd_iter_range,
+ [],
+ [],
+ [],
+ [],
+ ),
+]
+
+
+if lightning_clf is not None and not fit_intercept:
+ alpha = 1.0 / C / n_samples
+ # compute the same step_size than in LR-sag
+ max_squared_sum = get_max_squared_sum(X)
+ step_size = get_auto_step_size(max_squared_sum, alpha, "log", fit_intercept)
+
+ clfs.append(
+ (
+ "Lightning-SVRG",
+ lightning_clf.SVRGClassifier(
+ alpha=alpha, eta=step_size, tol=tol, loss="log"
+ ),
+ sag_iter_range,
+ [],
+ [],
+ [],
+ [],
+ )
+ )
+ clfs.append(
+ (
+ "Lightning-SAG",
+ lightning_clf.SAGClassifier(
+ alpha=alpha, eta=step_size, tol=tol, loss="log"
+ ),
+ sag_iter_range,
+ [],
+ [],
+ [],
+ [],
+ )
+ )
+
+ # We keep only 200 features, to have a dense dataset,
+ # and compare to lightning SAG, which seems incorrect in the sparse case.
+ X_csc = X.tocsc()
+ nnz_in_each_features = X_csc.indptr[1:] - X_csc.indptr[:-1]
+ X = X_csc[:, np.argsort(nnz_in_each_features)[-200:]]
+ X = X.toarray()
+ print("dataset: %.3f MB" % (X.nbytes / 1e6))
+
+
+# Split training and testing. Switch train and test subset compared to
+# LYRL2004 split, to have a larger training dataset.
+n = 23149
+X_test = X[:n, :]
+y_test = y[:n]
+X = X[n:, :]
+y = y[n:]
+
+clfs = bench(clfs)
+
+plot_train_scores(clfs)
+plot_test_scores(clfs)
+plot_train_losses(clfs)
+plot_dloss(clfs)
+plt.show()
diff --git a/auto_building_tools/benchmarks/bench_saga.py b/auto_building_tools/benchmarks/bench_saga.py
new file mode 100644
index 0000000..97d4ba7
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_saga.py
@@ -0,0 +1,386 @@
+"""Author: Arthur Mensch, Nelle Varoquaux
+
+Benchmarks of sklearn SAGA vs lightning SAGA vs Liblinear. Shows the gain
+in using multinomial logistic regression in term of learning time.
+"""
+
+import json
+import os
+import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.datasets import (
+ fetch_20newsgroups_vectorized,
+ fetch_rcv1,
+ load_digits,
+ load_iris,
+)
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import log_loss
+from sklearn.model_selection import train_test_split
+from sklearn.multiclass import OneVsRestClassifier
+from sklearn.preprocessing import LabelBinarizer, LabelEncoder
+from sklearn.utils.extmath import safe_sparse_dot, softmax
+from sklearn.utils.parallel import Parallel, delayed
+
+
+def fit_single(
+ solver,
+ X,
+ y,
+ penalty="l2",
+ single_target=True,
+ C=1,
+ max_iter=10,
+ skip_slow=False,
+ dtype=np.float64,
+):
+ if skip_slow and solver == "lightning" and penalty == "l1":
+ print("skip_slowping l1 logistic regression with solver lightning.")
+ return
+
+ print(
+ "Solving %s logistic regression with penalty %s, solver %s."
+ % ("binary" if single_target else "multinomial", penalty, solver)
+ )
+
+ if solver == "lightning":
+ from lightning.classification import SAGAClassifier
+
+ if single_target or solver not in ["sag", "saga"]:
+ multi_class = "ovr"
+ else:
+ multi_class = "multinomial"
+ X = X.astype(dtype)
+ y = y.astype(dtype)
+ X_train, X_test, y_train, y_test = train_test_split(
+ X, y, random_state=42, stratify=y
+ )
+ n_samples = X_train.shape[0]
+ n_classes = np.unique(y_train).shape[0]
+ test_scores = [1]
+ train_scores = [1]
+ accuracies = [1 / n_classes]
+ times = [0]
+
+ if penalty == "l2":
+ alpha = 1.0 / (C * n_samples)
+ beta = 0
+ lightning_penalty = None
+ else:
+ alpha = 0.0
+ beta = 1.0 / (C * n_samples)
+ lightning_penalty = "l1"
+
+ for this_max_iter in range(1, max_iter + 1, 2):
+ print(
+ "[%s, %s, %s] Max iter: %s"
+ % (
+ "binary" if single_target else "multinomial",
+ penalty,
+ solver,
+ this_max_iter,
+ )
+ )
+ if solver == "lightning":
+ lr = SAGAClassifier(
+ loss="log",
+ alpha=alpha,
+ beta=beta,
+ penalty=lightning_penalty,
+ tol=-1,
+ max_iter=this_max_iter,
+ )
+ else:
+ lr = LogisticRegression(
+ solver=solver,
+ C=C,
+ penalty=penalty,
+ fit_intercept=False,
+ tol=0,
+ max_iter=this_max_iter,
+ random_state=42,
+ )
+ if multi_class == "ovr":
+ lr = OneVsRestClassifier(lr)
+
+ # Makes cpu cache even for all fit calls
+ X_train.max()
+ t0 = time.clock()
+
+ lr.fit(X_train, y_train)
+ train_time = time.clock() - t0
+
+ scores = []
+ for X, y in [(X_train, y_train), (X_test, y_test)]:
+ try:
+ y_pred = lr.predict_proba(X)
+ except NotImplementedError:
+ # Lightning predict_proba is not implemented for n_classes > 2
+ y_pred = _predict_proba(lr, X)
+ if isinstance(lr, OneVsRestClassifier):
+ coef = np.concatenate([est.coef_ for est in lr.estimators_])
+ else:
+ coef = lr.coef_
+ score = log_loss(y, y_pred, normalize=False) / n_samples
+ score += 0.5 * alpha * np.sum(coef**2) + beta * np.sum(np.abs(coef))
+ scores.append(score)
+ train_score, test_score = tuple(scores)
+
+ y_pred = lr.predict(X_test)
+ accuracy = np.sum(y_pred == y_test) / y_test.shape[0]
+ test_scores.append(test_score)
+ train_scores.append(train_score)
+ accuracies.append(accuracy)
+ times.append(train_time)
+ return lr, times, train_scores, test_scores, accuracies
+
+
+def _predict_proba(lr, X):
+ """Predict proba for lightning for n_classes >=3."""
+ pred = safe_sparse_dot(X, lr.coef_.T)
+ if hasattr(lr, "intercept_"):
+ pred += lr.intercept_
+ return softmax(pred)
+
+
+def exp(
+ solvers,
+ penalty,
+ single_target,
+ n_samples=30000,
+ max_iter=20,
+ dataset="rcv1",
+ n_jobs=1,
+ skip_slow=False,
+):
+ dtypes_mapping = {
+ "float64": np.float64,
+ "float32": np.float32,
+ }
+
+ if dataset == "rcv1":
+ rcv1 = fetch_rcv1()
+
+ lbin = LabelBinarizer()
+ lbin.fit(rcv1.target_names)
+
+ X = rcv1.data
+ y = rcv1.target
+ y = lbin.inverse_transform(y)
+ le = LabelEncoder()
+ y = le.fit_transform(y)
+ if single_target:
+ y_n = y.copy()
+ y_n[y > 16] = 1
+ y_n[y <= 16] = 0
+ y = y_n
+
+ elif dataset == "digits":
+ X, y = load_digits(return_X_y=True)
+ if single_target:
+ y_n = y.copy()
+ y_n[y < 5] = 1
+ y_n[y >= 5] = 0
+ y = y_n
+ elif dataset == "iris":
+ iris = load_iris()
+ X, y = iris.data, iris.target
+ elif dataset == "20newspaper":
+ ng = fetch_20newsgroups_vectorized()
+ X = ng.data
+ y = ng.target
+ if single_target:
+ y_n = y.copy()
+ y_n[y > 4] = 1
+ y_n[y <= 16] = 0
+ y = y_n
+
+ X = X[:n_samples]
+ y = y[:n_samples]
+
+ out = Parallel(n_jobs=n_jobs, mmap_mode=None)(
+ delayed(fit_single)(
+ solver,
+ X,
+ y,
+ penalty=penalty,
+ single_target=single_target,
+ dtype=dtype,
+ C=1,
+ max_iter=max_iter,
+ skip_slow=skip_slow,
+ )
+ for solver in solvers
+ for dtype in dtypes_mapping.values()
+ )
+
+ res = []
+ idx = 0
+ for dtype_name in dtypes_mapping.keys():
+ for solver in solvers:
+ if not (skip_slow and solver == "lightning" and penalty == "l1"):
+ lr, times, train_scores, test_scores, accuracies = out[idx]
+ this_res = dict(
+ solver=solver,
+ penalty=penalty,
+ dtype=dtype_name,
+ single_target=single_target,
+ times=times,
+ train_scores=train_scores,
+ test_scores=test_scores,
+ accuracies=accuracies,
+ )
+ res.append(this_res)
+ idx += 1
+
+ with open("bench_saga.json", "w+") as f:
+ json.dump(res, f)
+
+
+def plot(outname=None):
+ import pandas as pd
+
+ with open("bench_saga.json", "r") as f:
+ f = json.load(f)
+ res = pd.DataFrame(f)
+ res.set_index(["single_target"], inplace=True)
+
+ grouped = res.groupby(level=["single_target"])
+
+ colors = {"saga": "C0", "liblinear": "C1", "lightning": "C2"}
+ linestyles = {"float32": "--", "float64": "-"}
+ alpha = {"float64": 0.5, "float32": 1}
+
+ for idx, group in grouped:
+ single_target = idx
+ fig, axes = plt.subplots(figsize=(12, 4), ncols=4)
+ ax = axes[0]
+
+ for scores, times, solver, dtype in zip(
+ group["train_scores"], group["times"], group["solver"], group["dtype"]
+ ):
+ ax.plot(
+ times,
+ scores,
+ label="%s - %s" % (solver, dtype),
+ color=colors[solver],
+ alpha=alpha[dtype],
+ marker=".",
+ linestyle=linestyles[dtype],
+ )
+ ax.axvline(
+ times[-1],
+ color=colors[solver],
+ alpha=alpha[dtype],
+ linestyle=linestyles[dtype],
+ )
+ ax.set_xlabel("Time (s)")
+ ax.set_ylabel("Training objective (relative to min)")
+ ax.set_yscale("log")
+
+ ax = axes[1]
+
+ for scores, times, solver, dtype in zip(
+ group["test_scores"], group["times"], group["solver"], group["dtype"]
+ ):
+ ax.plot(
+ times,
+ scores,
+ label=solver,
+ color=colors[solver],
+ linestyle=linestyles[dtype],
+ marker=".",
+ alpha=alpha[dtype],
+ )
+ ax.axvline(
+ times[-1],
+ color=colors[solver],
+ alpha=alpha[dtype],
+ linestyle=linestyles[dtype],
+ )
+
+ ax.set_xlabel("Time (s)")
+ ax.set_ylabel("Test objective (relative to min)")
+ ax.set_yscale("log")
+
+ ax = axes[2]
+ for accuracy, times, solver, dtype in zip(
+ group["accuracies"], group["times"], group["solver"], group["dtype"]
+ ):
+ ax.plot(
+ times,
+ accuracy,
+ label="%s - %s" % (solver, dtype),
+ alpha=alpha[dtype],
+ marker=".",
+ color=colors[solver],
+ linestyle=linestyles[dtype],
+ )
+ ax.axvline(
+ times[-1],
+ color=colors[solver],
+ alpha=alpha[dtype],
+ linestyle=linestyles[dtype],
+ )
+
+ ax.set_xlabel("Time (s)")
+ ax.set_ylabel("Test accuracy")
+ ax.legend()
+ name = "single_target" if single_target else "multi_target"
+ name += "_%s" % penalty
+ plt.suptitle(name)
+ if outname is None:
+ outname = name + ".png"
+ fig.tight_layout()
+ fig.subplots_adjust(top=0.9)
+
+ ax = axes[3]
+ for scores, times, solver, dtype in zip(
+ group["train_scores"], group["times"], group["solver"], group["dtype"]
+ ):
+ ax.plot(
+ np.arange(len(scores)),
+ scores,
+ label="%s - %s" % (solver, dtype),
+ marker=".",
+ alpha=alpha[dtype],
+ color=colors[solver],
+ linestyle=linestyles[dtype],
+ )
+
+ ax.set_yscale("log")
+ ax.set_xlabel("# iterations")
+ ax.set_ylabel("Objective function")
+ ax.legend()
+
+ plt.savefig(outname)
+
+
+if __name__ == "__main__":
+ solvers = ["saga", "liblinear", "lightning"]
+ penalties = ["l1", "l2"]
+ n_samples = [100000, 300000, 500000, 800000, None]
+ single_target = True
+ for penalty in penalties:
+ for n_sample in n_samples:
+ exp(
+ solvers,
+ penalty,
+ single_target,
+ n_samples=n_sample,
+ n_jobs=1,
+ dataset="rcv1",
+ max_iter=10,
+ )
+ if n_sample is not None:
+ outname = "figures/saga_%s_%d.png" % (penalty, n_sample)
+ else:
+ outname = "figures/saga_%s_all.png" % (penalty,)
+ try:
+ os.makedirs("figures")
+ except OSError:
+ pass
+ plot(outname)
diff --git a/auto_building_tools/benchmarks/bench_sample_without_replacement.py b/auto_building_tools/benchmarks/bench_sample_without_replacement.py
new file mode 100644
index 0000000..39cf1a1
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_sample_without_replacement.py
@@ -0,0 +1,233 @@
+"""
+Benchmarks for sampling without replacement of integer.
+
+"""
+
+import gc
+import operator
+import optparse
+import random
+import sys
+from datetime import datetime
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.utils.random import sample_without_replacement
+
+
+def compute_time(t_start, delta):
+ mu_second = 0.0 + 10**6 # number of microseconds in a second
+
+ return delta.seconds + delta.microseconds / mu_second
+
+
+def bench_sample(sampling, n_population, n_samples):
+ gc.collect()
+ # start time
+ t_start = datetime.now()
+ sampling(n_population, n_samples)
+ delta = datetime.now() - t_start
+ # stop time
+ time = compute_time(t_start, delta)
+ return time
+
+
+if __name__ == "__main__":
+ ###########################################################################
+ # Option parser
+ ###########################################################################
+ op = optparse.OptionParser()
+ op.add_option(
+ "--n-times",
+ dest="n_times",
+ default=5,
+ type=int,
+ help="Benchmark results are average over n_times experiments",
+ )
+
+ op.add_option(
+ "--n-population",
+ dest="n_population",
+ default=100000,
+ type=int,
+ help="Size of the population to sample from.",
+ )
+
+ op.add_option(
+ "--n-step",
+ dest="n_steps",
+ default=5,
+ type=int,
+ help="Number of step interval between 0 and n_population.",
+ )
+
+ default_algorithms = (
+ "custom-tracking-selection,custom-auto,"
+ "custom-reservoir-sampling,custom-pool,"
+ "python-core-sample,numpy-permutation"
+ )
+
+ op.add_option(
+ "--algorithm",
+ dest="selected_algorithm",
+ default=default_algorithms,
+ type=str,
+ help=(
+ "Comma-separated list of transformer to benchmark. "
+ "Default: %default. \nAvailable: %default"
+ ),
+ )
+
+ # op.add_option("--random-seed",
+ # dest="random_seed", default=13, type=int,
+ # help="Seed used by the random number generators.")
+
+ (opts, args) = op.parse_args()
+ if len(args) > 0:
+ op.error("this script takes no arguments.")
+ sys.exit(1)
+
+ selected_algorithm = opts.selected_algorithm.split(",")
+ for key in selected_algorithm:
+ if key not in default_algorithms.split(","):
+ raise ValueError(
+ 'Unknown sampling algorithm "%s" not in (%s).'
+ % (key, default_algorithms)
+ )
+
+ ###########################################################################
+ # List sampling algorithm
+ ###########################################################################
+ # We assume that sampling algorithm has the following signature:
+ # sample(n_population, n_sample)
+ #
+ sampling_algorithm = {}
+
+ ###########################################################################
+ # Set Python core input
+ sampling_algorithm["python-core-sample"] = (
+ lambda n_population, n_sample: random.sample(range(n_population), n_sample)
+ )
+
+ ###########################################################################
+ # Set custom automatic method selection
+ sampling_algorithm["custom-auto"] = (
+ lambda n_population, n_samples, random_state=None: sample_without_replacement(
+ n_population, n_samples, method="auto", random_state=random_state
+ )
+ )
+
+ ###########################################################################
+ # Set custom tracking based method
+ sampling_algorithm["custom-tracking-selection"] = (
+ lambda n_population, n_samples, random_state=None: sample_without_replacement(
+ n_population,
+ n_samples,
+ method="tracking_selection",
+ random_state=random_state,
+ )
+ )
+
+ ###########################################################################
+ # Set custom reservoir based method
+ sampling_algorithm["custom-reservoir-sampling"] = (
+ lambda n_population, n_samples, random_state=None: sample_without_replacement(
+ n_population,
+ n_samples,
+ method="reservoir_sampling",
+ random_state=random_state,
+ )
+ )
+
+ ###########################################################################
+ # Set custom reservoir based method
+ sampling_algorithm["custom-pool"] = (
+ lambda n_population, n_samples, random_state=None: sample_without_replacement(
+ n_population, n_samples, method="pool", random_state=random_state
+ )
+ )
+
+ ###########################################################################
+ # Numpy permutation based
+ sampling_algorithm["numpy-permutation"] = (
+ lambda n_population, n_sample: np.random.permutation(n_population)[:n_sample]
+ )
+
+ ###########################################################################
+ # Remove unspecified algorithm
+ sampling_algorithm = {
+ key: value
+ for key, value in sampling_algorithm.items()
+ if key in selected_algorithm
+ }
+
+ ###########################################################################
+ # Perform benchmark
+ ###########################################################################
+ time = {}
+ n_samples = np.linspace(start=0, stop=opts.n_population, num=opts.n_steps).astype(
+ int
+ )
+
+ ratio = n_samples / opts.n_population
+
+ print("Benchmarks")
+ print("===========================")
+
+ for name in sorted(sampling_algorithm):
+ print("Perform benchmarks for %s..." % name, end="")
+ time[name] = np.zeros(shape=(opts.n_steps, opts.n_times))
+
+ for step in range(opts.n_steps):
+ for it in range(opts.n_times):
+ time[name][step, it] = bench_sample(
+ sampling_algorithm[name], opts.n_population, n_samples[step]
+ )
+
+ print("done")
+
+ print("Averaging results...", end="")
+ for name in sampling_algorithm:
+ time[name] = np.mean(time[name], axis=1)
+ print("done\n")
+
+ # Print results
+ ###########################################################################
+ print("Script arguments")
+ print("===========================")
+ arguments = vars(opts)
+ print(
+ "%s \t | %s "
+ % (
+ "Arguments".ljust(16),
+ "Value".center(12),
+ )
+ )
+ print(25 * "-" + ("|" + "-" * 14) * 1)
+ for key, value in arguments.items():
+ print("%s \t | %s " % (str(key).ljust(16), str(value).strip().center(12)))
+ print("")
+
+ print("Sampling algorithm performance:")
+ print("===============================")
+ print("Results are averaged over %s repetition(s)." % opts.n_times)
+ print("")
+
+ fig = plt.figure("scikit-learn sample w/o replacement benchmark results")
+ fig.suptitle("n_population = %s, n_times = %s" % (opts.n_population, opts.n_times))
+ ax = fig.add_subplot(111)
+ for name in sampling_algorithm:
+ ax.plot(ratio, time[name], label=name)
+
+ ax.set_xlabel("ratio of n_sample / n_population")
+ ax.set_ylabel("Time (s)")
+ ax.legend()
+
+ # Sort legend labels
+ handles, labels = ax.get_legend_handles_labels()
+ hl = sorted(zip(handles, labels), key=operator.itemgetter(1))
+ handles2, labels2 = zip(*hl)
+ ax.legend(handles2, labels2, loc=0)
+
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_sgd_regression.py b/auto_building_tools/benchmarks/bench_sgd_regression.py
new file mode 100644
index 0000000..bd00615
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_sgd_regression.py
@@ -0,0 +1,151 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import gc
+from time import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.datasets import make_regression
+from sklearn.linear_model import ElasticNet, Ridge, SGDRegressor
+from sklearn.metrics import mean_squared_error
+
+"""
+Benchmark for SGD regression
+
+Compares SGD regression against coordinate descent and Ridge
+on synthetic data.
+"""
+
+print(__doc__)
+
+if __name__ == "__main__":
+ list_n_samples = np.linspace(100, 10000, 5).astype(int)
+ list_n_features = [10, 100, 1000]
+ n_test = 1000
+ max_iter = 1000
+ noise = 0.1
+ alpha = 0.01
+ sgd_results = np.zeros((len(list_n_samples), len(list_n_features), 2))
+ elnet_results = np.zeros((len(list_n_samples), len(list_n_features), 2))
+ ridge_results = np.zeros((len(list_n_samples), len(list_n_features), 2))
+ asgd_results = np.zeros((len(list_n_samples), len(list_n_features), 2))
+ for i, n_train in enumerate(list_n_samples):
+ for j, n_features in enumerate(list_n_features):
+ X, y, coef = make_regression(
+ n_samples=n_train + n_test,
+ n_features=n_features,
+ noise=noise,
+ coef=True,
+ )
+
+ X_train = X[:n_train]
+ y_train = y[:n_train]
+ X_test = X[n_train:]
+ y_test = y[n_train:]
+
+ print("=======================")
+ print("Round %d %d" % (i, j))
+ print("n_features:", n_features)
+ print("n_samples:", n_train)
+
+ # Shuffle data
+ idx = np.arange(n_train)
+ np.random.seed(13)
+ np.random.shuffle(idx)
+ X_train = X_train[idx]
+ y_train = y_train[idx]
+
+ std = X_train.std(axis=0)
+ mean = X_train.mean(axis=0)
+ X_train = (X_train - mean) / std
+ X_test = (X_test - mean) / std
+
+ std = y_train.std(axis=0)
+ mean = y_train.mean(axis=0)
+ y_train = (y_train - mean) / std
+ y_test = (y_test - mean) / std
+
+ gc.collect()
+ print("- benchmarking ElasticNet")
+ clf = ElasticNet(alpha=alpha, l1_ratio=0.5, fit_intercept=False)
+ tstart = time()
+ clf.fit(X_train, y_train)
+ elnet_results[i, j, 0] = mean_squared_error(clf.predict(X_test), y_test)
+ elnet_results[i, j, 1] = time() - tstart
+
+ gc.collect()
+ print("- benchmarking SGD")
+ clf = SGDRegressor(
+ alpha=alpha / n_train,
+ fit_intercept=False,
+ max_iter=max_iter,
+ learning_rate="invscaling",
+ eta0=0.01,
+ power_t=0.25,
+ tol=1e-3,
+ )
+
+ tstart = time()
+ clf.fit(X_train, y_train)
+ sgd_results[i, j, 0] = mean_squared_error(clf.predict(X_test), y_test)
+ sgd_results[i, j, 1] = time() - tstart
+
+ gc.collect()
+ print("max_iter", max_iter)
+ print("- benchmarking A-SGD")
+ clf = SGDRegressor(
+ alpha=alpha / n_train,
+ fit_intercept=False,
+ max_iter=max_iter,
+ learning_rate="invscaling",
+ eta0=0.002,
+ power_t=0.05,
+ tol=1e-3,
+ average=(max_iter * n_train // 2),
+ )
+
+ tstart = time()
+ clf.fit(X_train, y_train)
+ asgd_results[i, j, 0] = mean_squared_error(clf.predict(X_test), y_test)
+ asgd_results[i, j, 1] = time() - tstart
+
+ gc.collect()
+ print("- benchmarking RidgeRegression")
+ clf = Ridge(alpha=alpha, fit_intercept=False)
+ tstart = time()
+ clf.fit(X_train, y_train)
+ ridge_results[i, j, 0] = mean_squared_error(clf.predict(X_test), y_test)
+ ridge_results[i, j, 1] = time() - tstart
+
+ # Plot results
+ i = 0
+ m = len(list_n_features)
+ plt.figure("scikit-learn SGD regression benchmark results", figsize=(5 * 2, 4 * m))
+ for j in range(m):
+ plt.subplot(m, 2, i + 1)
+ plt.plot(list_n_samples, np.sqrt(elnet_results[:, j, 0]), label="ElasticNet")
+ plt.plot(list_n_samples, np.sqrt(sgd_results[:, j, 0]), label="SGDRegressor")
+ plt.plot(list_n_samples, np.sqrt(asgd_results[:, j, 0]), label="A-SGDRegressor")
+ plt.plot(list_n_samples, np.sqrt(ridge_results[:, j, 0]), label="Ridge")
+ plt.legend(prop={"size": 10})
+ plt.xlabel("n_train")
+ plt.ylabel("RMSE")
+ plt.title("Test error - %d features" % list_n_features[j])
+ i += 1
+
+ plt.subplot(m, 2, i + 1)
+ plt.plot(list_n_samples, np.sqrt(elnet_results[:, j, 1]), label="ElasticNet")
+ plt.plot(list_n_samples, np.sqrt(sgd_results[:, j, 1]), label="SGDRegressor")
+ plt.plot(list_n_samples, np.sqrt(asgd_results[:, j, 1]), label="A-SGDRegressor")
+ plt.plot(list_n_samples, np.sqrt(ridge_results[:, j, 1]), label="Ridge")
+ plt.legend(prop={"size": 10})
+ plt.xlabel("n_train")
+ plt.ylabel("Time [sec]")
+ plt.title("Training time - %d features" % list_n_features[j])
+ i += 1
+
+ plt.subplots_adjust(hspace=0.30)
+
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_sparsify.py b/auto_building_tools/benchmarks/bench_sparsify.py
new file mode 100644
index 0000000..1832ca4
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_sparsify.py
@@ -0,0 +1,107 @@
+"""
+Benchmark SGD prediction time with dense/sparse coefficients.
+
+Invoke with
+-----------
+
+$ kernprof.py -l sparsity_benchmark.py
+$ python -m line_profiler sparsity_benchmark.py.lprof
+
+Typical output
+--------------
+
+input data sparsity: 0.050000
+true coef sparsity: 0.000100
+test data sparsity: 0.027400
+model sparsity: 0.000024
+r^2 on test data (dense model) : 0.233651
+r^2 on test data (sparse model) : 0.233651
+Wrote profile results to sparsity_benchmark.py.lprof
+Timer unit: 1e-06 s
+
+File: sparsity_benchmark.py
+Function: benchmark_dense_predict at line 51
+Total time: 0.532979 s
+
+Line # Hits Time Per Hit % Time Line Contents
+==============================================================
+ 51 @profile
+ 52 def benchmark_dense_predict():
+ 53 301 640 2.1 0.1 for _ in range(300):
+ 54 300 532339 1774.5 99.9 clf.predict(X_test)
+
+File: sparsity_benchmark.py
+Function: benchmark_sparse_predict at line 56
+Total time: 0.39274 s
+
+Line # Hits Time Per Hit % Time Line Contents
+==============================================================
+ 56 @profile
+ 57 def benchmark_sparse_predict():
+ 58 1 10854 10854.0 2.8 X_test_sparse = csr_matrix(X_test)
+ 59 301 477 1.6 0.1 for _ in range(300):
+ 60 300 381409 1271.4 97.1 clf.predict(X_test_sparse)
+"""
+
+import numpy as np
+from scipy.sparse import csr_matrix
+
+from sklearn.linear_model import SGDRegressor
+from sklearn.metrics import r2_score
+
+np.random.seed(42)
+
+
+def sparsity_ratio(X):
+ return np.count_nonzero(X) / float(n_samples * n_features)
+
+
+n_samples, n_features = 5000, 300
+X = np.random.randn(n_samples, n_features)
+inds = np.arange(n_samples)
+np.random.shuffle(inds)
+X[inds[int(n_features / 1.2) :]] = 0 # sparsify input
+print("input data sparsity: %f" % sparsity_ratio(X))
+coef = 3 * np.random.randn(n_features)
+inds = np.arange(n_features)
+np.random.shuffle(inds)
+coef[inds[n_features // 2 :]] = 0 # sparsify coef
+print("true coef sparsity: %f" % sparsity_ratio(coef))
+y = np.dot(X, coef)
+
+# add noise
+y += 0.01 * np.random.normal((n_samples,))
+
+# Split data in train set and test set
+n_samples = X.shape[0]
+X_train, y_train = X[: n_samples // 2], y[: n_samples // 2]
+X_test, y_test = X[n_samples // 2 :], y[n_samples // 2 :]
+print("test data sparsity: %f" % sparsity_ratio(X_test))
+
+###############################################################################
+clf = SGDRegressor(penalty="l1", alpha=0.2, max_iter=2000, tol=None)
+clf.fit(X_train, y_train)
+print("model sparsity: %f" % sparsity_ratio(clf.coef_))
+
+
+def benchmark_dense_predict():
+ for _ in range(300):
+ clf.predict(X_test)
+
+
+def benchmark_sparse_predict():
+ X_test_sparse = csr_matrix(X_test)
+ for _ in range(300):
+ clf.predict(X_test_sparse)
+
+
+def score(y_test, y_pred, case):
+ r2 = r2_score(y_test, y_pred)
+ print("r^2 on test data (%s) : %f" % (case, r2))
+
+
+score(y_test, clf.predict(X_test), "dense model")
+benchmark_dense_predict()
+clf.sparsify()
+score(y_test, clf.predict(X_test), "sparse model")
+benchmark_sparse_predict()
diff --git a/auto_building_tools/benchmarks/bench_text_vectorizers.py b/auto_building_tools/benchmarks/bench_text_vectorizers.py
new file mode 100644
index 0000000..2eab707
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_text_vectorizers.py
@@ -0,0 +1,74 @@
+"""
+
+To run this benchmark, you will need,
+
+ * scikit-learn
+ * pandas
+ * memory_profiler
+ * psutil (optional, but recommended)
+
+"""
+
+import itertools
+import timeit
+
+import numpy as np
+import pandas as pd
+from memory_profiler import memory_usage
+
+from sklearn.datasets import fetch_20newsgroups
+from sklearn.feature_extraction.text import (
+ CountVectorizer,
+ HashingVectorizer,
+ TfidfVectorizer,
+)
+
+n_repeat = 3
+
+
+def run_vectorizer(Vectorizer, X, **params):
+ def f():
+ vect = Vectorizer(**params)
+ vect.fit_transform(X)
+
+ return f
+
+
+text = fetch_20newsgroups(subset="train").data[:1000]
+
+print("=" * 80 + "\n#" + " Text vectorizers benchmark" + "\n" + "=" * 80 + "\n")
+print("Using a subset of the 20 newsgroups dataset ({} documents).".format(len(text)))
+print("This benchmarks runs in ~1 min ...")
+
+res = []
+
+for Vectorizer, (analyzer, ngram_range) in itertools.product(
+ [CountVectorizer, TfidfVectorizer, HashingVectorizer],
+ [("word", (1, 1)), ("word", (1, 2)), ("char", (4, 4)), ("char_wb", (4, 4))],
+):
+ bench = {"vectorizer": Vectorizer.__name__}
+ params = {"analyzer": analyzer, "ngram_range": ngram_range}
+ bench.update(params)
+ dt = timeit.repeat(
+ run_vectorizer(Vectorizer, text, **params), number=1, repeat=n_repeat
+ )
+ bench["time"] = "{:.3f} (+-{:.3f})".format(np.mean(dt), np.std(dt))
+
+ mem_usage = memory_usage(run_vectorizer(Vectorizer, text, **params))
+
+ bench["memory"] = "{:.1f}".format(np.max(mem_usage))
+
+ res.append(bench)
+
+
+df = pd.DataFrame(res).set_index(["analyzer", "ngram_range", "vectorizer"])
+
+print("\n========== Run time performance (sec) ===========\n")
+print(
+ "Computing the mean and the standard deviation "
+ "of the run time over {} runs...\n".format(n_repeat)
+)
+print(df["time"].unstack(level=-1))
+
+print("\n=============== Memory usage (MB) ===============\n")
+print(df["memory"].unstack(level=-1))
diff --git a/auto_building_tools/benchmarks/bench_tree.py b/auto_building_tools/benchmarks/bench_tree.py
new file mode 100644
index 0000000..c522bcb
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_tree.py
@@ -0,0 +1,123 @@
+"""
+To run this, you'll need to have installed.
+
+ * scikit-learn
+
+Does two benchmarks
+
+First, we fix a training set, increase the number of
+samples to classify and plot number of classified samples as a
+function of time.
+
+In the second benchmark, we increase the number of dimensions of the
+training set, classify a sample and plot the time taken as a function
+of the number of dimensions.
+"""
+
+import gc
+from datetime import datetime
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+# to store the results
+scikit_classifier_results = []
+scikit_regressor_results = []
+
+mu_second = 0.0 + 10**6 # number of microseconds in a second
+
+
+def bench_scikit_tree_classifier(X, Y):
+ """Benchmark with scikit-learn decision tree classifier"""
+
+ from sklearn.tree import DecisionTreeClassifier
+
+ gc.collect()
+
+ # start time
+ tstart = datetime.now()
+ clf = DecisionTreeClassifier()
+ clf.fit(X, Y).predict(X)
+ delta = datetime.now() - tstart
+ # stop time
+
+ scikit_classifier_results.append(delta.seconds + delta.microseconds / mu_second)
+
+
+def bench_scikit_tree_regressor(X, Y):
+ """Benchmark with scikit-learn decision tree regressor"""
+
+ from sklearn.tree import DecisionTreeRegressor
+
+ gc.collect()
+
+ # start time
+ tstart = datetime.now()
+ clf = DecisionTreeRegressor()
+ clf.fit(X, Y).predict(X)
+ delta = datetime.now() - tstart
+ # stop time
+
+ scikit_regressor_results.append(delta.seconds + delta.microseconds / mu_second)
+
+
+if __name__ == "__main__":
+ print("============================================")
+ print("Warning: this is going to take a looong time")
+ print("============================================")
+
+ n = 10
+ step = 10000
+ n_samples = 10000
+ dim = 10
+ n_classes = 10
+ for i in range(n):
+ print("============================================")
+ print("Entering iteration %s of %s" % (i, n))
+ print("============================================")
+ n_samples += step
+ X = np.random.randn(n_samples, dim)
+ Y = np.random.randint(0, n_classes, (n_samples,))
+ bench_scikit_tree_classifier(X, Y)
+ Y = np.random.randn(n_samples)
+ bench_scikit_tree_regressor(X, Y)
+
+ xx = range(0, n * step, step)
+ plt.figure("scikit-learn tree benchmark results")
+ plt.subplot(211)
+ plt.title("Learning with varying number of samples")
+ plt.plot(xx, scikit_classifier_results, "g-", label="classification")
+ plt.plot(xx, scikit_regressor_results, "r-", label="regression")
+ plt.legend(loc="upper left")
+ plt.xlabel("number of samples")
+ plt.ylabel("Time (s)")
+
+ scikit_classifier_results = []
+ scikit_regressor_results = []
+ n = 10
+ step = 500
+ start_dim = 500
+ n_classes = 10
+
+ dim = start_dim
+ for i in range(0, n):
+ print("============================================")
+ print("Entering iteration %s of %s" % (i, n))
+ print("============================================")
+ dim += step
+ X = np.random.randn(100, dim)
+ Y = np.random.randint(0, n_classes, (100,))
+ bench_scikit_tree_classifier(X, Y)
+ Y = np.random.randn(100)
+ bench_scikit_tree_regressor(X, Y)
+
+ xx = np.arange(start_dim, start_dim + n * step, step)
+ plt.subplot(212)
+ plt.title("Learning in high dimensional spaces")
+ plt.plot(xx, scikit_classifier_results, "g-", label="classification")
+ plt.plot(xx, scikit_regressor_results, "r-", label="regression")
+ plt.legend(loc="upper left")
+ plt.xlabel("number of dimensions")
+ plt.ylabel("Time (s)")
+ plt.axis("tight")
+ plt.show()
diff --git a/auto_building_tools/benchmarks/bench_tsne_mnist.py b/auto_building_tools/benchmarks/bench_tsne_mnist.py
new file mode 100644
index 0000000..8649c7a
--- /dev/null
+++ b/auto_building_tools/benchmarks/bench_tsne_mnist.py
@@ -0,0 +1,208 @@
+"""
+=============================
+MNIST dataset T-SNE benchmark
+=============================
+
+"""
+
+# SPDX-License-Identifier: BSD-3-Clause
+
+import argparse
+import json
+import os
+import os.path as op
+from time import time
+
+import numpy as np
+from joblib import Memory
+
+from sklearn.datasets import fetch_openml
+from sklearn.decomposition import PCA
+from sklearn.manifold import TSNE
+from sklearn.neighbors import NearestNeighbors
+from sklearn.utils import check_array
+from sklearn.utils import shuffle as _shuffle
+from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
+
+LOG_DIR = "mnist_tsne_output"
+if not os.path.exists(LOG_DIR):
+ os.mkdir(LOG_DIR)
+
+
+memory = Memory(os.path.join(LOG_DIR, "mnist_tsne_benchmark_data"), mmap_mode="r")
+
+
+@memory.cache
+def load_data(dtype=np.float32, order="C", shuffle=True, seed=0):
+ """Load the data, then cache and memmap the train/test split"""
+ print("Loading dataset...")
+ data = fetch_openml("mnist_784", as_frame=True)
+
+ X = check_array(data["data"], dtype=dtype, order=order)
+ y = data["target"]
+
+ if shuffle:
+ X, y = _shuffle(X, y, random_state=seed)
+
+ # Normalize features
+ X /= 255
+ return X, y
+
+
+def nn_accuracy(X, X_embedded, k=1):
+ """Accuracy of the first nearest neighbor"""
+ knn = NearestNeighbors(n_neighbors=1, n_jobs=-1)
+ _, neighbors_X = knn.fit(X).kneighbors()
+ _, neighbors_X_embedded = knn.fit(X_embedded).kneighbors()
+ return np.mean(neighbors_X == neighbors_X_embedded)
+
+
+def tsne_fit_transform(model, data):
+ transformed = model.fit_transform(data)
+ return transformed, model.n_iter_
+
+
+def sanitize(filename):
+ return filename.replace("/", "-").replace(" ", "_")
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser("Benchmark for t-SNE")
+ parser.add_argument(
+ "--order", type=str, default="C", help="Order of the input data"
+ )
+ parser.add_argument("--perplexity", type=float, default=30)
+ parser.add_argument(
+ "--bhtsne",
+ action="store_true",
+ help=(
+ "if set and the reference bhtsne code is "
+ "correctly installed, run it in the benchmark."
+ ),
+ )
+ parser.add_argument(
+ "--all",
+ action="store_true",
+ help=(
+ "if set, run the benchmark with the whole MNIST."
+ "dataset. Note that it will take up to 1 hour."
+ ),
+ )
+ parser.add_argument(
+ "--profile",
+ action="store_true",
+ help="if set, run the benchmark with a memory profiler.",
+ )
+ parser.add_argument("--verbose", type=int, default=0)
+ parser.add_argument(
+ "--pca-components",
+ type=int,
+ default=50,
+ help="Number of principal components for preprocessing.",
+ )
+ args = parser.parse_args()
+
+ print("Used number of threads: {}".format(_openmp_effective_n_threads()))
+ X, y = load_data(order=args.order)
+
+ if args.pca_components > 0:
+ t0 = time()
+ X = PCA(n_components=args.pca_components).fit_transform(X)
+ print(
+ "PCA preprocessing down to {} dimensions took {:0.3f}s".format(
+ args.pca_components, time() - t0
+ )
+ )
+
+ methods = []
+
+ # Put TSNE in methods
+ tsne = TSNE(
+ n_components=2,
+ init="pca",
+ perplexity=args.perplexity,
+ verbose=args.verbose,
+ n_iter=1000,
+ )
+ methods.append(("sklearn TSNE", lambda data: tsne_fit_transform(tsne, data)))
+
+ if args.bhtsne:
+ try:
+ from bhtsne.bhtsne import run_bh_tsne
+ except ImportError as e:
+ raise ImportError(
+ """\
+If you want comparison with the reference implementation, build the
+binary from source (https://github.com/lvdmaaten/bhtsne) in the folder
+benchmarks/bhtsne and add an empty `__init__.py` file in the folder:
+
+$ git clone git@github.com:lvdmaaten/bhtsne.git
+$ cd bhtsne
+$ g++ sptree.cpp tsne.cpp tsne_main.cpp -o bh_tsne -O2
+$ touch __init__.py
+$ cd ..
+"""
+ ) from e
+
+ def bhtsne(X):
+ """Wrapper for the reference lvdmaaten/bhtsne implementation."""
+ # PCA preprocessing is done elsewhere in the benchmark script
+ n_iter = -1 # TODO find a way to report the number of iterations
+ return (
+ run_bh_tsne(
+ X,
+ use_pca=False,
+ perplexity=args.perplexity,
+ verbose=args.verbose > 0,
+ ),
+ n_iter,
+ )
+
+ methods.append(("lvdmaaten/bhtsne", bhtsne))
+
+ if args.profile:
+ try:
+ from memory_profiler import profile
+ except ImportError as e:
+ raise ImportError(
+ "To run the benchmark with `--profile`, you "
+ "need to install `memory_profiler`. Please "
+ "run `pip install memory_profiler`."
+ ) from e
+ methods = [(n, profile(m)) for n, m in methods]
+
+ data_size = [100, 500, 1000, 5000, 10000]
+ if args.all:
+ data_size.append(70000)
+
+ results = []
+ basename = os.path.basename(os.path.splitext(__file__)[0])
+ log_filename = os.path.join(LOG_DIR, basename + ".json")
+ for n in data_size:
+ X_train = X[:n]
+ y_train = y[:n]
+ n = X_train.shape[0]
+ for name, method in methods:
+ print("Fitting {} on {} samples...".format(name, n))
+ t0 = time()
+ np.save(
+ os.path.join(LOG_DIR, "mnist_{}_{}.npy".format("original", n)), X_train
+ )
+ np.save(
+ os.path.join(LOG_DIR, "mnist_{}_{}.npy".format("original_labels", n)),
+ y_train,
+ )
+ X_embedded, n_iter = method(X_train)
+ duration = time() - t0
+ precision_5 = nn_accuracy(X_train, X_embedded)
+ print(
+ "Fitting {} on {} samples took {:.3f}s in {:d} iterations, "
+ "nn accuracy: {:0.3f}".format(name, n, duration, n_iter, precision_5)
+ )
+ results.append(dict(method=name, duration=duration, n_samples=n))
+ with open(log_filename, "w", encoding="utf-8") as f:
+ json.dump(results, f)
+ method_name = sanitize(name)
+ np.save(
+ op.join(LOG_DIR, "mnist_{}_{}.npy".format(method_name, n)), X_embedded
+ )
diff --git a/auto_building_tools/benchmarks/plot_tsne_mnist.py b/auto_building_tools/benchmarks/plot_tsne_mnist.py
new file mode 100644
index 0000000..fff71ee
--- /dev/null
+++ b/auto_building_tools/benchmarks/plot_tsne_mnist.py
@@ -0,0 +1,33 @@
+import argparse
+import os.path as op
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+LOG_DIR = "mnist_tsne_output"
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser("Plot benchmark results for t-SNE")
+ parser.add_argument(
+ "--labels",
+ type=str,
+ default=op.join(LOG_DIR, "mnist_original_labels_10000.npy"),
+ help="1D integer numpy array for labels",
+ )
+ parser.add_argument(
+ "--embedding",
+ type=str,
+ default=op.join(LOG_DIR, "mnist_sklearn_TSNE_10000.npy"),
+ help="2D float numpy array for embedded data",
+ )
+ args = parser.parse_args()
+
+ X = np.load(args.embedding)
+ y = np.load(args.labels)
+
+ for i in np.unique(y):
+ mask = y == i
+ plt.scatter(X[mask, 0], X[mask, 1], alpha=0.2, label=int(i))
+ plt.legend(loc="best")
+ plt.show()
diff --git a/auto_building_tools/build_tools/Makefile b/auto_building_tools/build_tools/Makefile
new file mode 100644
index 0000000..544bbaf
--- /dev/null
+++ b/auto_building_tools/build_tools/Makefile
@@ -0,0 +1,4 @@
+# Makefile for maintenance tools
+
+authors:
+ python generate_authors_table.py
diff --git a/auto_building_tools/build_tools/azure/combine_coverage_reports.sh b/auto_building_tools/build_tools/azure/combine_coverage_reports.sh
new file mode 100644
index 0000000..c3b90fd
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/combine_coverage_reports.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+set -e
+
+# Defines the show_installed_libraries and activate_environment functions.
+source build_tools/shared.sh
+
+activate_environment
+
+# Combine all coverage files generated by subprocesses workers such
+# such as pytest-xdist and joblib/loky:
+pushd $TEST_DIR
+coverage combine --append
+coverage xml
+popd
+
+# Copy the combined coverage file to the root of the repository:
+cp $TEST_DIR/coverage.xml $BUILD_REPOSITORY_LOCALPATH
diff --git a/auto_building_tools/build_tools/azure/cpython_free_threaded_lock.txt b/auto_building_tools/build_tools/azure/cpython_free_threaded_lock.txt
new file mode 100644
index 0000000..91b5021
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/cpython_free_threaded_lock.txt
@@ -0,0 +1,35 @@
+#
+# This file is autogenerated by pip-compile with Python 3.13
+# by the following command:
+#
+# pip-compile --output-file=/scikit-learn/build_tools/azure/cpython_free_threaded_lock.txt /scikit-learn/build_tools/azure/cpython_free_threaded_requirements.txt
+#
+execnet==2.1.1
+ # via pytest-xdist
+iniconfig==2.0.0
+ # via pytest
+joblib==1.4.2
+ # via -r /scikit-learn/build_tools/azure/cpython_free_threaded_requirements.txt
+meson==1.4.1
+ # via meson-python
+meson-python==0.16.0
+ # via -r /scikit-learn/build_tools/azure/cpython_free_threaded_requirements.txt
+ninja==1.11.1.1
+ # via -r /scikit-learn/build_tools/azure/cpython_free_threaded_requirements.txt
+packaging==24.0
+ # via
+ # meson-python
+ # pyproject-metadata
+ # pytest
+pluggy==1.5.0
+ # via pytest
+pyproject-metadata==0.8.0
+ # via meson-python
+pytest==8.2.2
+ # via
+ # -r /scikit-learn/build_tools/azure/cpython_free_threaded_requirements.txt
+ # pytest-xdist
+pytest-xdist==3.6.1
+ # via -r /scikit-learn/build_tools/azure/cpython_free_threaded_requirements.txt
+threadpoolctl==3.5.0
+ # via -r /scikit-learn/build_tools/azure/cpython_free_threaded_requirements.txt
diff --git a/auto_building_tools/build_tools/azure/cpython_free_threaded_requirements.txt b/auto_building_tools/build_tools/azure/cpython_free_threaded_requirements.txt
new file mode 100644
index 0000000..bdcb169
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/cpython_free_threaded_requirements.txt
@@ -0,0 +1,14 @@
+# To generate cpython_free_threaded_lock.txt, use the following command:
+# docker run -v $PWD:/scikit-learn -it ubuntu bash -c 'export DEBIAN_FRONTEND=noninteractive; apt-get -yq update; apt-get install software-properties-common ccache -y; add-apt-repository --yes ppa:deadsnakes/nightly; apt-get update -y; apt-get install -y --no-install-recommends python3.13-dev python3.13-venv python3.13-nogil; python3.13t -m venv /venvs/myenv; source /venvs/myenv/bin/activate; pip install pip-tools; pip-compile /scikit-learn/build_tools/azure/cpython_free_threaded_requirements.txt -o /scikit-learn/build_tools/azure/cpython_free_threaded_lock.txt'
+
+# The reason behind it is that you need python-3.13t to generate the pip lock
+# file. For pure Python wheel this does not really matter. But when there are
+# cython, numpy and scipy releases that have a CPython 3.13 free-threaded
+# wheel, we can add them here and this is important that the Python 3.13
+# free-threaded wheel is picked up in the lock-file
+joblib
+threadpoolctl
+pytest
+pytest-xdist
+ninja
+meson-python
diff --git a/auto_building_tools/build_tools/azure/debian_atlas_32bit_lock.txt b/auto_building_tools/build_tools/azure/debian_atlas_32bit_lock.txt
new file mode 100644
index 0000000..6e40724
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/debian_atlas_32bit_lock.txt
@@ -0,0 +1,43 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+# pip-compile --output-file=build_tools/azure/debian_atlas_32bit_lock.txt build_tools/azure/debian_atlas_32bit_requirements.txt
+#
+attrs==24.2.0
+ # via pytest
+coverage==7.6.1
+ # via pytest-cov
+cython==3.0.10
+ # via -r build_tools/azure/debian_atlas_32bit_requirements.txt
+iniconfig==2.0.0
+ # via pytest
+joblib==1.2.0
+ # via -r build_tools/azure/debian_atlas_32bit_requirements.txt
+meson==1.5.1
+ # via meson-python
+meson-python==0.16.0
+ # via -r build_tools/azure/debian_atlas_32bit_requirements.txt
+ninja==1.11.1.1
+ # via -r build_tools/azure/debian_atlas_32bit_requirements.txt
+packaging==24.1
+ # via
+ # meson-python
+ # pyproject-metadata
+ # pytest
+pluggy==1.5.0
+ # via pytest
+py==1.11.0
+ # via pytest
+pyproject-metadata==0.8.0
+ # via meson-python
+pytest==7.1.2
+ # via
+ # -r build_tools/azure/debian_atlas_32bit_requirements.txt
+ # pytest-cov
+pytest-cov==2.9.0
+ # via -r build_tools/azure/debian_atlas_32bit_requirements.txt
+threadpoolctl==3.1.0
+ # via -r build_tools/azure/debian_atlas_32bit_requirements.txt
+tomli==2.0.1
+ # via pytest
diff --git a/auto_building_tools/build_tools/azure/debian_atlas_32bit_requirements.txt b/auto_building_tools/build_tools/azure/debian_atlas_32bit_requirements.txt
new file mode 100644
index 0000000..615193a
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/debian_atlas_32bit_requirements.txt
@@ -0,0 +1,10 @@
+# DO NOT EDIT: this file is generated from the specification found in the
+# following script to centralize the configuration for CI builds:
+# build_tools/update_environments_and_lock_files.py
+cython==3.0.10 # min
+joblib==1.2.0 # min
+threadpoolctl==3.1.0
+pytest==7.1.2 # min
+pytest-cov==2.9.0 # min
+ninja
+meson-python
diff --git a/auto_building_tools/build_tools/azure/get_commit_message.py b/auto_building_tools/build_tools/azure/get_commit_message.py
new file mode 100644
index 0000000..0b1246b
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/get_commit_message.py
@@ -0,0 +1,65 @@
+import argparse
+import os
+import subprocess
+
+
+def get_commit_message():
+ """Retrieve the commit message."""
+ build_source_version_message = os.environ["BUILD_SOURCEVERSIONMESSAGE"]
+
+ if os.environ["BUILD_REASON"] == "PullRequest":
+ # By default pull requests use refs/pull/PULL_ID/merge as the source branch
+ # which has a "Merge ID into ID" as a commit message. The latest commit
+ # message is the second to last commit
+ commit_id = build_source_version_message.split()[1]
+ git_cmd = ["git", "log", commit_id, "-1", "--pretty=%B"]
+ commit_message = subprocess.run(
+ git_cmd, capture_output=True, text=True
+ ).stdout.strip()
+ else:
+ commit_message = build_source_version_message
+
+ # Sanitize the commit message to avoid introducing a vulnerability: a PR
+ # submitter could include the "##vso" special marker in their commit
+ # message to attempt to obfuscate the injection of arbitrary commands in
+ # the Azure pipeline.
+ #
+ # This can be a problem if the PR reviewers do not pay close enough
+ # attention to the full commit message prior to clicking the merge button
+ # and as a result make the inject code run in a protected branch with
+ # elevated access to CI secrets. On a protected branch, Azure
+ # already sanitizes `BUILD_SOURCEVERSIONMESSAGE`, but the message
+ # will still be sanitized here out of precaution.
+ commit_message = commit_message.replace("##vso", "..vso")
+
+ return commit_message
+
+
+def parsed_args():
+ parser = argparse.ArgumentParser(
+ description=(
+ "Show commit message that triggered the build in Azure DevOps pipeline"
+ )
+ )
+ parser.add_argument(
+ "--only-show-message",
+ action="store_true",
+ default=False,
+ help=(
+ "Only print commit message. Useful for direct use in scripts rather than"
+ " setting output variable of the Azure job"
+ ),
+ )
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parsed_args()
+ commit_message = get_commit_message()
+
+ if args.only_show_message:
+ print(commit_message)
+ else:
+ # set the environment variable to be propagated to other steps
+ print(f"##vso[task.setvariable variable=message;isOutput=true]{commit_message}")
+ print(f"commit message: {commit_message}") # helps debugging
diff --git a/auto_building_tools/build_tools/azure/get_selected_tests.py b/auto_building_tools/build_tools/azure/get_selected_tests.py
new file mode 100644
index 0000000..f453748
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/get_selected_tests.py
@@ -0,0 +1,34 @@
+from get_commit_message import get_commit_message
+
+
+def get_selected_tests():
+ """Parse the commit message to check if pytest should run only specific tests.
+
+ If so, selected tests will be run with SKLEARN_TESTS_GLOBAL_RANDOM_SEED="all".
+
+ The commit message must take the form:
+ [all random seeds]
+
+
+ ...
+ """
+ commit_message = get_commit_message()
+
+ if "[all random seeds]" in commit_message:
+ selected_tests = commit_message.split("[all random seeds]")[1].strip()
+ selected_tests = selected_tests.replace("\n", " or ")
+ else:
+ selected_tests = ""
+
+ return selected_tests
+
+
+if __name__ == "__main__":
+ # set the environment variable to be propagated to other steps
+ selected_tests = get_selected_tests()
+
+ if selected_tests:
+ print(f"##vso[task.setvariable variable=SELECTED_TESTS]'{selected_tests}'")
+ print(f"selected tests: {selected_tests}") # helps debugging
+ else:
+ print("no selected tests")
diff --git a/auto_building_tools/build_tools/azure/install.sh b/auto_building_tools/build_tools/azure/install.sh
new file mode 100644
index 0000000..398de49
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/install.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+
+set -e
+set -x
+
+# defines the get_dep and show_installed_libraries functions
+source build_tools/shared.sh
+
+UNAMESTR=`uname`
+CCACHE_LINKS_DIR="/tmp/ccache"
+
+setup_ccache() {
+ CCACHE_BIN=`which ccache || echo ""`
+ if [[ "${CCACHE_BIN}" == "" ]]; then
+ echo "ccache not found, skipping..."
+ elif [[ -d "${CCACHE_LINKS_DIR}" ]]; then
+ echo "ccache already configured, skipping..."
+ else
+ echo "Setting up ccache with CCACHE_DIR=${CCACHE_DIR}"
+ mkdir ${CCACHE_LINKS_DIR}
+ which ccache
+ for name in gcc g++ cc c++ clang clang++ i686-linux-gnu-gcc i686-linux-gnu-c++ x86_64-linux-gnu-gcc x86_64-linux-gnu-c++ x86_64-apple-darwin13.4.0-clang x86_64-apple-darwin13.4.0-clang++; do
+ ln -s ${CCACHE_BIN} "${CCACHE_LINKS_DIR}/${name}"
+ done
+ export PATH="${CCACHE_LINKS_DIR}:${PATH}"
+ ccache -M 256M
+ fi
+}
+
+pre_python_environment_install() {
+ if [[ "$DISTRIB" == "ubuntu" ]]; then
+ sudo apt-get update
+ sudo apt-get install python3-scipy python3-matplotlib \
+ libatlas3-base libatlas-base-dev python3-virtualenv ccache
+
+ elif [[ "$DISTRIB" == "debian-32" ]]; then
+ apt-get update
+ apt-get install -y python3-dev python3-numpy python3-scipy \
+ python3-matplotlib libatlas3-base libatlas-base-dev \
+ python3-virtualenv python3-pandas ccache git
+
+ # TODO for now we use CPython 3.13 from Ubuntu deadsnakes PPA. When CPython
+ # 3.13 is released (scheduled October 2024) we can use something more
+ # similar to other conda+pip based builds
+ elif [[ "$DISTRIB" == "pip-free-threaded" ]]; then
+ sudo apt-get -yq update
+ sudo apt-get install -yq ccache
+ sudo apt-get install -yq software-properties-common
+ sudo add-apt-repository --yes ppa:deadsnakes/nightly
+ sudo apt-get update -yq
+ sudo apt-get install -yq --no-install-recommends python3.13-dev python3.13-venv python3.13-nogil
+ fi
+}
+
+check_packages_dev_version() {
+ for package in $@; do
+ package_version=$(python -c "import $package; print($package.__version__)")
+ if [[ $package_version =~ "^[.0-9]+$" ]]; then
+ echo "$package is not a development version: $package_version"
+ exit 1
+ fi
+ done
+}
+
+python_environment_install_and_activate() {
+ if [[ "$DISTRIB" == "conda"* ]]; then
+ create_conda_environment_from_lock_file $VIRTUALENV $LOCK_FILE
+ source activate $VIRTUALENV
+
+ elif [[ "$DISTRIB" == "ubuntu" || "$DISTRIB" == "debian-32" ]]; then
+ python3 -m virtualenv --system-site-packages --python=python3 $VIRTUALENV
+ source $VIRTUALENV/bin/activate
+ pip install -r "${LOCK_FILE}"
+
+ elif [[ "$DISTRIB" == "pip-free-threaded" ]]; then
+ python3.13t -m venv $VIRTUALENV
+ source $VIRTUALENV/bin/activate
+ pip install -r "${LOCK_FILE}"
+ # TODO you need pip>=24.1 to find free-threaded wheels. This may be
+ # removed when the underlying Ubuntu image has pip>=24.1.
+ pip install 'pip>=24.1'
+ # TODO When there are CPython 3.13 free-threaded wheels for numpy,
+ # scipy and cython move them to
+ # build_tools/azure/cpython_free_threaded_requirements.txt. For now we
+ # install them from scientific-python-nightly-wheels
+ dev_anaconda_url=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
+ dev_packages="numpy scipy Cython"
+ pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url $dev_packages --only-binary :all:
+ fi
+
+ if [[ "$DISTRIB" == "conda-pip-scipy-dev" ]]; then
+ echo "Installing development dependency wheels"
+ dev_anaconda_url=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
+ dev_packages="numpy scipy pandas Cython"
+ pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url $dev_packages --only-binary :all:
+
+ check_packages_dev_version $dev_packages
+
+ echo "Installing joblib from latest sources"
+ pip install https://github.com/joblib/joblib/archive/master.zip
+ echo "Installing pillow from latest sources"
+ pip install https://github.com/python-pillow/Pillow/archive/main.zip
+ fi
+}
+
+scikit_learn_install() {
+ setup_ccache
+ show_installed_libraries
+
+ if [[ "$UNAMESTR" == "Darwin" && "$SKLEARN_TEST_NO_OPENMP" == "true" ]]; then
+ # Without openmp, we use the system clang. Here we use /usr/bin/ar
+ # instead because llvm-ar errors
+ export AR=/usr/bin/ar
+ # Make sure omp.h is not present in the conda environment, so that
+ # using an unprotected "cimport openmp" will make this build fail. At
+ # the time of writing (2023-01-13), on OSX, blas (mkl or openblas)
+ # brings in openmp so that you end up having the omp.h include inside
+ # the conda environment.
+ find $CONDA_PREFIX -name omp.h -delete -print
+ fi
+
+ if [[ "$UNAMESTR" == "Linux" ]]; then
+ # FIXME: temporary fix to link against system libraries on linux
+ # https://github.com/scikit-learn/scikit-learn/issues/20640
+ export LDFLAGS="$LDFLAGS -Wl,--sysroot=/"
+ fi
+
+ if [[ "$PIP_BUILD_ISOLATION" == "true" ]]; then
+ # Check that pip can automatically build scikit-learn with the build
+ # dependencies specified in pyproject.toml using an isolated build
+ # environment:
+ pip install --verbose .
+ else
+ if [[ "$UNAMESTR" == "MINGW64"* ]]; then
+ # Needed on Windows CI to compile with Visual Studio compiler
+ # otherwise Meson detects a MINGW64 platform and use MINGW64
+ # toolchain
+ ADDITIONAL_PIP_OPTIONS='-Csetup-args=--vsenv'
+ fi
+ # Use the pre-installed build dependencies and build directly in the
+ # current environment.
+ pip install --verbose --no-build-isolation --editable . $ADDITIONAL_PIP_OPTIONS
+ fi
+
+ ccache -s || echo "ccache not installed, skipping ccache statistics"
+}
+
+main() {
+ pre_python_environment_install
+ python_environment_install_and_activate
+ scikit_learn_install
+}
+
+main
diff --git a/auto_building_tools/build_tools/azure/install_pyodide.sh b/auto_building_tools/build_tools/azure/install_pyodide.sh
new file mode 100644
index 0000000..58d0348
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/install_pyodide.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+set -e
+
+git clone https://github.com/emscripten-core/emsdk.git
+cd emsdk
+./emsdk install $EMSCRIPTEN_VERSION
+./emsdk activate $EMSCRIPTEN_VERSION
+source emsdk_env.sh
+cd -
+
+pip install pyodide-build==$PYODIDE_VERSION pyodide-cli
+
+pyodide build
+
+ls -ltrh dist
+
+# The Pyodide js library is needed by build_tools/azure/test_script_pyodide.sh
+# to run tests inside Pyodide
+npm install pyodide@$PYODIDE_VERSION
diff --git a/auto_building_tools/build_tools/azure/posix-all-parallel.yml b/auto_building_tools/build_tools/azure/posix-all-parallel.yml
new file mode 100644
index 0000000..45d2b45
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/posix-all-parallel.yml
@@ -0,0 +1,50 @@
+# This configuration allows enables a job based on `posix.yml` to have two modes:
+#
+# 1. When `[azure parallel]` *is not* in the commit message, then this job will
+# run first. If this job succeeds, then all dependent jobs can run.
+# 2. When `[azure parallel]` *is* in the commit message, then this job will
+# run with name `{{ parameters.name }}_Parallel` along with all other jobs.
+#
+# To enable this template, all dependent jobs should check if this job succeeded
+# or skipped by using:
+# dependsOn: in(dependencies[{{ parameters.name }}]['result'], 'Succeeded', 'Skipped')
+
+parameters:
+ name: ''
+ vmImage: ''
+ matrix: []
+ dependsOn: []
+ condition: ''
+ commitMessage: ''
+
+jobs:
+
+# When [azure parallel] *is not* in the commit message, this job will run
+# first.
+- template: posix.yml
+ parameters:
+ name: ${{ parameters.name }}
+ vmImage: ${{ parameters.vmImage }}
+ matrix: ${{ parameters.matrix }}
+ dependsOn: ${{ parameters.dependsOn }}
+ condition: |
+ and(
+ ${{ parameters.condition }},
+ not(contains(${{ parameters.commitMessage }}, '[azure parallel]'))
+ )
+
+# When [azure parallel] *is* in the commit message, this job and dependent
+# jobs will run in parallel. Implementation-wise, the job above is skipped and
+# this job, named ${{ parameters.name }}_Parallel, will run in parallel with
+# the other jobs.
+- template: posix.yml
+ parameters:
+ name: ${{ parameters.name }}_Parallel
+ vmImage: ${{ parameters.vmImage }}
+ matrix: ${{ parameters.matrix }}
+ dependsOn: ${{ parameters.dependsOn }}
+ condition: |
+ and(
+ ${{ parameters.condition }},
+ contains(${{ parameters.commitMessage }}, '[azure parallel]')
+ )
diff --git a/auto_building_tools/build_tools/azure/posix-docker.yml b/auto_building_tools/build_tools/azure/posix-docker.yml
new file mode 100644
index 0000000..b00ca66
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/posix-docker.yml
@@ -0,0 +1,133 @@
+parameters:
+ name: ''
+ vmImage: ''
+ matrix: []
+ dependsOn: []
+ condition: ne(variables['Build.Reason'], 'Schedule')
+
+jobs:
+- job: ${{ parameters.name }}
+ dependsOn: ${{ parameters.dependsOn }}
+ condition: ${{ parameters.condition }}
+ timeoutInMinutes: 120
+ pool:
+ vmImage: ${{ parameters.vmImage }}
+ variables:
+ VIRTUALENV: 'testvenv'
+ TEST_DIR: '$(Agent.WorkFolder)/tmp_folder'
+ JUNITXML: 'test-data.xml'
+ SKLEARN_SKIP_NETWORK_TESTS: '1'
+ PYTEST_XDIST_VERSION: 'latest'
+ COVERAGE: 'false'
+ # Set in azure-pipelines.yml
+ DISTRIB: ''
+ DOCKER_CONTAINER: ''
+ CREATE_ISSUE_ON_TRACKER: 'true'
+ CCACHE_DIR: $(Pipeline.Workspace)/ccache
+ CCACHE_COMPRESS: '1'
+ strategy:
+ matrix:
+ ${{ insert }}: ${{ parameters.matrix }}
+
+ steps:
+ - task: UsePythonVersion@0
+ inputs:
+ versionSpec: '3.9'
+ addToPath: false
+ name: pyTools
+ displayName: Select python version to run CI python scripts
+ - bash: $(pyTools.pythonLocation)/bin/python build_tools/azure/get_selected_tests.py
+ displayName: Check selected tests for all random seeds
+ condition: eq(variables['Build.Reason'], 'PullRequest')
+ - task: Cache@2
+ inputs:
+ key: '"ccache-v1" | "$(Agent.JobName)" | "$(Build.BuildNumber)"'
+ restoreKeys: |
+ "ccache-v1" | "$(Agent.JobName)"
+ path: $(CCACHE_DIR)
+ displayName: ccache
+ continueOnError: true
+ - script: >
+ mkdir -p $CCACHE_DIR
+ # Container is detached and sleeping, allowing steps to run commands
+ # in the container. The TEST_DIR is mapped allowing the host to access
+ # the JUNITXML file
+ - script: >
+ docker container run --rm
+ --volume $TEST_DIR:/temp_dir
+ --volume $BUILD_REPOSITORY_LOCALPATH:/repo_localpath
+ --volume $PWD:/io
+ --volume $CCACHE_DIR:/ccache
+ -w /io
+ --detach
+ --name skcontainer
+ -e BUILD_SOURCESDIRECTORY=/io
+ -e TEST_DIR=/temp_dir
+ -e CCACHE_DIR=/ccache
+ -e BUILD_REPOSITORY_LOCALPATH=/repo_localpath
+ -e COVERAGE
+ -e DISTRIB
+ -e LOCK_FILE
+ -e JUNITXML
+ -e VIRTUALENV
+ -e PYTEST_XDIST_VERSION
+ -e SKLEARN_SKIP_NETWORK_TESTS
+ -e SELECTED_TESTS
+ -e CCACHE_COMPRESS
+ -e BUILD_SOURCEVERSIONMESSAGE
+ -e BUILD_REASON
+ $DOCKER_CONTAINER
+ sleep 1000000
+ displayName: 'Start container'
+ - script: >
+ docker exec skcontainer ./build_tools/azure/install.sh
+ displayName: 'Install'
+ - script: >
+ docker exec skcontainer ./build_tools/azure/test_script.sh
+ displayName: 'Test Library'
+ - script: >
+ docker exec skcontainer ./build_tools/azure/combine_coverage_reports.sh
+ condition: and(succeeded(), eq(variables['COVERAGE'], 'true'),
+ eq(variables['SELECTED_TESTS'], ''))
+ displayName: 'Combine coverage'
+ - task: PublishTestResults@2
+ inputs:
+ testResultsFiles: '$(TEST_DIR)/$(JUNITXML)'
+ testRunTitle: ${{ format('{0}-$(Agent.JobName)', parameters.name) }}
+ displayName: 'Publish Test Results'
+ condition: succeededOrFailed()
+ - script: >
+ docker container stop skcontainer
+ displayName: 'Stop container'
+ condition: always()
+ - bash: |
+ set -ex
+ if [[ $(BOT_GITHUB_TOKEN) == "" ]]; then
+ echo "GitHub Token is not set. Issue tracker will not be updated."
+ exit
+ fi
+
+ LINK_TO_RUN="https://dev.azure.com/$BUILD_REPOSITORY_NAME/_build/results?buildId=$BUILD_BUILDID&view=logs&j=$SYSTEM_JOBID"
+ CI_NAME="$SYSTEM_JOBIDENTIFIER"
+ ISSUE_REPO="$BUILD_REPOSITORY_NAME"
+
+ $(pyTools.pythonLocation)/bin/pip install defusedxml PyGithub
+ $(pyTools.pythonLocation)/bin/python maint_tools/update_tracking_issue.py \
+ $(BOT_GITHUB_TOKEN) \
+ $CI_NAME \
+ $ISSUE_REPO \
+ $LINK_TO_RUN \
+ --junit-file $JUNIT_FILE \
+ --auto-close false
+ displayName: 'Update issue tracker'
+ env:
+ JUNIT_FILE: $(TEST_DIR)/$(JUNITXML)
+ condition: and(succeededOrFailed(), eq(variables['CREATE_ISSUE_ON_TRACKER'], 'true'),
+ eq(variables['Build.Reason'], 'Schedule'))
+ - bash: bash build_tools/azure/upload_codecov.sh
+ condition: and(succeeded(), eq(variables['COVERAGE'], 'true'),
+ eq(variables['SELECTED_TESTS'], ''))
+ displayName: 'Upload To Codecov'
+ retryCountOnTaskFailure: 5
+ env:
+ CODECOV_TOKEN: $(CODECOV_TOKEN)
diff --git a/auto_building_tools/build_tools/azure/posix.yml b/auto_building_tools/build_tools/azure/posix.yml
new file mode 100644
index 0000000..35e5165
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/posix.yml
@@ -0,0 +1,111 @@
+parameters:
+ name: ''
+ vmImage: ''
+ matrix: []
+ dependsOn: []
+ condition: ''
+
+jobs:
+- job: ${{ parameters.name }}
+ dependsOn: ${{ parameters.dependsOn }}
+ condition: ${{ parameters.condition }}
+ timeoutInMinutes: 120
+ pool:
+ vmImage: ${{ parameters.vmImage }}
+ variables:
+ TEST_DIR: '$(Agent.WorkFolder)/tmp_folder'
+ VIRTUALENV: 'testvenv'
+ JUNITXML: 'test-data.xml'
+ SKLEARN_SKIP_NETWORK_TESTS: '1'
+ CCACHE_DIR: $(Pipeline.Workspace)/ccache
+ CCACHE_COMPRESS: '1'
+ PYTEST_XDIST_VERSION: 'latest'
+ COVERAGE: 'true'
+ CREATE_ISSUE_ON_TRACKER: 'true'
+ strategy:
+ matrix:
+ ${{ insert }}: ${{ parameters.matrix }}
+
+ steps:
+ - task: UsePythonVersion@0
+ inputs:
+ versionSpec: '3.9'
+ addToPath: false
+ name: pyTools
+ displayName: Select python version to run CI python scripts
+ - bash: $(pyTools.pythonLocation)/bin/python build_tools/azure/get_selected_tests.py
+ displayName: Check selected tests for all random seeds
+ condition: eq(variables['Build.Reason'], 'PullRequest')
+ - bash: echo "##vso[task.prependpath]$CONDA/bin"
+ displayName: Add conda to PATH
+ condition: startsWith(variables['DISTRIB'], 'conda')
+ - bash: sudo chown -R $USER $CONDA
+ displayName: Take ownership of conda installation
+ condition: startsWith(variables['DISTRIB'], 'conda')
+ - task: Cache@2
+ inputs:
+ key: '"ccache-v1" | "$(Agent.JobName)" | "$(Build.BuildNumber)"'
+ restoreKeys: |
+ "ccache-v1" | "$(Agent.JobName)"
+ path: $(CCACHE_DIR)
+ displayName: ccache
+ continueOnError: true
+ - script: |
+ build_tools/azure/install.sh
+ displayName: 'Install'
+ - script: |
+ build_tools/azure/test_script.sh
+ displayName: 'Test Library'
+ - script: |
+ build_tools/azure/test_docs.sh
+ displayName: 'Test Docs'
+ condition: and(succeeded(), eq(variables['SELECTED_TESTS'], ''))
+ - script: |
+ build_tools/azure/test_pytest_soft_dependency.sh
+ displayName: 'Test Soft Dependency'
+ condition: and(succeeded(),
+ eq(variables['CHECK_PYTEST_SOFT_DEPENDENCY'], 'true'),
+ eq(variables['SELECTED_TESTS'], ''))
+ - script: |
+ build_tools/azure/combine_coverage_reports.sh
+ condition: and(succeeded(), eq(variables['COVERAGE'], 'true'),
+ eq(variables['SELECTED_TESTS'], ''))
+ displayName: 'Combine coverage'
+ - task: PublishTestResults@2
+ inputs:
+ testResultsFiles: '$(TEST_DIR)/$(JUNITXML)'
+ testRunTitle: ${{ format('{0}-$(Agent.JobName)', parameters.name) }}
+ displayName: 'Publish Test Results'
+ condition: succeededOrFailed()
+ - bash: |
+ set -ex
+ if [[ $(BOT_GITHUB_TOKEN) == "" ]]; then
+ echo "GitHub Token is not set. Issue tracker will not be updated."
+ exit
+ fi
+
+ LINK_TO_RUN="https://dev.azure.com/$BUILD_REPOSITORY_NAME/_build/results?buildId=$BUILD_BUILDID&view=logs&j=$SYSTEM_JOBID"
+ CI_NAME="$SYSTEM_JOBIDENTIFIER"
+ ISSUE_REPO="$BUILD_REPOSITORY_NAME"
+
+ $(pyTools.pythonLocation)/bin/pip install defusedxml PyGithub
+ $(pyTools.pythonLocation)/bin/python maint_tools/update_tracking_issue.py \
+ $(BOT_GITHUB_TOKEN) \
+ $CI_NAME \
+ $ISSUE_REPO \
+ $LINK_TO_RUN \
+ --junit-file $JUNIT_FILE \
+ --auto-close false
+ displayName: 'Update issue tracker'
+ env:
+ JUNIT_FILE: $(TEST_DIR)/$(JUNITXML)
+ condition: and(succeededOrFailed(), eq(variables['CREATE_ISSUE_ON_TRACKER'], 'true'),
+ eq(variables['Build.Reason'], 'Schedule'))
+ - script: |
+ build_tools/azure/upload_codecov.sh
+ condition: and(succeeded(), eq(variables['COVERAGE'], 'true'),
+ eq(variables['SELECTED_TESTS'], ''))
+ displayName: 'Upload To Codecov'
+ retryCountOnTaskFailure: 5
+ env:
+ CODECOV_TOKEN: $(CODECOV_TOKEN)
diff --git a/auto_building_tools/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock b/auto_building_tools/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock
new file mode 100644
index 0000000..c72eceb
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock
@@ -0,0 +1,235 @@
+# Generated by conda-lock.
+# platform: linux-64
+# input_hash: 93ee312868bc5df4bdc9b2ef07f938f6a5922dfe2375c4963a7c63d19c5d87f6
+@EXPLICIT
+https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
+https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.7.4-hbcca054_0.conda#23ab7665c5f63cfb9f1f6195256daac6
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_2.conda#cbbe59391138ea5ad3658c76912e147f
+https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-hf3520f5_7.conda#b80f2f396ca2c28b8c14c437a4ed1e74
+https://conda.anaconda.org/conda-forge/linux-64/mkl-include-2023.2.0-h84fe81f_50496.conda#7af9fd0b2d7219f4a4200a34561340f6
+https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.12-5_cp312.conda#0424ae29b104430108f5218a66db7260
+https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8
+https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29
+https://conda.anaconda.org/conda-forge/linux-64/libglvnd-1.7.0-ha4b6fd6_0.conda#e46b5ae31282252e0525713e34ffbe2b
+https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab
+https://conda.anaconda.org/conda-forge/linux-64/libegl-1.7.0-ha4b6fd6_0.conda#35e52d19547cb3265a09c49de146a5ae
+https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793
+https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.1.0-h77fa898_0.conda#ca0fad6a41ddaef54a153b78eccb5037
+https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.12-h4ab18f5_0.conda#7ed427f0871fd41cb1d9c17727c17589
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-common-0.9.27-h4bc722e_0.conda#817119e8a21a45d325f65d0d54710052
+https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553
+https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.33.1-heb4867d_0.conda#0d3c60291342c0c025db231353376dfb
+https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_1.conda#aec6c91c7371c26392a06708a73c70e5
+https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.21-h4bc722e_0.conda#36ce76665bf67f5aac36be7a0d21b7f3
+https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055
+https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d
+https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3
+https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-14.1.0-hc5f4f2c_0.conda#6456c2620c990cd8dde2428a27ba0bc5
+https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e
+https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8
+https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7
+https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hd590300_0.conda#48f4330bfcd959c3cfb704d424903c82
+https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.1.0-hc0a3c3a_0.conda#1cb187a157136398ddbaae90713e2498
+https://conda.anaconda.org/conda-forge/linux-64/libutf8proc-2.8.0-h166bdaf_0.tar.bz2#ede4266dc02e875fe1ea77b25dd43747
+https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b
+https://conda.anaconda.org/conda-forge/linux-64/libuv-1.48.0-hd590300_0.conda#7e8b914b1062dd4386e3de4d82a3ead6
+https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559
+https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc
+https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-h4ab18f5_1.conda#57d7dc60e9325e3de37ff8dffd18e814
+https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69
+https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.1-hb9d3cd8_3.conda#6c566a46baae794daf34775d41eb180a
+https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036
+https://conda.anaconda.org/conda-forge/linux-64/xorg-inputproto-2.3.2-h7f98852_1002.tar.bz2#bcd1b3396ec6960cbc1d2855a9e60b2b
+https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908
+https://conda.anaconda.org/conda-forge/linux-64/xorg-recordproto-1.14.2-h7f98852_1002.tar.bz2#2f835e6c386e73c6faaddfe9eda67e98
+https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534
+https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87
+https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15
+https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-cal-0.7.3-h8dac057_2.conda#577509458a061ddc9b089602ac6e1e98
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-compression-0.2.19-haa50ccc_0.conda#00c38c49d0befb632f686cf67ee8c9f5
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-sdkutils-0.1.19-h038f3f9_2.conda#6861cab6cddb5d713cb3db95c838d30f
+https://conda.anaconda.org/conda-forge/linux-64/aws-checksums-0.1.18-h038f3f9_10.conda#4bf9c8fcf2bb6793c55e5c5758b9b011
+https://conda.anaconda.org/conda-forge/linux-64/double-conversion-3.3.0-h59595ed_0.conda#c2f83a5ddadadcdb08fe05863295ee97
+https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61
+https://conda.anaconda.org/conda-forge/linux-64/gflags-2.2.2-he1b5a44_1004.tar.bz2#cddaf2c63ea4a5901cf09524c490ecdc
+https://conda.anaconda.org/conda-forge/linux-64/gmp-6.3.0-hac33072_2.conda#c94a5994ef49749880a8139cf9afcbe1
+https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c
+https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3
+https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f
+https://conda.anaconda.org/conda-forge/linux-64/libabseil-20240116.2-cxx17_he02047a_1.conda#c48fc56ec03229f294176923c3265c05
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d
+https://conda.anaconda.org/conda-forge/linux-64/libcrc32c-1.1.2-h9c3ff4c_0.tar.bz2#c965a5aa0d5c1c37ffc62dff36e28400
+https://conda.anaconda.org/conda-forge/linux-64/libdrm-2.4.122-h4ab18f5_0.conda#bbfc4dbe5e97b385ef088f354d65e563
+https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1
+https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d
+https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-14.1.0-h69a702a_0.conda#f4ca84fbd6d06b0a052fb2d5b96dde41
+https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda#700ac6ea6d53d5510591c4344d5c989a
+https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae
+https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.0-hde9e2c9_0.conda#18aa975d2094c34aef978060ae7da7d8
+https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe
+https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.16-hb9d3cd8_1.conda#3601598f0db0470af28985e3e7ad0158
+https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0
+https://conda.anaconda.org/conda-forge/linux-64/mysql-common-9.0.1-h70512c7_0.conda#c567b6fa201bc424e84f1e70f7a36095
+https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72
+https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.44-hba22ea6_2.conda#df359c09c41cd186fffb93a2d87aa6f5
+https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123
+https://conda.anaconda.org/conda-forge/linux-64/qhull-2020.2-h434a139_5.conda#353823361b1d27eb3960efb076dfcaf6
+https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
+https://conda.anaconda.org/conda-forge/linux-64/s2n-1.5.1-h3400bea_0.conda#bf136eb7f8e15fcf8915c1a04b0aec6f
+https://conda.anaconda.org/conda-forge/linux-64/sleef-3.6.1-h1b44611_3.conda#af4dbe128af0840dcaeb4d40eb27ab73
+https://conda.anaconda.org/conda-forge/linux-64/snappy-1.2.1-ha2e4443_0.conda#6b7dcc7349efd123d493d2dbe85a045f
+https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc
+https://conda.anaconda.org/conda-forge/linux-64/wayland-1.23.1-h3e06ad9_0.conda#0a732427643ae5e0486a727927791da1
+https://conda.anaconda.org/conda-forge/linux-64/xorg-fixesproto-5.0-h7f98852_1002.tar.bz2#65ad6e1eb4aed2b0611855aff05e04f6
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6
+https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-h4ab18f5_1.conda#9653f1bf3766164d0e65fa723cabbc54
+https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-io-0.14.18-hf5b9b93_6.conda#8fd43c2719355d795f5c7cef11f08ec0
+https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.conda#39f910d205726805a958da408ca194ba
+https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb
+https://conda.anaconda.org/conda-forge/linux-64/glog-0.7.1-hbabe93e_0.conda#ff862eebdfeb2fd048ae9dc92510baca
+https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368
+https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.3-h315aac3_2.conda#b0143a3e98136a680b728fdf9b42a258
+https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a
+https://conda.anaconda.org/conda-forge/linux-64/libprotobuf-4.25.3-h08a7969_0.conda#6945825cebd2aeb16af4c69d97c32c13
+https://conda.anaconda.org/conda-forge/linux-64/libre2-11-2023.09.01-h5a48ba9_2.conda#41c69fba59d495e8cf5ffda48a607e35
+https://conda.anaconda.org/conda-forge/linux-64/libthrift-0.20.0-hb90f79a_0.conda#9ce07c1750e779c9d4cc968047f78b0d
+https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h46a8edc_4.conda#a7e3a62981350e232e0e7345b5aea580
+https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.7-he7c6b58_4.conda#08a9265c637230c37cb1be4a6cad4536
+https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.8-hf5423f3_1.conda#8782406a10201b67bd6476ca70cf92a8
+https://conda.anaconda.org/conda-forge/linux-64/mpfr-4.2.1-h38ae2d0_2.conda#168e18a2bba4f8520e6c5e38982f5847
+https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-9.0.1-ha479ceb_0.conda#6fd406aef37faad86bd7f37a94fb6f8a
+https://conda.anaconda.org/conda-forge/linux-64/python-3.12.5-h2ad013b_0_cpython.conda#9c56c4df45f6571b13111d8df2448692
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.1-hb711507_2.conda#8637c3e5821654d0edf97e2b0404b443
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.1-hb711507_0.conda#ad748ccca349aec3e91743e08b5e2b50
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.10-hb711507_0.conda#0e0cbe0564d03a99afd5fd7b362feecd
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.2-hb711507_0.conda#608e0ef8256b81d04456e8d211eee3e8
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-hb711507_1.conda#4a6d410296d7e39f00bacdee7df046e9
+https://conda.anaconda.org/conda-forge/noarch/array-api-compat-1.8-pyhd8ed1ab_0.conda#1178a75b8f6f260ac4b4436979754278
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-event-stream-0.4.3-h570d160_0.conda#1c121949295cac86798be8f369768d7c
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-http-0.8.7-h1c59cda_5.conda#0fc88e5bb5f095bdf4129282411c50c9
+https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_1.conda#f27a24d46e3ea7b70a1f98e50c62508f
+https://conda.anaconda.org/conda-forge/linux-64/ccache-4.10.1-h065aff2_0.conda#d6b48c138e0c8170a6fe9c136e063540
+https://conda.anaconda.org/conda-forge/noarch/certifi-2024.7.4-pyhd8ed1ab_0.conda#24e7fd6ca65997938fff9e5ab6f653e4
+https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
+https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441
+https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.11-py312hca68cad_0.conda#f824c60def49466ad5b9aed4eaa23c28
+https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_0.conda#d02ae936e42063ca46af6cdad2dbd1e0
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46
+https://conda.anaconda.org/conda-forge/noarch/filelock-3.15.4-pyhd8ed1ab_0.conda#0e7e4388e9d5283e22b35a9443bdbcc9
+https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d
+https://conda.anaconda.org/conda-forge/noarch/fsspec-2024.6.1-pyhff2d567_0.conda#996bf792cdb8c0ac38ff54b9fde56841
+https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
+https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py312h8572e83_1.conda#c1e71f2bc05d8e8e033aefac2c490d05
+https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5
+https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3
+https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.9.1-hdb1bdb2_0.conda#7da1d242ca3591e174a3c7d82230d3c0
+https://conda.anaconda.org/conda-forge/linux-64/libglx-1.7.0-ha4b6fd6_0.conda#b470cc353c5b852e0d830e8d5d23e952
+https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.11.1-default_hecaa2ac_1000.conda#f54aeebefb5c5ff84eca4fb05ca8aa3a
+https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.8-h8b73ec9_2.conda#2e25bb2f53e4a48873a936f8ef53e592
+https://conda.anaconda.org/conda-forge/linux-64/libpq-16.4-h482b261_0.conda#0f74c5581623f860e7baca042d9d7139
+https://conda.anaconda.org/conda-forge/linux-64/libxslt-1.1.39-h76b75d6_0.conda#e71f31f8cfb0a91439f2086fc8aa0461
+https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.5-py312h98912ed_0.conda#6ff0b9582da2d4a74a1f9ae1f9ce2af6
+https://conda.anaconda.org/conda-forge/linux-64/mpc-1.3.1-hfe3b2da_0.conda#289c71e83dc0daa7d4c81f04180778ca
+https://conda.anaconda.org/conda-forge/noarch/mpmath-1.3.0-pyhd8ed1ab_0.conda#dbf6e2d89137da32fa6670f3bffc024e
+https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19
+https://conda.anaconda.org/conda-forge/noarch/networkx-3.3-pyhd8ed1ab_1.conda#d335fd5704b46f4efb89a6774e81aef0
+https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138
+https://conda.anaconda.org/conda-forge/linux-64/orc-2.0.2-h669347b_0.conda#1e6c10f7d749a490612404efeb179eb8
+https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda#cbe1bb1f21567018ce595d9c2be0f0db
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf
+https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.4-pyhd8ed1ab_0.conda#4d91352a50949d049cf9714c8563d433
+https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d
+https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad
+https://conda.anaconda.org/conda-forge/linux-64/re2-2023.09.01-h7f4b329_2.conda#8f70e36268dea8eb666ef14c29bd3cda
+https://conda.anaconda.org/conda-forge/noarch/setuptools-72.2.0-pyhd8ed1ab_0.conda#1462aa8b243aad09ef5d0841c745eb89
+https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd
+https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095
+https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
+https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.1-py312h9a8786e_0.conda#fd9c83fde763b494f07acee1404c280e
+https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.12.2-pyha770c72_0.conda#ebe6952715e1d5eb567eeebf25250fa7
+https://conda.anaconda.org/conda-forge/noarch/wheel-0.44.0-pyhd8ed1ab_0.conda#d44e3b085abcaef02983c6305b84b584
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-hb711507_2.conda#a0901183f08b6c7107aab109733a3c91
+https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.42-h4ab18f5_0.conda#b193af204da1bfb8c13882d131a14bd2
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-5.0.3-h7f98852_1004.tar.bz2#e9a21aa4d5e3e5f1aed71e8cefd46b6a
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-auth-0.7.25-h15d0e8c_6.conda#e0d292ba383ac09598c664186c0144cd
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-mqtt-0.10.4-hc14a930_17.conda#f0e3f95a9f545d5975e8573f80cdb5fa
+https://conda.anaconda.org/conda-forge/linux-64/azure-core-cpp-1.13.0-h935415a_0.conda#debd1677c2fea41eb2233a260f48a298
+https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-hebfffa5_3.conda#fceaedf1cdbcb02df9699a0d9b005292
+https://conda.anaconda.org/conda-forge/linux-64/coverage-7.6.1-py312h41a817b_0.conda#4006636c39312dc42f8504475be3800f
+https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.53.1-py312h41a817b_0.conda#da921c56bcf69a8b97216ecec0cc4015
+https://conda.anaconda.org/conda-forge/linux-64/gmpy2-2.1.5-py312h1d5cde6_1.conda#27abd7664bc87595bd98b6306b8393d1
+https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.4-pyhd8ed1ab_0.conda#7b86ecb7d3557821c649b3c31e3eb9f2
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f
+https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp18.1-18.1.8-default_hf981a13_2.conda#b0f8c590aa86d9bee5987082f7f15bdf
+https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.8-default_h9def88c_2.conda#ba2d12adbea9de311297f2b577f4bb86
+https://conda.anaconda.org/conda-forge/linux-64/libgl-1.7.0-ha4b6fd6_0.conda#3deca8c25851196c28d1c84dd4ae9149
+https://conda.anaconda.org/conda-forge/linux-64/libgrpc-1.62.2-h15f2491_0.conda#8dabe607748cb3d7002ad73cd06f1325
+https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h2c5496b_1.conda#e2eaefa4de2b7237af7c907b8bbc760a
+https://conda.anaconda.org/conda-forge/noarch/meson-1.5.1-pyhd8ed1ab_1.conda#979087ee59bea1355f991a3b738af64e
+https://conda.anaconda.org/conda-forge/linux-64/pillow-10.4.0-py312h287a98d_0.conda#59ea71eed98aee0bebbbdd3b118167c7
+https://conda.anaconda.org/conda-forge/noarch/pip-24.2-pyhd8ed1ab_0.conda#6721aef6bfe5937abe70181545dd2c51
+https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47
+https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.2-pyhd8ed1ab_0.conda#e010a224b90f1f623a917c35addbb924
+https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c
+https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.12.0-h434a139_3.conda#c667c11d1e488a38220ede8a34441bff
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-cursor-0.1.4-h4ab18f5_2.conda#79e46d4a6ccecb7ee1912042958a8758
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxi-1.7.10-h4bc722e_1.conda#749baebe7e2ff3360630e069175e528b
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxxf86vm-1.1.5-h4bc722e_1.conda#0c90ad87101001080484b91bd9d2cdef
+https://conda.anaconda.org/conda-forge/linux-64/aws-c-s3-0.6.4-h558cea2_8.conda#af03e7b03e929396fb80ffac1a676c89
+https://conda.anaconda.org/conda-forge/linux-64/azure-identity-cpp-1.8.0-hd126650_2.conda#36df3cf05459de5d0a41c77c4329634b
+https://conda.anaconda.org/conda-forge/linux-64/azure-storage-common-cpp-12.7.0-h10ac4d7_1.conda#ab6d507ad16dbe2157920451d662e4a1
+https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-9.0.0-hda332d3_1.conda#76b32dcf243444aea9c6b804bcfa40b8
+https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-2.28.0-h26d7fe4_0.conda#2c51703b4d775f8943c08a361788131b
+https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547
+https://conda.anaconda.org/conda-forge/linux-64/mkl-2023.2.0-h84fe81f_50496.conda#81d4a1a57d618adf0152db973d93b2ad
+https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63
+https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_0.conda#b39568655c127a9c4a44d178ac99b6d0
+https://conda.anaconda.org/conda-forge/noarch/sympy-1.13.2-pypyh2585a3b_103.conda#7327125b427c98b81564f164c4a75d4c
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxtst-1.2.5-h4bc722e_0.conda#185159d666308204eca00295599b0a5c
+https://conda.anaconda.org/conda-forge/linux-64/aws-crt-cpp-0.27.6-h1966bd9_0.conda#30b59fa809914489974fe275a0fb7c7e
+https://conda.anaconda.org/conda-forge/linux-64/azure-storage-blobs-cpp-12.12.0-hd2e3451_0.conda#61f1c193452f0daa582f39634627ea33
+https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-20_linux64_mkl.conda#8bf521f6007b0b0eb91515a1165b5d85
+https://conda.anaconda.org/conda-forge/linux-64/libgoogle-cloud-storage-2.28.0-ha262f82_0.conda#9e7960f0b9ab3895ef73d92477c47dae
+https://conda.anaconda.org/conda-forge/linux-64/mkl-devel-2023.2.0-ha770c72_50496.conda#3b4c50e31ff098b18a450e4f5f860adf
+https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.7.2-hb12f9c5_5.conda#8c662388c2418f293266f5e7f50df7d7
+https://conda.anaconda.org/conda-forge/linux-64/aws-sdk-cpp-1.11.379-hf9693f6_5.conda#18a4bf7e8a65006b26ca53700fcf2362
+https://conda.anaconda.org/conda-forge/linux-64/azure-storage-files-datalake-cpp-12.11.0-h325d260_1.conda#11d926d1f4a75a1b03d1c053ca20424b
+https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-20_linux64_mkl.conda#7a2972758a03adc92d856072c71c9170
+https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-20_linux64_mkl.conda#4db0cd03efcdab535f6f066aca4cddbb
+https://conda.anaconda.org/conda-forge/linux-64/pyside6-6.7.2-py312hb5137db_2.conda#99889d0c042cc4dfb9a758619d487282
+https://conda.anaconda.org/conda-forge/linux-64/libarrow-17.0.0-h9d17f36_9_cpu.conda#bfae79329f50d5bd960e1ac289625096
+https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-20_linux64_mkl.conda#3dea5e9be386b963d7f4368966e238b3
+https://conda.anaconda.org/conda-forge/linux-64/libtorch-2.4.0-cpu_mkl_h0bb0d08_100.conda#6e7c6f99657f8da2610b45b3c98abf1c
+https://conda.anaconda.org/conda-forge/linux-64/numpy-2.1.0-py312h1103770_0.conda#9709027e8a51a3476db65a3c0cf806c2
+https://conda.anaconda.org/conda-forge/noarch/array-api-strict-2.0.1-pyhd8ed1ab_0.conda#2c00d29e0e276f2d32dfe20e698b8eeb
+https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-20_linux64_mkl.conda#079d50df2338a3d47522d7e84c3dfbf6
+https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py312h8572e83_0.conda#12c6a831ef734f0b2dd4caff514cbb7f
+https://conda.anaconda.org/conda-forge/linux-64/libarrow-acero-17.0.0-h5888daf_9_cpu.conda#cace9fe91c532c67ff828937a633fb1c
+https://conda.anaconda.org/conda-forge/linux-64/libparquet-17.0.0-h39682fd_9_cpu.conda#0efe4b18e72f519298f57ff75a9adf07
+https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.2-py312h1d6d2e6_1.conda#ae00b61f3000d2284d1f2584d4dfafa8
+https://conda.anaconda.org/conda-forge/linux-64/polars-1.5.0-py312h7285250_0.conda#4756b2dda06b6c7bedb376677ffbca06
+https://conda.anaconda.org/conda-forge/linux-64/pyarrow-core-17.0.0-py312h9cafe31_1_cpu.conda#235827b9c93850cafdd2d5ab359893f9
+https://conda.anaconda.org/conda-forge/linux-64/pytorch-2.4.0-cpu_mkl_py312h3b258cc_100.conda#9090b9de6ee59871a619219dfc814ecd
+https://conda.anaconda.org/conda-forge/linux-64/scipy-1.14.1-py312h7d485d2_0.conda#7418a22e73008356d9aba99d93dfeeee
+https://conda.anaconda.org/conda-forge/linux-64/blas-2.120-mkl.conda#9444330235a4828878cbe9c897ba0aa3
+https://conda.anaconda.org/conda-forge/linux-64/libarrow-dataset-17.0.0-h5888daf_9_cpu.conda#4df21168065a9e21372a442783dfd547
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.9.2-py312h854627b_0.conda#a57b0ae7c0aac603839a4e83a3e997d6
+https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.2.1-py312h389efb2_0.conda#37038b979f8be9666d90a852879368fb
+https://conda.anaconda.org/conda-forge/linux-64/pytorch-cpu-2.4.0-cpu_mkl_py312h5e78504_100.conda#11757e62e5b4511d9fbd73706272ae0d
+https://conda.anaconda.org/conda-forge/linux-64/libarrow-substrait-17.0.0-hf54134d_9_cpu.conda#239401053cfbf93d24795b12dec89c56
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.9.2-py312h7900ff3_0.conda#44c07eccf73f549b8ea5c9aacfe3ad0a
+https://conda.anaconda.org/conda-forge/linux-64/pyarrow-17.0.0-py312h9cebb41_1.conda#7e8ddbd44fb99ba376b09c4e9e61e509
diff --git a/auto_building_tools/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml b/auto_building_tools/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml
new file mode 100644
index 0000000..12fbd17
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pylatest_conda_forge_mkl_linux-64_environment.yml
@@ -0,0 +1,31 @@
+# DO NOT EDIT: this file is generated from the specification found in the
+# following script to centralize the configuration for CI builds:
+# build_tools/update_environments_and_lock_files.py
+channels:
+ - conda-forge
+dependencies:
+ - python
+ - numpy
+ - blas[build=mkl]
+ - scipy
+ - cython
+ - joblib
+ - threadpoolctl
+ - matplotlib
+ - pandas
+ - pyamg
+ - pytest
+ - pytest-xdist
+ - pillow
+ - pip
+ - ninja
+ - meson-python
+ - pytest-cov
+ - coverage
+ - ccache
+ - pytorch
+ - pytorch-cpu
+ - polars
+ - pyarrow
+ - array-api-compat
+ - array-api-strict
diff --git a/auto_building_tools/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock b/auto_building_tools/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock
new file mode 100644
index 0000000..97533f7
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock
@@ -0,0 +1,130 @@
+# Generated by conda-lock.
+# platform: osx-64
+# input_hash: e7c2bc2b07721ef735f30d3b1cf0b2a780b5bf5c138d9d18ad174611bfbd32bf
+@EXPLICIT
+https://conda.anaconda.org/conda-forge/osx-64/ca-certificates-2024.7.4-h8857fd0_0.conda#7df874a4b05b2d2b82826190170eaa0f
+https://conda.anaconda.org/conda-forge/osx-64/libbrotlicommon-1.1.0-h0dc2134_1.conda#9e6c31441c9aa24e41ace40d6151aab6
+https://conda.anaconda.org/conda-forge/osx-64/libexpat-2.6.2-h73e2aa4_0.conda#3d1d51c8f716d97c864d12f7af329526
+https://conda.anaconda.org/conda-forge/osx-64/libffi-3.4.2-h0d85af4_5.tar.bz2#ccb34fb14960ad8b125962d3d79b31a9
+https://conda.anaconda.org/conda-forge/noarch/libgfortran-devel_osx-64-12.3.0-h0b6f5ec_3.conda#39eeea5454333825d72202fae2d5e0b8
+https://conda.anaconda.org/conda-forge/osx-64/libiconv-1.17-hd75f5a5_2.conda#6c3628d047e151efba7cf08c5e54d1ca
+https://conda.anaconda.org/conda-forge/osx-64/libjpeg-turbo-3.0.0-h0dc2134_1.conda#72507f8e3961bc968af17435060b6dd6
+https://conda.anaconda.org/conda-forge/osx-64/libwebp-base-1.4.0-h10d778d_0.conda#b2c0047ea73819d992484faacbbe1c24
+https://conda.anaconda.org/conda-forge/osx-64/mkl-include-2023.2.0-h6bab518_50500.conda#835abb8ded5e26f23ea6996259c7972e
+https://conda.anaconda.org/conda-forge/osx-64/ncurses-6.5-h5846eda_0.conda#02a888433d165c99bf09784a7b14d900
+https://conda.anaconda.org/conda-forge/osx-64/pthread-stubs-0.4-hc929b4f_1001.tar.bz2#addd19059de62181cd11ae8f4ef26084
+https://conda.anaconda.org/conda-forge/osx-64/python_abi-3.12-5_cp312.conda#c34dd4920e0addf7cfcc725809f25d8e
+https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8
+https://conda.anaconda.org/conda-forge/osx-64/xorg-libxau-1.0.11-h0dc2134_0.conda#9566b4c29274125b0266d0177b5eb97b
+https://conda.anaconda.org/conda-forge/osx-64/xorg-libxdmcp-1.1.3-h35c211d_0.tar.bz2#86ac76d6bf1cbb9621943eb3bd9ae36e
+https://conda.anaconda.org/conda-forge/osx-64/xz-5.2.6-h775f41a_0.tar.bz2#a72f9d4ea13d55d745ff1ed594747f10
+https://conda.anaconda.org/conda-forge/osx-64/bzip2-1.0.8-hfdf4475_7.conda#7ed4301d437b59045be7e051a0308211
+https://conda.anaconda.org/conda-forge/osx-64/icu-75.1-h120a0e1_0.conda#d68d48a3060eb5abdc1cdc8e2a3a5966
+https://conda.anaconda.org/conda-forge/osx-64/libbrotlidec-1.1.0-h0dc2134_1.conda#9ee0bab91b2ca579e10353738be36063
+https://conda.anaconda.org/conda-forge/osx-64/libbrotlienc-1.1.0-h0dc2134_1.conda#8a421fe09c6187f0eb5e2338a8a8be6d
+https://conda.anaconda.org/conda-forge/osx-64/libcxx-18.1.8-heced48a_4.conda#7e13da1296840905452340fca10a625b
+https://conda.anaconda.org/conda-forge/osx-64/libdeflate-1.21-hfdf4475_0.conda#88409b23a5585c15d52de0073f3c9c61
+https://conda.anaconda.org/conda-forge/osx-64/libxcb-1.16-h00291cd_1.conda#c989b18131ab79fdc67e42473d53d545
+https://conda.anaconda.org/conda-forge/osx-64/libzlib-1.3.1-h87427d6_1.conda#b7575b5aa92108dcc9aaab0f05f2dbce
+https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-18.1.8-h15ab845_1.conda#ad0afa524866cc1c08b436865d0ae484
+https://conda.anaconda.org/conda-forge/osx-64/openssl-3.3.1-hd23fc13_3.conda#ad8c8c9556a701817bd1aca75a302e96
+https://conda.anaconda.org/conda-forge/osx-64/readline-8.2-h9e318b2_1.conda#f17f77f2acf4d344734bda76829ce14e
+https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.1.0-h0dc2134_1.conda#ece565c215adcc47fc1db4e651ee094b
+https://conda.anaconda.org/conda-forge/osx-64/gmp-6.3.0-hf036a51_2.conda#427101d13f19c4974552a4e5b072eef1
+https://conda.anaconda.org/conda-forge/osx-64/isl-0.26-imath32_h2e86a7b_101.conda#d06222822a9144918333346f145b68c6
+https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hb486fe8_0.tar.bz2#f9d6a4c82889d5ecedec1d90eb673c55
+https://conda.anaconda.org/conda-forge/osx-64/libgfortran5-13.2.0-h2873a65_3.conda#e4fb4d23ec2870ff3c40d10afe305aec
+https://conda.anaconda.org/conda-forge/osx-64/libpng-1.6.43-h92b6c6a_0.conda#65dcddb15965c9de2c0365cb14910532
+https://conda.anaconda.org/conda-forge/osx-64/libsqlite-3.46.0-h1b8f9f3_0.conda#5dadfbc1a567fe6e475df4ce3148be09
+https://conda.anaconda.org/conda-forge/osx-64/libxml2-2.12.7-heaf3512_4.conda#ea1be6ecfe814da889e882c8b6ead79d
+https://conda.anaconda.org/conda-forge/osx-64/ninja-1.12.1-h3c5361c_0.conda#a0ebabd021c8191aeb82793fe43cfdcb
+https://conda.anaconda.org/conda-forge/osx-64/qhull-2020.2-h3c5361c_5.conda#dd1ea9ff27c93db7c01a7b7656bd4ad4
+https://conda.anaconda.org/conda-forge/osx-64/sigtool-0.1.3-h88f4db0_0.tar.bz2#fbfb84b9de9a6939cb165c02c69b1865
+https://conda.anaconda.org/conda-forge/osx-64/tapi-1100.0.11-h9ce4665_0.tar.bz2#f9ff42ccf809a21ba6f8607f8de36108
+https://conda.anaconda.org/conda-forge/osx-64/tk-8.6.13-h1abcd95_1.conda#bf830ba5afc507c6232d4ef0fb1a882d
+https://conda.anaconda.org/conda-forge/osx-64/zlib-1.3.1-h87427d6_1.conda#3ac9ef8975965f9698dbedd2a4cc5894
+https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.6-h915ae27_0.conda#4cb2cd56f039b129bb0e491c1164167e
+https://conda.anaconda.org/conda-forge/osx-64/brotli-1.1.0-h0dc2134_1.conda#9272dd3b19c4e8212f8542cefd5c3d67
+https://conda.anaconda.org/conda-forge/osx-64/freetype-2.12.1-h60636b9_2.conda#25152fce119320c980e5470e64834b50
+https://conda.anaconda.org/conda-forge/osx-64/libgfortran-5.0.0-13_2_0_h97931a8_3.conda#0b6e23a012ee7a9a5f6b244f5a92c1d5
+https://conda.anaconda.org/conda-forge/osx-64/libhwloc-2.11.1-default_h456cccd_1000.conda#a14989f6bbea46e6ec4521a403f63ff2
+https://conda.anaconda.org/conda-forge/osx-64/libllvm16-16.0.6-hbedff68_3.conda#8fd56c0adc07a37f93bd44aa61a97c90
+https://conda.anaconda.org/conda-forge/osx-64/libtiff-4.6.0-h603087a_4.conda#362626a2aacb976ec89c91b99bfab30b
+https://conda.anaconda.org/conda-forge/osx-64/mpfr-4.2.1-hc80595b_2.conda#fc9b5179824146b67ad5a0b053b253ff
+https://conda.anaconda.org/conda-forge/osx-64/python-3.12.5-h37a9e06_0_cpython.conda#517cb4e16466f8d96ba2a72897d14c48
+https://conda.anaconda.org/conda-forge/noarch/certifi-2024.7.4-pyhd8ed1ab_0.conda#24e7fd6ca65997938fff9e5ab6f653e4
+https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
+https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441
+https://conda.anaconda.org/conda-forge/osx-64/cython-3.0.11-py312h28f332c_0.conda#4ab9ee64007a1e4a79b38e4de31aa2fc
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_0.conda#d02ae936e42063ca46af6cdad2dbd1e0
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46
+https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
+https://conda.anaconda.org/conda-forge/osx-64/kiwisolver-1.4.5-py312h49ebfd2_1.conda#21f174a5cfb5964069c374171a979157
+https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.16-ha2f27b4_0.conda#1442db8f03517834843666c422238c9b
+https://conda.anaconda.org/conda-forge/osx-64/ld64_osx-64-711-h04ffbf3_3.conda#944906b249119ecff9139acf7d1f2574
+https://conda.anaconda.org/conda-forge/osx-64/libclang-cpp16-16.0.6-default_h0c94c6a_11.conda#c1f63f67baf9f11d5d96f65be03aa437
+https://conda.anaconda.org/conda-forge/osx-64/libhiredis-1.0.2-h2beb688_0.tar.bz2#524282b2c46c9dedf051b3bc2ae05494
+https://conda.anaconda.org/conda-forge/osx-64/llvm-tools-16.0.6-hbedff68_3.conda#e9356b0807462e8f84c1384a8da539a5
+https://conda.anaconda.org/conda-forge/osx-64/mpc-1.3.1-h81bd1dd_0.conda#c752c0eb6c250919559172c011e5f65b
+https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19
+https://conda.anaconda.org/conda-forge/osx-64/openjpeg-2.5.2-h7310d3a_0.conda#05a14cc9d725dd74995927968d6547e3
+https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda#cbe1bb1f21567018ce595d9c2be0f0db
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf
+https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.4-pyhd8ed1ab_0.conda#4d91352a50949d049cf9714c8563d433
+https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d
+https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad
+https://conda.anaconda.org/conda-forge/noarch/setuptools-72.2.0-pyhd8ed1ab_0.conda#1462aa8b243aad09ef5d0841c745eb89
+https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
+https://conda.anaconda.org/conda-forge/osx-64/tbb-2021.12.0-h3c5361c_3.conda#b0cada4d5a4cf1cbf8598b86231b5958
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd
+https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095
+https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
+https://conda.anaconda.org/conda-forge/osx-64/tornado-6.4.1-py312hbd25219_0.conda#5a40db69b327c71511248f8186965bd3
+https://conda.anaconda.org/conda-forge/noarch/wheel-0.44.0-pyhd8ed1ab_0.conda#d44e3b085abcaef02983c6305b84b584
+https://conda.anaconda.org/conda-forge/osx-64/ccache-4.10.1-hee5fd93_0.conda#09898bb80e196695cea9e07402cff215
+https://conda.anaconda.org/conda-forge/osx-64/cctools_osx-64-986-h303a5ab_3.conda#3fc65d01538ca026f662f2b13dacc35e
+https://conda.anaconda.org/conda-forge/osx-64/clang-16-16.0.6-default_h0c94c6a_11.conda#ba17dcbffdd79fc381eba4125d83fa03
+https://conda.anaconda.org/conda-forge/osx-64/coverage-7.6.1-py312hbd25219_0.conda#17ee8821c9b8cd8f7ae752f4a57fbf56
+https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.53.1-py312hbd25219_0.conda#56b85d2b2f034ed31feaaa0b90c37b7f
+https://conda.anaconda.org/conda-forge/osx-64/gfortran_impl_osx-64-12.3.0-hc328e78_3.conda#b3d751dc7073bbfdfa9d863e39b9685d
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f
+https://conda.anaconda.org/conda-forge/osx-64/ld64-711-ha02d983_3.conda#c28c578f9791983a2a9dd480d120d562
+https://conda.anaconda.org/conda-forge/noarch/meson-1.5.1-pyhd8ed1ab_1.conda#979087ee59bea1355f991a3b738af64e
+https://conda.anaconda.org/conda-forge/osx-64/mkl-2023.2.0-h54c2260_50500.conda#0a342ccdc79e4fcd359245ac51941e7b
+https://conda.anaconda.org/conda-forge/osx-64/pillow-10.4.0-py312hbd70edc_0.conda#8d55e92fa6380ac8c245f253b096fefd
+https://conda.anaconda.org/conda-forge/noarch/pip-24.2-pyhd8ed1ab_0.conda#6721aef6bfe5937abe70181545dd2c51
+https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47
+https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.2-pyhd8ed1ab_0.conda#e010a224b90f1f623a917c35addbb924
+https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c
+https://conda.anaconda.org/conda-forge/osx-64/cctools-986-h40f6528_3.conda#9dd9cb9edfe3c3437c28e495a3b67517
+https://conda.anaconda.org/conda-forge/osx-64/clang-16.0.6-default_h179603d_11.conda#29c8b527d8b8fac52f5e2cf6abfcdc93
+https://conda.anaconda.org/conda-forge/osx-64/libblas-3.9.0-20_osx64_mkl.conda#160fdc97a51d66d51dc782fb67d35205
+https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547
+https://conda.anaconda.org/conda-forge/osx-64/mkl-devel-2023.2.0-h694c41f_50500.conda#1b4d0235ef253a1e19459351badf4f9f
+https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63
+https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_0.conda#b39568655c127a9c4a44d178ac99b6d0
+https://conda.anaconda.org/conda-forge/osx-64/clangxx-16.0.6-default_h179603d_11.conda#8c2055146f68eb4c3b0da893a8bed33c
+https://conda.anaconda.org/conda-forge/osx-64/libcblas-3.9.0-20_osx64_mkl.conda#51089a4865eb4aec2bc5c7468bd07f9f
+https://conda.anaconda.org/conda-forge/osx-64/liblapack-3.9.0-20_osx64_mkl.conda#58f08e12ad487fac4a08f90ff0b87aec
+https://conda.anaconda.org/conda-forge/noarch/compiler-rt_osx-64-16.0.6-ha38d28d_2.conda#7a46507edc35c6c8818db0adaf8d787f
+https://conda.anaconda.org/conda-forge/osx-64/liblapacke-3.9.0-20_osx64_mkl.conda#124ae8e384268a8da66f1d64114a1eda
+https://conda.anaconda.org/conda-forge/osx-64/numpy-2.1.0-py312h8813227_0.conda#437bc6e9dcd5612d123a9c99b2988040
+https://conda.anaconda.org/conda-forge/osx-64/blas-devel-3.9.0-20_osx64_mkl.conda#cc3260179093918b801e373c6e888e02
+https://conda.anaconda.org/conda-forge/osx-64/compiler-rt-16.0.6-ha38d28d_2.conda#3b9e8c5c63b8e86234f499490acd85c2
+https://conda.anaconda.org/conda-forge/osx-64/contourpy-1.2.1-py312h9230928_0.conda#079df34ce7c71259cfdd394645370891
+https://conda.anaconda.org/conda-forge/osx-64/pandas-2.2.2-py312h1171441_1.conda#240737937f1f046b0e03ecc11ac4ec98
+https://conda.anaconda.org/conda-forge/osx-64/scipy-1.14.1-py312he82a568_0.conda#dd3c55da62964fcadf27771e1928e67f
+https://conda.anaconda.org/conda-forge/osx-64/blas-2.120-mkl.conda#b041a7677a412f3d925d8208936cb1e2
+https://conda.anaconda.org/conda-forge/osx-64/clang_impl_osx-64-16.0.6-h8787910_19.conda#64155ef139280e8c181dad866dea2980
+https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.9.2-py312h0d5aeb7_0.conda#0c73a08429d20f15fa8b28083ec04cc9
+https://conda.anaconda.org/conda-forge/osx-64/pyamg-5.2.1-py312h44e70fa_0.conda#a7c77239f0135d30cbba0164922aa861
+https://conda.anaconda.org/conda-forge/osx-64/clang_osx-64-16.0.6-hb91bd55_19.conda#760ecbc6f4b6cecbe440b0080626286f
+https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.9.2-py312hb401068_0.conda#f468fd4f10632ff2500482118a3d4ace
+https://conda.anaconda.org/conda-forge/osx-64/c-compiler-1.7.0-h282daa2_1.conda#d27411cb82bc1b76b9f487da6ae97f1d
+https://conda.anaconda.org/conda-forge/osx-64/clangxx_impl_osx-64-16.0.6-h6d92fbe_19.conda#9ffa16e2bd7eb5b8b1a0d19185710cd3
+https://conda.anaconda.org/conda-forge/osx-64/gfortran_osx-64-12.3.0-h18f7dce_1.conda#436af2384c47aedb94af78a128e174f1
+https://conda.anaconda.org/conda-forge/osx-64/clangxx_osx-64-16.0.6-hb91bd55_19.conda#81d40fad4c14cc7a893f2e274647c7a4
+https://conda.anaconda.org/conda-forge/osx-64/gfortran-12.3.0-h2c809b3_1.conda#c48adbaa8944234b80ef287c37e329b0
+https://conda.anaconda.org/conda-forge/osx-64/cxx-compiler-1.7.0-h7728843_1.conda#e04cb15a20553b973dd068c2dc81d682
+https://conda.anaconda.org/conda-forge/osx-64/fortran-compiler-1.7.0-h6c2ab21_1.conda#48319058089f492d5059e04494b81ed9
+https://conda.anaconda.org/conda-forge/osx-64/compilers-1.7.0-h694c41f_1.conda#875e9b06186a41d55b96b9c1a52f15be
diff --git a/auto_building_tools/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml b/auto_building_tools/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml
new file mode 100644
index 0000000..ad177e4
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pylatest_conda_forge_mkl_osx-64_environment.yml
@@ -0,0 +1,27 @@
+# DO NOT EDIT: this file is generated from the specification found in the
+# following script to centralize the configuration for CI builds:
+# build_tools/update_environments_and_lock_files.py
+channels:
+ - conda-forge
+dependencies:
+ - python
+ - numpy
+ - blas[build=mkl]
+ - scipy
+ - cython
+ - joblib
+ - threadpoolctl
+ - matplotlib
+ - pandas
+ - pyamg
+ - pytest
+ - pytest-xdist
+ - pillow
+ - pip
+ - ninja
+ - meson-python
+ - pytest-cov
+ - coverage
+ - ccache
+ - compilers
+ - llvm-openmp
diff --git a/auto_building_tools/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml b/auto_building_tools/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml
new file mode 100644
index 0000000..4752e10
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml
@@ -0,0 +1,28 @@
+# DO NOT EDIT: this file is generated from the specification found in the
+# following script to centralize the configuration for CI builds:
+# build_tools/update_environments_and_lock_files.py
+channels:
+ - defaults
+dependencies:
+ - python
+ - numpy
+ - blas[build=mkl]
+ - scipy<1.12
+ - joblib
+ - matplotlib
+ - pandas
+ - pyamg
+ - pytest
+ - pytest-xdist
+ - pillow
+ - pip
+ - ninja
+ - pytest-cov
+ - coverage
+ - ccache
+ - pip
+ - pip:
+ - cython
+ - threadpoolctl
+ - meson-python
+ - meson<1.5
diff --git a/auto_building_tools/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock b/auto_building_tools/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock
new file mode 100644
index 0000000..8a7c31b
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock
@@ -0,0 +1,86 @@
+# Generated by conda-lock.
+# platform: osx-64
+# input_hash: 5845cb6c37d0833b4f2f61e276bf4f86f98aa0e19d9c74bfbc8ddcc73d251139
+@EXPLICIT
+https://repo.anaconda.com/pkgs/main/osx-64/blas-1.0-mkl.conda#cb2c87e85ac8e0ceae776d26d4214c8a
+https://repo.anaconda.com/pkgs/main/osx-64/bzip2-1.0.8-h6c40b1e_6.conda#96224786021d0765ce05818fa3c59bdb
+https://repo.anaconda.com/pkgs/main/osx-64/ca-certificates-2024.7.2-hecd8cb5_0.conda#297cfad0c0eac53e5ac75674828eedd9
+https://repo.anaconda.com/pkgs/main/osx-64/jpeg-9e-h46256e1_3.conda#b1d9769eac428e11f5f922531a1da2e0
+https://repo.anaconda.com/pkgs/main/osx-64/libbrotlicommon-1.0.9-h6c40b1e_8.conda#8e86dfa34b08bc664b19e1499e5465b8
+https://repo.anaconda.com/pkgs/main/osx-64/libcxx-14.0.6-h9765a3e_0.conda#387757bb354ae9042370452cd0fb5627
+https://repo.anaconda.com/pkgs/main/osx-64/libdeflate-1.17-hb664fd8_1.conda#b6116b8db33ea6a5b5287dae70d4a913
+https://repo.anaconda.com/pkgs/main/osx-64/libffi-3.4.4-hecd8cb5_1.conda#eb7f09ada4d95f1a26f483f1009d9286
+https://repo.anaconda.com/pkgs/main/osx-64/libwebp-base-1.3.2-h6c40b1e_0.conda#d8fd9f599dd4e012694e69d119016442
+https://repo.anaconda.com/pkgs/main/osx-64/llvm-openmp-14.0.6-h0dcd299_0.conda#b5804d32b87dc61ca94561ade33d5f2d
+https://repo.anaconda.com/pkgs/main/osx-64/ncurses-6.4-hcec6c5f_0.conda#0214d1ee980e217fabc695f1e40662aa
+https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25
+https://repo.anaconda.com/pkgs/main/osx-64/xz-5.4.6-h6c40b1e_1.conda#b40d69768d28133d8be1843def4f82f5
+https://repo.anaconda.com/pkgs/main/osx-64/zlib-1.2.13-h4b97444_1.conda#38e35f7c817fac0973034bfce6706ec2
+https://repo.anaconda.com/pkgs/main/osx-64/ccache-3.7.9-hf120daa_0.conda#a01515a32e721c51d631283f991bc8ea
+https://repo.anaconda.com/pkgs/main/osx-64/expat-2.6.2-hcec6c5f_0.conda#c748234dd7e242784198ab038372cb0c
+https://repo.anaconda.com/pkgs/main/osx-64/intel-openmp-2023.1.0-ha357a0b_43548.conda#ba8a89ffe593eb88e4c01334753c40c3
+https://repo.anaconda.com/pkgs/main/osx-64/lerc-3.0-he9d5cce_0.conda#aec2c3dbef836849c9260f05be04f3db
+https://repo.anaconda.com/pkgs/main/osx-64/libbrotlidec-1.0.9-h6c40b1e_8.conda#6338cd7779e614fc16d835990e627e04
+https://repo.anaconda.com/pkgs/main/osx-64/libbrotlienc-1.0.9-h6c40b1e_8.conda#2af01a7b3fdbed47ebe5c452c34e5c5d
+https://repo.anaconda.com/pkgs/main/osx-64/libgfortran5-11.3.0-h9dfd629_28.conda#1fa1a27ee100b1918c3021dbfa3895a3
+https://repo.anaconda.com/pkgs/main/osx-64/libpng-1.6.39-h6c40b1e_0.conda#a3c824835f53ad27aeb86d2b55e47804
+https://repo.anaconda.com/pkgs/main/osx-64/lz4-c-1.9.4-hcec6c5f_1.conda#aee0efbb45220e1985533dbff48551f8
+https://repo.anaconda.com/pkgs/main/osx-64/ninja-base-1.10.2-haf03e11_5.conda#c857c13129710a61395270656905c4a2
+https://repo.anaconda.com/pkgs/main/osx-64/openssl-3.0.14-h46256e1_0.conda#d722280df65b3308e1b8b1b7777a3305
+https://repo.anaconda.com/pkgs/main/osx-64/readline-8.2-hca72f7f_0.conda#971667436260e523f6f7355fdfa238bf
+https://repo.anaconda.com/pkgs/main/osx-64/tbb-2021.8.0-ha357a0b_0.conda#fb48530a3eea681c11dafb95b3387c0f
+https://repo.anaconda.com/pkgs/main/osx-64/tk-8.6.14-h4d00af3_0.conda#a2c03940c2ae54614301ec82e6a98d75
+https://repo.anaconda.com/pkgs/main/osx-64/brotli-bin-1.0.9-h6c40b1e_8.conda#11053f9c6b8d8a8348d0c33450c23ce9
+https://repo.anaconda.com/pkgs/main/osx-64/freetype-2.12.1-hd8bbffd_0.conda#1f276af321375ee7fe8056843044fa76
+https://repo.anaconda.com/pkgs/main/osx-64/libgfortran-5.0.0-11_3_0_hecd8cb5_28.conda#2eb13b680803f1064e53873ae0aaafb3
+https://repo.anaconda.com/pkgs/main/osx-64/mkl-2023.1.0-h8e150cf_43560.conda#85d0f3431dd5c6ae44f8725fdd3d3e59
+https://repo.anaconda.com/pkgs/main/osx-64/sqlite-3.45.3-h6c40b1e_0.conda#2edf909b937b3aad48322c9cb2e8f1a0
+https://repo.anaconda.com/pkgs/main/osx-64/zstd-1.5.5-hc035e20_2.conda#c033bf68c12f8c71fd916f000f3dc118
+https://repo.anaconda.com/pkgs/main/osx-64/brotli-1.0.9-h6c40b1e_8.conda#10f89677a3898d0113dc354adf643df3
+https://repo.anaconda.com/pkgs/main/osx-64/libtiff-4.5.1-hcec6c5f_0.conda#e127a800ffd9d300ed7d5e1b026944ec
+https://repo.anaconda.com/pkgs/main/osx-64/python-3.12.4-hcd54a6c_1.conda#753d941593548d5e26518c51b3e3b10d
+https://repo.anaconda.com/pkgs/main/osx-64/coverage-7.2.2-py312h6c40b1e_0.conda#b6e4b9fba325047c07f3c9211ae91d1c
+https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab
+https://repo.anaconda.com/pkgs/main/noarch/execnet-1.9.0-pyhd3eb1b0_0.conda#f895937671af67cebb8af617494b3513
+https://repo.anaconda.com/pkgs/main/noarch/iniconfig-1.1.1-pyhd3eb1b0_0.tar.bz2#e40edff2c5708f342cef43c7f280c507
+https://repo.anaconda.com/pkgs/main/osx-64/joblib-1.4.2-py312hecd8cb5_0.conda#8ab03dfa447b4e0bfa0bd3d25930f3b6
+https://repo.anaconda.com/pkgs/main/osx-64/kiwisolver-1.4.4-py312hcec6c5f_0.conda#2ba6561ddd1d05936fe74f5d118ce7dd
+https://repo.anaconda.com/pkgs/main/osx-64/lcms2-2.12-hf1fd2bf_0.conda#697aba7a3308226df7a93ccfeae16ffa
+https://repo.anaconda.com/pkgs/main/osx-64/mkl-service-2.4.0-py312h6c40b1e_1.conda#b1ef860be9043b35c5e8d9388b858514
+https://repo.anaconda.com/pkgs/main/osx-64/ninja-1.10.2-hecd8cb5_5.conda#a0043b325fb08db82477ae433668e684
+https://repo.anaconda.com/pkgs/main/osx-64/openjpeg-2.5.2-hbf2204d_0.conda#8463f11309271a93d615450382761470
+https://repo.anaconda.com/pkgs/main/osx-64/packaging-24.1-py312hecd8cb5_0.conda#6130dafc4d26d55e93ceab460d2a72b5
+https://repo.anaconda.com/pkgs/main/osx-64/pluggy-1.0.0-py312hecd8cb5_1.conda#647fada22f1697691fdee90b52c99bcb
+https://repo.anaconda.com/pkgs/main/osx-64/pyparsing-3.0.9-py312hecd8cb5_0.conda#d85cf2b81c6d9326a57a6418e14db258
+https://repo.anaconda.com/pkgs/main/noarch/python-tzdata-2023.3-pyhd3eb1b0_0.conda#479c037de0186d114b9911158427624e
+https://repo.anaconda.com/pkgs/main/osx-64/pytz-2024.1-py312hecd8cb5_0.conda#2b28ec0e0d07f5c0c701f75200b1e8b6
+https://repo.anaconda.com/pkgs/main/osx-64/setuptools-72.1.0-py312hecd8cb5_0.conda#dff219f3528a6e8ad235c48a29cd6dbe
+https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda#34586824d411d36af2fa40e799c172d0
+https://repo.anaconda.com/pkgs/main/noarch/toml-0.10.2-pyhd3eb1b0_0.conda#cda05f5f6d8509529d1a2743288d197a
+https://repo.anaconda.com/pkgs/main/osx-64/tornado-6.4.1-py312h46256e1_0.conda#ff2efd781e1b1af38284aeda9d676d42
+https://repo.anaconda.com/pkgs/main/osx-64/unicodedata2-15.1.0-py312h6c40b1e_0.conda#65bd2cb787fc99662d9bb6e6520c5826
+https://repo.anaconda.com/pkgs/main/osx-64/wheel-0.43.0-py312hecd8cb5_0.conda#c0bdd5748b170523232e8ad1d667136c
+https://repo.anaconda.com/pkgs/main/osx-64/fonttools-4.51.0-py312h6c40b1e_0.conda#8f55fa86b73e8a7f4403503f9b7a9959
+https://repo.anaconda.com/pkgs/main/osx-64/numpy-base-1.26.4-py312h6f81483_0.conda#87f73efbf26ab2e2ea7c32481a71bd47
+https://repo.anaconda.com/pkgs/main/osx-64/pillow-10.4.0-py312h46256e1_0.conda#486a21e17faf0611e454c0e7faf0bcbc
+https://repo.anaconda.com/pkgs/main/osx-64/pip-24.2-py312hecd8cb5_0.conda#35119ef238299ccf29b25889fd466139
+https://repo.anaconda.com/pkgs/main/osx-64/pytest-7.4.4-py312hecd8cb5_0.conda#d4dda983900b045cd27ae836cad670de
+https://repo.anaconda.com/pkgs/main/osx-64/python-dateutil-2.9.0post0-py312hecd8cb5_2.conda#1047dde28f78127dd9f6121e882926dd
+https://repo.anaconda.com/pkgs/main/osx-64/pytest-cov-4.1.0-py312hecd8cb5_1.conda#a33a24eb20359f464938e75b2f57e23a
+https://repo.anaconda.com/pkgs/main/osx-64/pytest-xdist-3.5.0-py312hecd8cb5_0.conda#d1ecfb3691cceecb1f16bcfdf0b67bb5
+https://repo.anaconda.com/pkgs/main/osx-64/bottleneck-1.3.7-py312h32608ca_0.conda#f96a01eba5ea542cf9c7cc8d77447627
+https://repo.anaconda.com/pkgs/main/osx-64/contourpy-1.2.0-py312ha357a0b_0.conda#57d384ad07152375b40a6293f79e3f0c
+https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-3.8.4-py312hecd8cb5_0.conda#6886c230c2ec2f47621b5cca4c7d493a
+https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-base-3.8.4-py312h7f12edd_0.conda#a4eee14a4dcaa89b306ca33d2d479fa4
+https://repo.anaconda.com/pkgs/main/osx-64/mkl_fft-1.3.8-py312h6c40b1e_0.conda#d59d01b940493f2b6a84aac922fd0c76
+https://repo.anaconda.com/pkgs/main/osx-64/mkl_random-1.2.4-py312ha357a0b_0.conda#c1ea9c8eee79a5af3399f3c31be0e9c6
+https://repo.anaconda.com/pkgs/main/osx-64/numpy-1.26.4-py312hac873b0_0.conda#3150bac1e382156f82a153229e1ebd06
+https://repo.anaconda.com/pkgs/main/osx-64/numexpr-2.8.7-py312hac873b0_0.conda#6303ba071636ef57fddf69eb6f440ec1
+https://repo.anaconda.com/pkgs/main/osx-64/scipy-1.11.4-py312h81688c2_0.conda#7d57b4c21a9261f97fa511e0940c5d93
+https://repo.anaconda.com/pkgs/main/osx-64/pandas-2.2.2-py312h77d3abe_0.conda#463868c40d8ff98bec263f1fd57a8d97
+https://repo.anaconda.com/pkgs/main/osx-64/pyamg-4.2.3-py312h44cbcf4_0.conda#3bdc7be74087b3a5a83c520a74e1e8eb
+# pip cython @ https://files.pythonhosted.org/packages/58/50/fbb23239efe2183e4eaf76689270d6f5b3bbcf9be9ad1eb97cc34349e6fc/Cython-3.0.11-cp312-cp312-macosx_10_9_x86_64.whl#sha256=11996c40c32abf843ba652a6d53cb15944c88d91f91fc4e6f0028f5df8a8f8a1
+# pip meson @ https://files.pythonhosted.org/packages/1d/8d/b83d525907c00c5e22a9cae832bbd958310518ae6ad1dc7e01b69abbb117/meson-1.4.2.tar.gz#sha256=ea2546a26f4a171a741c1fd036f22c9c804d6198e3259f1df588e01f842dd69f
+# pip threadpoolctl @ https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl#sha256=56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467
+# pip pyproject-metadata @ https://files.pythonhosted.org/packages/aa/5f/bb5970d3d04173b46c9037109f7f05fc8904ff5be073ee49bb6ff00301bc/pyproject_metadata-0.8.0-py3-none-any.whl#sha256=ad858d448e1d3a1fb408ac5bac9ea7743e7a8bbb472f2693aaa334d2db42f526
+# pip meson-python @ https://files.pythonhosted.org/packages/91/c0/104cb6244c83fe6bc3886f144cc433db0c0c78efac5dc00e409a5a08c87d/meson_python-0.16.0-py3-none-any.whl#sha256=842dc9f5dc29e55fc769ff1b6fe328412fe6c870220fc321060a1d2d395e69e8
diff --git a/auto_building_tools/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml b/auto_building_tools/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml
new file mode 100644
index 0000000..2d9ca39
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pylatest_pip_openblas_pandas_environment.yml
@@ -0,0 +1,31 @@
+# DO NOT EDIT: this file is generated from the specification found in the
+# following script to centralize the configuration for CI builds:
+# build_tools/update_environments_and_lock_files.py
+channels:
+ - defaults
+dependencies:
+ - python=3.11
+ - ccache
+ - pip
+ - pip:
+ - numpy
+ - scipy
+ - cython
+ - joblib
+ - threadpoolctl
+ - matplotlib
+ - pandas
+ - pyamg
+ - pytest
+ - pytest-xdist
+ - pillow
+ - ninja
+ - meson-python
+ - pytest-cov
+ - coverage
+ - sphinx
+ - numpydoc
+ - lightgbm
+ - scikit-image
+ - array-api-compat
+ - array-api-strict
diff --git a/auto_building_tools/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock b/auto_building_tools/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock
new file mode 100644
index 0000000..f1afa48
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock
@@ -0,0 +1,87 @@
+# Generated by conda-lock.
+# platform: linux-64
+# input_hash: 893e5f90e655d6606d6b7e308c1099125012b25c3444b5a4240d44b184531e00
+@EXPLICIT
+https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9
+https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2024.7.2-h06a4308_0.conda#5c6799c01e9be4c7ba294f6530b2d562
+https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b
+https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25
+https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd
+https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd
+https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85
+https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464
+https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h5eee18b_6.conda#f21a3ff51c1b271977f53ce956a69297
+https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_1.conda#70646cc713f0c43926cfdcfe9b695fe0
+https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299
+https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c
+https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.14-h5eee18b_0.conda#37b6dad6aa49000a4230a9f0cad172f6
+https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.6-h5eee18b_1.conda#1562802f843297ee776a50b9329597ed
+https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_1.conda#92e42d8310108b0a440fb2e60b2b2a25
+https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e
+https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb
+https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.14-h39e8969_0.conda#78dbc5e3c69143ebc037fc5d5b22e597
+https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.45.3-h5eee18b_0.conda#acf93d6aceb74d6110e20b44cc45939e
+https://repo.anaconda.com/pkgs/main/linux-64/python-3.11.9-h955ad1f_0.conda#5668a8845dd35bbbc9663c8f217a2ab8
+https://repo.anaconda.com/pkgs/main/linux-64/setuptools-72.1.0-py311h06a4308_0.conda#58a35dba367429761d046074dcfa8b19
+https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.43.0-py311h06a4308_0.conda#ec915b5ff89bdbcea7ef943d9e296967
+https://repo.anaconda.com/pkgs/main/linux-64/pip-24.2-py311h06a4308_0.conda#eff3ec695130b6912d64997edbc0db16
+# pip alabaster @ https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl#sha256=fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b
+# pip array-api-compat @ https://files.pythonhosted.org/packages/0f/22/8228be1d3c6d4ffcf05cd89872ce65c1317b2af98d34b9d89b247d8d49cb/array_api_compat-1.8-py3-none-any.whl#sha256=140204454086264d37263bc4afe1182b428353e94e9edcc38d17b009863c982d
+# pip babel @ https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl#sha256=368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b
+# pip certifi @ https://files.pythonhosted.org/packages/1c/d5/c84e1a17bf61d4df64ca866a1c9a913874b4e9bdc131ec689a0ad013fb36/certifi-2024.7.4-py3-none-any.whl#sha256=c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90
+# pip charset-normalizer @ https://files.pythonhosted.org/packages/40/26/f35951c45070edc957ba40a5b1db3cf60a9dbb1b350c2d5bef03e01e61de/charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8
+# pip coverage @ https://files.pythonhosted.org/packages/14/6f/8351b465febb4dbc1ca9929505202db909c5a635c6fdf33e089bbc3d7d85/coverage-7.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=0c0420b573964c760df9e9e86d1a9a622d0d27f417e1a949a8a66dd7bcee7bc6
+# pip cycler @ https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl#sha256=85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30
+# pip cython @ https://files.pythonhosted.org/packages/93/03/e330b241ad8aa12bb9d98b58fb76d4eb7dcbe747479aab5c29fce937b9e7/Cython-3.0.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=3999fb52d3328a6a5e8c63122b0a8bd110dfcdb98dda585a3def1426b991cba7
+# pip docutils @ https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl#sha256=dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2
+# pip execnet @ https://files.pythonhosted.org/packages/43/09/2aea36ff60d16dd8879bdb2f5b3ee0ba8d08cbbdcdfe870e695ce3784385/execnet-2.1.1-py3-none-any.whl#sha256=26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc
+# pip fonttools @ https://files.pythonhosted.org/packages/a4/22/0a0ad59d9367997fd74a00ad2e88d10559122e09f105e94d34c155aecc0a/fonttools-4.53.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=bee32ea8765e859670c4447b0817514ca79054463b6b79784b08a8df3a4d78e3
+# pip idna @ https://files.pythonhosted.org/packages/22/7e/d71db821f177828df9dea8c42ac46473366f191be53080e552e628aad991/idna-3.8-py3-none-any.whl#sha256=050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac
+# pip imagesize @ https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl#sha256=0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b
+# pip iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
+# pip joblib @ https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl#sha256=06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6
+# pip kiwisolver @ https://files.pythonhosted.org/packages/17/ba/17a706b232308e65f57deeccae503c268292e6a091313f6ce833a23093ea/kiwisolver-1.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=040c1aebeda72197ef477a906782b5ab0d387642e93bda547336b8957c61022e
+# pip markupsafe @ https://files.pythonhosted.org/packages/97/18/c30da5e7a0e7f4603abfc6780574131221d9148f323752c2755d48abad30/MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5
+# pip meson @ https://files.pythonhosted.org/packages/b7/33/513a9ca4fd5892463abb38592105b78fd425214f7983033633e2e48cbd30/meson-1.5.1-py3-none-any.whl#sha256=5531e24e6cfd6000bf1c712793cf28dff032841370b1a3b941a894e4fde46e5a
+# pip networkx @ https://files.pythonhosted.org/packages/38/e9/5f72929373e1a0e8d142a130f3f97e6ff920070f87f91c4e13e40e0fba5a/networkx-3.3-py3-none-any.whl#sha256=28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2
+# pip ninja @ https://files.pythonhosted.org/packages/6d/92/8d7aebd4430ab5ff65df2bfee6d5745f95c004284db2d8ca76dcbfd9de47/ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl#sha256=84502ec98f02a037a169c4b0d5d86075eaf6afc55e1879003d6cab51ced2ea4b
+# pip numpy @ https://files.pythonhosted.org/packages/7b/93/831b4c5b4355210827b3de34f539297e1833c39a68c26a8b454d8cf9f5ed/numpy-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f5ebbf9fbdabed208d4ecd2e1dfd2c0741af2f876e7ae522c2537d404ca895c3
+# pip packaging @ https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl#sha256=5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124
+# pip pillow @ https://files.pythonhosted.org/packages/ba/e5/8c68ff608a4203085158cff5cc2a3c534ec384536d9438c405ed6370d080/pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl#sha256=76a911dfe51a36041f2e756b00f96ed84677cdeb75d25c767f296c1c1eda1319
+# pip pluggy @ https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl#sha256=44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669
+# pip pygments @ https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl#sha256=b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a
+# pip pyparsing @ https://files.pythonhosted.org/packages/e5/0c/0e3c05b1c87bb6a1c76d281b0f35e78d2d80ac91b5f8f524cebf77f51049/pyparsing-3.1.4-py3-none-any.whl#sha256=a6a7ee4235a3f944aa1fa2249307708f893fe5717dc603503c6c7969c070fb7c
+# pip pytz @ https://files.pythonhosted.org/packages/9c/3d/a121f284241f08268b21359bd425f7d4825cffc5ac5cd0e1b3d82ffd2b10/pytz-2024.1-py2.py3-none-any.whl#sha256=328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319
+# pip six @ https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl#sha256=8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
+# pip snowballstemmer @ https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl#sha256=c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a
+# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl#sha256=4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5
+# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl#sha256=aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2
+# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl#sha256=166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8
+# pip sphinxcontrib-jsmath @ https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl#sha256=2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178
+# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl#sha256=b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb
+# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl#sha256=6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331
+# pip tabulate @ https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl#sha256=024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f
+# pip threadpoolctl @ https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl#sha256=56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467
+# pip tzdata @ https://files.pythonhosted.org/packages/65/58/f9c9e6be752e9fcb8b6a0ee9fb87e6e7a1f6bcab2cdc73f02bb7ba91ada0/tzdata-2024.1-py2.py3-none-any.whl#sha256=9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252
+# pip urllib3 @ https://files.pythonhosted.org/packages/ca/1c/89ffc63a9605b583d5df2be791a27bc1a42b7c32bab68d3c8f2f73a98cd4/urllib3-2.2.2-py3-none-any.whl#sha256=a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472
+# pip array-api-strict @ https://files.pythonhosted.org/packages/08/06/aba69bce257fd1cda0d1db616c12728af0f46878a5cc1923fcbb94201947/array_api_strict-2.0.1-py3-none-any.whl#sha256=f74cbf0d0c182fcb45c5ee7f28f9c7b77e6281610dfbbdd63be60b1a5a7872b3
+# pip contourpy @ https://files.pythonhosted.org/packages/ee/c0/9bd123d676eb61750e116a2cd915b06483fc406143cfc36c7f263f0f5368/contourpy-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=d4492d82b3bc7fbb7e3610747b159869468079fe149ec5c4d771fa1f614a14df
+# pip imageio @ https://files.pythonhosted.org/packages/1e/b7/02adac4e42a691008b5cfb31db98c190e1fc348d1521b9be4429f9454ed1/imageio-2.35.1-py3-none-any.whl#sha256=6eb2e5244e7a16b85c10b5c2fe0f7bf961b40fcb9f1a9fd1bd1d2c2f8fb3cd65
+# pip jinja2 @ https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl#sha256=bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d
+# pip lazy-loader @ https://files.pythonhosted.org/packages/83/60/d497a310bde3f01cb805196ac61b7ad6dc5dcf8dce66634dc34364b20b4f/lazy_loader-0.4-py3-none-any.whl#sha256=342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc
+# pip pyproject-metadata @ https://files.pythonhosted.org/packages/aa/5f/bb5970d3d04173b46c9037109f7f05fc8904ff5be073ee49bb6ff00301bc/pyproject_metadata-0.8.0-py3-none-any.whl#sha256=ad858d448e1d3a1fb408ac5bac9ea7743e7a8bbb472f2693aaa334d2db42f526
+# pip pytest @ https://files.pythonhosted.org/packages/0f/f9/cf155cf32ca7d6fa3601bc4c5dd19086af4b320b706919d48a4c79081cf9/pytest-8.3.2-py3-none-any.whl#sha256=4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5
+# pip python-dateutil @ https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl#sha256=a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427
+# pip requests @ https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl#sha256=70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6
+# pip scipy @ https://files.pythonhosted.org/packages/93/6b/701776d4bd6bdd9b629c387b5140f006185bd8ddea16788a44434376b98f/scipy-1.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=fef8c87f8abfb884dac04e97824b61299880c43f4ce675dd2cbeadd3c9b466d2
+# pip tifffile @ https://files.pythonhosted.org/packages/e1/82/e3d0b9720345f9057e736b305d22809e5b80143c76f2266e2a1bf57ad2cd/tifffile-2024.8.24-py3-none-any.whl#sha256=40faba20cb0af05c0eb500eda63244dd81500360e1518ff4548eb61ce3943099
+# pip lightgbm @ https://files.pythonhosted.org/packages/4e/19/1b928cad70a4e1a3e2c37d5417ca2182510f2451eaadb6c91cd9ec692cae/lightgbm-4.5.0-py3-none-manylinux_2_28_x86_64.whl#sha256=960a0e7c077de0ca3053f1325d3edfc92ea815acf5176adcacdea0f635aeef9b
+# pip matplotlib @ https://files.pythonhosted.org/packages/01/75/6c7ce560e95714a10fcbb3367d1304975a1a3e620f72af28921b796403f3/matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=8912ef7c2362f7193b5819d17dae8629b34a95c58603d781329712ada83f9447
+# pip meson-python @ https://files.pythonhosted.org/packages/91/c0/104cb6244c83fe6bc3886f144cc433db0c0c78efac5dc00e409a5a08c87d/meson_python-0.16.0-py3-none-any.whl#sha256=842dc9f5dc29e55fc769ff1b6fe328412fe6c870220fc321060a1d2d395e69e8
+# pip pandas @ https://files.pythonhosted.org/packages/fc/a5/4d82be566f069d7a9a702dcdf6f9106df0e0b042e738043c0cc7ddd7e3f6/pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee
+# pip pyamg @ https://files.pythonhosted.org/packages/d3/e8/6898b3b791f369605012e896ed903b6626f3bd1208c6a647d7219c070209/pyamg-5.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=679a5904eac3a4880288c8c0e6a29f110a2627ea15a443a4e9d5997c7dc5fab6
+# pip pytest-cov @ https://files.pythonhosted.org/packages/78/3a/af5b4fa5961d9a1e6237b530eb87dd04aea6eb83da09d2a4073d81b54ccf/pytest_cov-5.0.0-py3-none-any.whl#sha256=4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652
+# pip pytest-xdist @ https://files.pythonhosted.org/packages/6d/82/1d96bf03ee4c0fdc3c0cbe61470070e659ca78dc0086fb88b66c185e2449/pytest_xdist-3.6.1-py3-none-any.whl#sha256=9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7
+# pip scikit-image @ https://files.pythonhosted.org/packages/ad/96/138484302b8ec9a69cdf65e8d4ab47a640a3b1a8ea3c437e1da3e1a5a6b8/scikit_image-0.24.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=fa27b3a0dbad807b966b8db2d78da734cb812ca4787f7fbb143764800ce2fa9c
+# pip sphinx @ https://files.pythonhosted.org/packages/4d/61/2ad169c6ff1226b46e50da0e44671592dbc6d840a52034a0193a99b28579/sphinx-8.0.2-py3-none-any.whl#sha256=56173572ae6c1b9a38911786e206a110c9749116745873feae4f9ce88e59391d
+# pip numpydoc @ https://files.pythonhosted.org/packages/6c/45/56d99ba9366476cd8548527667f01869279cedb9e66b28eb4dfb27701679/numpydoc-1.8.0-py3-none-any.whl#sha256=72024c7fd5e17375dec3608a27c03303e8ad00c81292667955c6fea7a3ccf541
diff --git a/auto_building_tools/build_tools/azure/pylatest_pip_scipy_dev_environment.yml b/auto_building_tools/build_tools/azure/pylatest_pip_scipy_dev_environment.yml
new file mode 100644
index 0000000..01709b7
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pylatest_pip_scipy_dev_environment.yml
@@ -0,0 +1,22 @@
+# DO NOT EDIT: this file is generated from the specification found in the
+# following script to centralize the configuration for CI builds:
+# build_tools/update_environments_and_lock_files.py
+channels:
+ - defaults
+dependencies:
+ - python
+ - ccache
+ - pip
+ - pip:
+ - threadpoolctl
+ - pytest
+ - pytest-xdist
+ - pip
+ - ninja
+ - meson-python
+ - pytest-cov
+ - coverage
+ - pooch
+ - sphinx
+ - numpydoc
+ - python-dateutil
diff --git a/auto_building_tools/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock b/auto_building_tools/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock
new file mode 100644
index 0000000..dffc09c
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock
@@ -0,0 +1,67 @@
+# Generated by conda-lock.
+# platform: linux-64
+# input_hash: 8a4a203136d97ff3b2c8657fce2dd2228215bfbf9c1cfbe271e401f934bdf1a7
+@EXPLICIT
+https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9
+https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2024.7.2-h06a4308_0.conda#5c6799c01e9be4c7ba294f6530b2d562
+https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b
+https://repo.anaconda.com/pkgs/main/noarch/tzdata-2024a-h04d1e81_0.conda#452af53adae0a5b06eb5d05c707b2f25
+https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd
+https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd
+https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85
+https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464
+https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h5eee18b_6.conda#f21a3ff51c1b271977f53ce956a69297
+https://repo.anaconda.com/pkgs/main/linux-64/expat-2.6.2-h6a678d5_0.conda#55049db2772dae035f6b8a95f72b5970
+https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_1.conda#70646cc713f0c43926cfdcfe9b695fe0
+https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299
+https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c
+https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.14-h5eee18b_0.conda#37b6dad6aa49000a4230a9f0cad172f6
+https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.6-h5eee18b_1.conda#1562802f843297ee776a50b9329597ed
+https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_1.conda#92e42d8310108b0a440fb2e60b2b2a25
+https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e
+https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb
+https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.14-h39e8969_0.conda#78dbc5e3c69143ebc037fc5d5b22e597
+https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.45.3-h5eee18b_0.conda#acf93d6aceb74d6110e20b44cc45939e
+https://repo.anaconda.com/pkgs/main/linux-64/python-3.12.4-h5148396_1.conda#7863dc035441267f7b617f080c933671
+https://repo.anaconda.com/pkgs/main/linux-64/setuptools-72.1.0-py312h06a4308_0.conda#bab64ac5186aa07014788baf1fbe3ca9
+https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.43.0-py312h06a4308_0.conda#18d5f3b68a175c72576876db4afc9e9e
+https://repo.anaconda.com/pkgs/main/linux-64/pip-24.2-py312h06a4308_0.conda#798cbea8112672434d0cd7551f8fc4b9
+# pip alabaster @ https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl#sha256=fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b
+# pip babel @ https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl#sha256=368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b
+# pip certifi @ https://files.pythonhosted.org/packages/1c/d5/c84e1a17bf61d4df64ca866a1c9a913874b4e9bdc131ec689a0ad013fb36/certifi-2024.7.4-py3-none-any.whl#sha256=c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90
+# pip charset-normalizer @ https://files.pythonhosted.org/packages/ee/fb/14d30eb4956408ee3ae09ad34299131fb383c47df355ddb428a7331cfa1e/charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b
+# pip coverage @ https://files.pythonhosted.org/packages/1f/0f/c890339dd605f3ebc269543247bdd43b703cce6825b5ed42ff5f2d6122c7/coverage-7.6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=c44fee9975f04b33331cb8eb272827111efc8930cfd582e0320613263ca849ca
+# pip docutils @ https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl#sha256=dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2
+# pip execnet @ https://files.pythonhosted.org/packages/43/09/2aea36ff60d16dd8879bdb2f5b3ee0ba8d08cbbdcdfe870e695ce3784385/execnet-2.1.1-py3-none-any.whl#sha256=26dee51f1b80cebd6d0ca8e74dd8745419761d3bef34163928cbebbdc4749fdc
+# pip idna @ https://files.pythonhosted.org/packages/22/7e/d71db821f177828df9dea8c42ac46473366f191be53080e552e628aad991/idna-3.8-py3-none-any.whl#sha256=050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac
+# pip imagesize @ https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl#sha256=0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b
+# pip iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
+# pip markupsafe @ https://files.pythonhosted.org/packages/0a/0d/2454f072fae3b5a137c119abf15465d1771319dfe9e4acbb31722a0fff91/MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5
+# pip meson @ https://files.pythonhosted.org/packages/b7/33/513a9ca4fd5892463abb38592105b78fd425214f7983033633e2e48cbd30/meson-1.5.1-py3-none-any.whl#sha256=5531e24e6cfd6000bf1c712793cf28dff032841370b1a3b941a894e4fde46e5a
+# pip ninja @ https://files.pythonhosted.org/packages/6d/92/8d7aebd4430ab5ff65df2bfee6d5745f95c004284db2d8ca76dcbfd9de47/ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl#sha256=84502ec98f02a037a169c4b0d5d86075eaf6afc55e1879003d6cab51ced2ea4b
+# pip packaging @ https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl#sha256=5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124
+# pip platformdirs @ https://files.pythonhosted.org/packages/68/13/2aa1f0e1364feb2c9ef45302f387ac0bd81484e9c9a4c5688a322fbdfd08/platformdirs-4.2.2-py3-none-any.whl#sha256=2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee
+# pip pluggy @ https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl#sha256=44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669
+# pip pygments @ https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl#sha256=b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a
+# pip six @ https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl#sha256=8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
+# pip snowballstemmer @ https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl#sha256=c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a
+# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl#sha256=4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5
+# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl#sha256=aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2
+# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl#sha256=166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8
+# pip sphinxcontrib-jsmath @ https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl#sha256=2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178
+# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl#sha256=b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb
+# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl#sha256=6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331
+# pip tabulate @ https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl#sha256=024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f
+# pip threadpoolctl @ https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl#sha256=56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467
+# pip urllib3 @ https://files.pythonhosted.org/packages/ca/1c/89ffc63a9605b583d5df2be791a27bc1a42b7c32bab68d3c8f2f73a98cd4/urllib3-2.2.2-py3-none-any.whl#sha256=a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472
+# pip jinja2 @ https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl#sha256=bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d
+# pip pyproject-metadata @ https://files.pythonhosted.org/packages/aa/5f/bb5970d3d04173b46c9037109f7f05fc8904ff5be073ee49bb6ff00301bc/pyproject_metadata-0.8.0-py3-none-any.whl#sha256=ad858d448e1d3a1fb408ac5bac9ea7743e7a8bbb472f2693aaa334d2db42f526
+# pip pytest @ https://files.pythonhosted.org/packages/0f/f9/cf155cf32ca7d6fa3601bc4c5dd19086af4b320b706919d48a4c79081cf9/pytest-8.3.2-py3-none-any.whl#sha256=4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5
+# pip python-dateutil @ https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl#sha256=a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427
+# pip requests @ https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl#sha256=70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6
+# pip meson-python @ https://files.pythonhosted.org/packages/91/c0/104cb6244c83fe6bc3886f144cc433db0c0c78efac5dc00e409a5a08c87d/meson_python-0.16.0-py3-none-any.whl#sha256=842dc9f5dc29e55fc769ff1b6fe328412fe6c870220fc321060a1d2d395e69e8
+# pip pooch @ https://files.pythonhosted.org/packages/a8/87/77cc11c7a9ea9fd05503def69e3d18605852cd0d4b0d3b8f15bbeb3ef1d1/pooch-1.8.2-py3-none-any.whl#sha256=3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47
+# pip pytest-cov @ https://files.pythonhosted.org/packages/78/3a/af5b4fa5961d9a1e6237b530eb87dd04aea6eb83da09d2a4073d81b54ccf/pytest_cov-5.0.0-py3-none-any.whl#sha256=4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652
+# pip pytest-xdist @ https://files.pythonhosted.org/packages/6d/82/1d96bf03ee4c0fdc3c0cbe61470070e659ca78dc0086fb88b66c185e2449/pytest_xdist-3.6.1-py3-none-any.whl#sha256=9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7
+# pip sphinx @ https://files.pythonhosted.org/packages/4d/61/2ad169c6ff1226b46e50da0e44671592dbc6d840a52034a0193a99b28579/sphinx-8.0.2-py3-none-any.whl#sha256=56173572ae6c1b9a38911786e206a110c9749116745873feae4f9ce88e59391d
+# pip numpydoc @ https://files.pythonhosted.org/packages/6c/45/56d99ba9366476cd8548527667f01869279cedb9e66b28eb4dfb27701679/numpydoc-1.8.0-py3-none-any.whl#sha256=72024c7fd5e17375dec3608a27c03303e8ad00c81292667955c6fea7a3ccf541
diff --git a/auto_building_tools/build_tools/azure/pymin_conda_forge_mkl_environment.yml b/auto_building_tools/build_tools/azure/pymin_conda_forge_mkl_environment.yml
new file mode 100644
index 0000000..a219e4b
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pymin_conda_forge_mkl_environment.yml
@@ -0,0 +1,24 @@
+# DO NOT EDIT: this file is generated from the specification found in the
+# following script to centralize the configuration for CI builds:
+# build_tools/update_environments_and_lock_files.py
+channels:
+ - conda-forge
+dependencies:
+ - python=3.9
+ - numpy
+ - blas[build=mkl]
+ - scipy
+ - cython
+ - joblib
+ - threadpoolctl
+ - matplotlib
+ - pytest
+ - pytest-xdist
+ - pillow
+ - pip
+ - ninja
+ - meson-python
+ - pytest-cov
+ - coverage
+ - wheel
+ - pip
diff --git a/auto_building_tools/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock b/auto_building_tools/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock
new file mode 100644
index 0000000..eb81eae
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pymin_conda_forge_mkl_win-64_conda.lock
@@ -0,0 +1,125 @@
+# Generated by conda-lock.
+# platform: win-64
+# input_hash: ea607aaeb7b1d1f8a1f821a9f505b3601083a218ec4763e2d72d3d3d800e718c
+@EXPLICIT
+https://conda.anaconda.org/conda-forge/win-64/ca-certificates-2024.7.4-h56e8100_0.conda#9caa97c9504072cd060cf0a3142cc0ed
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_2.conda#cbbe59391138ea5ad3658c76912e147f
+https://conda.anaconda.org/conda-forge/win-64/intel-openmp-2024.2.1-h57928b3_1083.conda#2d89243bfb53652c182a7c73182cce4f
+https://conda.anaconda.org/conda-forge/win-64/libexpat-2.6.2-h63175ca_0.conda#bc592d03f62779511d392c175dcece64
+https://conda.anaconda.org/conda-forge/win-64/mkl-include-2024.1.0-h66d3029_694.conda#1f80971a50e69c1f7af15707619df49e
+https://conda.anaconda.org/conda-forge/win-64/msys2-conda-epoch-20160418-1.tar.bz2#b0309b72560df66f71a9d5e34a5efdfa
+https://conda.anaconda.org/conda-forge/win-64/python_abi-3.9-5_cp39.conda#86ba1bbcf9b259d1592201f3c345c810
+https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8
+https://conda.anaconda.org/conda-forge/win-64/ucrt-10.0.22621.0-h57928b3_0.tar.bz2#72608f6cd3e5898229c3ea16deb1ac43
+https://conda.anaconda.org/conda-forge/win-64/expat-2.6.2-h63175ca_0.conda#52f9dec6758ceb8ce0ea8af9fa13eb1a
+https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29
+https://conda.anaconda.org/conda-forge/win-64/m2w64-gmp-6.1.0-2.tar.bz2#53a1c73e1e3d185516d7e3af177596d9
+https://conda.anaconda.org/conda-forge/win-64/m2w64-libwinpthread-git-5.0.0.4634.697f757-2.tar.bz2#774130a326dee16f1ceb05cc687ee4f0
+https://conda.anaconda.org/conda-forge/win-64/vc14_runtime-14.40.33810-ha82c5b3_20.conda#e39cc4c34c53654ec939558993d9dc5b
+https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab
+https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-core-5.3.0-7.tar.bz2#4289d80fb4d272f1f3b56cfe87ac90bd
+https://conda.anaconda.org/conda-forge/win-64/vc-14.3-h8a93ad2_20.conda#8558f367e1d7700554f7cdb823c46faf
+https://conda.anaconda.org/conda-forge/win-64/vs2015_runtime-14.40.33810-h3bf8584_20.conda#c21f1b4a3a30bbc3ef35a50957578e0e
+https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-h2466b09_7.conda#276e7ffe9ffe39688abc665ef0f45596
+https://conda.anaconda.org/conda-forge/win-64/double-conversion-3.3.0-h63175ca_0.conda#1a8bc18b24014167b2184c5afbe6037e
+https://conda.anaconda.org/conda-forge/win-64/graphite2-1.3.13-h63175ca_1003.conda#3194499ee7d1a67404a87d0eefdd92c6
+https://conda.anaconda.org/conda-forge/win-64/icu-75.1-he0c23c2_0.conda#8579b6bb8d18be7c0b27fb08adeeeb40
+https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h63175ca_0.tar.bz2#1900cb3cab5055833cfddb0ba233b074
+https://conda.anaconda.org/conda-forge/win-64/libbrotlicommon-1.1.0-hcfcfb64_1.conda#f77f319fb82980166569e1280d5b2864
+https://conda.anaconda.org/conda-forge/win-64/libdeflate-1.21-h2466b09_0.conda#4ebe2206ebf4bf38f6084ad836110361
+https://conda.anaconda.org/conda-forge/win-64/libffi-3.4.2-h8ffe710_5.tar.bz2#2c96d1b6915b408893f9472569dee135
+https://conda.anaconda.org/conda-forge/win-64/libiconv-1.17-hcfcfb64_2.conda#e1eb10b1cca179f2baa3601e4efc8712
+https://conda.anaconda.org/conda-forge/win-64/libjpeg-turbo-3.0.0-hcfcfb64_1.conda#3f1b948619c45b1ca714d60c7389092c
+https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.46.0-h2466b09_0.conda#951b0a3a463932e17414cd9f047fa03d
+https://conda.anaconda.org/conda-forge/win-64/libwebp-base-1.4.0-hcfcfb64_0.conda#abd61d0ab127ec5cd68f62c2969e6f34
+https://conda.anaconda.org/conda-forge/win-64/libzlib-1.3.1-h2466b09_1.conda#d4483ca8afc57ddf1f6dded53b36c17f
+https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libgfortran-5.3.0-6.tar.bz2#066552ac6b907ec6d72c0ddab29050dc
+https://conda.anaconda.org/conda-forge/win-64/ninja-1.12.1-hc790b64_0.conda#a557dde55343e03c68cd7e29e7f87279
+https://conda.anaconda.org/conda-forge/win-64/openssl-3.3.1-h2466b09_3.conda#c6ebd3a1a2b393e040ca71c9f9ef8d97
+https://conda.anaconda.org/conda-forge/win-64/pixman-0.43.4-h63175ca_0.conda#b98135614135d5f458b75ab9ebb9558c
+https://conda.anaconda.org/conda-forge/win-64/pthreads-win32-2.9.1-hfa6e2cd_3.tar.bz2#e2da8758d7d51ff6aa78a14dfb9dbed4
+https://conda.anaconda.org/conda-forge/win-64/qhull-2020.2-hc790b64_5.conda#854fbdff64b572b5c0b470f334d34c11
+https://conda.anaconda.org/conda-forge/win-64/tk-8.6.13-h5226925_1.conda#fc048363eb8f03cd1737600a5d08aafe
+https://conda.anaconda.org/conda-forge/win-64/xz-5.2.6-h8d14728_0.tar.bz2#515d77642eaa3639413c6b1bc3f94219
+https://conda.anaconda.org/conda-forge/win-64/krb5-1.21.3-hdf4eb48_0.conda#31aec030344e962fbd7dbbbbd68e60a9
+https://conda.anaconda.org/conda-forge/win-64/libbrotlidec-1.1.0-hcfcfb64_1.conda#19ce3e1dacc7912b3d6ff40690ba9ae0
+https://conda.anaconda.org/conda-forge/win-64/libbrotlienc-1.1.0-hcfcfb64_1.conda#71e890a0b361fd58743a13f77e1506b7
+https://conda.anaconda.org/conda-forge/win-64/libintl-0.22.5-h5728263_3.conda#2cf0cf76cc15d360dfa2f17fd6cf9772
+https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.43-h19919ed_0.conda#77e398acc32617a0384553aea29e866b
+https://conda.anaconda.org/conda-forge/win-64/libxml2-2.12.7-h0f24e4e_4.conda#ed4d301f0d2149b34deb9c4fecafd836
+https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-5.3.0-7.tar.bz2#fe759119b8b3bfa720b8762c6fdc35de
+https://conda.anaconda.org/conda-forge/win-64/pcre2-10.44-h3d7b363_2.conda#a3a3baddcfb8c80db84bec3cb7746fb8
+https://conda.anaconda.org/conda-forge/win-64/python-3.9.19-h4de0772_0_cpython.conda#b6999bc275e0e6beae7b1c8ea0be1e85
+https://conda.anaconda.org/conda-forge/win-64/zlib-1.3.1-h2466b09_1.conda#f8e0a35bf6df768ad87ed7bbbc36ab04
+https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.6-h0ea2cb4_0.conda#9a17230f95733c04dc40a2b1e5491d74
+https://conda.anaconda.org/conda-forge/win-64/brotli-bin-1.1.0-hcfcfb64_1.conda#0105229d7c5fabaa840043a86c10ec64
+https://conda.anaconda.org/conda-forge/noarch/certifi-2024.7.4-pyhd8ed1ab_0.conda#24e7fd6ca65997938fff9e5ab6f653e4
+https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
+https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441
+https://conda.anaconda.org/conda-forge/win-64/cython-3.0.11-py39ha51f57c_0.conda#d7dfdb0e5fa3cc89807fc77fe6173c4d
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_0.conda#d02ae936e42063ca46af6cdad2dbd1e0
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46
+https://conda.anaconda.org/conda-forge/win-64/freetype-2.12.1-hdaf720e_2.conda#3761b23693f768dc75a8fd0a73ca053f
+https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
+https://conda.anaconda.org/conda-forge/win-64/kiwisolver-1.4.5-py39h1f6ef14_1.conda#4fc5bd0a7b535252028c647cc27d6c87
+https://conda.anaconda.org/conda-forge/win-64/libclang13-18.1.8-default_ha5278ca_2.conda#8185207d3f7e59474870cc79e4f9eaa5
+https://conda.anaconda.org/conda-forge/win-64/libglib-2.80.3-h7025463_2.conda#b60894793e7e4a555027bfb4e4ed1d54
+https://conda.anaconda.org/conda-forge/win-64/libhwloc-2.11.1-default_h8125262_1000.conda#933bad6e4658157f1aec9b171374fde2
+https://conda.anaconda.org/conda-forge/win-64/libtiff-4.6.0-hb151862_4.conda#7d35d9aa8f051d548116039f5813c8ec
+https://conda.anaconda.org/conda-forge/win-64/libxslt-1.1.39-h3df6e99_0.conda#279ee338c9b34871d578cb3c7aa68f70
+https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19
+https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda#cbe1bb1f21567018ce595d9c2be0f0db
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf
+https://conda.anaconda.org/conda-forge/win-64/pthread-stubs-0.4-hcd874cb_1001.tar.bz2#a1f820480193ea83582b13249a7e7bd9
+https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.4-pyhd8ed1ab_0.conda#4d91352a50949d049cf9714c8563d433
+https://conda.anaconda.org/conda-forge/noarch/setuptools-72.2.0-pyhd8ed1ab_0.conda#1462aa8b243aad09ef5d0841c745eb89
+https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd
+https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095
+https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
+https://conda.anaconda.org/conda-forge/win-64/tornado-6.4.1-py39ha55e580_0.conda#7d1e87f3036af858ce7e248489c3faec
+https://conda.anaconda.org/conda-forge/win-64/unicodedata2-15.1.0-py39ha55989b_0.conda#20ec896e8d97f2ff8be1124e624dc8f2
+https://conda.anaconda.org/conda-forge/noarch/wheel-0.44.0-pyhd8ed1ab_0.conda#d44e3b085abcaef02983c6305b84b584
+https://conda.anaconda.org/conda-forge/win-64/xorg-libxau-1.0.11-hcd874cb_0.conda#c46ba8712093cb0114404ae8a7582e1a
+https://conda.anaconda.org/conda-forge/win-64/xorg-libxdmcp-1.1.3-hcd874cb_0.tar.bz2#46878ebb6b9cbd8afcf8088d7ef00ece
+https://conda.anaconda.org/conda-forge/noarch/zipp-3.20.0-pyhd8ed1ab_0.conda#05b6bcb391b5be17374f7ad0aeedc479
+https://conda.anaconda.org/conda-forge/win-64/brotli-1.1.0-hcfcfb64_1.conda#f47f6db2528e38321fb00ae31674c133
+https://conda.anaconda.org/conda-forge/win-64/coverage-7.6.1-py39ha55e580_0.conda#a9c63313e61e510e8f8bca90794eee73
+https://conda.anaconda.org/conda-forge/win-64/fontconfig-2.14.2-hbde0cde_0.conda#08767992f1a4f1336a257af1241034bd
+https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.4-pyhd8ed1ab_0.conda#99aa3edd3f452d61c305a30e78140513
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f
+https://conda.anaconda.org/conda-forge/win-64/lcms2-2.16-h67d730c_0.conda#d3592435917b62a8becff3a60db674f6
+https://conda.anaconda.org/conda-forge/win-64/libxcb-1.16-h013a479_1.conda#f0b599acdc82d5bc7e3b105833e7c5c8
+https://conda.anaconda.org/conda-forge/noarch/meson-1.5.1-pyhd8ed1ab_1.conda#979087ee59bea1355f991a3b738af64e
+https://conda.anaconda.org/conda-forge/win-64/openjpeg-2.5.2-h3d672ee_0.conda#7e7099ad94ac3b599808950cec30ad4e
+https://conda.anaconda.org/conda-forge/noarch/pip-24.2-pyhd8ed1ab_0.conda#6721aef6bfe5937abe70181545dd2c51
+https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47
+https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.2-pyhd8ed1ab_0.conda#e010a224b90f1f623a917c35addbb924
+https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c
+https://conda.anaconda.org/conda-forge/win-64/tbb-2021.12.0-hc790b64_3.conda#a16e2a639e87c554abee5192ce6ee308
+https://conda.anaconda.org/conda-forge/win-64/cairo-1.18.0-h32b962e_3.conda#8f43723a4925c51e55c2d81725a97db4
+https://conda.anaconda.org/conda-forge/win-64/fonttools-4.53.1-py39ha55e580_0.conda#81bbae03542e491178a620a45ad0b474
+https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.4-pyhd8ed1ab_0.conda#c62e775953b6b65f2079c9ee2a62813c
+https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547
+https://conda.anaconda.org/conda-forge/win-64/mkl-2024.1.0-h66d3029_694.conda#a17423859d3fb912c8f2e9797603ddb6
+https://conda.anaconda.org/conda-forge/win-64/pillow-10.4.0-py39hfa8c767_0.conda#7b24bccfb14f05019c8a488d4ee084a8
+https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63
+https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_0.conda#b39568655c127a9c4a44d178ac99b6d0
+https://conda.anaconda.org/conda-forge/win-64/harfbuzz-9.0.0-h2bedf89_1.conda#254f119aaed2c0be271c1114ae18d09b
+https://conda.anaconda.org/conda-forge/win-64/libblas-3.9.0-23_win64_mkl.conda#693407a31c27e70c750b5ae153251d9a
+https://conda.anaconda.org/conda-forge/win-64/mkl-devel-2024.1.0-h57928b3_694.conda#cb1406a70154cdef203167c6a95f6351
+https://conda.anaconda.org/conda-forge/win-64/libcblas-3.9.0-23_win64_mkl.conda#7ffb5b336cefd2e6d1e00ac1f7c9f2c9
+https://conda.anaconda.org/conda-forge/win-64/liblapack-3.9.0-23_win64_mkl.conda#3580796ab7b7d68143f45d4d94d866b7
+https://conda.anaconda.org/conda-forge/win-64/qt6-main-6.7.2-hbb46ec1_5.conda#e14fa5fe2da0bf8cc30d06314ce6ce33
+https://conda.anaconda.org/conda-forge/win-64/liblapacke-3.9.0-23_win64_mkl.conda#f6e2619d4359c6806b97b3d405193741
+https://conda.anaconda.org/conda-forge/win-64/numpy-2.0.1-py39h60232e0_0.conda#abb4185f8ac60eeb9b450757197da7ac
+https://conda.anaconda.org/conda-forge/win-64/pyside6-6.7.2-py39h0285922_2.conda#12004e14d1835eca43c4207841c24e4f
+https://conda.anaconda.org/conda-forge/win-64/blas-devel-3.9.0-23_win64_mkl.conda#5fd0882b94fa827533f51cc8c2e04392
+https://conda.anaconda.org/conda-forge/win-64/contourpy-1.2.1-py39h1f6ef14_0.conda#03e25c6bae87f4f9595337255b44b0fb
+https://conda.anaconda.org/conda-forge/win-64/scipy-1.13.1-py39h1a10956_0.conda#9f8e571406af04d2f5fdcbecec704505
+https://conda.anaconda.org/conda-forge/win-64/blas-2.123-mkl.conda#0d089770a9bc073da806864c60a0a173
+https://conda.anaconda.org/conda-forge/win-64/matplotlib-base-3.9.2-py39h5376392_0.conda#bd0c448492ac46f8ba0d23dac3e2e9ff
+https://conda.anaconda.org/conda-forge/win-64/matplotlib-3.9.2-py39hcbf5309_0.conda#0405102feb5b62c7ba7f924346953192
diff --git a/auto_building_tools/build_tools/azure/pymin_conda_forge_openblas_min_dependencies_environment.yml b/auto_building_tools/build_tools/azure/pymin_conda_forge_openblas_min_dependencies_environment.yml
new file mode 100644
index 0000000..a1bda82
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pymin_conda_forge_openblas_min_dependencies_environment.yml
@@ -0,0 +1,26 @@
+# DO NOT EDIT: this file is generated from the specification found in the
+# following script to centralize the configuration for CI builds:
+# build_tools/update_environments_and_lock_files.py
+channels:
+ - conda-forge
+dependencies:
+ - python=3.9
+ - numpy=1.19.5 # min
+ - blas[build=openblas]
+ - scipy=1.6.0 # min
+ - cython=3.0.10 # min
+ - joblib=1.2.0 # min
+ - threadpoolctl=3.1.0 # min
+ - matplotlib=3.3.4 # min
+ - pandas=1.1.5 # min
+ - pyamg
+ - pytest
+ - pytest-xdist
+ - pillow
+ - pip
+ - ninja
+ - meson-python=0.16.0 # min
+ - pytest-cov
+ - coverage
+ - ccache
+ - polars=0.20.30 # min
diff --git a/auto_building_tools/build_tools/azure/pymin_conda_forge_openblas_min_dependencies_linux-64_conda.lock b/auto_building_tools/build_tools/azure/pymin_conda_forge_openblas_min_dependencies_linux-64_conda.lock
new file mode 100644
index 0000000..6173ded
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pymin_conda_forge_openblas_min_dependencies_linux-64_conda.lock
@@ -0,0 +1,173 @@
+# Generated by conda-lock.
+# platform: linux-64
+# input_hash: da804213459d72ef5fa344326a71a64386dfb5085c8e0b582527e8337cecca32
+@EXPLICIT
+https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
+https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.7.4-hbcca054_0.conda#23ab7665c5f63cfb9f1f6195256daac6
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_2.conda#cbbe59391138ea5ad3658c76912e147f
+https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-hf3520f5_7.conda#b80f2f396ca2c28b8c14c437a4ed1e74
+https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-5_cp39.conda#40363a30db350596b5f225d0d5a33328
+https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8
+https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29
+https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab
+https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793
+https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.1.0-h77fa898_0.conda#ca0fad6a41ddaef54a153b78eccb5037
+https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.12-h4ab18f5_0.conda#7ed427f0871fd41cb1d9c17727c17589
+https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00
+https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553
+https://conda.anaconda.org/conda-forge/linux-64/gettext-tools-0.22.5-he02047a_3.conda#fcd2016d1d299f654f81021e27496818
+https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3
+https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51
+https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.21-h4bc722e_0.conda#36ce76665bf67f5aac36be7a0d21b7f3
+https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d
+https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3
+https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-0.22.5-he02047a_3.conda#efab66b82ec976930b96d62a976de8e7
+https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-14.1.0-hc5f4f2c_0.conda#6456c2620c990cd8dde2428a27ba0bc5
+https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e
+https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8
+https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7
+https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.5-h4ab18f5_0.conda#601bfb4b3c6f0b844443bb81a56651e0
+https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f
+https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.1.0-hc0a3c3a_0.conda#1cb187a157136398ddbaae90713e2498
+https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b
+https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559
+https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc
+https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-h4ab18f5_1.conda#57d7dc60e9325e3de37ff8dffd18e814
+https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69
+https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.1-hb9d3cd8_3.conda#6c566a46baae794daf34775d41eb180a
+https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036
+https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908
+https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534
+https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87
+https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0
+https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15
+https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0
+https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61
+https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c
+https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff
+https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f
+https://conda.anaconda.org/conda-forge/linux-64/libasprintf-0.22.5-he8f35ee_3.conda#4fab9799da9571266d05ca5503330655
+https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5
+https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1
+https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d
+https://conda.anaconda.org/conda-forge/linux-64/libgettextpo-devel-0.22.5-he02047a_3.conda#9aba7960731e6b4547b3a52f812ed801
+https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-14.1.0-h69a702a_0.conda#f4ca84fbd6d06b0a052fb2d5b96dde41
+https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae
+https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.0-hde9e2c9_0.conda#18aa975d2094c34aef978060ae7da7d8
+https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0
+https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.16-hb9d3cd8_1.conda#3601598f0db0470af28985e3e7ad0158
+https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0
+https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.32.6-h59595ed_0.conda#9160cdeb523a1b20cf8d2a0bf821f45d
+https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.3.0-h70512c7_5.conda#4b652e3e572cbb3f297e77c96313faea
+https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72
+https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1
+https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.44-hba22ea6_2.conda#df359c09c41cd186fffb93a2d87aa6f5
+https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123
+https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
+https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6
+https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-h4ab18f5_1.conda#9653f1bf3766164d0e65fa723cabbc54
+https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45
+https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb
+https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368
+https://conda.anaconda.org/conda-forge/linux-64/libasprintf-devel-0.22.5-he8f35ee_3.conda#1091193789bb830127ed067a9e01ac57
+https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.3-h315aac3_2.conda#b0143a3e98136a680b728fdf9b42a258
+https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a
+https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.25-pthreads_h413a1c8_0.conda#d172b34a443b95f86089e8229ddc9a17
+https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h46a8edc_4.conda#a7e3a62981350e232e0e7345b5aea580
+https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.7-h4c95cb1_3.conda#0ac9aff6010a7751961c8e4b863a40e7
+https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.8-hf5423f3_1.conda#8782406a10201b67bd6476ca70cf92a8
+https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.3.0-ha479ceb_5.conda#82776ee8145b9d1fd6546604de4b351d
+https://conda.anaconda.org/conda-forge/linux-64/nss-3.103-h593d115_0.conda#233bfe41968d6fb04eba9258bb5061ad
+https://conda.anaconda.org/conda-forge/linux-64/python-3.9.19-h0755675_0_cpython.conda#d9ee3647fbd9e8595b8df759b2bbefb8
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.1-hb711507_2.conda#8637c3e5821654d0edf97e2b0404b443
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.1-hb711507_0.conda#ad748ccca349aec3e91743e08b5e2b50
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.10-hb711507_0.conda#0e0cbe0564d03a99afd5fd7b362feecd
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.2-hb711507_0.conda#608e0ef8256b81d04456e8d211eee3e8
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-hb711507_1.conda#4a6d410296d7e39f00bacdee7df046e9
+https://conda.anaconda.org/conda-forge/linux-64/ccache-4.10.1-h065aff2_0.conda#d6b48c138e0c8170a6fe9c136e063540
+https://conda.anaconda.org/conda-forge/noarch/certifi-2024.7.4-pyhd8ed1ab_0.conda#24e7fd6ca65997938fff9e5ab6f653e4
+https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
+https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441
+https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.10-py39h3d6467e_0.conda#76b5d215fb735a6dc43010ffbe78040e
+https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_0.conda#d02ae936e42063ca46af6cdad2dbd1e0
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46
+https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d
+https://conda.anaconda.org/conda-forge/linux-64/gettext-0.22.5-he02047a_3.conda#c7f243bbaea97cd6ea1edd693270100e
+https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.80.3-h8fdd7da_2.conda#9958a1f8faba35260e6b68e3a7bc88d6
+https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
+https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39h7633fee_1.conda#c9f74d717e5a2847a9f8b779c54130f2
+https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5
+https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-20_linux64_openblas.conda#2b7bb4f7562c8cf334fc2e20c2d28abc
+https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3
+https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-hb3ce162_4.conda#8a35df3cbc0c8b12cc8af9473ae75eef
+https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.8-h8b73ec9_2.conda#2e25bb2f53e4a48873a936f8ef53e592
+https://conda.anaconda.org/conda-forge/linux-64/libpq-16.4-h482b261_0.conda#0f74c5581623f860e7baca042d9d7139
+https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.25-pthreads_h7a3da1a_0.conda#87661673941b5e702275fdf0fc095ad0
+https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138
+https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda#cbe1bb1f21567018ce595d9c2be0f0db
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf
+https://conda.anaconda.org/conda-forge/noarch/ply-3.11-pyhd8ed1ab_2.conda#18c6deb6f9602e32446398203c8f0e91
+https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.4-pyhd8ed1ab_0.conda#4d91352a50949d049cf9714c8563d433
+https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad
+https://conda.anaconda.org/conda-forge/linux-64/setuptools-59.8.0-py39hf3d152e_1.tar.bz2#4252d0c211566a9f65149ba7f6e87aa4
+https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c
+https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095
+https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
+https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.1-py39hd3abc70_0.conda#c183e99f9320e5e2d0f9c43efcb3fb22
+https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.12.2-pyha770c72_0.conda#ebe6952715e1d5eb567eeebf25250fa7
+https://conda.anaconda.org/conda-forge/noarch/wheel-0.44.0-pyhd8ed1ab_0.conda#d44e3b085abcaef02983c6305b84b584
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-hb711507_2.conda#a0901183f08b6c7107aab109733a3c91
+https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.42-h4ab18f5_0.conda#b193af204da1bfb8c13882d131a14bd2
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530
+https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-hbb29018_2.conda#b6d90276c5aee9b4407dd94eb0cd40a8
+https://conda.anaconda.org/conda-forge/linux-64/coverage-7.6.1-py39hcd6043d_0.conda#daab0ee8e85e258281e2b2dd74ebe0bb
+https://conda.anaconda.org/conda-forge/linux-64/glib-2.80.3-h315aac3_2.conda#00e0da7e4fceb5449f3ddd2bf6b2c351
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb
+https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-20_linux64_openblas.conda#36d486d72ab64ffea932329a1d3729a3
+https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp15-15.0.7-default_h127d8a8_5.conda#d0a9633b53cdc319b8a1a532ae7822b8
+https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.8-default_h9def88c_2.conda#ba2d12adbea9de311297f2b577f4bb86
+https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869
+https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.50-h4f305b6_0.conda#0d7ff1a8e69565ca3add6925e18e708f
+https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-20_linux64_openblas.conda#6fabc51f5e647d09cc010c40061557e0
+https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h2c5496b_1.conda#e2eaefa4de2b7237af7c907b8bbc760a
+https://conda.anaconda.org/conda-forge/noarch/meson-1.5.1-pyhd8ed1ab_1.conda#979087ee59bea1355f991a3b738af64e
+https://conda.anaconda.org/conda-forge/linux-64/pillow-10.4.0-py39h16a7006_0.conda#d9a6b19174a6cf5185296b16f781951f
+https://conda.anaconda.org/conda-forge/noarch/pip-24.2-pyhd8ed1ab_0.conda#6721aef6bfe5937abe70181545dd2c51
+https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47
+https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.2-pyhd8ed1ab_0.conda#e010a224b90f1f623a917c35addbb924
+https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c
+https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.12-py39h3d6467e_0.conda#e667a3ab0df62c54e60e1843d2e6defb
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxxf86vm-1.1.5-h4bc722e_1.conda#0c90ad87101001080484b91bd9d2cdef
+https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.24.6-haf2f30d_0.conda#a15d7b21e4b7b82b87ba04c3b46c1317
+https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-9.0.0-hfac3d4d_0.conda#c7b47c64af53e8ecee01d101eeab2342
+https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.11.0-h4ab18f5_1.conda#14858a47d4cc995892e79f2b340682d7
+https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-20_linux64_openblas.conda#05c5862c7dc25e65ba6c471d96429dae
+https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hc60ed4a_1.conda#ef1910918dd895516a769ed36b5b3a4e
+https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547
+https://conda.anaconda.org/conda-forge/linux-64/numpy-1.19.5-py39hd249d9e_3.tar.bz2#0cf333996ebdeeba8d1c8c1c0ee9eff9
+https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py39h3d6467e_5.conda#93aff412f3e49fdb43361c0215cbd72d
+https://conda.anaconda.org/conda-forge/noarch/pytest-cov-5.0.0-pyhd8ed1ab_0.conda#c54c0107057d67ddf077751339ec2c63
+https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_0.conda#b39568655c127a9c4a44d178ac99b6d0
+https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-20_linux64_openblas.conda#9932a1d4e9ecf2d35fb19475446e361e
+https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.24.6-hbaaba92_0.conda#b22ffc80ac9af846df60b2640c98fea4
+https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-255-h3516f8a_1.conda#3366af27f0b593544a6cd453c7932ac5
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.3.4-py39h2fa2bec_0.tar.bz2#9ec0b2186fab9121c54f4844f93ee5b7
+https://conda.anaconda.org/conda-forge/linux-64/pandas-1.1.5-py39hde0f152_0.tar.bz2#79fc4b5b3a865b90dd3701cecf1ad33c
+https://conda.anaconda.org/conda-forge/linux-64/polars-0.20.30-py39ha963410_0.conda#322084e8890afc27fcca6df7a528df25
+https://conda.anaconda.org/conda-forge/linux-64/scipy-1.6.0-py39hee8e79c_0.tar.bz2#3afcb78281836e61351a2924f3230060
+https://conda.anaconda.org/conda-forge/linux-64/blas-2.120-openblas.conda#c8f6916a81a340650078171b1d852574
+https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-17.0-hb77b528_0.conda#07f45f1be1c25345faddb8db0de8039b
+https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.2.3-py39hac2352c_1.tar.bz2#6fb0628d6195d8b6caa2422d09296399
+https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h320f8da_24.conda#bec111b67cb8dc63277c6af65d214044
+https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_5.conda#e1f148e57d071b09187719df86f513c1
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.3.4-py39hf3d152e_0.tar.bz2#cbaec993375a908bbe506dc7328d747c
diff --git a/auto_building_tools/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml b/auto_building_tools/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml
new file mode 100644
index 0000000..38737e7
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_environment.yml
@@ -0,0 +1,25 @@
+# DO NOT EDIT: this file is generated from the specification found in the
+# following script to centralize the configuration for CI builds:
+# build_tools/update_environments_and_lock_files.py
+channels:
+ - conda-forge
+dependencies:
+ - python=3.9
+ - numpy
+ - blas[build=openblas]
+ - scipy
+ - cython
+ - joblib
+ - threadpoolctl
+ - matplotlib
+ - pandas
+ - pyamg
+ - pytest
+ - pytest-xdist
+ - pillow
+ - pip
+ - ninja
+ - meson-python
+ - sphinx
+ - numpydoc
+ - ccache
diff --git a/auto_building_tools/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock b/auto_building_tools/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock
new file mode 100644
index 0000000..86af9ec
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pymin_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock
@@ -0,0 +1,196 @@
+# Generated by conda-lock.
+# platform: linux-64
+# input_hash: 3974f9847d888a2fd37ba5fcfb76cb09bba4c9b84b6200932500fc94e3b0c4ae
+@EXPLICIT
+https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
+https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.7.4-hbcca054_0.conda#23ab7665c5f63cfb9f1f6195256daac6
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb
+https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_2.conda#cbbe59391138ea5ad3658c76912e147f
+https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-hf3520f5_7.conda#b80f2f396ca2c28b8c14c437a4ed1e74
+https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-5_cp39.conda#40363a30db350596b5f225d0d5a33328
+https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h0c530f3_0.conda#161081fc7cec0bfda0d86d7cb595f8d8
+https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29
+https://conda.anaconda.org/conda-forge/linux-64/libglvnd-1.7.0-ha4b6fd6_0.conda#e46b5ae31282252e0525713e34ffbe2b
+https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab
+https://conda.anaconda.org/conda-forge/linux-64/libegl-1.7.0-ha4b6fd6_0.conda#35e52d19547cb3265a09c49de146a5ae
+https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793
+https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.1.0-h77fa898_0.conda#ca0fad6a41ddaef54a153b78eccb5037
+https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.12-h4ab18f5_0.conda#7ed427f0871fd41cb1d9c17727c17589
+https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553
+https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_1.conda#aec6c91c7371c26392a06708a73c70e5
+https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.21-h4bc722e_0.conda#36ce76665bf67f5aac36be7a0d21b7f3
+https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.2-h59595ed_0.conda#e7ba12deb7020dd080c6c70e7b6f6a3d
+https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3
+https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-14.1.0-hc5f4f2c_0.conda#6456c2620c990cd8dde2428a27ba0bc5
+https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e
+https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8
+https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7
+https://conda.anaconda.org/conda-forge/linux-64/libpciaccess-0.18-hd590300_0.conda#48f4330bfcd959c3cfb704d424903c82
+https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.1.0-hc0a3c3a_0.conda#1cb187a157136398ddbaae90713e2498
+https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b
+https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559
+https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc
+https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-h4ab18f5_1.conda#57d7dc60e9325e3de37ff8dffd18e814
+https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h59595ed_0.conda#fcea371545eda051b6deafb24889fc69
+https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.1-hb9d3cd8_3.conda#6c566a46baae794daf34775d41eb180a
+https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036
+https://conda.anaconda.org/conda-forge/linux-64/xorg-inputproto-2.3.2-h7f98852_1002.tar.bz2#bcd1b3396ec6960cbc1d2855a9e60b2b
+https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.3-h7f98852_0.tar.bz2#be93aabceefa2fac576e971aef407908
+https://conda.anaconda.org/conda-forge/linux-64/xorg-recordproto-1.14.2-h7f98852_1002.tar.bz2#2f835e6c386e73c6faaddfe9eda67e98
+https://conda.anaconda.org/conda-forge/linux-64/xorg-renderproto-0.11.1-h7f98852_1002.tar.bz2#06feff3d2634e3097ce2fe681474b534
+https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_1003.conda#bce9f945da8ad2ae9b1d7165a64d0f87
+https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15
+https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0
+https://conda.anaconda.org/conda-forge/linux-64/double-conversion-3.3.0-h59595ed_0.conda#c2f83a5ddadadcdb08fe05863295ee97
+https://conda.anaconda.org/conda-forge/linux-64/expat-2.6.2-h59595ed_0.conda#53fb86322bdb89496d7579fe3f02fd61
+https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c
+https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3
+https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_1.conda#f07002e225d7a60a694d42a7bf5ff53f
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_1.conda#5fc11c6020d421960607d821310fcd4d
+https://conda.anaconda.org/conda-forge/linux-64/libdrm-2.4.122-h4ab18f5_0.conda#bbfc4dbe5e97b385ef088f354d65e563
+https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1
+https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-14.1.0-h69a702a_0.conda#f4ca84fbd6d06b0a052fb2d5b96dde41
+https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.43-h2797004_0.conda#009981dd9cfcaa4dbfa25ffaed86bcae
+https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.0-hde9e2c9_0.conda#18aa975d2094c34aef978060ae7da7d8
+https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.16-hb9d3cd8_1.conda#3601598f0db0470af28985e3e7ad0158
+https://conda.anaconda.org/conda-forge/linux-64/mysql-common-9.0.1-h70512c7_0.conda#c567b6fa201bc424e84f1e70f7a36095
+https://conda.anaconda.org/conda-forge/linux-64/ninja-1.12.1-h297d8ca_0.conda#3aa1c7e292afeff25a0091ddd7c69b72
+https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.44-hba22ea6_2.conda#df359c09c41cd186fffb93a2d87aa6f5
+https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123
+https://conda.anaconda.org/conda-forge/linux-64/qhull-2020.2-h434a139_5.conda#353823361b1d27eb3960efb076dfcaf6
+https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
+https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc
+https://conda.anaconda.org/conda-forge/linux-64/wayland-1.23.1-h3e06ad9_0.conda#0a732427643ae5e0486a727927791da1
+https://conda.anaconda.org/conda-forge/linux-64/xorg-fixesproto-5.0-h7f98852_1002.tar.bz2#65ad6e1eb4aed2b0611855aff05e04f6
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6
+https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-h4ab18f5_1.conda#9653f1bf3766164d0e65fa723cabbc54
+https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45
+https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_1.conda#39f910d205726805a958da408ca194ba
+https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb
+https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368
+https://conda.anaconda.org/conda-forge/linux-64/libglib-2.80.3-h315aac3_2.conda#b0143a3e98136a680b728fdf9b42a258
+https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a
+https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.27-pthreads_hac2b453_1.conda#ae05ece66d3924ac3d48b4aa3fa96cec
+https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h46a8edc_4.conda#a7e3a62981350e232e0e7345b5aea580
+https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.12.7-he7c6b58_4.conda#08a9265c637230c37cb1be4a6cad4536
+https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-18.1.8-hf5423f3_1.conda#8782406a10201b67bd6476ca70cf92a8
+https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-9.0.1-ha479ceb_0.conda#6fd406aef37faad86bd7f37a94fb6f8a
+https://conda.anaconda.org/conda-forge/linux-64/python-3.9.19-h0755675_0_cpython.conda#d9ee3647fbd9e8595b8df759b2bbefb8
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.1-hb711507_2.conda#8637c3e5821654d0edf97e2b0404b443
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.1-hb711507_0.conda#ad748ccca349aec3e91743e08b5e2b50
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.10-hb711507_0.conda#0e0cbe0564d03a99afd5fd7b362feecd
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.2-hb711507_0.conda#608e0ef8256b81d04456e8d211eee3e8
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.9-hb711507_1.conda#4a6d410296d7e39f00bacdee7df046e9
+https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.16-pyhd8ed1ab_0.conda#def531a3ac77b7fb8c21d17bb5d0badb
+https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_1.conda#f27a24d46e3ea7b70a1f98e50c62508f
+https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py39h3d6467e_1.conda#c48418c8b35f1d59ae9ae1174812b40a
+https://conda.anaconda.org/conda-forge/linux-64/ccache-4.10.1-h065aff2_0.conda#d6b48c138e0c8170a6fe9c136e063540
+https://conda.anaconda.org/conda-forge/noarch/certifi-2024.7.4-pyhd8ed1ab_0.conda#24e7fd6ca65997938fff9e5ab6f653e4
+https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.3.2-pyhd8ed1ab_0.conda#7f4a9e3fcff3f6356ae99244a014da6a
+https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
+https://conda.anaconda.org/conda-forge/noarch/cycler-0.12.1-pyhd8ed1ab_0.conda#5cd86562580f274031ede6aa6aa24441
+https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.11-py39h98e3656_0.conda#e3762ffb02c6490cf1b8d2c7af219eb5
+https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d
+https://conda.anaconda.org/conda-forge/noarch/docutils-0.21.2-pyhd8ed1ab_0.conda#e8cd5d629f65bdf0f3bb312cde14659e
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_0.conda#d02ae936e42063ca46af6cdad2dbd1e0
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.1.1-pyhd8ed1ab_0.conda#15dda3cdbf330abfe9f555d22f66db46
+https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d
+https://conda.anaconda.org/conda-forge/noarch/hpack-4.0.0-pyh9f0ad1d_0.tar.bz2#914d6646c4dbb1fd3ff539830a12fd71
+https://conda.anaconda.org/conda-forge/noarch/hyperframe-6.0.1-pyhd8ed1ab_0.tar.bz2#9f765cbfab6870c8435b9eefecd7a1f4
+https://conda.anaconda.org/conda-forge/noarch/idna-3.8-pyhd8ed1ab_0.conda#99e164522f6bdf23c177c8d9ae63f975
+https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352
+https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
+https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39h7633fee_1.conda#c9f74d717e5a2847a9f8b779c54130f2
+https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5
+https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-23_linux64_openblas.conda#96c8450a40aa2b9733073a9460de972c
+https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3
+https://conda.anaconda.org/conda-forge/linux-64/libglx-1.7.0-ha4b6fd6_0.conda#b470cc353c5b852e0d830e8d5d23e952
+https://conda.anaconda.org/conda-forge/linux-64/libllvm18-18.1.8-h8b73ec9_2.conda#2e25bb2f53e4a48873a936f8ef53e592
+https://conda.anaconda.org/conda-forge/linux-64/libpq-16.4-h482b261_0.conda#0f74c5581623f860e7baca042d9d7139
+https://conda.anaconda.org/conda-forge/linux-64/libxslt-1.1.39-h76b75d6_0.conda#e71f31f8cfb0a91439f2086fc8aa0461
+https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.5-py39hd1e30aa_0.conda#9a9a22eb1f83c44953319ee3b027769f
+https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19
+https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.27-pthreads_h9eca1d5_1.conda#5633a1616bda33f8b815841eba4dbfb8
+https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.2-h488ebb8_0.conda#7f2e286780f072ed750df46dc2631138
+https://conda.anaconda.org/conda-forge/noarch/packaging-24.1-pyhd8ed1ab_0.conda#cbe1bb1f21567018ce595d9c2be0f0db
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.5.0-pyhd8ed1ab_0.conda#d3483c8fc2dc2cc3f5cf43e26d60cabf
+https://conda.anaconda.org/conda-forge/noarch/pycparser-2.22-pyhd8ed1ab_0.conda#844d9eb3b43095b031874477f7d70088
+https://conda.anaconda.org/conda-forge/noarch/pygments-2.18.0-pyhd8ed1ab_0.conda#b7f5c092b8f9800150d998a71b76d5a1
+https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.4-pyhd8ed1ab_0.conda#4d91352a50949d049cf9714c8563d433
+https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025
+https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2024.1-pyhd8ed1ab_0.conda#98206ea9954216ee7540f0c773f2104d
+https://conda.anaconda.org/conda-forge/noarch/pytz-2024.1-pyhd8ed1ab_0.conda#3eeeeb9e4827ace8c0c1419c85d590ad
+https://conda.anaconda.org/conda-forge/noarch/setuptools-72.2.0-pyhd8ed1ab_0.conda#1462aa8b243aad09ef5d0841c745eb89
+https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
+https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087
+https://conda.anaconda.org/conda-forge/noarch/tabulate-0.9.0-pyhd8ed1ab_1.tar.bz2#4759805cce2d914c38472f70bf4d8bcb
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.5.0-pyhc1e730c_0.conda#df68d78237980a159bd7149f33c0e8fd
+https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
+https://conda.anaconda.org/conda-forge/linux-64/tornado-6.4.1-py39hd3abc70_0.conda#c183e99f9320e5e2d0f9c43efcb3fb22
+https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.1.0-py39hd1e30aa_0.conda#1da984bbb6e765743e13388ba7b7b2c8
+https://conda.anaconda.org/conda-forge/noarch/wheel-0.44.0-pyhd8ed1ab_0.conda#d44e3b085abcaef02983c6305b84b584
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-hb711507_2.conda#a0901183f08b6c7107aab109733a3c91
+https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.42-h4ab18f5_0.conda#b193af204da1bfb8c13882d131a14bd2
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-5.0.3-h7f98852_1004.tar.bz2#e9a21aa4d5e3e5f1aed71e8cefd46b6a
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530
+https://conda.anaconda.org/conda-forge/noarch/zipp-3.20.0-pyhd8ed1ab_0.conda#05b6bcb391b5be17374f7ad0aeedc479
+https://conda.anaconda.org/conda-forge/noarch/babel-2.14.0-pyhd8ed1ab_0.conda#9669586875baeced8fc30c0826c3270e
+https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-hebfffa5_3.conda#fceaedf1cdbcb02df9699a0d9b005292
+https://conda.anaconda.org/conda-forge/linux-64/cffi-1.17.0-py39h49a4b6b_0.conda#278cc676a7e939cf2561ce4a5cfaa484
+https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.53.1-py39hcd6043d_0.conda#297804eca6ea16a835a869699095de1c
+https://conda.anaconda.org/conda-forge/noarch/h2-4.1.0-pyhd8ed1ab_0.tar.bz2#b748fbf7060927a6e82df7cb5ee8f097
+https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-8.4.0-pyha770c72_0.conda#6e3dbc422d3749ad72659243d6ac8b2b
+https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.4.4-pyhd8ed1ab_0.conda#99aa3edd3f452d61c305a30e78140513
+https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.4-pyhd8ed1ab_0.conda#7b86ecb7d3557821c649b3c31e3eb9f2
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.4.2-pyhd8ed1ab_0.conda#25df261d4523d9f9783bcdb7208d872f
+https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-23_linux64_openblas.conda#eede29b40efa878cbe5bdcb767e97310
+https://conda.anaconda.org/conda-forge/linux-64/libclang-cpp18.1-18.1.8-default_hf981a13_2.conda#b0f8c590aa86d9bee5987082f7f15bdf
+https://conda.anaconda.org/conda-forge/linux-64/libclang13-18.1.8-default_h9def88c_2.conda#ba2d12adbea9de311297f2b577f4bb86
+https://conda.anaconda.org/conda-forge/linux-64/libgl-1.7.0-ha4b6fd6_0.conda#3deca8c25851196c28d1c84dd4ae9149
+https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-23_linux64_openblas.conda#2af0879961951987e464722fd00ec1e0
+https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.7.0-h2c5496b_1.conda#e2eaefa4de2b7237af7c907b8bbc760a
+https://conda.anaconda.org/conda-forge/noarch/meson-1.5.1-pyhd8ed1ab_1.conda#979087ee59bea1355f991a3b738af64e
+https://conda.anaconda.org/conda-forge/linux-64/pillow-10.4.0-py39h16a7006_0.conda#d9a6b19174a6cf5185296b16f781951f
+https://conda.anaconda.org/conda-forge/noarch/pip-24.2-pyhd8ed1ab_0.conda#6721aef6bfe5937abe70181545dd2c51
+https://conda.anaconda.org/conda-forge/noarch/pyproject-metadata-0.8.0-pyhd8ed1ab_0.conda#573fe09d7bd0cd4bcc210d8369b5ca47
+https://conda.anaconda.org/conda-forge/noarch/pytest-8.3.2-pyhd8ed1ab_0.conda#e010a224b90f1f623a917c35addbb924
+https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda#2cf4264fffb9e6eff6031c5b6884d61c
+https://conda.anaconda.org/conda-forge/linux-64/xcb-util-cursor-0.1.4-h4ab18f5_2.conda#79e46d4a6ccecb7ee1912042958a8758
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxi-1.7.10-h4bc722e_1.conda#749baebe7e2ff3360630e069175e528b
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxxf86vm-1.1.5-h4bc722e_1.conda#0c90ad87101001080484b91bd9d2cdef
+https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-9.0.0-hda332d3_1.conda#76b32dcf243444aea9c6b804bcfa40b8
+https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.4.4-pyhd8ed1ab_0.conda#c62e775953b6b65f2079c9ee2a62813c
+https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-23_linux64_openblas.conda#89d7bcdb1e9a72a73e36d8e29d2a2beb
+https://conda.anaconda.org/conda-forge/noarch/meson-python-0.16.0-pyh0c530f3_0.conda#e16f0dbf502da873be9f9adb0dc52547
+https://conda.anaconda.org/conda-forge/linux-64/numpy-2.0.1-py39h2fd3214_0.conda#2c69819400d3318cf74f831811ab066f
+https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-3.6.1-pyhd8ed1ab_0.conda#b39568655c127a9c4a44d178ac99b6d0
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxtst-1.2.5-h4bc722e_0.conda#185159d666308204eca00295599b0a5c
+https://conda.anaconda.org/conda-forge/linux-64/zstandard-0.23.0-py39h623c9ba_0.conda#a19d023682384c637cb356d270c276c0
+https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-23_linux64_openblas.conda#08b43a5c3d6cc13aeb69bd2cbc293196
+https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.2.1-py39h7633fee_0.conda#bdc188e59857d6efab332714e0d01d93
+https://conda.anaconda.org/conda-forge/linux-64/pandas-2.2.2-py39hfc16268_1.conda#8b23d2b425035a7468d17e6fe1d54124
+https://conda.anaconda.org/conda-forge/linux-64/qt6-main-6.7.2-hb12f9c5_5.conda#8c662388c2418f293266f5e7f50df7d7
+https://conda.anaconda.org/conda-forge/linux-64/scipy-1.13.1-py39haf93ffa_0.conda#492a2cd65862d16a4aaf535ae9ccb761
+https://conda.anaconda.org/conda-forge/noarch/urllib3-2.2.2-pyhd8ed1ab_1.conda#e804c43f58255e977093a2298e442bb8
+https://conda.anaconda.org/conda-forge/linux-64/blas-2.123-openblas.conda#7f4b3ea1cdd6e50dca2a226abda6e2d9
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.9.2-py39h0565ad7_0.conda#14917b240f18eba18576e81530360a0a
+https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.2.1-py39h85c637f_0.conda#0bfaf33b7ebdbadc77bf9a67e281c0b1
+https://conda.anaconda.org/conda-forge/linux-64/pyside6-6.7.2-py39h8242bd1_2.conda#e5c6995331893cf9fcaab45d11e343ff
+https://conda.anaconda.org/conda-forge/noarch/requests-2.32.3-pyhd8ed1ab_0.conda#5ede4753180c7a550a443c430dc8ab52
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.9.2-py39hf3d152e_0.conda#5f49ac6db4d60b2afbb6feb2a85beea7
+https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.8.0-pyhd8ed1ab_0.conda#0a5522bdd3983c52102e75d1307ad8c4
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-2.0.0-pyhd8ed1ab_0.conda#9075bd8c033f0257122300db914e49c9
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-2.0.0-pyhd8ed1ab_0.conda#b3bcc38c471ebb738854f52a36059b48
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.1.0-pyhd8ed1ab_0.conda#e25640d692c02e8acfff0372f547e940
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-2.0.0-pyhd8ed1ab_0.conda#d6e5ea5fe00164ac6c2dcc5d76a42192
+https://conda.anaconda.org/conda-forge/noarch/sphinx-7.4.7-pyhd8ed1ab_0.conda#c568e260463da2528ecfd7c5a0b41bbd
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.10-pyhd8ed1ab_0.conda#e507335cb4ca9cff4c3d0fa9cdab255e
diff --git a/auto_building_tools/build_tools/azure/pytest-pyodide.js b/auto_building_tools/build_tools/azure/pytest-pyodide.js
new file mode 100644
index 0000000..c195940
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/pytest-pyodide.js
@@ -0,0 +1,53 @@
+const { opendir } = require('node:fs/promises');
+const { loadPyodide } = require("pyodide");
+
+async function main() {
+ let exit_code = 0;
+ try {
+ global.pyodide = await loadPyodide();
+ let pyodide = global.pyodide;
+ const FS = pyodide.FS;
+ const NODEFS = FS.filesystems.NODEFS;
+
+ let mountDir = "/mnt";
+ pyodide.FS.mkdir(mountDir);
+ pyodide.FS.mount(pyodide.FS.filesystems.NODEFS, { root: "." }, mountDir);
+
+ await pyodide.loadPackage(["micropip"]);
+ await pyodide.runPythonAsync(`
+ import glob
+ import micropip
+
+ wheels = glob.glob('/mnt/dist/*.whl')
+ wheels = [f'emfs://{wheel}' for wheel in wheels]
+ print(f'installing wheels: {wheels}')
+ await micropip.install(wheels);
+
+ pkg_list = micropip.list()
+ print(pkg_list)
+ `);
+
+ // Pyodide is built without OpenMP, need to set environment variable to
+ // skip related test
+ await pyodide.runPythonAsync(`
+ import os
+ os.environ['SKLEARN_SKIP_OPENMP_TEST'] = 'true'
+ `);
+
+ await pyodide.runPythonAsync("import micropip; micropip.install('pytest')");
+ let pytest = pyodide.pyimport("pytest");
+ let args = process.argv.slice(2);
+ console.log('pytest args:', args);
+ exit_code = pytest.main(pyodide.toPy(args));
+ } catch (e) {
+ console.error(e);
+ // Arbitrary exit code here. I have seen this code reached instead of a
+ // Pyodide fatal error sometimes
+ exit_code = 66;
+
+ } finally {
+ process.exit(exit_code);
+ }
+}
+
+main();
diff --git a/auto_building_tools/build_tools/azure/test_docs.sh b/auto_building_tools/build_tools/azure/test_docs.sh
new file mode 100644
index 0000000..48ad276
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/test_docs.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+set -ex
+
+source build_tools/shared.sh
+activate_environment
+
+# XXX: for some unknown reason python -m pytest fails here in the CI, can't
+# reproduce locally and not worth spending time on this
+pytest $(find doc -name '*.rst' | sort)
diff --git a/auto_building_tools/build_tools/azure/test_pytest_soft_dependency.sh b/auto_building_tools/build_tools/azure/test_pytest_soft_dependency.sh
new file mode 100644
index 0000000..dbfb80f
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/test_pytest_soft_dependency.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+set -e
+
+# called when DISTRIB=="conda"
+source activate $VIRTUALENV
+conda remove -y py pytest || pip uninstall -y py pytest
+
+if [[ "$COVERAGE" == "true" ]]; then
+ # conda may remove coverage when uninstall pytest and py
+ pip install coverage
+ # Need to append the coverage to the existing .coverage generated by
+ # running the tests. Make sure to reuse the same coverage
+ # configuration as the one used by the main pytest run to be
+ # able to combine the results.
+ CMD="coverage run --rcfile=$BUILD_SOURCESDIRECTORY/.coveragerc"
+else
+ CMD="python"
+fi
+
+# .coverage from running the tests is in TEST_DIR
+pushd $TEST_DIR
+$CMD -m sklearn.utils.tests.test_estimator_checks
+popd
diff --git a/auto_building_tools/build_tools/azure/test_script.sh b/auto_building_tools/build_tools/azure/test_script.sh
new file mode 100644
index 0000000..9be7d58
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/test_script.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+set -e
+
+# Defines the show_installed_libraries and activate_environment functions.
+source build_tools/shared.sh
+
+activate_environment
+
+if [[ "$BUILD_REASON" == "Schedule" ]]; then
+ # Enable global random seed randomization to discover seed-sensitive tests
+ # only on nightly builds.
+ # https://scikit-learn.org/stable/computing/parallelism.html#environment-variables
+ export SKLEARN_TESTS_GLOBAL_RANDOM_SEED=$(($RANDOM % 100))
+ echo "To reproduce this test run, set the following environment variable:"
+ echo " SKLEARN_TESTS_GLOBAL_RANDOM_SEED=$SKLEARN_TESTS_GLOBAL_RANDOM_SEED",
+ echo "See: https://scikit-learn.org/dev/computing/parallelism.html#sklearn-tests-global-random-seed"
+
+ # Enable global dtype fixture for all nightly builds to discover
+ # numerical-sensitive tests.
+ # https://scikit-learn.org/stable/computing/parallelism.html#environment-variables
+ export SKLEARN_RUN_FLOAT32_TESTS=1
+fi
+
+COMMIT_MESSAGE=$(python build_tools/azure/get_commit_message.py --only-show-message)
+
+if [[ "$COMMIT_MESSAGE" =~ \[float32\] ]]; then
+ echo "float32 tests will be run due to commit message"
+ export SKLEARN_RUN_FLOAT32_TESTS=1
+fi
+
+mkdir -p $TEST_DIR
+cp setup.cfg $TEST_DIR
+cd $TEST_DIR
+
+python -c "import joblib; print(f'Number of cores (physical): \
+{joblib.cpu_count()} ({joblib.cpu_count(only_physical_cores=True)})')"
+python -c "import sklearn; sklearn.show_versions()"
+
+show_installed_libraries
+
+TEST_CMD="python -m pytest --showlocals --durations=20 --junitxml=$JUNITXML"
+
+if [[ "$COVERAGE" == "true" ]]; then
+ # Note: --cov-report= is used to disable to long text output report in the
+ # CI logs. The coverage data is consolidated by codecov to get an online
+ # web report across all the platforms so there is no need for this text
+ # report that otherwise hides the test failures and forces long scrolls in
+ # the CI logs.
+ export COVERAGE_PROCESS_START="$BUILD_SOURCESDIRECTORY/.coveragerc"
+ TEST_CMD="$TEST_CMD --cov-config='$COVERAGE_PROCESS_START' --cov sklearn --cov-report="
+fi
+
+if [[ "$PYTEST_XDIST_VERSION" != "none" ]]; then
+ XDIST_WORKERS=$(python -c "import joblib; print(joblib.cpu_count(only_physical_cores=True))")
+ TEST_CMD="$TEST_CMD -n$XDIST_WORKERS"
+fi
+
+if [[ -n "$SELECTED_TESTS" ]]; then
+ TEST_CMD="$TEST_CMD -k $SELECTED_TESTS"
+
+ # Override to make selected tests run on all random seeds
+ export SKLEARN_TESTS_GLOBAL_RANDOM_SEED="all"
+fi
+
+TEST_CMD="$TEST_CMD --pyargs sklearn"
+
+set -x
+eval "$TEST_CMD"
+set +x
diff --git a/auto_building_tools/build_tools/azure/test_script_pyodide.sh b/auto_building_tools/build_tools/azure/test_script_pyodide.sh
new file mode 100644
index 0000000..d1aa207
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/test_script_pyodide.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -e
+
+# We are using a pytest js wrapper script to run tests inside Pyodide. Maybe
+# one day we can use a Pyodide venv instead but at the time of writing
+# (2023-09-27) there is an issue with scipy.linalg in a Pyodide venv, see
+# https://github.com/pyodide/pyodide/issues/3865 for more details.
+node build_tools/azure/pytest-pyodide.js --pyargs sklearn --durations 20 --showlocals
diff --git a/auto_building_tools/build_tools/azure/ubuntu_atlas_lock.txt b/auto_building_tools/build_tools/azure/ubuntu_atlas_lock.txt
new file mode 100644
index 0000000..4e4283d
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/ubuntu_atlas_lock.txt
@@ -0,0 +1,43 @@
+#
+# This file is autogenerated by pip-compile with Python 3.10
+# by the following command:
+#
+# pip-compile --output-file=build_tools/azure/ubuntu_atlas_lock.txt build_tools/azure/ubuntu_atlas_requirements.txt
+#
+cython==3.0.10
+ # via -r build_tools/azure/ubuntu_atlas_requirements.txt
+exceptiongroup==1.2.2
+ # via pytest
+execnet==2.1.1
+ # via pytest-xdist
+iniconfig==2.0.0
+ # via pytest
+joblib==1.2.0
+ # via -r build_tools/azure/ubuntu_atlas_requirements.txt
+meson==1.5.1
+ # via meson-python
+meson-python==0.16.0
+ # via -r build_tools/azure/ubuntu_atlas_requirements.txt
+ninja==1.11.1.1
+ # via -r build_tools/azure/ubuntu_atlas_requirements.txt
+packaging==24.1
+ # via
+ # meson-python
+ # pyproject-metadata
+ # pytest
+pluggy==1.5.0
+ # via pytest
+pyproject-metadata==0.8.0
+ # via meson-python
+pytest==8.3.2
+ # via
+ # -r build_tools/azure/ubuntu_atlas_requirements.txt
+ # pytest-xdist
+pytest-xdist==3.6.1
+ # via -r build_tools/azure/ubuntu_atlas_requirements.txt
+threadpoolctl==3.1.0
+ # via -r build_tools/azure/ubuntu_atlas_requirements.txt
+tomli==2.0.1
+ # via
+ # meson-python
+ # pytest
diff --git a/auto_building_tools/build_tools/azure/ubuntu_atlas_requirements.txt b/auto_building_tools/build_tools/azure/ubuntu_atlas_requirements.txt
new file mode 100644
index 0000000..dfb0cfe
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/ubuntu_atlas_requirements.txt
@@ -0,0 +1,10 @@
+# DO NOT EDIT: this file is generated from the specification found in the
+# following script to centralize the configuration for CI builds:
+# build_tools/update_environments_and_lock_files.py
+cython==3.0.10 # min
+joblib==1.2.0 # min
+threadpoolctl==3.1.0 # min
+pytest
+pytest-xdist
+ninja
+meson-python
diff --git a/auto_building_tools/build_tools/azure/upload_codecov.sh b/auto_building_tools/build_tools/azure/upload_codecov.sh
new file mode 100644
index 0000000..0e87b2d
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/upload_codecov.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+set -e
+
+# Do not upload to codecov on forks
+if [[ "$BUILD_REPOSITORY_NAME" != "scikit-learn/scikit-learn" ]]; then
+ exit 0
+fi
+
+# When we update the codecov uploader version, we need to update the checksums.
+# The checksum for each codecov binary is available at
+# https://uploader.codecov.io e.g. for linux
+# https://uploader.codecov.io/v0.7.1/linux/codecov.SHA256SUM.
+
+# Instead of hardcoding a specific version and signature in this script, it
+# would be possible to use the "latest" symlink URL but then we need to
+# download both the codecov.SHA256SUM files each time and check the signatures
+# with the codecov gpg key as well, see:
+# https://docs.codecov.com/docs/codecov-uploader#integrity-checking-the-uploader
+# However this approach would yield a larger number of downloads from
+# codecov.io and keybase.io, therefore increasing the risk of running into
+# network failures.
+CODECOV_UPLOADER_VERSION=0.7.1
+CODECOV_BASE_URL="https://uploader.codecov.io/v$CODECOV_UPLOADER_VERSION"
+
+
+# Check that the git repo is located at the expected location:
+if [[ ! -d "$BUILD_REPOSITORY_LOCALPATH/.git" ]]; then
+ echo "Could not find the git checkout at $BUILD_REPOSITORY_LOCALPATH"
+ exit 1
+fi
+
+# Check that the combined coverage file exists at the expected location:
+export COVERAGE_XML="$BUILD_REPOSITORY_LOCALPATH/coverage.xml"
+if [[ ! -f "$COVERAGE_XML" ]]; then
+ echo "Could not find the combined coverage file at $COVERAGE_XML"
+ exit 1
+fi
+
+if [[ $OSTYPE == *"linux"* ]]; then
+ curl -Os "$CODECOV_BASE_URL/linux/codecov"
+ SHA256SUM="b9282b8b43eef83f722646d8992c4dd36563046afe0806722184e7e9923a6d7b codecov"
+ echo "$SHA256SUM" | shasum -a256 -c
+ chmod +x codecov
+ ./codecov -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z --verbose
+elif [[ $OSTYPE == *"darwin"* ]]; then
+ curl -Os "$CODECOV_BASE_URL/macos/codecov"
+ SHA256SUM="e4ce34c144d3195eccb7f8b9ca8de092d2a4be114d927ca942500f3a6326225c codecov"
+ echo "$SHA256SUM" | shasum -a256 -c
+ chmod +x codecov
+ ./codecov -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z --verbose
+else
+ curl -Os "$CODECOV_BASE_URL/windows/codecov.exe"
+ SHA256SUM="f5de88026f061ff08b88a5895f9c11855523924ceb8174e027403dd20fa5e4d6 codecov.exe"
+ echo "$SHA256SUM" | sha256sum -c
+ ./codecov.exe -t ${CODECOV_TOKEN} -R $BUILD_REPOSITORY_LOCALPATH -f coverage.xml -Z --verbose
+fi
diff --git a/auto_building_tools/build_tools/azure/windows.yml b/auto_building_tools/build_tools/azure/windows.yml
new file mode 100644
index 0000000..1727da4
--- /dev/null
+++ b/auto_building_tools/build_tools/azure/windows.yml
@@ -0,0 +1,85 @@
+
+parameters:
+ name: ''
+ vmImage: ''
+ matrix: []
+ dependsOn: []
+ condition: ne(variables['Build.Reason'], 'Schedule')
+
+jobs:
+- job: ${{ parameters.name }}
+ dependsOn: ${{ parameters.dependsOn }}
+ condition: ${{ parameters.condition }}
+ pool:
+ vmImage: ${{ parameters.vmImage }}
+ variables:
+ VIRTUALENV: 'testvenv'
+ JUNITXML: 'test-data.xml'
+ SKLEARN_SKIP_NETWORK_TESTS: '1'
+ PYTEST_XDIST_VERSION: 'latest'
+ TEST_DIR: '$(Agent.WorkFolder)/tmp_folder'
+ SHOW_SHORT_SUMMARY: 'false'
+ strategy:
+ matrix:
+ ${{ insert }}: ${{ parameters.matrix }}
+
+ steps:
+ - bash: python build_tools/azure/get_selected_tests.py
+ displayName: Check selected tests for all random seeds
+ condition: eq(variables['Build.Reason'], 'PullRequest')
+ - bash: echo "##vso[task.prependpath]$CONDA/Scripts"
+ displayName: Add conda to PATH
+ condition: startsWith(variables['DISTRIB'], 'conda')
+ - task: UsePythonVersion@0
+ inputs:
+ versionSpec: '$(PYTHON_VERSION)'
+ addToPath: true
+ architecture: 'x86'
+ displayName: Use 32 bit System Python
+ condition: and(succeeded(), eq(variables['PYTHON_ARCH'], '32'))
+ - bash: ./build_tools/azure/install.sh
+ displayName: 'Install'
+ - bash: ./build_tools/azure/test_script.sh
+ displayName: 'Test Library'
+ - bash: ./build_tools/azure/combine_coverage_reports.sh
+ condition: and(succeeded(), eq(variables['COVERAGE'], 'true'),
+ eq(variables['SELECTED_TESTS'], ''))
+ displayName: 'Combine coverage'
+ - task: PublishTestResults@2
+ inputs:
+ testResultsFiles: '$(TEST_DIR)/$(JUNITXML)'
+ testRunTitle: ${{ format('{0}-$(Agent.JobName)', parameters.name) }}
+ displayName: 'Publish Test Results'
+ condition: succeededOrFailed()
+ - bash: |
+ set -ex
+ if [[ $(BOT_GITHUB_TOKEN) == "" ]]; then
+ echo "GitHub Token is not set. Issue tracker will not be updated."
+ exit
+ fi
+
+ LINK_TO_RUN="https://dev.azure.com/$BUILD_REPOSITORY_NAME/_build/results?buildId=$BUILD_BUILDID&view=logs&j=$SYSTEM_JOBID"
+ CI_NAME="$SYSTEM_JOBIDENTIFIER"
+ ISSUE_REPO="$BUILD_REPOSITORY_NAME"
+
+ $(pyTools.pythonLocation)/bin/pip install defusedxml PyGithub
+ $(pyTools.pythonLocation)/bin/python maint_tools/update_tracking_issue.py \
+ $(BOT_GITHUB_TOKEN) \
+ $CI_NAME \
+ $ISSUE_REPO \
+ $LINK_TO_RUN \
+ --junit-file $JUNIT_FILE \
+ --auto-close false
+ displayName: 'Update issue tracker'
+ env:
+ JUNIT_FILE: $(TEST_DIR)/$(JUNITXML)
+ condition: and(succeededOrFailed(), eq(variables['CREATE_ISSUE_ON_TRACKER'], 'true'),
+ eq(variables['Build.Reason'], 'Schedule'))
+ - bash: ./build_tools/azure/upload_codecov.sh
+ condition: and(succeeded(),
+ eq(variables['COVERAGE'], 'true'),
+ eq(variables['SELECTED_TESTS'], ''))
+ displayName: 'Upload To Codecov'
+ retryCountOnTaskFailure: 5
+ env:
+ CODECOV_TOKEN: $(CODECOV_TOKEN)
diff --git a/auto_building_tools/build_tools/circle/build_doc.sh b/auto_building_tools/build_tools/circle/build_doc.sh
new file mode 100644
index 0000000..5555468
--- /dev/null
+++ b/auto_building_tools/build_tools/circle/build_doc.sh
@@ -0,0 +1,251 @@
+#!/usr/bin/env bash
+set -e
+
+# Decide what kind of documentation build to run, and run it.
+#
+# If the last commit message has a "[doc skip]" marker, do not build
+# the doc. On the contrary if a "[doc build]" marker is found, build the doc
+# instead of relying on the subsequent rules.
+#
+# We always build the documentation for jobs that are not related to a specific
+# PR (e.g. a merge to main or a maintenance branch).
+#
+# If this is a PR, do a full build if there are some files in this PR that are
+# under the "doc/" or "examples/" folders, otherwise perform a quick build.
+#
+# If the inspection of the current commit fails for any reason, the default
+# behavior is to quick build the documentation.
+
+# defines the get_dep and show_installed_libraries functions
+source build_tools/shared.sh
+
+if [ -n "$GITHUB_ACTION" ]
+then
+ # Map the variables from Github Action to CircleCI
+ CIRCLE_SHA1=$(git log -1 --pretty=format:%H)
+
+ CIRCLE_JOB=$GITHUB_JOB
+
+ if [ "$GITHUB_EVENT_NAME" == "pull_request" ]
+ then
+ CIRCLE_BRANCH=$GITHUB_HEAD_REF
+ CI_PULL_REQUEST=true
+ else
+ CIRCLE_BRANCH=$GITHUB_REF_NAME
+ fi
+fi
+
+get_build_type() {
+ if [ -z "$CIRCLE_SHA1" ]
+ then
+ echo SKIP: undefined CIRCLE_SHA1
+ return
+ fi
+ commit_msg=$(git log --format=%B -n 1 $CIRCLE_SHA1)
+ if [ -z "$commit_msg" ]
+ then
+ echo QUICK BUILD: failed to inspect commit $CIRCLE_SHA1
+ return
+ fi
+ if [[ "$commit_msg" =~ \[doc\ skip\] ]]
+ then
+ echo SKIP: [doc skip] marker found
+ return
+ fi
+ if [[ "$commit_msg" =~ \[doc\ quick\] ]]
+ then
+ echo QUICK: [doc quick] marker found
+ return
+ fi
+ if [[ "$commit_msg" =~ \[doc\ build\] ]]
+ then
+ echo BUILD: [doc build] marker found
+ return
+ fi
+ if [ -z "$CI_PULL_REQUEST" ]
+ then
+ echo BUILD: not a pull request
+ return
+ fi
+ git_range="origin/main...$CIRCLE_SHA1"
+ git fetch origin main >&2 || (echo QUICK BUILD: failed to get changed filenames for $git_range; return)
+ filenames=$(git diff --name-only $git_range)
+ if [ -z "$filenames" ]
+ then
+ echo QUICK BUILD: no changed filenames for $git_range
+ return
+ fi
+ changed_examples=$(echo "$filenames" | grep -E "^examples/(.*/)*plot_")
+
+ # The following is used to extract the list of filenames of example python
+ # files that sphinx-gallery needs to run to generate png files used as
+ # figures or images in the .rst files from the documentation.
+ # If the contributor changes a .rst file in a PR we need to run all
+ # the examples mentioned in that file to get sphinx build the
+ # documentation without generating spurious warnings related to missing
+ # png files.
+
+ if [[ -n "$filenames" ]]
+ then
+ # get rst files
+ rst_files="$(echo "$filenames" | grep -E "rst$")"
+
+ # get lines with figure or images
+ img_fig_lines="$(echo "$rst_files" | xargs grep -shE "(figure|image)::")"
+
+ # get only auto_examples
+ auto_example_files="$(echo "$img_fig_lines" | grep auto_examples | awk -F "/" '{print $NF}')"
+
+ # remove "sphx_glr_" from path and accept replace _(\d\d\d|thumb).png with .py
+ scripts_names="$(echo "$auto_example_files" | sed 's/sphx_glr_//' | sed -E 's/_([[:digit:]][[:digit:]][[:digit:]]|thumb).png/.py/')"
+
+ # get unique values
+ examples_in_rst="$(echo "$scripts_names" | uniq )"
+ fi
+
+ # executed only if there are examples in the modified rst files
+ if [[ -n "$examples_in_rst" ]]
+ then
+ if [[ -n "$changed_examples" ]]
+ then
+ changed_examples="$changed_examples|$examples_in_rst"
+ else
+ changed_examples="$examples_in_rst"
+ fi
+ fi
+
+ if [[ -n "$changed_examples" ]]
+ then
+ echo BUILD: detected examples/ filename modified in $git_range: $changed_examples
+ pattern=$(echo "$changed_examples" | paste -sd '|')
+ # pattern for examples to run is the last line of output
+ echo "$pattern"
+ return
+ fi
+ echo QUICK BUILD: no examples/ filename modified in $git_range:
+ echo "$filenames"
+}
+
+build_type=$(get_build_type)
+if [[ "$build_type" =~ ^SKIP ]]
+then
+ exit 0
+fi
+
+if [[ "$CIRCLE_BRANCH" =~ ^main$|^[0-9]+\.[0-9]+\.X$ && -z "$CI_PULL_REQUEST" ]]
+then
+ # ZIP linked into HTML
+ make_args=dist
+elif [[ "$build_type" =~ ^QUICK ]]
+then
+ make_args=html-noplot
+elif [[ "$build_type" =~ ^'BUILD: detected examples' ]]
+then
+ # pattern for examples to run is the last line of output
+ pattern=$(echo "$build_type" | tail -n 1)
+ make_args="html EXAMPLES_PATTERN=$pattern"
+else
+ make_args=html
+fi
+
+# Installing required system packages to support the rendering of math
+# notation in the HTML documentation and to optimize the image files
+sudo -E apt-get -yq update --allow-releaseinfo-change
+sudo -E apt-get -yq --no-install-suggests --no-install-recommends \
+ install dvipng gsfonts ccache zip optipng
+
+# deactivate circleci virtualenv and setup a conda env instead
+if [[ `type -t deactivate` ]]; then
+ deactivate
+fi
+
+# Install Miniforge
+MINIFORGE_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh"
+curl -L --retry 10 $MINIFORGE_URL -o miniconda.sh
+MINIFORGE_PATH=$HOME/miniforge3
+bash ./miniconda.sh -b -p $MINIFORGE_PATH
+source $MINIFORGE_PATH/etc/profile.d/conda.sh
+conda activate
+
+export PATH="/usr/lib/ccache:$PATH"
+ccache -M 512M
+export CCACHE_COMPRESS=1
+
+create_conda_environment_from_lock_file $CONDA_ENV_NAME $LOCK_FILE
+conda activate $CONDA_ENV_NAME
+
+show_installed_libraries
+
+pip install -e . --no-build-isolation
+
+echo "ccache build summary:"
+ccache -s
+
+export OMP_NUM_THREADS=1
+
+if [[ "$CIRCLE_BRANCH" =~ ^main$ && -z "$CI_PULL_REQUEST" ]]
+then
+ # List available documentation versions if on main
+ python build_tools/circle/list_versions.py --json doc/js/versions.json --rst doc/versions.rst
+fi
+
+
+# The pipefail is requested to propagate exit code
+set -o pipefail && cd doc && make $make_args 2>&1 | tee ~/log.txt
+
+cd -
+set +o pipefail
+
+affected_doc_paths() {
+ files=$(git diff --name-only origin/main...$CIRCLE_SHA1)
+ echo "$files" | grep ^doc/.*\.rst | sed 's/^doc\/\(.*\)\.rst$/\1.html/'
+ echo "$files" | grep ^examples/.*.py | sed 's/^\(.*\)\.py$/auto_\1.html/'
+ sklearn_files=$(echo "$files" | grep '^sklearn/')
+ if [ -n "$sklearn_files" ]
+ then
+ grep -hlR -f<(echo "$sklearn_files" | sed 's/^/scikit-learn\/blob\/[a-z0-9]*\//') doc/_build/html/stable/modules/generated | cut -d/ -f5-
+ fi
+}
+
+affected_doc_warnings() {
+ files=$(git diff --name-only origin/main...$CIRCLE_SHA1)
+ # Look for sphinx warnings only in files affected by the PR
+ if [ -n "$files" ]
+ then
+ for af in ${files[@]}
+ do
+ warn+=`grep WARNING ~/log.txt | grep $af`
+ done
+ fi
+ echo "$warn"
+}
+
+if [ -n "$CI_PULL_REQUEST" ]
+then
+ echo "The following documentation warnings may have been generated by PR #$CI_PULL_REQUEST:"
+ warnings=$(affected_doc_warnings)
+ if [ -z "$warnings" ]
+ then
+ warnings="/home/circleci/project/ no warnings"
+ fi
+ echo "$warnings"
+
+ echo "The following documentation files may have been changed by PR #$CI_PULL_REQUEST:"
+ affected=$(affected_doc_paths)
+ echo "$affected"
+ (
+ echo '