From 2aebc88c028fd5c0c42f908bc0a03f11beb0f13f Mon Sep 17 00:00:00 2001
From: Ross Wightman <rwightman@users.noreply.github.com>
Date: Thu, 4 Jul 2024 10:46:54 -0700
Subject: [PATCH] Refactor build / dist to use pyproject.toml (#909)

* Move training -> open_clip_train since it's being installed as import package in site-packages

* Remove .gitignore from train package

* Update root gitignore

* Update training module name to open_clip_train

* switch from setup.py -> pyproject.toml

* open_clip.__version__

* make [test] depend on [training]

* pip install .[test] for CI

* Change references training.main -> open_clip_train.main
---
 .github/workflows/ci.yml                      |  4 +-
 .github/workflows/python-publish.yml          |  2 +-
 .gitignore                                    |  6 +-
 README.md                                     | 20 ++---
 .../clipa/vit_b16/i50_t16_finetune.sh         |  2 +-
 .../clipa/vit_b16/i50_t16_pretrain.sh         |  2 +-
 .../clipa/vit_l16/i17_t16_finetune.sh         |  2 +-
 .../clipa/vit_l16/i17_t16_pretrain.sh         |  2 +-
 .../clipa/vit_l16/i37_t8_finetune.sh          |  2 +-
 .../clipa/vit_l16/i37_t8_pretrain.sh          |  2 +-
 .../clipav2/vit_h14/i257_t32_finetunex4.sh    |  2 +-
 .../clipav2/vit_h14/i50_t8_pretrain.sh        |  2 +-
 .../clipav2/vit_h14/i577_t32_finetunex1.sh    |  2 +-
 docs/script_examples/stability_example.sh     |  2 +-
 pyproject.toml                                | 79 +++++++++++++++++++
 scripts/clipav1_vit_l16_i37_t8.sh             |  2 +-
 ...vit_h14_i84_224_336_cl32_gap_datacomp1b.sh |  2 +-
 scripts/h14_224_32_finetune.sh                |  2 +-
 scripts/h14_84_8_pretrain.sh                  |  2 +-
 setup.py                                      | 63 ---------------
 src/open_clip/__init__.py                     |  2 +
 src/{training => open_clip_train}/__init__.py |  0
 src/{training => open_clip_train}/data.py     |  0
 .../distributed.py                            |  0
 .../file_utils.py                             |  0
 src/{training => open_clip_train}/logger.py   |  0
 src/{training => open_clip_train}/main.py     | 14 ++--
 src/{training => open_clip_train}/params.py   |  0
 .../precision.py                              |  0
 src/{training => open_clip_train}/profiler.py |  0
 .../scheduler.py                              |  0
 src/{training => open_clip_train}/train.py    |  6 +-
 .../zero_shot.py                              |  2 +-
 src/training/.gitignore                       |  1 -
 tests/test_hf_model.py                        |  1 +
 tests/test_num_shards.py                      |  2 +-
 tests/test_training_simple.py                 |  3 +-
 tests/test_wds.py                             |  6 +-
 38 files changed, 127 insertions(+), 112 deletions(-)
 create mode 100644 pyproject.toml
 delete mode 100644 setup.py
 rename src/{training => open_clip_train}/__init__.py (100%)
 rename src/{training => open_clip_train}/data.py (100%)
 rename src/{training => open_clip_train}/distributed.py (100%)
 rename src/{training => open_clip_train}/file_utils.py (100%)
 rename src/{training => open_clip_train}/logger.py (100%)
 rename src/{training => open_clip_train}/main.py (97%)
 rename src/{training => open_clip_train}/params.py (100%)
 rename src/{training => open_clip_train}/precision.py (100%)
 rename src/{training => open_clip_train}/profiler.py (100%)
 rename src/{training => open_clip_train}/scheduler.py (100%)
 rename src/{training => open_clip_train}/train.py (98%)
 rename src/{training => open_clip_train}/zero_shot.py (98%)
 delete mode 100644 src/training/.gitignore

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2681b943e..0449d4daf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -68,9 +68,7 @@ jobs:
       run: |
         python3 -m venv .env
         source .env/bin/activate
-        make install
-        make install-test
-        make install-training
+        pip install -e .[test]
     - name: Prepare test data
       run: |
         source .env/bin/activate
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index f336b13ba..afb4cfc80 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -33,5 +33,5 @@ jobs:
         TWINE_USERNAME: __token__
         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
       run: |
-        python setup.py sdist bdist_wheel
+        python -m build
         twine upload dist/*
diff --git a/.gitignore b/.gitignore
index 960651015..6eab86277 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
-logs/
-wandb/
+**/logs/
+**/wandb/
 models/
 features/
 results/
@@ -150,4 +150,4 @@ src/debug
 core.*
 
 # Allow
-!src/evaluation/misc/results_dbs/*
\ No newline at end of file
+!src/evaluation/misc/results_dbs/*
diff --git a/README.md b/README.md
index edfdff691..bd28f0a33 100644
--- a/README.md
+++ b/README.md
@@ -169,7 +169,7 @@ Running regression tests against a specific git revision or tag:
 ### Sample single-process running code:
 
 ```bash
-python -m training.main \
+python -m open_clip_train.main \
     --save-frequency 1 \
     --zeroshot-frequency 1 \
     --report-to tensorboard \
@@ -234,7 +234,7 @@ a job on a node of 4 GPUs:
 
 ```bash
 cd open_clip/src
-torchrun --nproc_per_node 4 -m training.main \
+torchrun --nproc_per_node 4 -m open_clip_train.main \
     --train-data '/data/cc12m/cc12m-train-{0000..2175}.tar' \
     --train-num-samples 10968539 \
     --dataset-type webdataset \
@@ -253,7 +253,7 @@ of nodes and host node.
 cd open_clip/src
 torchrun --nproc_per_node=4 \
     --rdzv_endpoint=$HOSTE_NODE_ADDR \
-    -m training.main \
+    -m open_clip_train.main \
     --train-data '/data/cc12m/cc12m-train-{0000..2175}.tar' \
     --train-num-samples 10968539 \
     --dataset-type webdataset \
@@ -289,7 +289,7 @@ export MASTER_ADDR=$master_addr
 
 cd /shared/open_clip
 export PYTHONPATH="$PYTHONPATH:$PWD/src"
-srun --cpu_bind=v --accel-bind=gn python -u src/training/main.py \
+srun --cpu_bind=v --accel-bind=gn python -u src/open_clip_train/main.py \
     --save-frequency 1 \
     --report-to tensorboard \
     --train-data="/data/LAION-400M/{00000..41455}.tar" \
@@ -307,7 +307,7 @@ srun --cpu_bind=v --accel-bind=gn python -u src/training/main.py \
 ### Resuming from a checkpoint:
 
 ```bash
-python -m training.main \
+python -m open_clip_train.main \
     --train-data="/path/to/train_data.csv" \
     --val-data="/path/to/validation_data.csv"  \
     --resume /path/to/checkpoints/epoch_K.pt
@@ -376,7 +376,7 @@ pd.DataFrame.from_dict(future_df).to_csv(
 ```
 This should create a csv dataset that one can use to fine-tune coca with open_clip
 ```bash
-python -m training.main \
+python -m open_clip_train.main \
     --dataset-type "csv" \
     --train-data "path/to/data/dir/train2014.csv" \
     --warmup 1000 \
@@ -392,7 +392,7 @@ python -m training.main \
     --log-every-n-steps 100
 ```
 
-This is a general setting, open_clip has very parameters that can be set, ```python -m training.main --help``` should show them. The only relevant change compared to pre-training are the two arguments
+This is a general setting, open_clip has very parameters that can be set, ```python -m open_clip_train.main --help``` should show them. The only relevant change compared to pre-training are the two arguments
 
 ```bash
 --coca-contrastive-loss-weight 0
@@ -404,7 +404,7 @@ which make the model only train the generative side.
 
 If you wish to use different language models as the text encoder for CLIP you can do so by using one of the Hugging Face model configs in ```src/open_clip/model_configs``` and passing in it's tokenizer as the ```--model``` and ```--hf-tokenizer-name``` parameters respectively. Currently we only support RoBERTa ("test-roberta" config), however adding new models should be trivial. You can also determine how many layers, from the end, to leave unfrozen with the ```--lock-text-unlocked-layers``` parameter. Here's an example command to train CLIP with the RoBERTa LM that has it's last 10 layers unfrozen:
 ```bash
-python -m training.main \
+python -m open_clip_train.main \
          --train-data="pipe:aws s3 cp s3://s-mas/cc3m/{00000..00329}.tar -" \
          --train-num-samples 3000000 \
          --val-data="pipe:aws s3 cp s3://s-mas/cc3m/{00330..00331}.tar -" \
@@ -453,7 +453,7 @@ We recommend https://github.com/LAION-AI/CLIP_benchmark#how-to-use for systemati
 ### Evaluating local checkpoint:
 
 ```bash
-python -m training.main \
+python -m open_clip_train.main \
     --val-data="/path/to/validation_data.csv"  \
     --model RN101 \
     --pretrained /path/to/checkpoints/epoch_K.pt
@@ -462,7 +462,7 @@ python -m training.main \
 ### Evaluating hosted pretrained checkpoint on ImageNet zero-shot prediction:
 
 ```bash
-python -m training.main \
+python -m open_clip_train.main \
     --imagenet-val /path/to/imagenet/validation \
     --model ViT-B-32-quickgelu \
     --pretrained laion400m_e32
diff --git a/docs/script_examples/clipa/vit_b16/i50_t16_finetune.sh b/docs/script_examples/clipa/vit_b16/i50_t16_finetune.sh
index 7691742b2..b8d446f1d 100644
--- a/docs/script_examples/clipa/vit_b16/i50_t16_finetune.sh
+++ b/docs/script_examples/clipa/vit_b16/i50_t16_finetune.sh
@@ -1,4 +1,4 @@
-torchrun --nproc_per_node 8 -m training.main \
+torchrun --nproc_per_node 8 -m open_clip_train.main \
     --save-frequency 1 \
     --save-most-recent \
     --zeroshot-frequency 1 \
diff --git a/docs/script_examples/clipa/vit_b16/i50_t16_pretrain.sh b/docs/script_examples/clipa/vit_b16/i50_t16_pretrain.sh
index 662c192d5..8985f7811 100644
--- a/docs/script_examples/clipa/vit_b16/i50_t16_pretrain.sh
+++ b/docs/script_examples/clipa/vit_b16/i50_t16_pretrain.sh
@@ -1,4 +1,4 @@
-torchrun --nproc_per_node 8 -m training.main \
+torchrun --nproc_per_node 8 -m open_clip_train.main \
     --save-frequency 1 \
     --save-most-recent \
     --zeroshot-frequency 1 \
diff --git a/docs/script_examples/clipa/vit_l16/i17_t16_finetune.sh b/docs/script_examples/clipa/vit_l16/i17_t16_finetune.sh
index a4a03e6d4..b4a024c5f 100644
--- a/docs/script_examples/clipa/vit_l16/i17_t16_finetune.sh
+++ b/docs/script_examples/clipa/vit_l16/i17_t16_finetune.sh
@@ -1,4 +1,4 @@
-torchrun --nproc_per_node 8 -m training.main \
+torchrun --nproc_per_node 8 -m open_clip_train.main \
     --save-frequency 1 \
     --save-most-recent \
     --zeroshot-frequency 1 \
diff --git a/docs/script_examples/clipa/vit_l16/i17_t16_pretrain.sh b/docs/script_examples/clipa/vit_l16/i17_t16_pretrain.sh
index ecf0c1f7c..cf729876a 100644
--- a/docs/script_examples/clipa/vit_l16/i17_t16_pretrain.sh
+++ b/docs/script_examples/clipa/vit_l16/i17_t16_pretrain.sh
@@ -1,4 +1,4 @@
-torchrun --nproc_per_node 8 -m training.main \
+torchrun --nproc_per_node 8 -m open_clip_train.main \
     --save-frequency 1 \
     --save-most-recent \
     --zeroshot-frequency 1 \
diff --git a/docs/script_examples/clipa/vit_l16/i37_t8_finetune.sh b/docs/script_examples/clipa/vit_l16/i37_t8_finetune.sh
index 5437a3435..5d82e18dc 100644
--- a/docs/script_examples/clipa/vit_l16/i37_t8_finetune.sh
+++ b/docs/script_examples/clipa/vit_l16/i37_t8_finetune.sh
@@ -1,4 +1,4 @@
-torchrun --nproc_per_node 8 -m training.main \
+torchrun --nproc_per_node 8 -m open_clip_train.main \
     --save-frequency 1 \
     --save-most-recent \
     --zeroshot-frequency 1 \
diff --git a/docs/script_examples/clipa/vit_l16/i37_t8_pretrain.sh b/docs/script_examples/clipa/vit_l16/i37_t8_pretrain.sh
index 5dcd5c8c2..83b1a2f08 100644
--- a/docs/script_examples/clipa/vit_l16/i37_t8_pretrain.sh
+++ b/docs/script_examples/clipa/vit_l16/i37_t8_pretrain.sh
@@ -1,4 +1,4 @@
-torchrun --nproc_per_node 8 -m training.main \
+torchrun --nproc_per_node 8 -m open_clip_train.main \
     --save-frequency 1 \
     --save-most-recent \
     --zeroshot-frequency 1 \
diff --git a/docs/script_examples/clipav2/vit_h14/i257_t32_finetunex4.sh b/docs/script_examples/clipav2/vit_h14/i257_t32_finetunex4.sh
index 40b9c4a9f..fad0b134a 100644
--- a/docs/script_examples/clipav2/vit_h14/i257_t32_finetunex4.sh
+++ b/docs/script_examples/clipav2/vit_h14/i257_t32_finetunex4.sh
@@ -1,7 +1,7 @@
 # have not been tested. use it at your own discretion
 # the original experiment was run on tpu v3-256.
 # this example script assumes 8 gpus, each with huge memory. Tune batchsize, warmup, and lr accordingly if you have different machine setups.
-torchrun --nproc_per_node 8 -m training.main \
+torchrun --nproc_per_node 8 -m open_clip_train.main \
     --save-frequency 1 \
     --save-most-recent \
     --zeroshot-frequency 1 \
diff --git a/docs/script_examples/clipav2/vit_h14/i50_t8_pretrain.sh b/docs/script_examples/clipav2/vit_h14/i50_t8_pretrain.sh
index f98ea9c25..f7e9a5fe1 100644
--- a/docs/script_examples/clipav2/vit_h14/i50_t8_pretrain.sh
+++ b/docs/script_examples/clipav2/vit_h14/i50_t8_pretrain.sh
@@ -1,7 +1,7 @@
 # have not been tested. use it at your own discretion
 # the original experiment was run on tpu v3-256.
 # this example script assumes 8 gpus, each with huge memory. Tune batchsize, warmup, and lr accordingly if you have different machine setups.
-torchrun --nproc_per_node 8 -m training.main \
+torchrun --nproc_per_node 8 -m open_clip_train.main \
     --save-frequency 1 \
     --save-most-recent \
     --zeroshot-frequency 1 \
diff --git a/docs/script_examples/clipav2/vit_h14/i577_t32_finetunex1.sh b/docs/script_examples/clipav2/vit_h14/i577_t32_finetunex1.sh
index 232bb8fcf..1473c6c2b 100644
--- a/docs/script_examples/clipav2/vit_h14/i577_t32_finetunex1.sh
+++ b/docs/script_examples/clipav2/vit_h14/i577_t32_finetunex1.sh
@@ -1,7 +1,7 @@
 # have not been tested. use it at your own discretion
 # the original experiment was run on tpu v3-256.
 # this example script assumes 8 gpus, each with huge memory. Tune batchsize, warmup, and lr accordingly if you have different machine setups.
-torchrun --nproc_per_node 8 -m training.main \
+torchrun --nproc_per_node 8 -m open_clip_train.main \
     --save-frequency 1 \
     --save-most-recent \
     --zeroshot-frequency 1 \
diff --git a/docs/script_examples/stability_example.sh b/docs/script_examples/stability_example.sh
index f2801ee08..7eaa952be 100644
--- a/docs/script_examples/stability_example.sh
+++ b/docs/script_examples/stability_example.sh
@@ -34,7 +34,7 @@ export PYTHONPATH="$PYTHONPATH:/admin/home-mitchellw/open_clip/src"
 
 EXP_NAME="test-B-32-laion5b-lr1e-3-bs90k"
 
-srun --comment laion --cpu_bind=v --accel-bind=gn python -m training.main \
+srun --comment laion --cpu_bind=v --accel-bind=gn python -m open_clip_train.main \
     --save-frequency 1 \
     --train-data="pipe:aws s3 cp s3://s-datasets/laion5b/{laion2B-data/{000000..231349}.tar,laion2B-multi-data/{000000..226687}.tar,laion1B-nolang-data/{000000..127231}.tar} -" \
     --train-num-samples 135646078 \
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..4fafb6293
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,79 @@
+[build-system]
+requires = ["pdm-backend"]
+build-backend = "pdm.backend"
+
+[project]
+name = "open_clip_torch"
+# NOTE for full list of authors see https://github.com/mlfoundations/open_clip?tab=readme-ov-file#citing
+# below covers most active / recent maintainers
+authors = [
+    {name = "Ross Wightman", email = "ross@huggingface.co"},
+    {name = "Gabriel Ilharco"},
+    {name = "Mitchell Wortsman"},
+    {name = "Romain Beaumont"},
+]
+description = "Open reproduction of consastive language-image pretraining (CLIP) and related."
+readme = "README.md"
+requires-python = ">=3.8"
+keywords = ["pytorch", "clip", "image-text", "language-image", "multimodal"]
+license = {text = "MIT"}
+classifiers = [
+        'Development Status :: 4 - Beta',
+        'Intended Audience :: Education',
+        'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: MIT License',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
+        'Programming Language :: Python :: 3.11',
+        'Programming Language :: Python :: 3.12',
+        'Topic :: Scientific/Engineering',
+        'Topic :: Scientific/Engineering :: Artificial Intelligence',
+        'Topic :: Software Development',
+        'Topic :: Software Development :: Libraries',
+        'Topic :: Software Development :: Libraries :: Python Modules',
+]
+dependencies = [
+    'torch>=1.9.0',
+    'torchvision',
+    'regex',
+    'ftfy',
+    'tqdm',
+    'huggingface-hub',
+    'timm',
+]
+dynamic = ["version"]
+
+[project.optional-dependencies]
+training = [
+    'torch>=2.0',
+    'webdataset>=0.2.5',
+    'pandas',
+    'transformers[sentencepiece]',
+    'timm>=1.0.7',
+    'fsspec',
+]
+test = [
+    'pytest-split',
+    'pytest',
+    'open_clip_torch[training]'
+]
+
+[project.urls]
+homepage = "https://github.com/mlfoundations/open_clip"
+repository = "https://github.com/mlfoundations/open_clip"
+
+[tool.pdm.version]
+source = "file"
+path = "src/open_clip/version.py"
+
+[tool.pdm.build]
+excludes = ["./**/.git", "./**/logs/*"]
+package-dir = "src"
+includes = ["src/open_clip", "src/open_clip_train"]
+
+[tool.pytest.ini_options]
+testpaths = ['tests']
+markers = [
+    'regression_test'
+]
\ No newline at end of file
diff --git a/scripts/clipav1_vit_l16_i37_t8.sh b/scripts/clipav1_vit_l16_i37_t8.sh
index d3ff0901e..b84528a9b 100644
--- a/scripts/clipav1_vit_l16_i37_t8.sh
+++ b/scripts/clipav1_vit_l16_i37_t8.sh
@@ -1,5 +1,5 @@
 # eval on a single gpu
-CUDA_VISIBLE_DEVICES=2 TORCH_CUDNN_V8_API_ENABLED=1 TFDS_PREFETCH_SIZE=8192 python3 -m training.main \
+CUDA_VISIBLE_DEVICES=2 TORCH_CUDNN_V8_API_ENABLED=1 TFDS_PREFETCH_SIZE=8192 python3 -m open_clip_train.main \
     --model ViT-L-16-CL32-GAP \
     --pretrained "/path/to/clipa_vit_l16_i37_t8.pt" \
     --seed 0 \
diff --git a/scripts/clipav2_vit_h14_i84_224_336_cl32_gap_datacomp1b.sh b/scripts/clipav2_vit_h14_i84_224_336_cl32_gap_datacomp1b.sh
index 7f22386c3..434398b1f 100644
--- a/scripts/clipav2_vit_h14_i84_224_336_cl32_gap_datacomp1b.sh
+++ b/scripts/clipav2_vit_h14_i84_224_336_cl32_gap_datacomp1b.sh
@@ -1,4 +1,4 @@
-CUDA_VISIBLE_DEVICES=1 python3 -m training.main \
+CUDA_VISIBLE_DEVICES=1 python3 -m open_clip_train.main \
     --model ViT-H-14-CL32-GAP-BigVision \
     --pretrained "/path/to/vit_h14_i84_224_336_cl32_gap_datacomp1b.pt" \
     --force-image-size 336 \
diff --git a/scripts/h14_224_32_finetune.sh b/scripts/h14_224_32_finetune.sh
index 7026b6415..82c296a1f 100644
--- a/scripts/h14_224_32_finetune.sh
+++ b/scripts/h14_224_32_finetune.sh
@@ -1,5 +1,5 @@
 # 64k batchsize for 2.048e-3 lr
-TORCH_CUDNN_V8_API_ENABLED=1 torchrun --nproc_per_node 8 -m training.main \
+TORCH_CUDNN_V8_API_ENABLED=1 torchrun --nproc_per_node 8 -m open_clip_train.main \
     --save-frequency 1 \
     --save-most-recent \
     --zeroshot-frequency 1 \
diff --git a/scripts/h14_84_8_pretrain.sh b/scripts/h14_84_8_pretrain.sh
index 4130ee5f6..c430ba615 100644
--- a/scripts/h14_84_8_pretrain.sh
+++ b/scripts/h14_84_8_pretrain.sh
@@ -1,5 +1,5 @@
 # 64k batchsize for 2.048e-3 lr
-TORCH_CUDNN_V8_API_ENABLED=1 torchrun --nproc_per_node 8 -m training.main \
+TORCH_CUDNN_V8_API_ENABLED=1 torchrun --nproc_per_node 8 -m open_clip_train.main \
     --save-frequency 1 \
     --save-most-recent \
     --zeroshot-frequency 1 \
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 171546855..000000000
--- a/setup.py
+++ /dev/null
@@ -1,63 +0,0 @@
-""" Setup
-"""
-from setuptools import setup, find_packages
-from codecs import open
-from os import path
-
-here = path.abspath(path.dirname(__file__))
-
-# Get the long description from the README file
-with open(path.join(here, 'README.md'), encoding='utf-8') as f:
-    long_description = f.read()
-
-def _read_reqs(relpath):
-    fullpath = path.join(path.dirname(__file__), relpath)
-    with open(fullpath) as f:
-        return [s.strip() for s in f.readlines() if (s.strip() and not s.startswith("#"))]
-
-REQUIREMENTS = _read_reqs("requirements.txt")
-TRAINING_REQUIREMENTS = _read_reqs("requirements-training.txt")
-
-exec(open('src/open_clip/version.py').read())
-setup(
-    name='open_clip_torch',
-    version=__version__,
-    description='OpenCLIP',
-    license='MIT',
-    long_description=long_description,
-    long_description_content_type='text/markdown',
-    url='https://github.com/mlfoundations/open_clip',
-    author='',
-    author_email='',
-    classifiers=[
-        # How mature is this project? Common values are
-        #   3 - Alpha
-        #   4 - Beta
-        #   5 - Production/Stable
-        'Development Status :: 4 - Beta',
-        'Intended Audience :: Education',
-        'Intended Audience :: Science/Research',
-        'License :: OSI Approved :: Apache Software License',
-        'Programming Language :: Python :: 3.8',
-        'Programming Language :: Python :: 3.9',
-        'Programming Language :: Python :: 3.10',
-        'Programming Language :: Python :: 3.11',
-        'Programming Language :: Python :: 3.12',
-        'Topic :: Scientific/Engineering',
-        'Topic :: Scientific/Engineering :: Artificial Intelligence',
-        'Topic :: Software Development',
-        'Topic :: Software Development :: Libraries',
-        'Topic :: Software Development :: Libraries :: Python Modules',
-    ],
-
-    # Note that this is a string of words separated by whitespace, not a list.
-    keywords='CLIP pretrained',
-    package_dir={'': 'src'},
-    packages=find_packages(where='src'),
-    include_package_data=True,
-    install_requires=REQUIREMENTS,
-    extras_require={
-        "training": TRAINING_REQUIREMENTS,
-    },
-    python_requires='>=3.8',
-)
diff --git a/src/open_clip/__init__.py b/src/open_clip/__init__.py
index 23856a3f1..d0419b4d7 100644
--- a/src/open_clip/__init__.py
+++ b/src/open_clip/__init__.py
@@ -1,3 +1,5 @@
+from .version import __version__
+
 from .coca_model import CoCa
 from .constants import OPENAI_DATASET_MEAN, OPENAI_DATASET_STD
 from .factory import create_model, create_model_and_transforms, create_model_from_pretrained, get_tokenizer, create_loss
diff --git a/src/training/__init__.py b/src/open_clip_train/__init__.py
similarity index 100%
rename from src/training/__init__.py
rename to src/open_clip_train/__init__.py
diff --git a/src/training/data.py b/src/open_clip_train/data.py
similarity index 100%
rename from src/training/data.py
rename to src/open_clip_train/data.py
diff --git a/src/training/distributed.py b/src/open_clip_train/distributed.py
similarity index 100%
rename from src/training/distributed.py
rename to src/open_clip_train/distributed.py
diff --git a/src/training/file_utils.py b/src/open_clip_train/file_utils.py
similarity index 100%
rename from src/training/file_utils.py
rename to src/open_clip_train/file_utils.py
diff --git a/src/training/logger.py b/src/open_clip_train/logger.py
similarity index 100%
rename from src/training/logger.py
rename to src/open_clip_train/logger.py
diff --git a/src/training/main.py b/src/open_clip_train/main.py
similarity index 97%
rename from src/training/main.py
rename to src/open_clip_train/main.py
index 94496999f..591ea1d32 100644
--- a/src/training/main.py
+++ b/src/open_clip_train/main.py
@@ -29,13 +29,13 @@
     hvd = None
 
 from open_clip import create_model_and_transforms, trace_model, get_tokenizer, create_loss
-from training.data import get_data
-from training.distributed import is_master, init_distributed_device, broadcast_object
-from training.logger import setup_logging
-from training.params import parse_args
-from training.scheduler import cosine_lr, const_lr, const_lr_cooldown
-from training.train import train_one_epoch, evaluate
-from training.file_utils import pt_load, check_exists, start_sync_process, remote_sync
+from open_clip_train.data import get_data
+from open_clip_train.distributed import is_master, init_distributed_device, broadcast_object
+from open_clip_train.logger import setup_logging
+from open_clip_train.params import parse_args
+from open_clip_train.scheduler import cosine_lr, const_lr, const_lr_cooldown
+from open_clip_train.train import train_one_epoch, evaluate
+from open_clip_train.file_utils import pt_load, check_exists, start_sync_process, remote_sync
 
 
 LATEST_CHECKPOINT_NAME = "epoch_latest.pt"
diff --git a/src/training/params.py b/src/open_clip_train/params.py
similarity index 100%
rename from src/training/params.py
rename to src/open_clip_train/params.py
diff --git a/src/training/precision.py b/src/open_clip_train/precision.py
similarity index 100%
rename from src/training/precision.py
rename to src/open_clip_train/precision.py
diff --git a/src/training/profiler.py b/src/open_clip_train/profiler.py
similarity index 100%
rename from src/training/profiler.py
rename to src/open_clip_train/profiler.py
diff --git a/src/training/scheduler.py b/src/open_clip_train/scheduler.py
similarity index 100%
rename from src/training/scheduler.py
rename to src/open_clip_train/scheduler.py
diff --git a/src/training/train.py b/src/open_clip_train/train.py
similarity index 98%
rename from src/training/train.py
rename to src/open_clip_train/train.py
index 1a2950083..f96589d4b 100644
--- a/src/training/train.py
+++ b/src/open_clip_train/train.py
@@ -15,9 +15,9 @@
     wandb = None
 
 from open_clip import get_input_dtype, CLIP, CustomTextCLIP
-from .distributed import is_master
-from .zero_shot import zero_shot_eval
-from .precision import get_autocast
+from open_clip_train.distributed import is_master
+from open_clip_train.zero_shot import zero_shot_eval
+from open_clip_train.precision import get_autocast
 
 
 class AverageMeter(object):
diff --git a/src/training/zero_shot.py b/src/open_clip_train/zero_shot.py
similarity index 98%
rename from src/training/zero_shot.py
rename to src/open_clip_train/zero_shot.py
index e04c9c0fb..dd694ba57 100644
--- a/src/training/zero_shot.py
+++ b/src/open_clip_train/zero_shot.py
@@ -5,7 +5,7 @@
 
 from open_clip import get_input_dtype, get_tokenizer, build_zero_shot_classifier, \
     IMAGENET_CLASSNAMES, OPENAI_IMAGENET_TEMPLATES
-from .precision import get_autocast
+from open_clip_train.precision import get_autocast
 
 
 def accuracy(output, target, topk=(1,)):
diff --git a/src/training/.gitignore b/src/training/.gitignore
deleted file mode 100644
index 333c1e910..000000000
--- a/src/training/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-logs/
diff --git a/tests/test_hf_model.py b/tests/test_hf_model.py
index 1deb00da8..6307b651a 100644
--- a/tests/test_hf_model.py
+++ b/tests/test_hf_model.py
@@ -4,6 +4,7 @@
 from open_clip.hf_model import _POOLERS, HFTextEncoder
 from transformers import AutoConfig
 from transformers.modeling_outputs import BaseModelOutput
+
 # test poolers
 def test_poolers():
     bs, sl, d = 2, 10, 5
diff --git a/tests/test_num_shards.py b/tests/test_num_shards.py
index 70ca8fecc..5210c1446 100644
--- a/tests/test_num_shards.py
+++ b/tests/test_num_shards.py
@@ -1,6 +1,6 @@
 import pytest
 
-from training.data import get_dataset_size
+from open_clip_train.data import get_dataset_size
 
 @pytest.mark.parametrize(
     "shards,expected_size",
diff --git a/tests/test_training_simple.py b/tests/test_training_simple.py
index 70998d592..58b33a3c1 100644
--- a/tests/test_training_simple.py
+++ b/tests/test_training_simple.py
@@ -2,9 +2,8 @@
 import os
 import sys
 import pytest
-from PIL import Image
 import torch
-from training.main import main
+from open_clip_train.main import main
 
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 
diff --git a/tests/test_wds.py b/tests/test_wds.py
index 3c7f8948a..a31039028 100644
--- a/tests/test_wds.py
+++ b/tests/test_wds.py
@@ -6,9 +6,9 @@
 import io
 from PIL import Image
 
-from training.data import get_wds_dataset
-from training.params import parse_args
-from training.main import random_seed
+from open_clip_train.data import get_wds_dataset
+from open_clip_train.params import parse_args
+from open_clip_train.main import random_seed
 
 TRAIN_NUM_SAMPLES = 10_000
 RTOL = 0.2