diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..c74de18e6 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include src/open_clip/bpe_simple_vocab_16e6.txt.gz +include src/open_clip/model_configs/*.json + diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..07c29756b --- /dev/null +++ b/setup.py @@ -0,0 +1,56 @@ +""" Setup +""" +from setuptools import setup, find_packages +from codecs import open +from os import path + +here = path.abspath(path.dirname(__file__)) + +# Get the long description from the README file +with open(path.join(here, 'README.md'), encoding='utf-8') as f: + long_description = f.read() + +exec(open('src/open_clip/version.py').read()) +setup( + name='open_clip', + version=__version__, + description='OpenCLIP', + long_description=long_description, + long_description_content_type='text/markdown', + url='https://github.com/mlfoundations/open_clip', + author='', + author_email='', + classifiers=[ + # How mature is this project? Common values are + # 3 - Alpha + # 4 - Beta + # 5 - Production/Stable + 'Development Status :: 3 - Alpha', + 'Intended Audience :: Education', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Topic :: Scientific/Engineering', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Topic :: Software Development', + 'Topic :: Software Development :: Libraries', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + + # Note that this is a string of words separated by whitespace, not a list. + keywords='CLIP pretrained', + package_dir={'': 'src'}, + packages=find_packages(where='src', exclude=['training']), + include_package_data=True, + install_requires=[ + 'torch >= 1.9', + 'torchvision', + 'webdataset >= 0.2.5', + 'ftfy', + 'regex', + ], + python_requires='>=3.7', +) diff --git a/src/clip/__init__.py b/src/open_clip/__init__.py similarity index 86% rename from src/clip/__init__.py rename to src/open_clip/__init__.py index 1d4154a7b..1593ca4ef 100644 --- a/src/clip/__init__.py +++ b/src/open_clip/__init__.py @@ -1,7 +1,7 @@ from .factory import create_model_and_transforms from .loss import ClipLoss from .model import CLIP, CLIPTextCfg, CLIPVisionCfg, convert_weights_to_fp16 -from .openai import load_openai +from .openai import load_openai_model, list_openai_models from .pretrained import list_pretrained, list_pretrained_tag_models, list_pretrained_model_tags,\ get_pretrained_url, download_pretrained from .tokenizer import SimpleTokenizer, tokenize diff --git a/src/clip/bpe_simple_vocab_16e6.txt.gz b/src/open_clip/bpe_simple_vocab_16e6.txt.gz similarity index 100% rename from src/clip/bpe_simple_vocab_16e6.txt.gz rename to src/open_clip/bpe_simple_vocab_16e6.txt.gz diff --git a/src/clip/factory.py b/src/open_clip/factory.py similarity index 90% rename from src/clip/factory.py rename to src/open_clip/factory.py index c834329e1..b3a61e5f6 100644 --- a/src/clip/factory.py +++ b/src/open_clip/factory.py @@ -6,7 +6,7 @@ import torch from .model import CLIP, convert_weights_to_fp16 -from .openai import load_openai +from .openai import load_openai_model from .pretrained import get_pretrained_url, download_pretrained from .transform import image_transform @@ -25,14 +25,14 @@ def load_state_dict(checkpoint_path: str, map_location='cpu'): def create_model_and_transforms( model_name: str, pretrained: str, - precision: str, - device: torch.device, + precision: str = 'fp32', + device: torch.device = torch.device('cpu'), force_quick_gelu: bool = False, ): pretrained = pretrained.lower() if pretrained == 'openai': logging.info(f'Loading pretrained {model_name} from OpenAI.') - model, preprocess_train, preprocess_val = load_openai(model_name, device=device, jit=False) + model, preprocess_train, preprocess_val = load_openai_model(model_name, device=device, jit=False) # See https://discuss.pytorch.org/t/valueerror-attemting-to-unscale-fp16-gradients/81372 if precision == "amp" or precision == "fp32": model = model.float() @@ -67,6 +67,7 @@ def create_model_and_transforms( model.to(device=device) if precision == "fp16": + assert device.type != 'cpu' convert_weights_to_fp16(model) return model, preprocess_train, preprocess_val diff --git a/src/clip/loss.py b/src/open_clip/loss.py similarity index 100% rename from src/clip/loss.py rename to src/open_clip/loss.py diff --git a/src/clip/model.py b/src/open_clip/model.py similarity index 100% rename from src/clip/model.py rename to src/open_clip/model.py diff --git a/src/clip/model_configs/RN101.json b/src/open_clip/model_configs/RN101.json similarity index 100% rename from src/clip/model_configs/RN101.json rename to src/open_clip/model_configs/RN101.json diff --git a/src/clip/model_configs/RN50.json b/src/open_clip/model_configs/RN50.json similarity index 100% rename from src/clip/model_configs/RN50.json rename to src/open_clip/model_configs/RN50.json diff --git a/src/clip/model_configs/RN50x16.json b/src/open_clip/model_configs/RN50x16.json similarity index 100% rename from src/clip/model_configs/RN50x16.json rename to src/open_clip/model_configs/RN50x16.json diff --git a/src/clip/model_configs/RN50x4.json b/src/open_clip/model_configs/RN50x4.json similarity index 100% rename from src/clip/model_configs/RN50x4.json rename to src/open_clip/model_configs/RN50x4.json diff --git a/src/clip/model_configs/ViT-B-16.json b/src/open_clip/model_configs/ViT-B-16.json similarity index 100% rename from src/clip/model_configs/ViT-B-16.json rename to src/open_clip/model_configs/ViT-B-16.json diff --git a/src/clip/model_configs/ViT-B-32.json b/src/open_clip/model_configs/ViT-B-32.json similarity index 100% rename from src/clip/model_configs/ViT-B-32.json rename to src/open_clip/model_configs/ViT-B-32.json diff --git a/src/clip/model_configs/ViT-L-14.json b/src/open_clip/model_configs/ViT-L-14.json similarity index 100% rename from src/clip/model_configs/ViT-L-14.json rename to src/open_clip/model_configs/ViT-L-14.json diff --git a/src/clip/model_configs/timm-efficientnetv2_rw_s.json b/src/open_clip/model_configs/timm-efficientnetv2_rw_s.json similarity index 100% rename from src/clip/model_configs/timm-efficientnetv2_rw_s.json rename to src/open_clip/model_configs/timm-efficientnetv2_rw_s.json diff --git a/src/clip/model_configs/timm-resnet50d.json b/src/open_clip/model_configs/timm-resnet50d.json similarity index 100% rename from src/clip/model_configs/timm-resnet50d.json rename to src/open_clip/model_configs/timm-resnet50d.json diff --git a/src/clip/model_configs/timm-resnetaa50d.json b/src/open_clip/model_configs/timm-resnetaa50d.json similarity index 100% rename from src/clip/model_configs/timm-resnetaa50d.json rename to src/open_clip/model_configs/timm-resnetaa50d.json diff --git a/src/clip/model_configs/timm-resnetblur50.json b/src/open_clip/model_configs/timm-resnetblur50.json similarity index 100% rename from src/clip/model_configs/timm-resnetblur50.json rename to src/open_clip/model_configs/timm-resnetblur50.json diff --git a/src/clip/model_configs/timm-swin_base_patch4_window7_224.json b/src/open_clip/model_configs/timm-swin_base_patch4_window7_224.json similarity index 100% rename from src/clip/model_configs/timm-swin_base_patch4_window7_224.json rename to src/open_clip/model_configs/timm-swin_base_patch4_window7_224.json diff --git a/src/clip/model_configs/timm-vit_base_patch16_224.json b/src/open_clip/model_configs/timm-vit_base_patch16_224.json similarity index 100% rename from src/clip/model_configs/timm-vit_base_patch16_224.json rename to src/open_clip/model_configs/timm-vit_base_patch16_224.json diff --git a/src/clip/model_configs/timm-vit_base_patch32_224.json b/src/open_clip/model_configs/timm-vit_base_patch32_224.json similarity index 100% rename from src/clip/model_configs/timm-vit_base_patch32_224.json rename to src/open_clip/model_configs/timm-vit_base_patch32_224.json diff --git a/src/clip/model_configs/timm-vit_small_patch16_224.json b/src/open_clip/model_configs/timm-vit_small_patch16_224.json similarity index 100% rename from src/clip/model_configs/timm-vit_small_patch16_224.json rename to src/open_clip/model_configs/timm-vit_small_patch16_224.json diff --git a/src/clip/openai.py b/src/open_clip/openai.py similarity index 96% rename from src/clip/openai.py rename to src/open_clip/openai.py index 9e2c2e00e..67933f69a 100644 --- a/src/clip/openai.py +++ b/src/open_clip/openai.py @@ -9,15 +9,15 @@ from .pretrained import get_pretrained_url, list_pretrained_tag_models, download_pretrained from .transform import image_transform -__all__ = ["available_openai_models", "load_openai"] +__all__ = ["list_openai_models", "load_openai_model"] -def available_openai_models() -> List[str]: +def list_openai_models() -> List[str]: """Returns the names of available CLIP models""" return list_pretrained_tag_models('openai') -def load_openai( +def load_openai_model( name: str, device: Union[str, torch.device] = "cuda" if torch.cuda.is_available() else "cpu", jit=True, @@ -45,7 +45,7 @@ def load_openai( elif os.path.isfile(name): model_path = name else: - raise RuntimeError(f"Model {name} not found; available models = {available_openai_models()}") + raise RuntimeError(f"Model {name} not found; available models = {list_openai_models()}") try: # loading JIT archive diff --git a/src/clip/pretrained.py b/src/open_clip/pretrained.py similarity index 92% rename from src/clip/pretrained.py rename to src/open_clip/pretrained.py index 05918274f..549768c02 100644 --- a/src/clip/pretrained.py +++ b/src/open_clip/pretrained.py @@ -49,8 +49,11 @@ } -def list_pretrained(as_tuple: bool = False): - return [(k, t) if as_tuple else ':'.join([k, t]) for k in _PRETRAINED.keys() for t in _PRETRAINED[k].keys()] +def list_pretrained(as_str: bool = False): + """ returns list of pretrained models + Returns a tuple (model_name, pretrain_tag) by default or 'name:tag' if as_str == True + """ + return [':'.join([k, t]) if as_str else (k, t) for k in _PRETRAINED.keys() for t in _PRETRAINED[k].keys()] def list_pretrained_tag_models(tag: str): diff --git a/src/clip/tokenizer.py b/src/open_clip/tokenizer.py similarity index 100% rename from src/clip/tokenizer.py rename to src/open_clip/tokenizer.py diff --git a/src/clip/transform.py b/src/open_clip/transform.py similarity index 98% rename from src/clip/transform.py rename to src/open_clip/transform.py index a6616b0f4..7014c926f 100644 --- a/src/clip/transform.py +++ b/src/open_clip/transform.py @@ -27,4 +27,4 @@ def image_transform( _convert_to_rgb, ToTensor(), normalize, - ]) \ No newline at end of file + ]) diff --git a/src/open_clip/version.py b/src/open_clip/version.py new file mode 100644 index 000000000..7fd229a32 --- /dev/null +++ b/src/open_clip/version.py @@ -0,0 +1 @@ +__version__ = '0.2.0' diff --git a/src/training/data.py b/src/training/data.py index 01a98efc3..9ce2ec138 100644 --- a/src/training/data.py +++ b/src/training/data.py @@ -22,7 +22,7 @@ except ImportError: hvd = None -from clip import tokenize +from open_clip import tokenize class CsvDataset(Dataset): diff --git a/src/training/main.py b/src/training/main.py index d41314808..99171a817 100644 --- a/src/training/main.py +++ b/src/training/main.py @@ -10,8 +10,12 @@ from torch import optim from torch.cuda.amp import GradScaler from torch.utils.tensorboard import SummaryWriter +try: + import horovod.torch as hvd +except ImportError: + hvd = None -from clip.factory import create_model_and_transforms +from open_clip import create_model_and_transforms from training.data import get_data from training.distributed import is_master, init_distributed_device, world_info_from_env from training.logger import setup_logging @@ -19,11 +23,6 @@ from training.scheduler import cosine_lr from training.train import train_one_epoch, evaluate -try: - import horovod.torch as hvd -except ImportError: - hvd = None - def random_seed(seed=42, rank=0): torch.manual_seed(seed + rank) diff --git a/src/training/train.py b/src/training/train.py index cefef0e68..4ecb5218d 100644 --- a/src/training/train.py +++ b/src/training/train.py @@ -10,7 +10,7 @@ import torch.nn.functional as F import wandb -from clip.loss import ClipLoss +from open_clip import ClipLoss from .distributed import is_master from .zero_shot import zero_shot_eval diff --git a/src/training/zero_shot.py b/src/training/zero_shot.py index 04fb42ed1..6fb2de76d 100644 --- a/src/training/zero_shot.py +++ b/src/training/zero_shot.py @@ -5,7 +5,7 @@ import torch.nn.functional as F from tqdm import tqdm -from clip import tokenize +from open_clip import tokenize from .imagenet_zeroshot_data import imagenet_classnames, openai_imagenet_template