From c27d1232461f1c05915477731770b44f0a2a7ee7 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Sat, 26 Oct 2024 01:24:22 +0200 Subject: [PATCH 01/67] Removed torchtext from NGramTokenizer --- ludwig/utils/tokenizers.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/ludwig/utils/tokenizers.py b/ludwig/utils/tokenizers.py index 6fb2d2117a4..41f761fdf28 100644 --- a/ludwig/utils/tokenizers.py +++ b/ludwig/utils/tokenizers.py @@ -140,9 +140,25 @@ def __init__(self, ngram_size: int = 2, **kwargs): self.n = ngram_size or 2 def get_tokens(self, tokens: List[str]) -> List[str]: - from torchtext.data.utils import ngrams_iterator + return list(self._ngrams_iterator(tokens, ngrams=self.n)) - return list(ngrams_iterator(tokens, ngrams=self.n)) + def _ngrams_iterator(self, token_list, ngrams): + """Return an iterator that yields the given tokens and their ngrams. This code is taken from + https://pytorch.org/text/stable/_modules/torchtext/data/utils.html#ngrams_iterator. + + Args: + token_list: A list of tokens + ngrams: the number of ngrams. + """ + + def _get_ngrams(n): + return zip(*[token_list[i:] for i in range(n)]) + + for x in token_list: + yield x + for n in range(2, ngrams + 1): + for x in _get_ngrams(n): + yield " ".join(x) class SpacePunctuationStringToListTokenizer(torch.nn.Module): From 8e90e70c0efd4c0ee4370f6029a31c755d406d70 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Sat, 26 Oct 2024 02:51:41 +0200 Subject: [PATCH 02/67] Refactored SentencePieceTokenizer See: #4032 --- ludwig/utils/tokenizers.py | 8 +++----- tests/ludwig/utils/test_tokenizers.py | 14 +++++++++++++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/ludwig/utils/tokenizers.py b/ludwig/utils/tokenizers.py index 41f761fdf28..a9005142115 100644 --- a/ludwig/utils/tokenizers.py +++ b/ludwig/utils/tokenizers.py @@ -1028,16 +1028,14 @@ def convert_token_to_id(self, token: str) -> int: class SentencePieceTokenizer(torch.nn.Module): - def __init__(self, pretrained_model_name_or_path: Optional[str] = None, **kwargs): + def __init__(self, **kwargs): super().__init__() - if pretrained_model_name_or_path is None: - pretrained_model_name_or_path = "https://download.pytorch.org/models/text/xlmr.sentencepiece.bpe.model" - self.tokenizer = torchtext.transforms.SentencePieceTokenizer(sp_model_path=pretrained_model_name_or_path) + self.tokenizer = load_pretrained_hf_tokenizer("FacebookAI/xlm-roberta-base") def forward(self, v: Union[str, List[str], torch.Tensor]): if isinstance(v, torch.Tensor): raise ValueError(f"Unsupported input: {v}") - return self.tokenizer(v) + return self.tokenizer.tokenize(v) class _BPETokenizer(torch.nn.Module): diff --git a/tests/ludwig/utils/test_tokenizers.py b/tests/ludwig/utils/test_tokenizers.py index 82f6d86bdff..ad7b6f732ce 100644 --- a/tests/ludwig/utils/test_tokenizers.py +++ b/tests/ludwig/utils/test_tokenizers.py @@ -4,7 +4,12 @@ import torch import torchtext -from ludwig.utils.tokenizers import EnglishLemmatizeFilterTokenizer, NgramTokenizer, StringSplitTokenizer +from ludwig.utils.tokenizers import ( + EnglishLemmatizeFilterTokenizer, + NgramTokenizer, + SentencePieceTokenizer, + StringSplitTokenizer, +) TORCHTEXT_0_14_0_HF_NAMES = [ "bert-base-uncased", @@ -85,3 +90,10 @@ def test_english_lemmatize_filter_tokenizer(): tokenizer = EnglishLemmatizeFilterTokenizer() tokens = tokenizer(inputs) assert len(tokens) > 0 + + +def test_sentence_piece_tokenizer(): + inputs = "This is a sentence. And this is another one." + tokenizer = SentencePieceTokenizer() + tokens = tokenizer(inputs) + assert tokens == ["▁This", "▁is", "▁a", "▁sentence", ".", "▁And", "▁this", "▁is", "▁another", "▁one", "."] From 92a3ec096572f613afc546cb514e534874d9e77c Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Mon, 28 Oct 2024 19:01:09 +0100 Subject: [PATCH 03/67] removed torchtext --- ludwig/utils/tokenizers.py | 342 +++---------------------------------- 1 file changed, 25 insertions(+), 317 deletions(-) diff --git a/ludwig/utils/tokenizers.py b/ludwig/utils/tokenizers.py index a9005142115..99cde68d51a 100644 --- a/ludwig/utils/tokenizers.py +++ b/ludwig/utils/tokenizers.py @@ -15,24 +15,16 @@ import logging from abc import abstractmethod -from typing import Any, Dict, List, Optional, Union +from typing import Any, List, Union import torch -import torchtext -from ludwig.constants import PADDING_SYMBOL, UNKNOWN_SYMBOL -from ludwig.utils.data_utils import load_json from ludwig.utils.hf_utils import load_pretrained_hf_tokenizer from ludwig.utils.nlp_utils import load_nlp_pipeline, process_text logger = logging.getLogger(__name__) -torchtext_version = torch.torch_version.TorchVersion(torchtext.__version__) TORCHSCRIPT_COMPATIBLE_TOKENIZERS = {"space", "space_punct", "comma", "underscore", "characters"} -TORCHTEXT_0_12_0_TOKENIZERS = {"sentencepiece", "clip", "gpt2bpe"} -TORCHTEXT_0_13_0_TOKENIZERS = {"bert"} - -HF_TOKENIZER_SAMPLE_INPUTS = ["UNwant\u00E9d,running", "ah\u535A\u63A8zz", " \tHeLLo!how \n Are yoU? [UNK]"] class BaseTokenizer: @@ -913,7 +905,7 @@ def convert_token_to_id(self, token: str) -> int: tokenizer_registry = { - # Torchscript-compatible tokenizers. Torchtext tokenizers are also available below (requires torchtext>=0.12.0). + # Torchscript-compatible tokenizers. "space": SpaceStringToListTokenizer, "space_punct": SpacePunctuationStringToListTokenizer, "ngram": NgramTokenizer, @@ -1021,231 +1013,40 @@ def convert_token_to_id(self, token: str) -> int: "multi_lemmatize_remove_stopwords": MultiLemmatizeRemoveStopwordsTokenizer, } -"""torchtext 0.12.0 tokenizers. - -Only available with torchtext>=0.12.0. -""" - - -class SentencePieceTokenizer(torch.nn.Module): - def __init__(self, **kwargs): - super().__init__() - self.tokenizer = load_pretrained_hf_tokenizer("FacebookAI/xlm-roberta-base") - - def forward(self, v: Union[str, List[str], torch.Tensor]): - if isinstance(v, torch.Tensor): - raise ValueError(f"Unsupported input: {v}") - return self.tokenizer.tokenize(v) - - -class _BPETokenizer(torch.nn.Module): - """Superclass for tokenizers that use BPE, such as CLIPTokenizer and GPT2BPETokenizer.""" - - def __init__(self, pretrained_model_name_or_path: str, vocab_file: str): - super().__init__() - self.str2idx, self.idx2str = self._init_vocab(vocab_file) - self.tokenizer = self._init_tokenizer(pretrained_model_name_or_path, vocab_file) - - def _init_vocab(self, vocab_file: str) -> Dict[str, str]: - """Loads the vocab from the vocab file.""" - str2idx = load_json(torchtext.utils.get_asset_local_path(vocab_file)) - _, idx2str = zip(*sorted((v, k) for k, v in str2idx.items())) - return str2idx, idx2str - - def _init_tokenizer(self, pretrained_model_name_or_path: str, vocab_file: str) -> Any: - """Initializes and returns the tokenizer.""" - raise NotImplementedError - - def forward(self, v: Union[str, List[str], torch.Tensor]) -> Any: - """Implements forward pass for tokenizer. - - BPE tokenizers from torchtext return ids directly, which is inconsistent with the Ludwig tokenizer API. The - below implementation works around this by converting the ids back to their original string tokens. - """ - if isinstance(v, torch.Tensor): - raise ValueError(f"Unsupported input: {v}") - - inputs: List[str] = [] - # Ludwig calls map on List[str] objects, so we need to handle individual strings as well. - if isinstance(v, str): - inputs.append(v) - else: - inputs.extend(v) - - token_ids = self.tokenizer(inputs) - assert torch.jit.isinstance(token_ids, List[List[str]]) - - tokens = [[self.idx2str[int(unit_idx)] for unit_idx in sequence] for sequence in token_ids] - return tokens[0] if isinstance(v, str) else tokens - - def get_vocab(self) -> Dict[str, str]: - return self.str2idx +class HFTokenizerShortcutFactory: + """This factory can be used to build HuggingFace tokenizers form a shortcut string. -class CLIPTokenizer(_BPETokenizer): - def __init__(self, pretrained_model_name_or_path: Optional[str] = None, vocab_file: Optional[str] = None, **kwargs): - if pretrained_model_name_or_path is None: - pretrained_model_name_or_path = "http://download.pytorch.org/models/text/clip_merges.bpe" - if vocab_file is None: - vocab_file = "http://download.pytorch.org/models/text/clip_encoder.json" - super().__init__(pretrained_model_name_or_path, vocab_file) - - def _init_tokenizer(self, pretrained_model_name_or_path: str, vocab_file: str): - return torchtext.transforms.CLIPTokenizer( - encoder_json_path=vocab_file, merges_path=pretrained_model_name_or_path - ) - - -class GPT2BPETokenizer(_BPETokenizer): - def __init__(self, pretrained_model_name_or_path: Optional[str] = None, vocab_file: Optional[str] = None, **kwargs): - if pretrained_model_name_or_path is None: - pretrained_model_name_or_path = "https://download.pytorch.org/models/text/gpt2_bpe_vocab.bpe" - if vocab_file is None: - vocab_file = "https://download.pytorch.org/models/text/gpt2_bpe_encoder.json" - super().__init__(pretrained_model_name_or_path, vocab_file) - - def _init_tokenizer(self, pretrained_model_name_or_path: str, vocab_file: str): - return torchtext.transforms.GPT2BPETokenizer( - encoder_json_path=vocab_file, vocab_bpe_path=pretrained_model_name_or_path - ) - + Those shortcuts were originally used for torchtext tokenizers. They also guarantee backward compatibility. + """ -tokenizer_registry.update( - { - "sentencepiece": SentencePieceTokenizer, - "clip": CLIPTokenizer, - "gpt2bpe": GPT2BPETokenizer, + MODELS = { + "sentencepiece": "FacebookAI/xlm-roberta-base", + "clip": "openai/clip-vit-base-patch32", + "gpt2bpe": "openai-community/gpt2", + "bert": "bert-base-uncased", } -) -TORCHSCRIPT_COMPATIBLE_TOKENIZERS.update(TORCHTEXT_0_12_0_TOKENIZERS) + @classmethod + def create_class(cls, model_name: str): + """Creating a tokenizer class from a model name.""" -class BERTTokenizer(torch.nn.Module): - def __init__( - self, - vocab_file: Optional[str] = None, - is_hf_tokenizer: Optional[bool] = False, - hf_tokenizer_attrs: Optional[Dict[str, Any]] = None, - **kwargs, - ): - super().__init__() - - if vocab_file is None: - # If vocab_file not passed in, use default "bert-base-uncased" vocab and kwargs. - kwargs = _get_bert_config("bert-base-uncased") - vocab_file = kwargs["vocab_file"] - vocab = self._init_vocab(vocab_file) - hf_tokenizer_attrs = { - "pad_token": "[PAD]", - "unk_token": "[UNK]", - "sep_token_id": vocab["[SEP]"], - "cls_token_id": vocab["[CLS]"], - } - else: - vocab = self._init_vocab(vocab_file) - - self.vocab = vocab - - self.is_hf_tokenizer = is_hf_tokenizer - if self.is_hf_tokenizer: - # Values used by Ludwig extracted from the corresponding HF model. - self.pad_token = hf_tokenizer_attrs["pad_token"] # Used as padding symbol - self.unk_token = hf_tokenizer_attrs["unk_token"] # Used as unknown symbol - self.cls_token_id = hf_tokenizer_attrs["cls_token_id"] # Used as start symbol. Only used if HF. - self.sep_token_id = hf_tokenizer_attrs["sep_token_id"] # Used as stop symbol. Only used if HF. - self.never_split = hf_tokenizer_attrs["all_special_tokens"] - else: - self.pad_token = PADDING_SYMBOL - self.unk_token = UNKNOWN_SYMBOL - self.cls_token_id = None - self.sep_token_id = None - self.never_split = [UNKNOWN_SYMBOL] - - tokenizer_kwargs = {} - if "do_lower_case" in kwargs: - tokenizer_kwargs["do_lower_case"] = kwargs["do_lower_case"] - if "strip_accents" in kwargs: - tokenizer_kwargs["strip_accents"] = kwargs["strip_accents"] - - # Return tokens as raw tokens only if not being used as a HF tokenizer. - self.return_tokens = not self.is_hf_tokenizer - - tokenizer_init_kwargs = { - **tokenizer_kwargs, - "vocab_path": vocab_file, - "return_tokens": self.return_tokens, - } - if torchtext_version >= (0, 14, 0): - # never_split kwarg added in torchtext 0.14.0 - tokenizer_init_kwargs["never_split"] = self.never_split - - self.tokenizer = torchtext.transforms.BERTTokenizer(**tokenizer_init_kwargs) - - def _init_vocab(self, vocab_file: str) -> Dict[str, int]: - from transformers.models.bert.tokenization_bert import load_vocab - - return load_vocab(vocab_file) - - def forward(self, v: Union[str, List[str], torch.Tensor]) -> Any: - """Implements forward pass for tokenizer. - - If the is_hf_tokenizer flag is set to True, then the output follows the HF convention, i.e. the output is an - List[List[int]] of tokens and the cls and sep tokens are automatically added as the start and stop symbols. - - If the is_hf_tokenizer flag is set to False, then the output follows the Ludwig convention, i.e. the output - is a List[List[str]] of tokens. - """ - if isinstance(v, torch.Tensor): - raise ValueError(f"Unsupported input: {v}") - - inputs: List[str] = [] - # Ludwig calls map on List[str] objects, so we need to handle individual strings as well. - if isinstance(v, str): - inputs.append(v) - else: - inputs.extend(v) - - if self.is_hf_tokenizer: - token_ids_str = self.tokenizer(inputs) - assert torch.jit.isinstance(token_ids_str, List[List[str]]) - # Must cast token_ids to ints because they are used directly as indices. - token_ids: List[List[int]] = [] - for token_ids_str_i in token_ids_str: - token_ids_i = [int(token_id_str) for token_id_str in token_ids_str_i] - token_ids_i = self._add_special_token_ids(token_ids_i) - token_ids.append(token_ids_i) - return token_ids[0] if isinstance(v, str) else token_ids - - tokens = self.tokenizer(inputs) - assert torch.jit.isinstance(tokens, List[List[str]]) - return tokens[0] if isinstance(v, str) else tokens + class DynamicHFTokenizer(torch.nn.Module): + def __init__(self, **kwargs): + super().__init__() + self.tokenizer = load_pretrained_hf_tokenizer(model_name, use_fast=False) - def get_vocab(self) -> Dict[str, int]: - return self.vocab + def forward(self, v: Union[str, List[str], torch.Tensor]): + if isinstance(v, torch.Tensor): + raise ValueError(f"Unsupported input: {v}") + return self.tokenizer.tokenize(v) - def get_pad_token(self) -> str: - return self.pad_token - - def get_unk_token(self) -> str: - return self.unk_token - - def _add_special_token_ids(self, token_ids: List[int]) -> List[int]: - """Adds special token ids to the token_ids list.""" - if torch.jit.isinstance(self.cls_token_id, int) and torch.jit.isinstance(self.sep_token_id, int): - token_ids.insert(0, self.cls_token_id) - token_ids.append(self.sep_token_id) - return token_ids - - def convert_token_to_id(self, token: str) -> int: - return self.vocab[token] + return DynamicHFTokenizer tokenizer_registry.update( - { - "bert": BERTTokenizer, - } + {name: HFTokenizerShortcutFactory.create_class(model) for name, model in HFTokenizerShortcutFactory.MODELS.items()} ) -TORCHSCRIPT_COMPATIBLE_TOKENIZERS.update(TORCHTEXT_0_13_0_TOKENIZERS) def get_hf_tokenizer(pretrained_model_name_or_path, **kwargs): @@ -1256,82 +1057,8 @@ def get_hf_tokenizer(pretrained_model_name_or_path, **kwargs): Returns: A torchscript-able HF tokenizer if it is available. Else, returns vanilla HF tokenizer. """ - from transformers import BertTokenizer, DistilBertTokenizer, ElectraTokenizer - - # HuggingFace has implemented a DO Repeat Yourself policy for models - # https://github.com/huggingface/transformers/issues/19303 - # We now need to manually track BERT-like tokenizers to map onto the TorchText implementation - # until PyTorch improves TorchScript to be able to compile HF tokenizers. This would require - # 1. Support for string inputs for torch.jit.trace, or - # 2. Support for `kwargs` in torch.jit.script - # This is populated in the `get_hf_tokenizer` since the set requires `transformers` to be installed - HF_BERTLIKE_TOKENIZER_CLS_SET = {BertTokenizer, DistilBertTokenizer, ElectraTokenizer} - - hf_name = pretrained_model_name_or_path - # use_fast=False to leverage python class inheritance - # cannot tokenize HF tokenizers directly because HF lacks strict typing and List[str] cannot be traced - hf_tokenizer = load_pretrained_hf_tokenizer(hf_name, use_fast=False) - - torchtext_tokenizer = None - if "bert" in TORCHSCRIPT_COMPATIBLE_TOKENIZERS and any( - isinstance(hf_tokenizer, cls) for cls in HF_BERTLIKE_TOKENIZER_CLS_SET - ): - tokenizer_kwargs = _get_bert_config(hf_name) - torchtext_tokenizer = BERTTokenizer( - **tokenizer_kwargs, - is_hf_tokenizer=True, - hf_tokenizer_attrs={ - "pad_token": hf_tokenizer.pad_token, - "unk_token": hf_tokenizer.unk_token, - "cls_token_id": hf_tokenizer.cls_token_id, - "sep_token_id": hf_tokenizer.sep_token_id, - "all_special_tokens": hf_tokenizer.all_special_tokens, - }, - ) - - use_torchtext = torchtext_tokenizer is not None - if use_torchtext: - # If a torchtext tokenizer is instantiable, tenatively we will use it. However, - # if the tokenizer does not pass (lightweight) validation, then we will fall back to the vanilla HF tokenizer. - # TODO(geoffrey): can we better validate tokenizer parity before swapping in the TorchText tokenizer? - # Samples from https://github.com/huggingface/transformers/blob/main/tests/models/bert/test_tokenization_bert.py - for sample_input in HF_TOKENIZER_SAMPLE_INPUTS: - hf_output = hf_tokenizer.encode(sample_input) - tt_output = torchtext_tokenizer(sample_input) - if hf_output != tt_output: - use_torchtext = False - logger.warning("Falling back to HuggingFace tokenizer because TorchText tokenizer failed validation.") - logger.warning(f"Sample input: {sample_input}\nHF output: {hf_output}\nTT output: {tt_output}") - break - - if use_torchtext: - logger.info(f"Loaded TorchText implementation of {hf_name} tokenizer") - return torchtext_tokenizer - else: - # If hf_name does not have a torchtext equivalent implementation, load the - # HuggingFace implementation. - logger.info(f"Loaded HuggingFace implementation of {hf_name} tokenizer") - return HFTokenizer(hf_name) - - -def _get_bert_config(hf_name): - """Gets configs from BERT tokenizers in HuggingFace. - - `vocab_file` is required for BERT tokenizers. `tokenizer_config.json` are optional keyword arguments used to - initialize the tokenizer object. If no `tokenizer_config.json` is found, then we instantiate the tokenizer with - default arguments. - """ - from huggingface_hub import hf_hub_download - from huggingface_hub.utils import EntryNotFoundError - - vocab_file = hf_hub_download(repo_id=hf_name, filename="vocab.txt") - - try: - tokenizer_config = load_json(hf_hub_download(repo_id=hf_name, filename="tokenizer_config.json")) - except EntryNotFoundError: - tokenizer_config = {} - return {"vocab_file": vocab_file, **tokenizer_config} + return HFTokenizer(pretrained_model_name_or_path) tokenizer_registry.update( @@ -1349,24 +1076,5 @@ def get_tokenizer_from_registry(tokenizer_name: str) -> torch.nn.Module: """ if tokenizer_name in tokenizer_registry: return tokenizer_registry[tokenizer_name] - - if ( - torch.torch_version.TorchVersion(torchtext.__version__) < (0, 12, 0) - and tokenizer_name in TORCHTEXT_0_12_0_TOKENIZERS - ): - raise KeyError( - f"torchtext>=0.12.0 is not installed, so '{tokenizer_name}' and the following tokenizers are not " - f"available: {TORCHTEXT_0_12_0_TOKENIZERS}" - ) - - if ( - torch.torch_version.TorchVersion(torchtext.__version__) < (0, 13, 0) - and tokenizer_name in TORCHTEXT_0_13_0_TOKENIZERS - ): - raise KeyError( - f"torchtext>=0.13.0 is not installed, so '{tokenizer_name}' and the following tokenizers are not " - f"available: {TORCHTEXT_0_13_0_TOKENIZERS}" - ) - # Tokenizer does not exist or is unavailable. raise KeyError(f"Invalid tokenizer name: '{tokenizer_name}'. Available tokenizers: {tokenizer_registry.keys()}") From 01f308ef523d40dc668c68bc1bfc364463114df2 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Tue, 29 Oct 2024 22:46:55 +0100 Subject: [PATCH 04/67] Rewrote tests --- tests/ludwig/utils/test_tokenizers.py | 94 +++++++++++---------------- 1 file changed, 38 insertions(+), 56 deletions(-) diff --git a/tests/ludwig/utils/test_tokenizers.py b/tests/ludwig/utils/test_tokenizers.py index ad7b6f732ce..0fa8104ed10 100644 --- a/tests/ludwig/utils/test_tokenizers.py +++ b/tests/ludwig/utils/test_tokenizers.py @@ -1,64 +1,10 @@ -import os - -import pytest -import torch -import torchtext - from ludwig.utils.tokenizers import ( EnglishLemmatizeFilterTokenizer, + get_tokenizer_from_registry, NgramTokenizer, - SentencePieceTokenizer, StringSplitTokenizer, ) -TORCHTEXT_0_14_0_HF_NAMES = [ - "bert-base-uncased", - "distilbert-base-uncased", - "google/electra-small-discriminator", - "dbmdz/bert-base-italian-cased", # Community model - "nreimers/MiniLM-L6-H384-uncased", # Community model - "emilyalsentzer/Bio_ClinicalBERT", # Community model - "bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12", # Community model -] - - -@pytest.mark.parametrize( - "pretrained_model_name_or_path", - [ - pytest.param( - model_name, - marks=[ - pytest.mark.skipif( - torch.torch_version.TorchVersion(torchtext.__version__) < (0, 14, 0), - reason="requires torchtext 0.14.0 or higher", - ), - ], - ) - for model_name in TORCHTEXT_0_14_0_HF_NAMES - ], -) -def test_bert_hf_tokenizer_parity(tmpdir, pretrained_model_name_or_path): - """Tests the BERTTokenizer implementation. - - Asserts both tokens and token IDs are the same by initializing the BERTTokenizer as a standalone tokenizer and as a - HF tokenizer. - """ - from ludwig.utils.tokenizers import get_hf_tokenizer, HFTokenizer - - inputs = "Hello, ``I'm'' ónë of 1,205,000 sentences!" - hf_tokenizer = HFTokenizer(pretrained_model_name_or_path) - torchtext_tokenizer = get_hf_tokenizer(pretrained_model_name_or_path) - - # Ensure that the tokenizer is scriptable - tokenizer_path = os.path.join(tmpdir, "tokenizer.pt") - torch.jit.script(torchtext_tokenizer).save(tokenizer_path) - torchtext_tokenizer = torch.jit.load(tokenizer_path) - - token_ids_expected = hf_tokenizer(inputs) - token_ids = torchtext_tokenizer(inputs) - - assert token_ids_expected == token_ids - def test_ngram_tokenizer(): inputs = "Hello, I'm a single sentence!" @@ -94,6 +40,42 @@ def test_english_lemmatize_filter_tokenizer(): def test_sentence_piece_tokenizer(): inputs = "This is a sentence. And this is another one." - tokenizer = SentencePieceTokenizer() + tokenizer = get_tokenizer_from_registry("sentencepiece")() tokens = tokenizer(inputs) assert tokens == ["▁This", "▁is", "▁a", "▁sentence", ".", "▁And", "▁this", "▁is", "▁another", "▁one", "."] + + +def test_clip_tokenizer(): + inputs = "This is a sentence. And this is another one." + tokenizer = get_tokenizer_from_registry("clip")() + tokens = tokenizer(inputs) + print(tokens) + assert tokens == [ + "this", + "is", + "a", + "sentence", + ".", + "and", + "this", + "is", + "another", + "one", + ".", + ] + + +def test_gpt2_bpe_tokenizer(): + inputs = "This is a sentence. And this is another one." + tokenizer = get_tokenizer_from_registry("gpt2bpe")() + tokens = tokenizer(inputs) + print(tokens) + assert tokens == ["This", "Ġis", "Ġa", "Ġsentence", ".", "ĠAnd", "Ġthis", "Ġis", "Ġanother", "Ġone", "."] + + +def test_bert_tokenizer(): + inputs = "This is a sentence. And this is another one." + tokenizer = get_tokenizer_from_registry("bert")() + tokens = tokenizer(inputs) + print(tokens) + assert tokens == ["this", "is", "a", "sentence", ".", "and", "this", "is", "another", "one", "."] From 6011fb15a2b324f69993cca2d025f46eb042bc85 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 30 Oct 2024 11:12:53 -0700 Subject: [PATCH 05/67] createda about file for hatch versioning and updated pyproject --- ludwig/__about__.py | 1 + pyproject.toml | 243 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+) create mode 100644 ludwig/__about__.py diff --git a/ludwig/__about__.py b/ludwig/__about__.py new file mode 100644 index 00000000000..5b74fe54240 --- /dev/null +++ b/ludwig/__about__.py @@ -0,0 +1 @@ +__version__ = '1.13.0' \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 38ac51086cf..2293cba1cdb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,246 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "ludwig" +dynamic = ["version"] +description = "Declarative machine learning: End-to-end machine learning pipelines using data-driven configurations." +readme = "README.md" +license = { text = "Apache 2.0" } +requires-python = ">=3.8" +authors = [ + { name = "Piero Molino", email = "piero.molino@gmail.com" }, +] +keywords = [ + "computer", + "deep", + "deep_learning", + "language", + "learning", + "ludwig", + "machine", + "machine_learning", + "natural", + "processing", + "vision", +] +dependencies = [ + "absl-py", + "bitsandbytes<0.41.0", + "Cython>=0.25", + "dataclasses-json", + "datasets", + "filelock", + "fsspec[http]<=2023.10.0", + "getdaft==0.1.20", + "gpustat", + "h5py>=2.6,!=3.0.0", + "html5lib", + "imagecodecs", + "jsonschema>=4.5.0,<4.7", + "kaggle", + "lxml", + "marshmallow", + "marshmallow-dataclass==8.5.4", + "marshmallow-jsonschema", + "nltk", + "numpy>=1.15", + "openpyxl>=3.0.7", + "packaging", + "pandas>=1.0,!=1.1.5,<2.2.0", + "protobuf", + "psutil", + "py-cpuinfo==9.0.0", + "pyarrow<15.0.0", + "pydantic<2.0", + "pyxlsb>=1.0.8", + "PyYAML>=3.12,<6.0.1,!=5.4.*", + "requests", + "retry", + "rich~=12.4.4", + "sacremoses", + "scikit-learn", + "scipy>=0.18", + "sentencepiece", + "spacy>=2.3", + "tabulate>=0.7", + "tensorboard", + "tifffile", + "tokenizers>=0.15", + "torch>=2.0.0", + "torchaudio", + "torchinfo", + "torchmetrics>=0.11.0", + "torchtext", + "torchvision", + "tqdm", + "transformers>=4.42.3", + "urllib3<2", + "xlrd>=2.0.1", + "xlsxwriter>=1.4.3", + "xlwt", +] + +[project.optional-dependencies] +benchmarking = [ + "s3fs", +] +distributed = [ + "awscli", + "dask[dataframe]<2023.4.0", + "deepspeed!=0.11.0,<0.13.0", + "getdaft[ray]==0.1.20", + "GPUtil", + "pyarrow", + "ray[default,data,serve,tune]==2.3.1", + "tblib", + "tensorboardX<2.3", +] +explain = [ + "captum", +] +extra = [ + "horovod[pytorch]>=0.24.0,!=0.26.0", + "modin[ray]", + "predibase>=2023.10.2", +] +full = [ + "accelerate", + "awscli", + "captum", + "cartonml-nightly", + "dask[dataframe]<2023.4.0", + "deepspeed!=0.11.0,<0.13.0", + "faiss-cpu", + "fastapi", + "getdaft[ray]==0.1.20", + "GPUtil", + "hiplot", + "httpx", + "hummingbird-ml>=0.4.8", + "hyperopt", + "lightgbm", + "lightgbm-ray", + "loralib", + "matplotlib>3.4,<3.9.0; python_version > '3.6'", + "matplotlib>=3.0,<3.4; python_version <= '3.6'", + "neuropod==0.3.0rc6 ; platform_system != \"Windows\" and python_version < '3.9'", + "peft>=0.10.0", + "ptitprince", + "pyarrow", + "python-multipart", + "ray[default,data,serve,tune]==2.3.1", + "ray[default,tune]>=2.0.0", + "s3fs", + "seaborn>=0.7,<0.12", + "sentence-transformers", + "tblib", + "tensorboardX<2.3", + "uvicorn", +] +hyperopt = [ + "hyperopt", + "ray[default,tune]>=2.0.0", +] +llm = [ + "accelerate", + "faiss-cpu", + "loralib", + "peft>=0.10.0", + "sentence-transformers", +] +serve = [ + "cartonml-nightly", + "fastapi", + "httpx", + "neuropod==0.3.0rc6 ; platform_system != \"Windows\" and python_version < '3.9'", + "python-multipart", + "uvicorn", +] +test = [ + "accelerate", + "aim", + "awscli", + "ax-platform", + "bayesian-optimization", + "captum", + "cartonml-nightly", + "comet_ml", + "ConfigSpace==0.7.1", + "dask[dataframe]<2023.4.0", + "deepspeed!=0.11.0,<0.13.0", + "faiss-cpu", + "fastapi", + "flaml[blendsearch]", + "getdaft[ray]==0.1.20", + "GPUtil", + "HEBO", + "hiplot", + "hpbandster", + "httpx", + "hummingbird-ml>=0.4.8", + "hyperopt", + "lightgbm", + "lightgbm-ray", + "loralib", + "matplotlib>3.4,<3.9.0; python_version > '3.6'", + "matplotlib>=3.0,<3.4; python_version <= '3.6'", + "mlflow", + "neuropod==0.3.0rc6 ; platform_system != \"Windows\" and python_version < '3.9'", + "nevergrad", + "optuna", + "peft>=0.10.0", + "ptitprince", + "pyarrow", + "pytest", + "pytest-timeout", + "python-multipart", + "ray[default,data,serve,tune]==2.3.1", + "ray[default,tune]>=2.0.0", + "s3fs", + "s3fs>=2022.8.2", + "scikit-optimize", + "seaborn>=0.7,<0.12", + "sentence-transformers", + "six>=1.13.0", + "sqlalchemy<2", + "tblib", + "tensorboardX<2.3", + "tifffile", + "uvicorn", + "wandb<0.12.11", + "wget", + "zoopt", +] +tree = [ + "hummingbird-ml>=0.4.8", + "lightgbm", + "lightgbm-ray", +] +viz = [ + "hiplot", + "matplotlib>3.4,<3.9.0; python_version > '3.6'", + "matplotlib>=3.0,<3.4; python_version <= '3.6'", + "ptitprince", + "seaborn>=0.7,<0.12", +] + +[project.scripts] +ludwig = "ludwig.cli:main" + +[project.urls] +Download = "https://pypi.org/project/ludwig/" +Homepage = "https://github.com/ludwig-ai/ludwig" + +[tool.hatch.version] +path = "ludwig/__about__.py" + +[tool.hatch.build.targets.sdist] +include = [ + "/ludwig", +] + [tool.isort] profile = "black" line_length = 120 From fd6d1464d7653a5dbd790d7044f4c17cfc69797b Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Thu, 31 Oct 2024 17:01:50 -0700 Subject: [PATCH 06/67] moved to hatch build system in .toml file --- pyproject.toml | 110 ++++++++++++++++++++++--------- requirements.txt | 69 ------------------- requirements_benchmarking.txt | 1 - requirements_distributed.txt | 17 ----- requirements_explain.txt | 1 - requirements_extra.txt | 8 --- requirements_hyperopt.txt | 5 -- requirements_llm.txt | 7 -- requirements_serve.txt | 6 -- requirements_test.txt | 47 ------------- requirements_tree.txt | 3 - requirements_viz.txt | 5 -- setup.cfg | 37 ----------- setup.py | 69 ------------------- tests/integration_tests/utils.py | 12 +++- 15 files changed, 90 insertions(+), 307 deletions(-) delete mode 100644 requirements.txt delete mode 100644 requirements_benchmarking.txt delete mode 100644 requirements_distributed.txt delete mode 100644 requirements_explain.txt delete mode 100644 requirements_extra.txt delete mode 100644 requirements_hyperopt.txt delete mode 100644 requirements_llm.txt delete mode 100644 requirements_serve.txt delete mode 100644 requirements_test.txt delete mode 100644 requirements_tree.txt delete mode 100644 requirements_viz.txt delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/pyproject.toml b/pyproject.toml index 2293cba1cdb..e817c514602 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ dynamic = ["version"] description = "Declarative machine learning: End-to-end machine learning pipelines using data-driven configurations." readme = "README.md" license = { text = "Apache 2.0" } -requires-python = ">=3.8" +requires-python = ">=3.10" authors = [ { name = "Piero Molino", email = "piero.molino@gmail.com" }, ] @@ -55,25 +55,25 @@ dependencies = [ "pyarrow<15.0.0", "pydantic<2.0", "pyxlsb>=1.0.8", - "PyYAML>=3.12,<6.0.1,!=5.4.*", + "PyYAML==6.0.2", "requests", "retry", "rich~=12.4.4", "sacremoses", + "importlib", "scikit-learn", "scipy>=0.18", "sentencepiece", "spacy>=2.3", "tabulate>=0.7", "tensorboard", - "tifffile", "tokenizers>=0.15", - "torch>=2.0.0", - "torchaudio", + "torch==2.2.2", + "torchaudio==2.2.2", "torchinfo", "torchmetrics>=0.11.0", - "torchtext", - "torchvision", + "torchtext==0.17.2", + "torchvision==0.17.2", "tqdm", "transformers>=4.42.3", "urllib3<2", @@ -82,7 +82,12 @@ dependencies = [ "xlwt", ] +# Optional Dependencies [project.optional-dependencies] +dev = [ + "flake8", + "flake8-pyproject" +] benchmarking = [ "s3fs", ] @@ -158,7 +163,27 @@ serve = [ "python-multipart", "uvicorn", ] -test = [ +tree = [ + "hummingbird-ml>=0.4.8", + "lightgbm", + "lightgbm-ray", +] +viz = [ + "hiplot", + "matplotlib>3.4,<3.9.0; python_version > '3.6'", + "matplotlib>=3.0,<3.4; python_version <= '3.6'", + "ptitprince", + "seaborn>=0.7,<0.12", +] + +[project.urls] +Download = "https://pypi.org/project/ludwig/" +Homepage = "https://github.com/ludwig-ai/ludwig" +Website = "https://ludwig.ai/latest/" + +[tool.hatch.envs.hatch-test] +dependencies = [ + "gpy >=1.10.0", "accelerate", "aim", "awscli", @@ -195,9 +220,10 @@ test = [ "pyarrow", "pytest", "pytest-timeout", + "pytest-cov", "python-multipart", - "ray[default,data,serve,tune]==2.3.1", - "ray[default,tune]>=2.0.0", + #"ray[default,data,serve,tune]==2.3.1", + #"ray[default,tune]>=2.0.0", "s3fs", "s3fs>=2022.8.2", "scikit-optimize", @@ -209,29 +235,15 @@ test = [ "tensorboardX<2.3", "tifffile", "uvicorn", - "wandb<0.12.11", + "wandb", + #"wandb<0.12.11", "wget", - "zoopt", -] -tree = [ - "hummingbird-ml>=0.4.8", - "lightgbm", - "lightgbm-ray", -] -viz = [ - "hiplot", - "matplotlib>3.4,<3.9.0; python_version > '3.6'", - "matplotlib>=3.0,<3.4; python_version <= '3.6'", - "ptitprince", - "seaborn>=0.7,<0.12", + "zoopt" ] -[project.scripts] -ludwig = "ludwig.cli:main" - -[project.urls] -Download = "https://pypi.org/project/ludwig/" -Homepage = "https://github.com/ludwig-ai/ludwig" +#[tool.hatch.envs.hatch-test] +#setup = "pip install -e .[test]" +#run = "pytest {args:test}" [tool.hatch.version] path = "ludwig/__about__.py" @@ -239,7 +251,45 @@ path = "ludwig/__about__.py" [tool.hatch.build.targets.sdist] include = [ "/ludwig", + "/tests" +] + +# ------- flake8 ---------- +[tool.flake8] +max-line-length = 120 +exclude = [ + ".tox", + "*.egg", + "*_pb2.py", + "build", + "temp" ] +select = ["E", "W", "F"] +doctests = true +verbose = 2 +format = "pylint" +ignore = [ + "E731", + "W503", + "E203", + "E231", + "E241", + "E221", + "E225", + "E226", + "E241", + "E271", + "E275" +] + +[tool.hatch.envs.lint] +dependencies = [ + "flake8", + "flake8-pyproject" +] + +[tool.hatch.envs.lint.scripts] +style = "flake8 ." [tool.isort] profile = "black" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index d293073e2c7..00000000000 --- a/requirements.txt +++ /dev/null @@ -1,69 +0,0 @@ -Cython>=0.25 -h5py>=2.6,!=3.0.0 -numpy>=1.15 -# GitHub Issue for Pinning Pandas < 2.2.0: https://github.com/ludwig-ai/ludwig/issues/3907 -pandas>=1.0,!=1.1.5,<2.2.0 -scipy>=0.18 -tabulate>=0.7 -scikit-learn -tqdm -torch>=2.0.0 -torchaudio -torchtext -torchvision -pydantic<2.0 -transformers>=4.42.3 -tifffile -imagecodecs -tokenizers>=0.15 -spacy>=2.3 -PyYAML>=3.12,<6.0.1,!=5.4.* #Exlude PyYAML 5.4.* due to incompatibility with awscli -absl-py -kaggle -requests -fsspec[http]<=2023.10.0 -dataclasses-json -jsonschema>=4.5.0,<4.7 -marshmallow -marshmallow-jsonschema -marshmallow-dataclass==8.5.4 -tensorboard -nltk # Required for rouge scores. -torchmetrics>=0.11.0 -torchinfo -filelock -psutil -protobuf -py-cpuinfo==9.0.0 -gpustat -rich~=12.4.4 -packaging -retry - -# required for TransfoXLTokenizer when using transformer_xl -sacremoses -sentencepiece - -# requirements for daft -# NOTE: daft needs to be <0.2 because of deprecation of fsspec argument in Daft -# Pinned for consistency with ludwig-ray docker image. -getdaft==0.1.20 - -# requirement for various paged and 8-bit optimizers -bitsandbytes<0.41.0 - -# new data format support -xlwt # excel -xlrd>=2.0.1 # excel -xlsxwriter>=1.4.3 # excel -openpyxl>=3.0.7 # excel -pyxlsb>=1.0.8 # excel -pyarrow<15.0.0 # parquet -lxml # html -html5lib # html - -# requirement for loading hugging face datasets -datasets - -# pin required for torch 2.1.0 -urllib3<2 diff --git a/requirements_benchmarking.txt b/requirements_benchmarking.txt deleted file mode 100644 index a51391a9488..00000000000 --- a/requirements_benchmarking.txt +++ /dev/null @@ -1 +0,0 @@ -s3fs diff --git a/requirements_distributed.txt b/requirements_distributed.txt deleted file mode 100644 index e39ee755950..00000000000 --- a/requirements_distributed.txt +++ /dev/null @@ -1,17 +0,0 @@ -# requirements for dask -dask[dataframe]<2023.4.0 -pyarrow - -# requirements for ray -ray[default,data,serve,tune]==2.3.1 -tensorboardX<2.3 -GPUtil -tblib -awscli - -# https://github.com/microsoft/DeepSpeed/issues/4473 -# https://github.com/ludwig-ai/ludwig/issues/3905 -deepspeed!=0.11.0,<0.13.0 - -# requirements for daft -getdaft[ray]==0.1.20 diff --git a/requirements_explain.txt b/requirements_explain.txt deleted file mode 100644 index 7a4edb90b8e..00000000000 --- a/requirements_explain.txt +++ /dev/null @@ -1 +0,0 @@ -captum diff --git a/requirements_extra.txt b/requirements_extra.txt deleted file mode 100644 index 26fe48eb998..00000000000 --- a/requirements_extra.txt +++ /dev/null @@ -1,8 +0,0 @@ -# requirements for horovod -horovod[pytorch]>=0.24.0,!=0.26.0 - -# alternative to Dask -modin[ray] - -# Allows users to upload -predibase>=2023.10.2 diff --git a/requirements_hyperopt.txt b/requirements_hyperopt.txt deleted file mode 100644 index 3b85fea598c..00000000000 --- a/requirements_hyperopt.txt +++ /dev/null @@ -1,5 +0,0 @@ -ray[default,tune]>=2.0.0 - -# required for Ray Tune Search Algorithm support for AutoML -#search_alg: hyperopt -hyperopt diff --git a/requirements_llm.txt b/requirements_llm.txt deleted file mode 100644 index c691bc0bac3..00000000000 --- a/requirements_llm.txt +++ /dev/null @@ -1,7 +0,0 @@ -sentence-transformers -faiss-cpu - -accelerate -loralib - -peft>=0.10.0 diff --git a/requirements_serve.txt b/requirements_serve.txt deleted file mode 100644 index 353adbb3f5c..00000000000 --- a/requirements_serve.txt +++ /dev/null @@ -1,6 +0,0 @@ -uvicorn -httpx -fastapi -python-multipart -neuropod==0.3.0rc6 ; platform_system != "Windows" and python_version < '3.9' -cartonml-nightly diff --git a/requirements_test.txt b/requirements_test.txt deleted file mode 100644 index f42f76db6a7..00000000000 --- a/requirements_test.txt +++ /dev/null @@ -1,47 +0,0 @@ -pytest -pytest-timeout -tifffile -wget -six>=1.13.0 -aim -wandb<0.12.11 -comet_ml -mlflow - -# For testing optional Ray Tune Search Algorithms -# search_alg: bohb -hpbandster -ConfigSpace==0.7.1 - -# search_alg: ax -ax-platform - -# Pinning because aimstack does not support 2.x.x - https://github.com/aimhubio/aim/issues/2514 -sqlalchemy<2 - -# search_alg: bayesopt -bayesian-optimization - -# search_alg: cfo and blendsearch -flaml[blendsearch] - -# Disabling due to numpy installation failure https://github.com/ludwig-ai/ludwig/actions/runs/4737879639/jobs/8411146481 -# search_alg: dragonfly -# dragonfly-opt - -# search_alg: hebo -HEBO - -# search_alg: nevergrad -nevergrad - -# search_alg: optuna -optuna - -# search_alg: skopt -scikit-optimize - -# search_alg: zoopt -zoopt - -s3fs>=2022.8.2 diff --git a/requirements_tree.txt b/requirements_tree.txt deleted file mode 100644 index f2153b1f3e0..00000000000 --- a/requirements_tree.txt +++ /dev/null @@ -1,3 +0,0 @@ -hummingbird-ml>=0.4.8 -lightgbm -lightgbm-ray diff --git a/requirements_viz.txt b/requirements_viz.txt deleted file mode 100644 index a33a1d546f3..00000000000 --- a/requirements_viz.txt +++ /dev/null @@ -1,5 +0,0 @@ -matplotlib>3.4,<3.9.0; python_version > '3.6' -matplotlib>=3.0,<3.4; python_version <= '3.6' -seaborn>=0.7,<0.12 -hiplot -ptitprince diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index c9095299645..00000000000 --- a/setup.cfg +++ /dev/null @@ -1,37 +0,0 @@ -[flake8] -max-line-length = 120 -exclude = - .tox, - *.egg, - *_pb2.py, - build, - temp - -select = E,W,F -doctests = True -verbose = 2 -# https://pep8.readthedocs.io/en/latest/intro.html#error-codes -format = pylint -ignore = - # Ignore "Do not assign a lambda expression, use a def" - E731 - # Ignore "Line break occurred before a binary operator" - W503 - # Ignore "whitespace before ':'" - E203 - # Ignore "missing whitespace after ':'" - E231 - # Ignore "multiple spaces after ':'" - E241 - # Ignore "multiple spaces before operator" - E221 - # Ignore "whitespace around operator" - E225 - # Ignore "whitespace around arithmetic operator" - E226 - # Ignore "multiple spaces after ':'" - E241 - # Ignore "multiple spaces after keyword" - E271 - # Ignore "missing whitespace after keyword" - E275 diff --git a/setup.py b/setup.py deleted file mode 100644 index 7276b5fabf7..00000000000 --- a/setup.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Ludwig: Data-centric declarative deep learning framework.""" -from codecs import open -from os import path - -from setuptools import find_packages, setup - -here = path.abspath(path.dirname(__file__)) - -# Get the long description from the README.md file -with open(path.join(here, "README.md"), encoding="utf-8") as f: - long_description = f.read() - -with open(path.join(here, "requirements.txt"), encoding="utf-8") as f: - requirements = [line.strip() for line in f if line] - -extra_requirements = {} - -with open(path.join(here, "requirements_serve.txt"), encoding="utf-8") as f: - extra_requirements["serve"] = [line.strip() for line in f if line] - -with open(path.join(here, "requirements_viz.txt"), encoding="utf-8") as f: - extra_requirements["viz"] = [line.strip() for line in f if line] - -with open(path.join(here, "requirements_distributed.txt"), encoding="utf-8") as f: - extra_requirements["distributed"] = [line.strip() for line in f if line] - -with open(path.join(here, "requirements_hyperopt.txt"), encoding="utf-8") as f: - extra_requirements["hyperopt"] = [line.strip() for line in f if line] - -with open(path.join(here, "requirements_tree.txt"), encoding="utf-8") as f: - extra_requirements["tree"] = [line.strip() for line in f if line] - -with open(path.join(here, "requirements_llm.txt"), encoding="utf-8") as f: - extra_requirements["llm"] = [line.strip() for line in f if line] - -with open(path.join(here, "requirements_explain.txt"), encoding="utf-8") as f: - extra_requirements["explain"] = [line.strip() for line in f if line] - -with open(path.join(here, "requirements_benchmarking.txt"), encoding="utf-8") as f: - extra_requirements["benchmarking"] = [line.strip() for line in f if line] - -extra_requirements["full"] = [item for sublist in extra_requirements.values() for item in sublist] - -with open(path.join(here, "requirements_test.txt"), encoding="utf-8") as f: - extra_requirements["test"] = extra_requirements["full"] + [line.strip() for line in f if line] - -with open(path.join(here, "requirements_extra.txt"), encoding="utf-8") as f: - extra_requirements["extra"] = [line.strip() for line in f if line] - -setup( - name="ludwig", - version="0.10.4.dev", - description="Declarative machine learning: End-to-end machine learning pipelines using data-driven configurations.", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/ludwig-ai/ludwig", - download_url="https://pypi.org/project/ludwig/", - author="Piero Molino", - author_email="piero.molino@gmail.com", - license="Apache 2.0", - keywords="ludwig deep learning deep_learning machine machine_learning natural language processing computer vision", - packages=find_packages(exclude=["contrib", "docs", "tests"]), - python_requires=">=3.8", - include_package_data=True, - package_data={"ludwig": ["etc/*", "examples/*.py"]}, - install_requires=requirements, - extras_require=extra_requirements, - entry_points={"console_scripts": ["ludwig=ludwig.cli:main"]}, -) diff --git a/tests/integration_tests/utils.py b/tests/integration_tests/utils.py index 6eb6f23f564..a6591f28510 100644 --- a/tests/integration_tests/utils.py +++ b/tests/integration_tests/utils.py @@ -23,7 +23,7 @@ import tempfile import traceback import uuid -from distutils.util import strtobool +#from distutils.util import strtobool from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING, Union import cloudpickle @@ -123,6 +123,14 @@ def train(self, *args, save_path=MODEL_FILE_NAME, **kwargs): with tempfile.TemporaryDirectory() as tmpdir: return super().train(*args, save_path=tmpdir, **kwargs) +def str2bool(val): + val = val.lower() + if val in ('y', 'yes', 't', 'true', 'on', '1'): + return 1 + elif val in ('n', 'no', 'f', 'false', 'off', '0'): + return 0 + else: + raise ValueError("invalid truth value {!r}".format(val)) def parse_flag_from_env(key, default=False): try: @@ -135,7 +143,7 @@ def parse_flag_from_env(key, default=False): try: if isinstance(value, bool): return 1 if value else 0 - _value = strtobool(value) + _value = str2bool(value) except ValueError: # More values are supported, but let's keep the message simple. raise ValueError(f"If set, {key} must be yes or no.") From 7e2574f948e09aa1b096d3c147058f9a96cfa43e Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Thu, 7 Nov 2024 20:25:32 -0800 Subject: [PATCH 07/67] added dockerfile for hatch --- docker/ludwig_hatch/Dockerfile | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 docker/ludwig_hatch/Dockerfile diff --git a/docker/ludwig_hatch/Dockerfile b/docker/ludwig_hatch/Dockerfile new file mode 100644 index 00000000000..a90d9073240 --- /dev/null +++ b/docker/ludwig_hatch/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.12 + +ENV PATH="/root/.local/bin:$PATH" +RUN apt-get -y update +RUN apt-get -y install pipx +RUN apt-get -y install git libsndfile1 build-essential g++ cmake ffmpeg sox libsox-dev +RUN pipx ensurepath --force +RUN pipx install hatch +RUN python3 -m pip install --upgrade pipx +WORKDIR /ludwig +#COPY /ludwig/ . +COPY . . + +RUN hatch env create +RUN hatch build + +ENTRYPOINT ["ludwig"] \ No newline at end of file From 8e9d32d1973708d42e6ae554f1876850e9110f67 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Sun, 17 Nov 2024 07:06:58 +0100 Subject: [PATCH 08/67] Altered dependencies so it works for hatch env create --- .pre-commit-config.yaml | 2 +- pyproject.toml | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1c6390db514..d25aac6c912 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,7 @@ repos: - id: pyupgrade args: [--py36-plus] - repo: https://github.com/PyCQA/docformatter - rev: v1.5.1 + rev: 06907d0 hooks: - id: docformatter args: [--in-place, --wrap-summaries=115, --wrap-descriptions=120] diff --git a/pyproject.toml b/pyproject.toml index e817c514602..e049bde4b79 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ keywords = [ dependencies = [ "absl-py", "bitsandbytes<0.41.0", - "Cython>=0.25", + "Cython>=0.25,<1.0", "dataclasses-json", "datasets", "filelock", @@ -45,7 +45,7 @@ dependencies = [ "marshmallow-dataclass==8.5.4", "marshmallow-jsonschema", "nltk", - "numpy>=1.15", + "numpy==1.26", "openpyxl>=3.0.7", "packaging", "pandas>=1.0,!=1.1.5,<2.2.0", @@ -61,10 +61,10 @@ dependencies = [ "rich~=12.4.4", "sacremoses", "importlib", - "scikit-learn", + "scikit-learn==1.3", "scipy>=0.18", "sentencepiece", - "spacy>=2.3", + "spacy", "tabulate>=0.7", "tensorboard", "tokenizers>=0.15", From 9c7993511806249ee26089673cfd59f86a6bb849 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Sun, 17 Nov 2024 07:45:36 +0100 Subject: [PATCH 09/67] Updated pytest workflows python versions --- .github/workflows/pytest.yml | 8 ++++---- pyproject.toml | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 03a307da140..30866696bf1 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -22,7 +22,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.10", "3.11", "3.12"] test-markers: ["not distributed", "distributed"] include: - python-version: "3.8" @@ -192,10 +192,10 @@ jobs: - name: Upload Unit Test Results if: ${{ always() && !env.ACT }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }}) - path: pytest.xml + path: pytest-${{ matrix.python-version }}-${{ matrix.test-markers }}.xml integration-tests: name: ${{ matrix.test-markers }} @@ -560,7 +560,7 @@ jobs: steps: - name: Upload if: ${{ !env.ACT }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: Event File path: ${{ github.event_path }} diff --git a/pyproject.toml b/pyproject.toml index e049bde4b79..d3386d87a4f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,8 @@ dependencies = [ [project.optional-dependencies] dev = [ "flake8", - "flake8-pyproject" + "flake8-pyproject", + "pre-commit", ] benchmarking = [ "s3fs", From 0a0617165a5b0ee90bf0e3d3c869f7be52209842 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Sun, 17 Nov 2024 07:52:22 +0100 Subject: [PATCH 10/67] Fixed minimal test python version --- .github/workflows/pytest.yml | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 30866696bf1..72b996d0d22 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -25,14 +25,6 @@ jobs: python-version: ["3.10", "3.11", "3.12"] test-markers: ["not distributed", "distributed"] include: - - python-version: "3.8" - pytorch-version: 2.0.0 - torchscript-version: 1.10.2 - ray-version: 2.3.1 - - python-version: "3.9" - pytorch-version: 2.1.1 - torchscript-version: 1.10.2 - ray-version: 2.3.1 - python-version: "3.10" # pytorch-version: nightly pytorch-version: 2.2.1 @@ -368,10 +360,10 @@ jobs: timeout-minutes: 15 steps: - uses: actions/checkout@v2 - - name: Set up Python 3.8 + - name: Set up Python 3.10 uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: 3.10 - name: Setup Linux if: runner.os == 'linux' @@ -388,9 +380,7 @@ jobs: python --version pip --version python -m pip install -U pip - pip install ray==2.3.0 - pip install '.' - pip install torch==2.0.0 torchtext torchvision torchaudio + pip install -e '.' pip list shell: bash - name: Check Install From 4256075a961ff10eea897a468fc4a818bba0a3de Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Sun, 17 Nov 2024 07:53:25 +0100 Subject: [PATCH 11/67] Fixed small error with versioning naming --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 72b996d0d22..75389cff1a8 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -363,7 +363,7 @@ jobs: - name: Set up Python 3.10 uses: actions/setup-python@v2 with: - python-version: 3.10 + python-version: "3.10" - name: Setup Linux if: runner.os == 'linux' From a09ef9561325913d5833355d4a4e5a56dd2da210 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Sun, 17 Nov 2024 08:18:53 +0100 Subject: [PATCH 12/67] Adding ludwig script to pyproject.toml --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index d3386d87a4f..af76319a73f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -182,6 +182,9 @@ Download = "https://pypi.org/project/ludwig/" Homepage = "https://github.com/ludwig-ai/ludwig" Website = "https://ludwig.ai/latest/" +[project.scripts] +ludwig = "ludwig.cli:main" + [tool.hatch.envs.hatch-test] dependencies = [ "gpy >=1.10.0", From 687c8eaa502e9f64820655c07bf9b0065174b431 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Sun, 17 Nov 2024 08:27:38 +0100 Subject: [PATCH 13/67] added tifffile to dependencies --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index af76319a73f..80e49d5dd51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ dependencies = [ "xlrd>=2.0.1", "xlsxwriter>=1.4.3", "xlwt", + "tifffile" ] # Optional Dependencies @@ -237,7 +238,6 @@ dependencies = [ "sqlalchemy<2", "tblib", "tensorboardX<2.3", - "tifffile", "uvicorn", "wandb", #"wandb<0.12.11", From 123b678f4e59d371df7aa27e356b420d45d14ceb Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Sun, 17 Nov 2024 08:30:32 +0100 Subject: [PATCH 14/67] Bumped Python Version for Minimal Test --- .github/workflows/pytest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 75389cff1a8..9e118c2b175 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -360,10 +360,10 @@ jobs: timeout-minutes: 15 steps: - uses: actions/checkout@v2 - - name: Set up Python 3.10 + - name: Set up Python 3.12 uses: actions/setup-python@v2 with: - python-version: "3.10" + python-version: "3.12" - name: Setup Linux if: runner.os == 'linux' From da47379eccd4745b5dcbb0bef0e3bab1795b13db Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Sun, 17 Nov 2024 15:08:28 +0100 Subject: [PATCH 15/67] fixed tifffile dep. and removed tests from pytest.yml --- .github/workflows/pytest.yml | 612 +++++++++++++++++------------------ pyproject.toml | 53 +-- 2 files changed, 318 insertions(+), 347 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 9e118c2b175..449d4754705 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -16,342 +16,342 @@ concurrency: cancel-in-progress: true jobs: - pytest: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python-version: ["3.10", "3.11", "3.12"] - test-markers: ["not distributed", "distributed"] - include: - - python-version: "3.10" - # pytorch-version: nightly - pytorch-version: 2.2.1 - torchscript-version: 1.10.2 - ray-version: 2.3.1 - env: - PYTORCH: ${{ matrix.pytorch-version }} - MARKERS: ${{ matrix.test-markers }} - NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod" - NEUROPOD_VERISON: "0.3.0-rc6" - TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }} - RAY_VERSION: ${{ matrix.ray-version }} - AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }} - KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} - KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} - IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }} - - name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }} - services: - minio: - image: fclairamb/minio-github-actions - env: - MINIO_ACCESS_KEY: minio - MINIO_SECRET_KEY: minio123 - ports: - - 9000:9000 - - timeout-minutes: 150 - steps: - - name: Setup ludwigai/ludwig-ray container for local testing with act. - if: ${{ env.ACT }} - run: | - curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash - - sudo apt-get install -y nodejs - sudo mkdir -p /opt/hostedtoolcache/ - sudo chmod 777 -R /opt/hostedtoolcache/ - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} + # pytest: + # runs-on: ${{ matrix.os }} + # strategy: + # fail-fast: false + # matrix: + # os: [ubuntu-latest] + # python-version: ["3.10", "3.11", "3.12"] + # test-markers: ["not distributed", "distributed"] + # include: + # - python-version: "3.10" + # # pytorch-version: nightly + # pytorch-version: 2.2.1 + # torchscript-version: 1.10.2 + # ray-version: 2.3.1 + # env: + # PYTORCH: ${{ matrix.pytorch-version }} + # MARKERS: ${{ matrix.test-markers }} + # NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod" + # NEUROPOD_VERISON: "0.3.0-rc6" + # TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }} + # RAY_VERSION: ${{ matrix.ray-version }} + # AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }} + # KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} + # KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} + # IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }} + + # name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }} + # services: + # minio: + # image: fclairamb/minio-github-actions + # env: + # MINIO_ACCESS_KEY: minio + # MINIO_SECRET_KEY: minio123 + # ports: + # - 9000:9000 - - name: Setup Linux - if: runner.os == 'linux' - run: | - sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev + # timeout-minutes: 150 + # steps: + # - name: Setup ludwigai/ludwig-ray container for local testing with act. + # if: ${{ env.ACT }} + # run: | + # curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash - + # sudo apt-get install -y nodejs + # sudo mkdir -p /opt/hostedtoolcache/ + # sudo chmod 777 -R /opt/hostedtoolcache/ + # - uses: actions/checkout@v2 + # - name: Set up Python ${{ matrix.python-version }} + # uses: actions/setup-python@v2 + # with: + # python-version: ${{ matrix.python-version }} - - name: Setup macOS - if: runner.os == 'macOS' - run: | - brew install libuv + # - name: Setup Linux + # if: runner.os == 'linux' + # run: | + # sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev - - name: pip cache - if: ${{ !env.ACT }} - uses: actions/cache@v2 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }} + # - name: Setup macOS + # if: runner.os == 'macOS' + # run: | + # brew install libuv - - name: Debug out of space - run: | - du -h -d 1 ~ - df -h + # - name: pip cache + # if: ${{ !env.ACT }} + # uses: actions/cache@v2 + # with: + # path: ~/.cache/pip + # key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }} - - name: Install dependencies - run: | - python --version - pip --version - python -m pip install -U pip - cmake --version - - # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. - cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt - cat requirements_distributed.txt | sed '/^ray[\[]/d' - - if [ "$MARKERS" != "distributed" ]; then - # Skip distributed and hyperopt requirements to test optional imports - echo > requirements-temp && mv requirements-temp requirements_distributed.txt - echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt - - # Skip distributed tree requirement (lightgbm-ray) - cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt - else - if [ "$RAY_VERSION" == "nightly" ]; then - # NOTE: hardcoded for python 3.10 on Linux - echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt - else - echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt - fi - fi - - if [ "$PYTORCH" == "nightly" ]; then - extra_index_url=https://download.pytorch.org/whl/nightly/cpu - pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url - - else - extra_index_url=https://download.pytorch.org/whl/cpu - pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url - fi - - pip install '.[test]' --extra-index-url $extra_index_url - pip list + # - name: Debug out of space + # run: | + # du -h -d 1 ~ + # df -h - if [ "$PYTORCH" == "nightly" ]; then - python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\"" - else - python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\"" - fi - - if [ "$MARKERS" == "distributed" ]; then - python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\"" - else - python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\"" - fi - shell: bash + # - name: Install dependencies + # run: | + # python --version + # pip --version + # python -m pip install -U pip + # cmake --version - - name: Install Neuropod backend - run: | - sudo mkdir -p "$NEUROPOD_BASE_DIR" - curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR" - shell: bash + # # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. + # cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt + # cat requirements_distributed.txt | sed '/^ray[\[]/d' - - name: Unit Tests - run: | - RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig + # if [ "$MARKERS" != "distributed" ]; then + # # Skip distributed and hyperopt requirements to test optional imports + # echo > requirements-temp && mv requirements-temp requirements_distributed.txt + # echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt - - name: Regression Tests - run: | - RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests - - # Skip Horovod and replace with DDP. - # https://github.com/ludwig-ai/ludwig/issues/3468 - # - name: Install Horovod if necessary - # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' - # env: - # HOROVOD_WITH_PYTORCH: 1 - # HOROVOD_WITHOUT_MPI: 1 - # HOROVOD_WITHOUT_TENSORFLOW: 1 - # HOROVOD_WITHOUT_MXNET: 1 - # run: | - # pip install -r requirements_extra.txt - # HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true) - # if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then - # pip uninstall -y horovod - # pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master - # fi - # horovodrun --check-build - # shell: bash - - # Skip Horovod tests and replace with DDP. - # https://github.com/ludwig-ai/ludwig/issues/3468 - # - name: Horovod Tests - # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' - # run: | - # RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/ - - - name: Upload Unit Test Results - if: ${{ always() && !env.ACT }} - uses: actions/upload-artifact@v4 - with: - name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }}) - path: pytest-${{ matrix.python-version }}-${{ matrix.test-markers }}.xml + # # Skip distributed tree requirement (lightgbm-ray) + # cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt + # else + # if [ "$RAY_VERSION" == "nightly" ]; then + # # NOTE: hardcoded for python 3.10 on Linux + # echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt + # else + # echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt + # fi + # fi - integration-tests: - name: ${{ matrix.test-markers }} - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - test-markers: - - "integration_tests_a" - - "integration_tests_b" - - "integration_tests_c" - - "integration_tests_d" - - "integration_tests_e" - - "integration_tests_f" - - env: - AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }} - KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} - KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} - IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }} - MARKERS: ${{ matrix.test-markers }} - - services: - minio: - image: fclairamb/minio-github-actions - env: - MINIO_ACCESS_KEY: minio - MINIO_SECRET_KEY: minio123 - ports: - - 9000:9000 - - timeout-minutes: 90 - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.10 - uses: actions/setup-python@v2 - with: - python-version: "3.10" + # if [ "$PYTORCH" == "nightly" ]; then + # extra_index_url=https://download.pytorch.org/whl/nightly/cpu + # pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url - - name: Setup Linux - if: runner.os == 'linux' - run: | - sudo apt-get update && sudo apt-get install -y cmake libsndfile1 + # else + # extra_index_url=https://download.pytorch.org/whl/cpu + # pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url + # fi - - name: Setup macOS - if: runner.os == 'macOS' - run: | - brew install libuv + # pip install '.[test]' --extra-index-url $extra_index_url + # pip list - - name: Install dependencies - run: | - python --version - pip --version - python -m pip install -U pip + # if [ "$PYTORCH" == "nightly" ]; then + # python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\"" + # else + # python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\"" + # fi - # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. - cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt - cat requirements_distributed.txt | sed '/^ray[\[]/d' - pip install torch==2.0.0 torchtext torchvision torchaudio - pip install ray==2.3.0 - pip install '.[test]' - pip list - shell: bash + # if [ "$MARKERS" == "distributed" ]; then + # python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\"" + # else + # python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\"" + # fi + # shell: bash - - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@main - with: - tool-cache: false - android: true - dotnet: true - haskell: true - large-packages: false - docker-images: true - swap-storage: true - - - name: Clean out /tmp directory - run: | - sudo rm -rf /tmp/* + # - name: Install Neuropod backend + # run: | + # sudo mkdir -p "$NEUROPOD_BASE_DIR" + # curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR" + # shell: bash - - name: Integration Tests - run: | - RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and $MARKERS" --junitxml pytest.xml tests/integration_tests + # - name: Unit Tests + # run: | + # RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig - llm-tests: - name: LLM Tests - runs-on: ubuntu-latest + # - name: Regression Tests + # run: | + # RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests + + # # Skip Horovod and replace with DDP. + # # https://github.com/ludwig-ai/ludwig/issues/3468 + # # - name: Install Horovod if necessary + # # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' + # # env: + # # HOROVOD_WITH_PYTORCH: 1 + # # HOROVOD_WITHOUT_MPI: 1 + # # HOROVOD_WITHOUT_TENSORFLOW: 1 + # # HOROVOD_WITHOUT_MXNET: 1 + # # run: | + # # pip install -r requirements_extra.txt + # # HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true) + # # if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then + # # pip uninstall -y horovod + # # pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master + # # fi + # # horovodrun --check-build + # # shell: bash + + # # Skip Horovod tests and replace with DDP. + # # https://github.com/ludwig-ai/ludwig/issues/3468 + # # - name: Horovod Tests + # # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' + # # run: | + # # RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/ - timeout-minutes: 60 - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 - with: - python-version: 3.9 + # - name: Upload Unit Test Results + # if: ${{ always() && !env.ACT }} + # uses: actions/upload-artifact@v4 + # with: + # name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }}) + # path: pytest-${{ matrix.python-version }}-${{ matrix.test-markers }}.xml - - name: Setup Linux - if: runner.os == 'linux' - run: | - sudo apt-get update && sudo apt-get install -y cmake libsndfile1 + # integration-tests: + # name: ${{ matrix.test-markers }} + # runs-on: ubuntu-latest + # strategy: + # fail-fast: false + # matrix: + # test-markers: + # - "integration_tests_a" + # - "integration_tests_b" + # - "integration_tests_c" + # - "integration_tests_d" + # - "integration_tests_e" + # - "integration_tests_f" - - name: Setup macOS - if: runner.os == 'macOS' - run: | - brew install libuv + # env: + # AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }} + # KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} + # KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} + # IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }} + # MARKERS: ${{ matrix.test-markers }} + + # services: + # minio: + # image: fclairamb/minio-github-actions + # env: + # MINIO_ACCESS_KEY: minio + # MINIO_SECRET_KEY: minio123 + # ports: + # - 9000:9000 - - name: Install dependencies - run: | - python --version - pip --version - python -m pip install -U pip + # timeout-minutes: 90 + # steps: + # - uses: actions/checkout@v2 + # - name: Set up Python 3.10 + # uses: actions/setup-python@v2 + # with: + # python-version: "3.10" - # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. - cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt - cat requirements_distributed.txt | sed '/^ray[\[]/d' - pip install torch==2.0.0 torchtext torchvision torchaudio - pip install ray==2.3.0 - pip install '.[test]' - pip list - shell: bash + # - name: Setup Linux + # if: runner.os == 'linux' + # run: | + # sudo apt-get update && sudo apt-get install -y cmake libsndfile1 - - name: LLM Tests - run: | - pytest -vs --durations 100 -m "llm" --junitxml pytest.xml tests + # - name: Setup macOS + # if: runner.os == 'macOS' + # run: | + # brew install libuv - combinatorial-tests: - name: Combinatorial Tests - runs-on: ubuntu-latest + # - name: Install dependencies + # run: | + # python --version + # pip --version + # python -m pip install -U pip - timeout-minutes: 60 - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: 3.8 + # # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. + # cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt + # cat requirements_distributed.txt | sed '/^ray[\[]/d' + # pip install torch==2.0.0 torchtext torchvision torchaudio + # pip install ray==2.3.0 + # pip install '.[test]' + # pip list + # shell: bash - - name: Setup Linux - if: runner.os == 'linux' - run: | - sudo apt-get update && sudo apt-get install -y cmake libsndfile1 + # - name: Free Disk Space (Ubuntu) + # uses: jlumbroso/free-disk-space@main + # with: + # tool-cache: false + # android: true + # dotnet: true + # haskell: true + # large-packages: false + # docker-images: true + # swap-storage: true + + # - name: Clean out /tmp directory + # run: | + # sudo rm -rf /tmp/* - - name: Setup macOS - if: runner.os == 'macOS' - run: | - brew install libuv + # - name: Integration Tests + # run: | + # RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and $MARKERS" --junitxml pytest.xml tests/integration_tests - - name: Install dependencies - run: | - python --version - pip --version - python -m pip install -U pip - pip install torch==2.0.0 torchtext torchvision torchaudio - pip install '.[test]' - pip list - shell: bash + # llm-tests: + # name: LLM Tests + # runs-on: ubuntu-latest - - name: Testing combinatorial config generation code - run: | - pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling + # timeout-minutes: 60 + # steps: + # - uses: actions/checkout@v2 + # - name: Set up Python 3.9 + # uses: actions/setup-python@v2 + # with: + # python-version: 3.9 - - name: Combinatorial Tests - run: | - pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success + # - name: Setup Linux + # if: runner.os == 'linux' + # run: | + # sudo apt-get update && sudo apt-get install -y cmake libsndfile1 + + # - name: Setup macOS + # if: runner.os == 'macOS' + # run: | + # brew install libuv + + # - name: Install dependencies + # run: | + # python --version + # pip --version + # python -m pip install -U pip + + # # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. + # cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt + # cat requirements_distributed.txt | sed '/^ray[\[]/d' + # pip install torch==2.0.0 torchtext torchvision torchaudio + # pip install ray==2.3.0 + # pip install '.[test]' + # pip list + # shell: bash + + # - name: LLM Tests + # run: | + # pytest -vs --durations 100 -m "llm" --junitxml pytest.xml tests + + # combinatorial-tests: + # name: Combinatorial Tests + # runs-on: ubuntu-latest + + # timeout-minutes: 60 + # steps: + # - uses: actions/checkout@v2 + # - name: Set up Python 3.8 + # uses: actions/setup-python@v2 + # with: + # python-version: 3.8 + + # - name: Setup Linux + # if: runner.os == 'linux' + # run: | + # sudo apt-get update && sudo apt-get install -y cmake libsndfile1 + + # - name: Setup macOS + # if: runner.os == 'macOS' + # run: | + # brew install libuv + + # - name: Install dependencies + # run: | + # python --version + # pip --version + # python -m pip install -U pip + # pip install torch==2.0.0 torchtext torchvision torchaudio + # pip install '.[test]' + # pip list + # shell: bash + + # - name: Testing combinatorial config generation code + # run: | + # pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling + + # - name: Combinatorial Tests + # run: | + # pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success test-minimal-install: name: Test Minimal Install diff --git a/pyproject.toml b/pyproject.toml index 80e49d5dd51..b334056870b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,9 +9,7 @@ description = "Declarative machine learning: End-to-end machine learning pipelin readme = "README.md" license = { text = "Apache 2.0" } requires-python = ">=3.10" -authors = [ - { name = "Piero Molino", email = "piero.molino@gmail.com" }, -] +authors = [{ name = "Piero Molino", email = "piero.molino@gmail.com" }] keywords = [ "computer", "deep", @@ -80,19 +78,13 @@ dependencies = [ "xlrd>=2.0.1", "xlsxwriter>=1.4.3", "xlwt", - "tifffile" + "tifffile==2024.9.20", ] # Optional Dependencies [project.optional-dependencies] -dev = [ - "flake8", - "flake8-pyproject", - "pre-commit", -] -benchmarking = [ - "s3fs", -] +dev = ["flake8", "flake8-pyproject", "pre-commit"] +benchmarking = ["s3fs"] distributed = [ "awscli", "dask[dataframe]<2023.4.0", @@ -104,9 +96,7 @@ distributed = [ "tblib", "tensorboardX<2.3", ] -explain = [ - "captum", -] +explain = ["captum"] extra = [ "horovod[pytorch]>=0.24.0,!=0.26.0", "modin[ray]", @@ -146,10 +136,7 @@ full = [ "tensorboardX<2.3", "uvicorn", ] -hyperopt = [ - "hyperopt", - "ray[default,tune]>=2.0.0", -] +hyperopt = ["hyperopt", "ray[default,tune]>=2.0.0"] llm = [ "accelerate", "faiss-cpu", @@ -165,11 +152,7 @@ serve = [ "python-multipart", "uvicorn", ] -tree = [ - "hummingbird-ml>=0.4.8", - "lightgbm", - "lightgbm-ray", -] +tree = ["hummingbird-ml>=0.4.8", "lightgbm", "lightgbm-ray"] viz = [ "hiplot", "matplotlib>3.4,<3.9.0; python_version > '3.6'", @@ -242,7 +225,7 @@ dependencies = [ "wandb", #"wandb<0.12.11", "wget", - "zoopt" + "zoopt", ] #[tool.hatch.envs.hatch-test] @@ -253,21 +236,12 @@ dependencies = [ path = "ludwig/__about__.py" [tool.hatch.build.targets.sdist] -include = [ - "/ludwig", - "/tests" -] +include = ["/ludwig", "/tests"] # ------- flake8 ---------- [tool.flake8] max-line-length = 120 -exclude = [ - ".tox", - "*.egg", - "*_pb2.py", - "build", - "temp" -] +exclude = [".tox", "*.egg", "*_pb2.py", "build", "temp"] select = ["E", "W", "F"] doctests = true verbose = 2 @@ -283,14 +257,11 @@ ignore = [ "E226", "E241", "E271", - "E275" + "E275", ] [tool.hatch.envs.lint] -dependencies = [ - "flake8", - "flake8-pyproject" -] +dependencies = ["flake8", "flake8-pyproject"] [tool.hatch.envs.lint.scripts] style = "flake8 ." From 68fb83699e447153150abfd4847b75f71381c79b Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Sun, 17 Nov 2024 16:36:07 +0100 Subject: [PATCH 16/67] Add combinatorial tests to pytest.yaml --- .github/workflows/pytest.yml | 65 ++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 33 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 449d4754705..56807220e2b 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -313,45 +313,44 @@ jobs: # run: | # pytest -vs --durations 100 -m "llm" --junitxml pytest.xml tests - # combinatorial-tests: - # name: Combinatorial Tests - # runs-on: ubuntu-latest + combinatorial-tests: + name: Combinatorial Tests + runs-on: ubuntu-latest - # timeout-minutes: 60 - # steps: - # - uses: actions/checkout@v2 - # - name: Set up Python 3.8 - # uses: actions/setup-python@v2 - # with: - # python-version: 3.8 + timeout-minutes: 60 + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.12 + uses: actions/setup-python@v2 + with: + python-version: "3.12" - # - name: Setup Linux - # if: runner.os == 'linux' - # run: | - # sudo apt-get update && sudo apt-get install -y cmake libsndfile1 + - name: Setup Linux + if: runner.os == 'linux' + run: | + sudo apt-get update && sudo apt-get install -y cmake libsndfile1 - # - name: Setup macOS - # if: runner.os == 'macOS' - # run: | - # brew install libuv + - name: Setup macOS + if: runner.os == 'macOS' + run: | + brew install libuv - # - name: Install dependencies - # run: | - # python --version - # pip --version - # python -m pip install -U pip - # pip install torch==2.0.0 torchtext torchvision torchaudio - # pip install '.[test]' - # pip list - # shell: bash + - name: Install dependencies + run: | + python --version + pip --version + python -m pip install -U pip + pip install '.[test]' + pip list + shell: bash - # - name: Testing combinatorial config generation code - # run: | - # pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling + - name: Testing combinatorial config generation code + run: | + pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling - # - name: Combinatorial Tests - # run: | - # pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success + - name: Combinatorial Tests + run: | + pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success test-minimal-install: name: Test Minimal Install From 6a896f9884b2333d943970363f104cabf0e6aba9 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Wed, 20 Nov 2024 00:01:30 +0100 Subject: [PATCH 17/67] Refined pyproject toml --- pyproject.toml | 56 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b334056870b..0fa21345085 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["hatchling"] +requires = ["hatchling", "setuptools>=65.0"] build-backend = "hatchling.build" [project] @@ -46,7 +46,7 @@ dependencies = [ "numpy==1.26", "openpyxl>=3.0.7", "packaging", - "pandas>=1.0,!=1.1.5,<2.2.0", + "pandas", "protobuf", "psutil", "py-cpuinfo==9.0.0", @@ -58,7 +58,7 @@ dependencies = [ "retry", "rich~=12.4.4", "sacremoses", - "importlib", + #"importlib", "scikit-learn==1.3", "scipy>=0.18", "sentencepiece", @@ -83,25 +83,6 @@ dependencies = [ # Optional Dependencies [project.optional-dependencies] -dev = ["flake8", "flake8-pyproject", "pre-commit"] -benchmarking = ["s3fs"] -distributed = [ - "awscli", - "dask[dataframe]<2023.4.0", - "deepspeed!=0.11.0,<0.13.0", - "getdaft[ray]==0.1.20", - "GPUtil", - "pyarrow", - "ray[default,data,serve,tune]==2.3.1", - "tblib", - "tensorboardX<2.3", -] -explain = ["captum"] -extra = [ - "horovod[pytorch]>=0.24.0,!=0.26.0", - "modin[ray]", - "predibase>=2023.10.2", -] full = [ "accelerate", "awscli", @@ -136,6 +117,28 @@ full = [ "tensorboardX<2.3", "uvicorn", ] + +dev = ["flake8", "flake8-pyproject", "pre-commit", "setuptools"] + +test = ["pytest", "pytest-timeout", "wget", "six>=1.13.0", "cloudpickle"] +benchmarking = ["s3fs"] +distributed = [ + "awscli", + "dask[dataframe]<2023.4.0", + "deepspeed!=0.11.0,<0.13.0", + "getdaft[ray]==0.1.20", + "GPUtil", + "pyarrow", + "ray[default,data,serve,tune]==2.3.1", + "tblib", + "tensorboardX<2.3", +] +explain = ["captum"] +extra = [ + "horovod[pytorch]>=0.24.0,!=0.26.0", + "modin[ray]", + "predibase>=2023.10.2", +] hyperopt = ["hyperopt", "ray[default,tune]>=2.0.0"] llm = [ "accelerate", @@ -266,6 +269,15 @@ dependencies = ["flake8", "flake8-pyproject"] [tool.hatch.envs.lint.scripts] style = "flake8 ." +[tool.hatch.envs.default] +python = "3.11" +dependencies = ["setuptools>=65.0"] + +[tool.hatch.envs.dev] +python = "3.11" +dependencies = [".[dev]"] + + [tool.isort] profile = "black" line_length = 120 From 8d0f0c4816c7b273d1e7d55af0ef7fa34d863323 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Wed, 20 Nov 2024 00:35:38 +0100 Subject: [PATCH 18/67] Removed importlib from dependencies --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0fa21345085..5e948171f1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,6 @@ dependencies = [ "retry", "rich~=12.4.4", "sacremoses", - #"importlib", "scikit-learn==1.3", "scipy>=0.18", "sentencepiece", From a926588b6e24463c843e2e07ea22a44713519292 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Wed, 20 Nov 2024 01:30:23 +0100 Subject: [PATCH 19/67] bump torch version to 2.4.1 --- pyproject.toml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5e948171f1c..7167dd0da32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,12 +65,12 @@ dependencies = [ "tabulate>=0.7", "tensorboard", "tokenizers>=0.15", - "torch==2.2.2", - "torchaudio==2.2.2", + "torch==2.4.1", + "torchaudio==2.4.1", "torchinfo", "torchmetrics>=0.11.0", - "torchtext==0.17.2", - "torchvision==0.17.2", + #"torchtext==0.17.2", + "torchvision==0.19.1", "tqdm", "transformers>=4.42.3", "urllib3<2", @@ -269,11 +269,11 @@ dependencies = ["flake8", "flake8-pyproject"] style = "flake8 ." [tool.hatch.envs.default] -python = "3.11" +python = "3.12" dependencies = ["setuptools>=65.0"] [tool.hatch.envs.dev] -python = "3.11" +python = "3.12" dependencies = [".[dev]"] From 5b288f2923c19da3a5675258e632861322533e2b Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Wed, 20 Nov 2024 01:35:07 +0100 Subject: [PATCH 20/67] fallback to eager for torch dynamo to prefent error --- ludwig/schema/trainer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ludwig/schema/trainer.py b/ludwig/schema/trainer.py index 322e4df2b06..a164e86fed5 100644 --- a/ludwig/schema/trainer.py +++ b/ludwig/schema/trainer.py @@ -1,8 +1,10 @@ +# flake8: noqa: E501 import re from abc import ABC from typing import Optional, Type, Union import torch +import torch._dynamo from packaging.version import parse as parse_version from ludwig.api_annotations import DeveloperAPI @@ -33,6 +35,9 @@ _torch_200 = parse_version(torch.__version__) >= parse_version("2.0") +# this is a workarrpund to avoid an error regarding torch.compile. +# TODO Fix torch.compile and dynamo problems +torch._dynamo.config.suppress_errors = True trainer_schema_registry = Registry() _llm_trainer_schema_registry = Registry() From 26fe1355c1bb4b48ad646c27ca30e1ac6e75a380 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Wed, 20 Nov 2024 01:41:38 +0100 Subject: [PATCH 21/67] added flake8 ignore line length error --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 7167dd0da32..eede457385e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -260,6 +260,7 @@ ignore = [ "E241", "E271", "E275", + "E501", ] [tool.hatch.envs.lint] From 551eb400179a19fb9df2f84d85b48570cf760d82 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Wed, 20 Nov 2024 02:36:01 +0100 Subject: [PATCH 22/67] added pytest suit to jobs --- .github/workflows/pytest.yml | 326 +++++++++++++++++------------------ 1 file changed, 163 insertions(+), 163 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 56807220e2b..82702a91635 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -16,178 +16,178 @@ concurrency: cancel-in-progress: true jobs: - # pytest: - # runs-on: ${{ matrix.os }} - # strategy: - # fail-fast: false - # matrix: - # os: [ubuntu-latest] - # python-version: ["3.10", "3.11", "3.12"] - # test-markers: ["not distributed", "distributed"] - # include: - # - python-version: "3.10" - # # pytorch-version: nightly - # pytorch-version: 2.2.1 - # torchscript-version: 1.10.2 - # ray-version: 2.3.1 - # env: - # PYTORCH: ${{ matrix.pytorch-version }} - # MARKERS: ${{ matrix.test-markers }} - # NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod" - # NEUROPOD_VERISON: "0.3.0-rc6" - # TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }} - # RAY_VERSION: ${{ matrix.ray-version }} - # AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }} - # AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }} - # KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} - # KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} - # IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }} - - # name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }} - # services: - # minio: - # image: fclairamb/minio-github-actions - # env: - # MINIO_ACCESS_KEY: minio - # MINIO_SECRET_KEY: minio123 - # ports: - # - 9000:9000 - - # timeout-minutes: 150 - # steps: - # - name: Setup ludwigai/ludwig-ray container for local testing with act. - # if: ${{ env.ACT }} - # run: | - # curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash - - # sudo apt-get install -y nodejs - # sudo mkdir -p /opt/hostedtoolcache/ - # sudo chmod 777 -R /opt/hostedtoolcache/ - # - uses: actions/checkout@v2 - # - name: Set up Python ${{ matrix.python-version }} - # uses: actions/setup-python@v2 - # with: - # python-version: ${{ matrix.python-version }} - - # - name: Setup Linux - # if: runner.os == 'linux' - # run: | - # sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev - - # - name: Setup macOS - # if: runner.os == 'macOS' - # run: | - # brew install libuv - - # - name: pip cache - # if: ${{ !env.ACT }} - # uses: actions/cache@v2 - # with: - # path: ~/.cache/pip - # key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }} - - # - name: Debug out of space - # run: | - # du -h -d 1 ~ - # df -h - - # - name: Install dependencies - # run: | - # python --version - # pip --version - # python -m pip install -U pip - # cmake --version - - # # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. - # cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt - # cat requirements_distributed.txt | sed '/^ray[\[]/d' - - # if [ "$MARKERS" != "distributed" ]; then - # # Skip distributed and hyperopt requirements to test optional imports - # echo > requirements-temp && mv requirements-temp requirements_distributed.txt - # echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt - - # # Skip distributed tree requirement (lightgbm-ray) - # cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt - # else - # if [ "$RAY_VERSION" == "nightly" ]; then - # # NOTE: hardcoded for python 3.10 on Linux - # echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt - # else - # echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt - # fi - # fi + pytest: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ["3.10", "3.11", "3.12"] + test-markers: ["not distributed", "distributed"] + include: + - python-version: "3.10" + # pytorch-version: nightly + pytorch-version: 2.2.1 + torchscript-version: 1.10.2 + ray-version: 2.3.1 + env: + PYTORCH: ${{ matrix.pytorch-version }} + MARKERS: ${{ matrix.test-markers }} + NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod" + NEUROPOD_VERISON: "0.3.0-rc6" + TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }} + RAY_VERSION: ${{ matrix.ray-version }} + AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }} + KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} + KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} + IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }} + + name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }} + services: + minio: + image: fclairamb/minio-github-actions + env: + MINIO_ACCESS_KEY: minio + MINIO_SECRET_KEY: minio123 + ports: + - 9000:9000 + + timeout-minutes: 150 + steps: + - name: Setup ludwigai/ludwig-ray container for local testing with act. + if: ${{ env.ACT }} + run: | + curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash - + sudo apt-get install -y nodejs + sudo mkdir -p /opt/hostedtoolcache/ + sudo chmod 777 -R /opt/hostedtoolcache/ + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} - # if [ "$PYTORCH" == "nightly" ]; then - # extra_index_url=https://download.pytorch.org/whl/nightly/cpu - # pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url + - name: Setup Linux + if: runner.os == 'linux' + run: | + sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev - # else - # extra_index_url=https://download.pytorch.org/whl/cpu - # pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url - # fi + - name: Setup macOS + if: runner.os == 'macOS' + run: | + brew install libuv - # pip install '.[test]' --extra-index-url $extra_index_url - # pip list + - name: pip cache + if: ${{ !env.ACT }} + uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }} - # if [ "$PYTORCH" == "nightly" ]; then - # python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\"" - # else - # python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\"" - # fi + - name: Debug out of space + run: | + du -h -d 1 ~ + df -h - # if [ "$MARKERS" == "distributed" ]; then - # python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\"" - # else - # python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\"" - # fi - # shell: bash + - name: Install dependencies + run: | + python --version + pip --version + python -m pip install -U pip + cmake --version + + # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. + cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt + cat requirements_distributed.txt | sed '/^ray[\[]/d' + + if [ "$MARKERS" != "distributed" ]; then + # Skip distributed and hyperopt requirements to test optional imports + echo > requirements-temp && mv requirements-temp requirements_distributed.txt + echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt + + # Skip distributed tree requirement (lightgbm-ray) + cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt + else + if [ "$RAY_VERSION" == "nightly" ]; then + # NOTE: hardcoded for python 3.10 on Linux + echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt + else + echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt + fi + fi + + if [ "$PYTORCH" == "nightly" ]; then + extra_index_url=https://download.pytorch.org/whl/nightly/cpu + pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url + + else + extra_index_url=https://download.pytorch.org/whl/cpu + pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url + fi + + pip install '.[test]' --extra-index-url $extra_index_url + pip list - # - name: Install Neuropod backend - # run: | - # sudo mkdir -p "$NEUROPOD_BASE_DIR" - # curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR" - # shell: bash + if [ "$PYTORCH" == "nightly" ]; then + python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\"" + else + python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\"" + fi + + if [ "$MARKERS" == "distributed" ]; then + python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\"" + else + python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\"" + fi + shell: bash - # - name: Unit Tests - # run: | - # RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig + - name: Install Neuropod backend + run: | + sudo mkdir -p "$NEUROPOD_BASE_DIR" + curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR" + shell: bash - # - name: Regression Tests - # run: | - # RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests - - # # Skip Horovod and replace with DDP. - # # https://github.com/ludwig-ai/ludwig/issues/3468 - # # - name: Install Horovod if necessary - # # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' - # # env: - # # HOROVOD_WITH_PYTORCH: 1 - # # HOROVOD_WITHOUT_MPI: 1 - # # HOROVOD_WITHOUT_TENSORFLOW: 1 - # # HOROVOD_WITHOUT_MXNET: 1 - # # run: | - # # pip install -r requirements_extra.txt - # # HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true) - # # if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then - # # pip uninstall -y horovod - # # pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master - # # fi - # # horovodrun --check-build - # # shell: bash - - # # Skip Horovod tests and replace with DDP. - # # https://github.com/ludwig-ai/ludwig/issues/3468 - # # - name: Horovod Tests - # # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' - # # run: | - # # RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/ + - name: Unit Tests + run: | + RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig - # - name: Upload Unit Test Results - # if: ${{ always() && !env.ACT }} - # uses: actions/upload-artifact@v4 - # with: - # name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }}) - # path: pytest-${{ matrix.python-version }}-${{ matrix.test-markers }}.xml + - name: Regression Tests + run: | + RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests + + # Skip Horovod and replace with DDP. + # https://github.com/ludwig-ai/ludwig/issues/3468 + # - name: Install Horovod if necessary + # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' + # env: + # HOROVOD_WITH_PYTORCH: 1 + # HOROVOD_WITHOUT_MPI: 1 + # HOROVOD_WITHOUT_TENSORFLOW: 1 + # HOROVOD_WITHOUT_MXNET: 1 + # run: | + # pip install -r requirements_extra.txt + # HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true) + # if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then + # pip uninstall -y horovod + # pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master + # fi + # horovodrun --check-build + # shell: bash + + # Skip Horovod tests and replace with DDP. + # https://github.com/ludwig-ai/ludwig/issues/3468 + # - name: Horovod Tests + # if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly' + # run: | + # RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/ + + - name: Upload Unit Test Results + if: ${{ always() && !env.ACT }} + uses: actions/upload-artifact@v4 + with: + name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }}) + path: pytest-${{ matrix.python-version }}-${{ matrix.test-markers }}.xml # integration-tests: # name: ${{ matrix.test-markers }} From e4161c6085b5d42e75631b88581725c41bc5a9e0 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Wed, 20 Nov 2024 13:14:35 +0100 Subject: [PATCH 23/67] Refactored Matrix Tests --- .github/workflows/pytest.yml | 59 +++--------------------------------- 1 file changed, 5 insertions(+), 54 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 82702a91635..2676bb1003e 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -23,27 +23,19 @@ jobs: matrix: os: [ubuntu-latest] python-version: ["3.10", "3.11", "3.12"] - test-markers: ["not distributed", "distributed"] - include: - - python-version: "3.10" - # pytorch-version: nightly - pytorch-version: 2.2.1 - torchscript-version: 1.10.2 - ray-version: 2.3.1 + test-markers: ["not distributed"] #["not distributed", "distributed"] + env: - PYTORCH: ${{ matrix.pytorch-version }} MARKERS: ${{ matrix.test-markers }} NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod" NEUROPOD_VERISON: "0.3.0-rc6" - TORCHSCRIPT_VERSION: ${{ matrix.torchscript-version }} - RAY_VERSION: ${{ matrix.ray-version }} AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }} KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }} IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }} - name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }} + name: py${{ matrix.python-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }} services: minio: image: fclairamb/minio-github-actions @@ -83,7 +75,7 @@ jobs: uses: actions/cache@v2 with: path: ~/.cache/pip - key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }} + key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-${{ matrix.test-markers }}-${{ hashFiles('.github/workflows/pytest.yml') }} - name: Debug out of space run: | @@ -97,49 +89,8 @@ jobs: python -m pip install -U pip cmake --version - # remove torch and ray from the dependencies so we can add them depending on the matrix args for the job. - cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt - cat requirements_distributed.txt | sed '/^ray[\[]/d' - - if [ "$MARKERS" != "distributed" ]; then - # Skip distributed and hyperopt requirements to test optional imports - echo > requirements-temp && mv requirements-temp requirements_distributed.txt - echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt - - # Skip distributed tree requirement (lightgbm-ray) - cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt - else - if [ "$RAY_VERSION" == "nightly" ]; then - # NOTE: hardcoded for python 3.10 on Linux - echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt - else - echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt - fi - fi - - if [ "$PYTORCH" == "nightly" ]; then - extra_index_url=https://download.pytorch.org/whl/nightly/cpu - pip install --pre torch torchtext torchvision torchaudio --extra-index-url $extra_index_url - - else - extra_index_url=https://download.pytorch.org/whl/cpu - pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url - fi - - pip install '.[test]' --extra-index-url $extra_index_url + pip install '.[test]' pip list - - if [ "$PYTORCH" == "nightly" ]; then - python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\"" - else - python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\"" - fi - - if [ "$MARKERS" == "distributed" ]; then - python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\"" - else - python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\"" - fi shell: bash - name: Install Neuropod backend From 01533ef728b004f6ab74bfc964d894588f254f3b Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Wed, 20 Nov 2024 13:21:03 +0100 Subject: [PATCH 24/67] Remove Neuropod from tests. --- .github/workflows/pytest.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 2676bb1003e..16b8a39e626 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -27,8 +27,6 @@ jobs: env: MARKERS: ${{ matrix.test-markers }} - NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod" - NEUROPOD_VERISON: "0.3.0-rc6" AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }} KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }} @@ -93,12 +91,6 @@ jobs: pip list shell: bash - - name: Install Neuropod backend - run: | - sudo mkdir -p "$NEUROPOD_BASE_DIR" - curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERSION }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR" - shell: bash - - name: Unit Tests run: | RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig From b2a145409711607f89c8541de9f22d8dea5b4b17 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Mon, 25 Nov 2024 14:48:58 +0100 Subject: [PATCH 25/67] Further removed torchtext --- .github/workflows/pytest_slow.yml | 2 +- docker/ludwig-ray-gpu/Dockerfile | 2 +- docker/ludwig-ray/Dockerfile | 2 +- docker/ludwig/Dockerfile | 2 +- ludwig/decoders/llm_decoders.py | 2 +- pyproject.toml | 138 +++++------------- tests/integration_tests/test_torchscript.py | 29 +--- .../ludwig/features/test_sequence_features.py | 18 +-- 8 files changed, 47 insertions(+), 148 deletions(-) diff --git a/.github/workflows/pytest_slow.yml b/.github/workflows/pytest_slow.yml index f2f8b493bad..9e93c2903fc 100644 --- a/.github/workflows/pytest_slow.yml +++ b/.github/workflows/pytest_slow.yml @@ -50,7 +50,7 @@ jobs: python --version pip --version python -m pip install -U pip - pip install torch==2.1.0 torchtext torchvision torchaudio + pip install torch==2.1.0 torchvision torchaudio pip install ray==2.3.1 pip install '.[test]' diff --git a/docker/ludwig-ray-gpu/Dockerfile b/docker/ludwig-ray-gpu/Dockerfile index 7721126f931..af434aac6a5 100644 --- a/docker/ludwig-ray-gpu/Dockerfile +++ b/docker/ludwig-ray-gpu/Dockerfile @@ -50,7 +50,7 @@ RUN pip install -U pip WORKDIR /ludwig -RUN pip install --no-cache-dir torch==2.1.0 torchtext torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 +RUN pip install --no-cache-dir torch==2.1.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 COPY . . RUN pip install --no-cache-dir '.[full]' --extra-index-url https://download.pytorch.org/whl/cu118 diff --git a/docker/ludwig-ray/Dockerfile b/docker/ludwig-ray/Dockerfile index 6075cae2e89..2c460e4a5ff 100644 --- a/docker/ludwig-ray/Dockerfile +++ b/docker/ludwig-ray/Dockerfile @@ -36,7 +36,7 @@ RUN pip install -U pip WORKDIR /ludwig -RUN pip install --no-cache-dir torch==2.1.0 torchtext torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu +RUN pip install --no-cache-dir torch==2.1.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu COPY . . RUN pip install --no-cache-dir '.[full]' --extra-index-url https://download.pytorch.org/whl/cpu diff --git a/docker/ludwig/Dockerfile b/docker/ludwig/Dockerfile index 73a5285380f..a94e014cd0e 100644 --- a/docker/ludwig/Dockerfile +++ b/docker/ludwig/Dockerfile @@ -24,7 +24,7 @@ RUN pip install -U pip WORKDIR /ludwig -RUN pip install --no-cache-dir torch==2.0.0 torchtext torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu +RUN pip install --no-cache-dir torch==2.0.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu COPY . . RUN pip install --no-cache-dir '.[full]' diff --git a/ludwig/decoders/llm_decoders.py b/ludwig/decoders/llm_decoders.py index eafc84bacc1..5763f5a5868 100644 --- a/ludwig/decoders/llm_decoders.py +++ b/ludwig/decoders/llm_decoders.py @@ -1,3 +1,4 @@ +# flake8: noqa: E501 import logging import re from typing import Any, Dict, List, Union @@ -91,7 +92,6 @@ def __init__( # Transformer Tokenizers self.tokenizer_vocab_size = self.tokenizer.tokenizer.vocab_size else: - # TorchText Tokenizers self.tokenizer_vocab_size = len(self.tokenizer.vocab) # Maximum number of new tokens that will be generated diff --git a/pyproject.toml b/pyproject.toml index eede457385e..8ab3de81452 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ keywords = [ "processing", "vision", ] + dependencies = [ "absl-py", "bitsandbytes<0.41.0", @@ -69,7 +70,6 @@ dependencies = [ "torchaudio==2.4.1", "torchinfo", "torchmetrics>=0.11.0", - #"torchtext==0.17.2", "torchvision==0.19.1", "tqdm", "transformers>=4.42.3", @@ -78,48 +78,42 @@ dependencies = [ "xlsxwriter>=1.4.3", "xlwt", "tifffile==2024.9.20", + "onnx", ] -# Optional Dependencies [project.optional-dependencies] -full = [ - "accelerate", - "awscli", - "captum", - "cartonml-nightly", - "dask[dataframe]<2023.4.0", - "deepspeed!=0.11.0,<0.13.0", - "faiss-cpu", - "fastapi", - "getdaft[ray]==0.1.20", - "GPUtil", - "hiplot", - "httpx", - "hummingbird-ml>=0.4.8", - "hyperopt", - "lightgbm", - "lightgbm-ray", - "loralib", - "matplotlib>3.4,<3.9.0; python_version > '3.6'", - "matplotlib>=3.0,<3.4; python_version <= '3.6'", - "neuropod==0.3.0rc6 ; platform_system != \"Windows\" and python_version < '3.9'", - "peft>=0.10.0", - "ptitprince", - "pyarrow", - "python-multipart", - "ray[default,data,serve,tune]==2.3.1", - "ray[default,tune]>=2.0.0", - "s3fs", - "seaborn>=0.7,<0.12", - "sentence-transformers", - "tblib", - "tensorboardX<2.3", - "uvicorn", -] - dev = ["flake8", "flake8-pyproject", "pre-commit", "setuptools"] +test = [ + # Core testing + "pytest", + "pytest-timeout", + "pytest-cov", + "tifffile", + "wget", + "six>=1.13.0", -test = ["pytest", "pytest-timeout", "wget", "six>=1.13.0", "cloudpickle"] + # Logging and experiment tracking + "aim", + "wandb<0.12.11", + "comet_ml", + "mlflow", + "sqlalchemy<2", # Pinned for aimstack compatibility + + # Ray Tune Search Algorithms + "hpbandster", # BOHB algorithm + "ConfigSpace==0.7.1", + "ax-platform", # AX algorithm + "bayesian-optimization", # Bayesian optimization + "flaml[blendsearch]", # CFO and blendsearch + "HEBO", # HEBO algorithm + "nevergrad", # Nevergrad algorithm + "optuna", # Optuna algorithm + "scikit-optimize", # SKopt algorithm + "zoopt", # ZOOpt algorithm + + # Storage + "s3fs>=2022.8.2", +] benchmarking = ["s3fs"] distributed = [ "awscli", @@ -153,6 +147,7 @@ serve = [ "neuropod==0.3.0rc6 ; platform_system != \"Windows\" and python_version < '3.9'", "python-multipart", "uvicorn", + "starlette", ] tree = ["hummingbird-ml>=0.4.8", "lightgbm", "lightgbm-ray"] viz = [ @@ -171,69 +166,6 @@ Website = "https://ludwig.ai/latest/" [project.scripts] ludwig = "ludwig.cli:main" -[tool.hatch.envs.hatch-test] -dependencies = [ - "gpy >=1.10.0", - "accelerate", - "aim", - "awscli", - "ax-platform", - "bayesian-optimization", - "captum", - "cartonml-nightly", - "comet_ml", - "ConfigSpace==0.7.1", - "dask[dataframe]<2023.4.0", - "deepspeed!=0.11.0,<0.13.0", - "faiss-cpu", - "fastapi", - "flaml[blendsearch]", - "getdaft[ray]==0.1.20", - "GPUtil", - "HEBO", - "hiplot", - "hpbandster", - "httpx", - "hummingbird-ml>=0.4.8", - "hyperopt", - "lightgbm", - "lightgbm-ray", - "loralib", - "matplotlib>3.4,<3.9.0; python_version > '3.6'", - "matplotlib>=3.0,<3.4; python_version <= '3.6'", - "mlflow", - "neuropod==0.3.0rc6 ; platform_system != \"Windows\" and python_version < '3.9'", - "nevergrad", - "optuna", - "peft>=0.10.0", - "ptitprince", - "pyarrow", - "pytest", - "pytest-timeout", - "pytest-cov", - "python-multipart", - #"ray[default,data,serve,tune]==2.3.1", - #"ray[default,tune]>=2.0.0", - "s3fs", - "s3fs>=2022.8.2", - "scikit-optimize", - "seaborn>=0.7,<0.12", - "sentence-transformers", - "six>=1.13.0", - "sqlalchemy<2", - "tblib", - "tensorboardX<2.3", - "uvicorn", - "wandb", - #"wandb<0.12.11", - "wget", - "zoopt", -] - -#[tool.hatch.envs.hatch-test] -#setup = "pip install -e .[test]" -#run = "pytest {args:test}" - [tool.hatch.version] path = "ludwig/__about__.py" @@ -270,11 +202,11 @@ dependencies = ["flake8", "flake8-pyproject"] style = "flake8 ." [tool.hatch.envs.default] -python = "3.12" +python = "3.11" dependencies = ["setuptools>=65.0"] [tool.hatch.envs.dev] -python = "3.12" +python = "3.11" dependencies = [".[dev]"] diff --git a/tests/integration_tests/test_torchscript.py b/tests/integration_tests/test_torchscript.py index 198089fed88..bb06371f733 100644 --- a/tests/integration_tests/test_torchscript.py +++ b/tests/integration_tests/test_torchscript.py @@ -1,3 +1,4 @@ +# flake8: noqa: E501 # Copyright (c) 2023 Predibase, Inc., 2019 Uber Technologies, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,7 +22,6 @@ import pandas as pd import pytest import torch -import torchtext from ludwig.api import LudwigModel from ludwig.backend import RAY @@ -408,32 +408,7 @@ def test_torchscript_e2e_text(tmpdir, csv_filename): validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path) -@pytest.mark.skipif( - torch.torch_version.TorchVersion(torchtext.__version__) < (0, 14, 0), - reason="requires torchtext 0.14.0 or higher", -) -@pytest.mark.integration_tests_e -def test_torchscript_e2e_text_hf_tokenizer(tmpdir, csv_filename): - data_csv_path = os.path.join(tmpdir, csv_filename) - input_features = [text_feature(encoder={"vocab_size": 3, "type": "bert"})] - output_features = [ - category_feature(), - ] - backend = LocalTestBackend() - config = { - "input_features": input_features, - "output_features": output_features, - TRAINER: {"epochs": 2, BATCH_SIZE: 128, EVAL_BATCH_SIZE: 128}, - } - training_data_csv_path = generate_data(input_features, output_features, data_csv_path) - - validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path) - - -@pytest.mark.skipif( - torch.torch_version.TorchVersion(torchtext.__version__) < (0, 14, 0), - reason="requires torchtext 0.14.0 or higher", -) +@pytest.mark.skip() @pytest.mark.integration_tests_e def test_torchscript_e2e_text_hf_tokenizer_truncated_sequence(tmpdir, csv_filename): data_csv_path = os.path.join(tmpdir, csv_filename) diff --git a/tests/ludwig/features/test_sequence_features.py b/tests/ludwig/features/test_sequence_features.py index ed158475aea..e4e84d7ef39 100644 --- a/tests/ludwig/features/test_sequence_features.py +++ b/tests/ludwig/features/test_sequence_features.py @@ -1,9 +1,9 @@ +# flake8: noqa: E501 from typing import List, Tuple import numpy as np import pytest import torch -import torchtext from ludwig.constants import ENCODER_OUTPUT, LAST_HIDDEN, LOGITS, SEQUENCE, TEXT, TYPE from ludwig.features.sequence_feature import _SequencePreprocessing, SequenceInputFeature, SequenceOutputFeature @@ -192,9 +192,7 @@ def test_text_preproc_module_space_punct_tokenizer(): ) -@pytest.mark.skipif( - torch.torch_version.TorchVersion(torchtext.__version__) < (0, 12, 0), reason="requires torchtext 0.12.0 or higher" -) +@pytest.mark.skip() def test_sequence_preproc_module_sentencepiece_tokenizer(): metadata = { "preprocessing": { @@ -227,9 +225,7 @@ def test_sequence_preproc_module_sentencepiece_tokenizer(): ) -@pytest.mark.skipif( - torch.torch_version.TorchVersion(torchtext.__version__) < (0, 12, 0), reason="requires torchtext 0.12.0 or higher" -) +@pytest.mark.skip() def test_sequence_preproc_module_clip_tokenizer(): metadata = { "preprocessing": { @@ -260,9 +256,7 @@ def test_sequence_preproc_module_clip_tokenizer(): ) -@pytest.mark.skipif( - torch.torch_version.TorchVersion(torchtext.__version__) < (0, 12, 0), reason="requires torchtext 0.12.0 or higher" -) +@pytest.mark.skip() def test_sequence_preproc_module_gpt2bpe_tokenizer(): metadata = { "preprocessing": { @@ -296,9 +290,7 @@ def test_sequence_preproc_module_gpt2bpe_tokenizer(): ) -@pytest.mark.skipif( - torch.torch_version.TorchVersion(torchtext.__version__) < (0, 13, 0), reason="requires torchtext 0.13.0 or higher" -) +@pytest.mark.skip() def test_sequence_preproc_module_bert_tokenizer(): metadata = { "preprocessing": { From 80dc452a5e2b02635c68e7a9ec6bd84ad31db538 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Mon, 25 Nov 2024 15:00:28 +0100 Subject: [PATCH 26/67] try to fix openblas issue --- .github/workflows/pytest.yml | 2 +- .gitignore | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 16b8a39e626..f0bc5f2a3aa 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -61,7 +61,7 @@ jobs: - name: Setup Linux if: runner.os == 'linux' run: | - sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev + sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev libopenblas-dev - name: Setup macOS if: runner.os == 'macOS' diff --git a/.gitignore b/.gitignore index 1c234c6996e..a4166a54fab 100644 --- a/.gitignore +++ b/.gitignore @@ -140,3 +140,5 @@ examples/*/visualizations/ # benchmarking configs ludwig/benchmarking/configs/ +pytest.xml +ludwig.code-workspace From f45d04653fd10e63121830d1513863b15ee5ee58 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Tue, 26 Nov 2024 21:30:14 +0100 Subject: [PATCH 27/67] test pythran 0.9 --- .github/workflows/pytest.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f0bc5f2a3aa..188c375f604 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -61,7 +61,7 @@ jobs: - name: Setup Linux if: runner.os == 'linux' run: | - sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev libopenblas-dev + sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev pythran~=0.9.0 libopenblas-dev - name: Setup macOS if: runner.os == 'macOS' @@ -263,10 +263,10 @@ jobs: timeout-minutes: 60 steps: - uses: actions/checkout@v2 - - name: Set up Python 3.12 - uses: actions/setup-python@v2 + - name: Set up Python 3.11 + uses: actions/setup-python@v5 with: - python-version: "3.12" + python-version: "3.11" - name: Setup Linux if: runner.os == 'linux' @@ -302,10 +302,10 @@ jobs: timeout-minutes: 15 steps: - uses: actions/checkout@v2 - - name: Set up Python 3.12 + - name: Set up Python 3.11 uses: actions/setup-python@v2 with: - python-version: "3.12" + python-version: "3.11" - name: Setup Linux if: runner.os == 'linux' From 802748c38c44e9a0d416f95b321fbeb375c77592 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Tue, 26 Nov 2024 21:31:24 +0100 Subject: [PATCH 28/67] fix version code --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 188c375f604..f86c50cb34a 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -61,7 +61,7 @@ jobs: - name: Setup Linux if: runner.os == 'linux' run: | - sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev pythran~=0.9.0 libopenblas-dev + sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev "pythran~=0.9.0" libopenblas-dev - name: Setup macOS if: runner.os == 'macOS' From 68566b84611bca3eeb55ed4784ce8db40e7b22b2 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Tue, 26 Nov 2024 21:33:24 +0100 Subject: [PATCH 29/67] second fix version code --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f86c50cb34a..73765c1100a 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -61,7 +61,7 @@ jobs: - name: Setup Linux if: runner.os == 'linux' run: | - sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev "pythran~=0.9.0" libopenblas-dev + sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev "pythran=0.9.*" libopenblas-dev - name: Setup macOS if: runner.os == 'macOS' From a108bb5a039a2b1b37bd71bea76113a3fc0b87f9 Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Tue, 26 Nov 2024 21:37:35 +0100 Subject: [PATCH 30/67] pyhtran via pip --- .github/workflows/pytest.yml | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 73765c1100a..a76e0e35c83 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -61,7 +61,7 @@ jobs: - name: Setup Linux if: runner.os == 'linux' run: | - sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev "pythran=0.9.*" libopenblas-dev + sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev libopenblas-dev - name: Setup macOS if: runner.os == 'macOS' diff --git a/pyproject.toml b/pyproject.toml index 8ab3de81452..8953743c5f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ dependencies = [ "py-cpuinfo==9.0.0", "pyarrow<15.0.0", "pydantic<2.0", + "pythran>=0.9", "pyxlsb>=1.0.8", "PyYAML==6.0.2", "requests", From 3607bd28cd172bc392a94a63b0908aea3177e13b Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Tue, 26 Nov 2024 21:48:41 +0100 Subject: [PATCH 31/67] added prefer binary --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index a76e0e35c83..f7b32b937ee 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -87,7 +87,7 @@ jobs: python -m pip install -U pip cmake --version - pip install '.[test]' + pip install --prefer-binary '.[test]' pip list shell: bash From c1af754ca0210d9529b8dae78ae6155deeb37fe3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 1 Dec 2024 22:25:55 +0000 Subject: [PATCH 32/67] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docker/ludwig_hatch/Dockerfile | 2 +- ludwig/__about__.py | 2 +- ludwig/api.py | 15 ++-- ludwig/automl/base_config.py | 32 +++------ ludwig/backend/_ray210_compat.py | 9 +-- ludwig/backend/datasource.py | 4 +- ludwig/backend/deepspeed.py | 2 +- ludwig/callbacks.py | 3 +- ludwig/contrib.py | 1 - ludwig/contribs/__init__.py | 1 - ludwig/data/preprocessing.py | 6 +- ludwig/data/sampler.py | 4 +- ludwig/datasets/__init__.py | 8 +-- ludwig/datasets/loaders/mnist.py | 17 ++--- ludwig/datasets/loaders/split_loaders.py | 6 +- ludwig/distributed/_ray_210_compat.py | 3 +- ludwig/distributed/deepspeed.py | 2 +- ludwig/experiment.py | 9 +-- ludwig/explain/captum.py | 5 +- ludwig/features/image_feature.py | 35 ++++----- ludwig/models/llm.py | 2 +- ludwig/preprocess.py | 7 +- ludwig/schema/features/utils.py | 2 - ludwig/schema/llms/model_parameters.py | 7 +- ludwig/schema/split.py | 4 +- ludwig/train.py | 7 +- ludwig/trainers/trainer_lightgbm.py | 3 +- ludwig/utils/automl/type_inference.py | 4 +- ludwig/utils/calibration.py | 3 +- ludwig/utils/data_utils.py | 10 ++- ludwig/utils/horovod_utils.py | 9 ++- ludwig/utils/image_utils.py | 13 ++-- ludwig/utils/torch_utils.py | 12 +--- ludwig/utils/triton_utils.py | 7 +- ludwig/utils/upload_utils.py | 4 +- ludwig/visualize.py | 71 ++++++------------- .../scripts/run_train_horovod.py | 2 +- tests/integration_tests/test_explain.py | 2 +- tests/integration_tests/test_visualization.py | 6 +- .../test_visualization_api.py | 64 ++++++----------- tests/integration_tests/utils.py | 11 +-- tests/ludwig/config_validation/test_checks.py | 4 +- .../marshmallow/test_marshmallow_misc.py | 2 +- tests/ludwig/utils/test_hyperopt_ray_utils.py | 2 +- 44 files changed, 158 insertions(+), 266 deletions(-) diff --git a/docker/ludwig_hatch/Dockerfile b/docker/ludwig_hatch/Dockerfile index a90d9073240..0bb3a038b61 100644 --- a/docker/ludwig_hatch/Dockerfile +++ b/docker/ludwig_hatch/Dockerfile @@ -14,4 +14,4 @@ COPY . . RUN hatch env create RUN hatch build -ENTRYPOINT ["ludwig"] \ No newline at end of file +ENTRYPOINT ["ludwig"] diff --git a/ludwig/__about__.py b/ludwig/__about__.py index 5b74fe54240..9a34ccc9fa7 100644 --- a/ludwig/__about__.py +++ b/ludwig/__about__.py @@ -1 +1 @@ -__version__ = '1.13.0' \ No newline at end of file +__version__ = "1.13.0" diff --git a/ludwig/api.py b/ludwig/api.py index 691219e201a..063853a7104 100644 --- a/ludwig/api.py +++ b/ludwig/api.py @@ -2015,9 +2015,9 @@ def to_torchscript( # Inputs :param model_only (bool, optional): If True, only the ECD model will be converted to Torchscript. Else, - preprocessing and postprocessing steps will also be converted to Torchscript. - :param device (TorchDevice, optional): If None, the model will be converted to Torchscript on the same device to - ensure maximum model parity. + preprocessing and postprocessing steps will also be converted to Torchscript. :param device (TorchDevice, + optional): If None, the model will be converted to Torchscript on the same device to ensure maximum model + parity. # Returns @@ -2086,11 +2086,8 @@ def create_model(config_obj: Union[ModelConfig, dict], random_seed: int = defaul # Inputs :param config_obj: (Union[Config, dict]) Ludwig config object - :param random_seed: (int, default: ludwig default random seed) Random - seed used for weights initialization, - splits and any other random function. - - # Return + :param random_seed: (int, default: ludwig default random seed) Random seed used for weights initialization, + splits and any other random function. # Return :return: (ludwig.models.BaseModel) Instance of the Ludwig model object. """ if isinstance(config_obj, dict): @@ -2136,7 +2133,7 @@ def is_merge_and_unload_set(self) -> bool: # Return - :return (bool): whether merge_and_unload should be done. + :return (bool): whether merge_and_unload should be done. """ # TODO: In the future, it may be possible to move up the model type check into the BaseModel class. return self.config_obj.model_type == MODEL_LLM and self.model.is_merge_and_unload_set() diff --git a/ludwig/automl/base_config.py b/ludwig/automl/base_config.py index 5384c643a50..76e2e248281 100644 --- a/ludwig/automl/base_config.py +++ b/ludwig/automl/base_config.py @@ -79,9 +79,8 @@ class DatasetInfo: def allocate_experiment_resources(resources: Resources) -> dict: """Allocates ray trial resources based on available resources. - # Inputs - :param resources (dict) specifies all available GPUs, CPUs and associated - metadata of the machines (i.e. memory) + # Inputs :param resources (dict) specifies all available GPUs, CPUs and associated metadata of the machines + (i.e. memory) # Return :return: (dict) gpu and cpu resources per trial @@ -260,9 +259,7 @@ def get_dataset_info(df: Union[pd.DataFrame, dd.core.DataFrame]) -> DatasetInfo: inference. # Inputs - :param df: (Union[pd.DataFrame, dd.core.DataFrame]) Pandas or Dask dataframe. - - # Return + :param df: (Union[pd.DataFrame, dd.core.DataFrame]) Pandas or Dask dataframe. # Return :return: (DatasetInfo) Structure containing list of FieldInfo objects. """ source = wrap_data_source(df) @@ -297,9 +294,8 @@ def get_dataset_info_from_source(source: DataSource) -> DatasetInfo: inference. # Inputs - :param source: (DataSource) A wrapper around a data source, which may represent a pandas or Dask dataframe. - - # Return + :param source: (DataSource) A wrapper around a data source, which may represent a pandas or Dask dataframe. # + Return :return: (DatasetInfo) Structure containing list of FieldInfo objects. """ row_count = len(source) @@ -355,10 +351,8 @@ def get_features_config( # Inputs :param fields: (List[FieldInfo]) FieldInfo objects for all fields in dataset - :param row_count: (int) total number of entries in original dataset - :param target_name (str, List[str]) name of target feature - - # Return + :param row_count: (int) total number of entries in original dataset :param target_name (str, List[str]) name of + target feature # Return :return: (dict) section of auto_train config for input_features and output_features """ targets = convert_targets(target_name) @@ -379,10 +373,8 @@ def get_config_from_metadata(metadata: List[FieldMetadata], targets: Set[str] = """Builds input/output feature sections of auto-train config using field metadata. # Inputs - :param metadata: (List[FieldMetadata]) field descriptions - :param targets (Set[str]) names of target features - - # Return + :param metadata: (List[FieldMetadata]) field descriptions :param targets (Set[str]) names of target features # + Return :return: (dict) section of auto_train config for input_features and output_features """ config = { @@ -405,10 +397,8 @@ def get_field_metadata(fields: List[FieldInfo], row_count: int, targets: Set[str # Inputs :param fields: (List[FieldInfo]) FieldInfo objects for all fields in dataset - :param row_count: (int) total number of entries in original dataset - :param targets (Set[str]) names of target features - - # Return + :param row_count: (int) total number of entries in original dataset :param targets (Set[str]) names of target + features # Return :return: (List[FieldMetadata]) list of objects containing metadata for each field """ diff --git a/ludwig/backend/_ray210_compat.py b/ludwig/backend/_ray210_compat.py index a05c64f3e20..afe1b705940 100644 --- a/ludwig/backend/_ray210_compat.py +++ b/ludwig/backend/_ray210_compat.py @@ -19,8 +19,8 @@ class TunerRay210(Tuner): """HACK(geoffrey): This is a temporary fix to support Ray 2.1.0. - Specifically, this Tuner ensures that TunerInternalRay210 is called by the class. - For more details, see TunerInternalRay210. + Specifically, this Tuner ensures that TunerInternalRay210 is called by the class. For more details, see + TunerInternalRay210. """ def __init__( @@ -120,8 +120,9 @@ def restore( class TunerInternalRay210(TunerInternal): """HACK(geoffrey): This is a temporary fix to support Ray 2.1.0. - This TunerInternal ensures that a division by zero is avoided when running zero-CPU hyperopt trials. - This is fixed in ray>=2.2 (but not ray<=2.1) here: https://github.com/ray-project/ray/pull/30598 + This TunerInternal ensures that a division by zero is avoided when running zero-CPU hyperopt trials. This is fixed + in ray>=2.2 (but not ray<=2.1) here: + https://github.com/ray-project/ray/pull/30598 """ def _expected_utilization(self, cpus_per_trial, cpus_total): diff --git a/ludwig/backend/datasource.py b/ludwig/backend/datasource.py index 8b67032c321..aa965da8463 100644 --- a/ludwig/backend/datasource.py +++ b/ludwig/backend/datasource.py @@ -88,8 +88,8 @@ def _open_input_source( The default implementation opens the source path as a sequential input stream. - Implementations that do not support streaming reads (e.g. that require random - access) should override this method. + Implementations that do not support streaming reads (e.g. that require random access) should override this + method. """ if path is None or is_http(path): return contextlib.nullcontext() diff --git a/ludwig/backend/deepspeed.py b/ludwig/backend/deepspeed.py index 41ed3718863..b4661334ea6 100644 --- a/ludwig/backend/deepspeed.py +++ b/ludwig/backend/deepspeed.py @@ -17,7 +17,7 @@ def __init__( fp16: Optional[Dict[str, Any]] = None, bf16: Optional[Dict[str, Any]] = None, compression_training: Optional[Dict[str, Any]] = None, - **kwargs + **kwargs, ): super().__init__(**kwargs) self.zero_optimization = zero_optimization diff --git a/ludwig/callbacks.py b/ludwig/callbacks.py index b7c4673789d..3e08962e855 100644 --- a/ludwig/callbacks.py +++ b/ludwig/callbacks.py @@ -48,7 +48,7 @@ def on_preprocess_end(self, training_set, validation_set, test_set, training_set :param test_set: The test set. :type test_set: ludwig.dataset.base.Dataset :param training_set_metadata: Values inferred from the training set, including preprocessing settings, - vocabularies, feature statistics, etc. Same as training_set_metadata.json. + vocabularies, feature statistics, etc. Same as training_set_metadata.json. """ pass @@ -374,7 +374,6 @@ def prepare_ray_tune(self, train_fn: Callable, tune_config: Dict[str, Any], tune :param train_fn: The function which runs the experiment trial. :param tune_config: The ray tune configuration dictionary. :param tune_callbacks: List of callbacks (not used yet). - :returns: Tuple[Callable, Dict] The train_fn and tune_config, which will be passed to ray tune. """ return train_fn, tune_config diff --git a/ludwig/contrib.py b/ludwig/contrib.py index d69085c5356..3c30bf6116f 100644 --- a/ludwig/contrib.py +++ b/ludwig/contrib.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - """Module for handling contributed support.""" import argparse diff --git a/ludwig/contribs/__init__.py b/ludwig/contribs/__init__.py index dd823ed44e6..c8f7c2ebd68 100644 --- a/ludwig/contribs/__init__.py +++ b/ludwig/contribs/__init__.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - """All contrib classes must implement the `ludwig.callbacks.Callback` interface. If you don't want to handle the call, either provide an empty method with `pass`, or just don't implement the method. diff --git a/ludwig/data/preprocessing.py b/ludwig/data/preprocessing.py index 51887a7d6bf..3754f959bc5 100644 --- a/ludwig/data/preprocessing.py +++ b/ludwig/data/preprocessing.py @@ -2086,12 +2086,12 @@ def _preprocess_file_for_training( :param features: list of all features (input + output) :param dataset: path to the data - :param training_set: training data + :param training_set: training data :param validation_set: validation data :param test_set: test data :param training_set_metadata: train set metadata - :param skip_save_processed_input: if False, the pre-processed data is saved - as .hdf5 files in the same location as the csv files with the same names. + :param skip_save_processed_input: if False, the pre-processed data is saved as .hdf5 files in the same location as + the csv files with the same names. :param preprocessing_params: preprocessing parameters :param random_seed: random seed :return: training, test, validation datasets, training metadata diff --git a/ludwig/data/sampler.py b/ludwig/data/sampler.py index 08487f7fc13..e062e9b0f40 100644 --- a/ludwig/data/sampler.py +++ b/ludwig/data/sampler.py @@ -64,8 +64,8 @@ def __len__(self): def set_epoch(self, epoch): """Sets the epoch for this sampler. - When `shuffle=True`, this ensures all replicas use a different random ordering - for each epoch. Otherwise, the next iteration of this sampler will yield the same ordering. + When `shuffle=True`, this ensures all replicas use a different random ordering for each epoch. Otherwise, the + next iteration of this sampler will yield the same ordering. :param epoch: (int) epoch number """ diff --git a/ludwig/datasets/__init__.py b/ludwig/datasets/__init__.py index 16795366f41..dbe45169376 100644 --- a/ludwig/datasets/__init__.py +++ b/ludwig/datasets/__init__.py @@ -242,7 +242,7 @@ def get_datasets_output_features( :param include_competitions: (bool) whether to include the output features from kaggle competition datasets :param include_data_modalities: (bool) whether to include the data modalities associated with the prediction task :return: (dict) dictionary with the output features for each dataset or a dictionary with the output features for - the specified dataset + the specified dataset """ ordered_configs = OrderedDict(sorted(_get_dataset_configs().items())) competition_datasets = [] @@ -321,10 +321,8 @@ def _get_hf_dataset_and_subsample(dataset_name: str) -> Tuple[str, Optional[str] The dataset name should follow the format "{HF_PREFIX}{hf_id}--{hf_subsample}" - Examples (Dataset Name --> HF ID; HF subsample): - "hf://wikisql" --> "wikisql"; None - "hf://ColumbiaNLP/FLUTE" --> "ColumbiaNLP/FLUTE"; None - "hf://mstz/adult--income" --> "mstz/adult"; "income" + Examples (Dataset Name --> HF ID; HF subsample): "hf://wikisql" --> "wikisql"; None "hf://ColumbiaNLP/FLUTE" --> + "ColumbiaNLP/FLUTE"; None "hf://mstz/adult--income" --> "mstz/adult"; "income" """ dataset_name = dataset_name[len(HF_PREFIX) :] dataset_name = dataset_name.split("--") diff --git a/ludwig/datasets/loaders/mnist.py b/ludwig/datasets/loaders/mnist.py index 28bc1efae33..5da5cc1c245 100644 --- a/ludwig/datasets/loaders/mnist.py +++ b/ludwig/datasets/loaders/mnist.py @@ -58,11 +58,8 @@ def load_unprocessed_dataframe(self, file_paths: List[str]) -> pd.DataFrame: def read_source_dataset(self, dataset="training", path="."): """Create a directory for training and test and extract all the images and labels to this destination. - :args: - dataset (str) : the label for the dataset - path (str): the raw dataset path - :returns: - A tuple of the label for the image, the file array, the size and rows and columns for the image + :args: dataset (str) : the label for the dataset path (str): the raw dataset path + :returns: A tuple of the label for the image, the file array, the size and rows and columns for the image """ if dataset == "training": fname_img = os.path.join(path, "train-images-idx3-ubyte") @@ -87,13 +84,9 @@ def read_source_dataset(self, dataset="training", path="."): def write_output_dataset(self, labels, images, output_dir): """Create output directories where we write out the images. - :args: - labels (str) : the labels for the image - data (np.array) : the binary array corresponding to the image - output_dir (str) : the output directory that we need to write to - path (str): the raw dataset path - :returns: - A tuple of the label for the image, the file array, the size and rows and columns for the image + :args: labels (str) : the labels for the image data (np.array) : the binary array corresponding to the + image output_dir (str) : the output directory that we need to write to path (str): the raw dataset path + :returns: A tuple of the label for the image, the file array, the size and rows and columns for the image """ # create child image output directories output_dirs = [os.path.join(output_dir, str(i)) for i in range(NUM_LABELS)] diff --git a/ludwig/datasets/loaders/split_loaders.py b/ludwig/datasets/loaders/split_loaders.py index 19963c83cd3..f83f605ce15 100644 --- a/ludwig/datasets/loaders/split_loaders.py +++ b/ludwig/datasets/loaders/split_loaders.py @@ -21,10 +21,12 @@ class RandomSplitLoader(DatasetLoader): """Adds a random split column to the dataset, with fixed proportions of: - train: 70% + + train: 70% validation: 10% test: 20% - .""" + . + """ def transform_dataframe(self, dataframe: pd.DataFrame) -> pd.DataFrame: df = super().transform_dataframe(dataframe) diff --git a/ludwig/distributed/_ray_210_compat.py b/ludwig/distributed/_ray_210_compat.py index 59dd2962b5f..e0adfb5512f 100644 --- a/ludwig/distributed/_ray_210_compat.py +++ b/ludwig/distributed/_ray_210_compat.py @@ -8,8 +8,7 @@ class HorovodTrainerRay210(HorovodTrainer): """HACK(geoffrey): This is a temporary fix to support Ray 2.1.0. - Specifically, this Trainer ensures that TunerRay210 is called by the class. - For more details, see TunerRay210. + Specifically, this Trainer ensures that TunerRay210 is called by the class. For more details, see TunerRay210. """ def fit(self) -> Result: diff --git a/ludwig/distributed/deepspeed.py b/ludwig/distributed/deepspeed.py index 9b3a04b5135..a5677f66538 100644 --- a/ludwig/distributed/deepspeed.py +++ b/ludwig/distributed/deepspeed.py @@ -47,7 +47,7 @@ def __init__( fp16: Optional[Dict[str, Any]] = None, bf16: Optional[Dict[str, Any]] = None, compression_training: Optional[Dict[str, Any]] = None, - **kwargs + **kwargs, ): # If we're initializing from a `deepspeed` CLI command, deepspeed will have already been initialized, as # indicated by the presence of the LOCAL_RANK var. Otherwise, we're initializing from Ray / torchrun, and will diff --git a/ludwig/experiment.py b/ludwig/experiment.py index 73e5a3fafa0..7615a4f9783 100644 --- a/ludwig/experiment.py +++ b/ludwig/experiment.py @@ -257,15 +257,12 @@ def kfold_cross_validate_cli( # Inputs :param k_fold: (int) number of folds to create for the cross-validation - :param config: (Union[str, dict], default: None) a dictionary or file path - containing model configuration. Refer to the [User Guide] - (http://ludwig.ai/user_guide/#model-config) for details. + :param config: (Union[str, dict], default: None) a dictionary or file path containing model configuration. Refer to + the [User Guide] (http://ludwig.ai/user_guide/#model-config) for details. :param dataset: (string, default: None) :param output_directory: (string, default: 'results') :param random_seed: (int) Random seed used k-fold splits. - :param skip_save_k_fold_split_indices: (boolean, default: False) Disables - saving k-fold split indices - + :param skip_save_k_fold_split_indices: (boolean, default: False) Disables saving k-fold split indices :return: None """ diff --git a/ludwig/explain/captum.py b/ludwig/explain/captum.py index 081568e18f7..643d5d2ddc3 100644 --- a/ludwig/explain/captum.py +++ b/ludwig/explain/captum.py @@ -273,10 +273,7 @@ def get_input_tensors( # Inputs :param model: The LudwigModel to use for encoding. - :param input_set: The input data to encode of shape [batch size, num input features]. - - # Return - + :param input_set: The input data to encode of shape [batch size, num input features]. # Return :return: A list of variables, one for each input feature. Shape of each variable is [batch size, embedding size]. """ # Ignore sample_ratio and sample_size from the model config, since we want to explain all the data. diff --git a/ludwig/features/image_feature.py b/ludwig/features/image_feature.py index c9c8416b8b8..a1488440f41 100644 --- a/ludwig/features/image_feature.py +++ b/ludwig/features/image_feature.py @@ -465,27 +465,20 @@ def _read_image_if_bytes_obj_and_resize( standardize_image: str, channel_class_map: torch.Tensor, ) -> Optional[np.ndarray]: - """ - :param img_entry Union[bytes, torch.Tensor, np.ndarray, str]: if str file path to the - image else torch.Tensor of the image itself - :param img_width: expected width of the image - :param img_height: expected height of the image - :param should_resize: Should the image be resized? - :param resize_method: type of resizing method - :param num_channels: expected number of channels in the first image - :param user_specified_num_channels: did the user specify num channels? - :param standardize_image: specifies whether to standarize image with imagenet1k specifications - :param channel_class_map: A tensor mapping channel values to classes, where dim=0 is the class - :return: image object as a numpy array - - Helper method to read and resize an image according to model definition. - If the user doesn't specify a number of channels, we use the first image - in the dataset as the source of truth. If any image in the dataset - doesn't have the same number of channels as the first image, - raise an exception. - - If the user specifies a number of channels, we try to convert all the - images to the specifications by dropping channels/padding 0 channels + """:param img_entry Union[bytes, torch.Tensor, np.ndarray, str]: if str file path to the image else + torch.Tensor of the image itself :param img_width: expected width of the image :param img_height: expected + height of the image :param should_resize: Should the image be resized? :param resize_method: type of + resizing method :param num_channels: expected number of channels in the first image :param + user_specified_num_channels: did the user specify num channels? :param standardize_image: specifies whether + to standarize image with imagenet1k specifications :param channel_class_map: A tensor mapping channel + values to classes, where dim=0 is the class :return: image object as a numpy array. + + Helper method to read and resize an image according to model definition. If the user doesn't specify a number of + channels, we use the first image in the dataset as the source of truth. If any image in the dataset doesn't have + the same number of channels as the first image, raise an exception. + + If the user specifies a number of channels, we try to convert all the images to the specifications by dropping + channels/padding 0 channels """ if isinstance(img_entry, bytes): diff --git a/ludwig/models/llm.py b/ludwig/models/llm.py index 862ee55ff4a..3a970cd2013 100644 --- a/ludwig/models/llm.py +++ b/ludwig/models/llm.py @@ -364,7 +364,7 @@ def is_merge_and_unload_set(self) -> bool: # Return - :return (bool): whether merge_and_unload should be done. + :return (bool): whether merge_and_unload should be done. """ return ( self.config_obj.adapter is not None diff --git a/ludwig/preprocess.py b/ludwig/preprocess.py index f4b427bd076..4b268d6482e 100644 --- a/ludwig/preprocess.py +++ b/ludwig/preprocess.py @@ -45,11 +45,10 @@ def preprocess_cli( logging_level: int = logging.INFO, callbacks: List[Callback] = None, backend: Union[Backend, str] = None, - **kwargs + **kwargs, ) -> None: - """*train* defines the entire training procedure used by Ludwig's - internals. Requires most of the parameters that are taken into the model. - Builds a full ludwig model and performs the training. + """*train* defines the entire training procedure used by Ludwig's internals. Requires most of the parameters + that are taken into the model. Builds a full ludwig model and performs the training. :param preprocessing_config: (Union[str, dict]) in-memory representation of config or string path to a YAML config file. diff --git a/ludwig/schema/features/utils.py b/ludwig/schema/features/utils.py index 34abd2eee15..9ef5c67a368 100644 --- a/ludwig/schema/features/utils.py +++ b/ludwig/schema/features/utils.py @@ -18,13 +18,11 @@ input_mixin_registry = Registry() output_mixin_registry = Registry() - """ As of Ludwig v0.7, ECD models support the full range of feature parameters available in Ludwig, so any feature schema can be registered into it. See `BinaryDefaultsConfig` for an example. """ ecd_defaults_config_registry = Registry() - """ As of Ludwig v0.7, GBM models only support certain feature types and those features may only contain preprocessing parameters (in comparison, ECD features can specify encoders and other parameters). This is why the two model types have diff --git a/ludwig/schema/llms/model_parameters.py b/ludwig/schema/llms/model_parameters.py index 9c8ef70b02c..aad909eec56 100644 --- a/ludwig/schema/llms/model_parameters.py +++ b/ludwig/schema/llms/model_parameters.py @@ -12,10 +12,9 @@ class RoPEScalingConfig(schema_utils.BaseMarshmallowConfig): """Dynamic RoPE-scaling (rotary position embeddings) to extend the context length of LLM like LLaMA, GPT-NeoX, or Falcon. - This parameter is a dictionary containing the scaling configuration - for the RoPE embeddings. Currently supports three scaling strategies: linear and dynamic. Their - scaling factor must be an float greater than 1. The expected format is {'type': strategy name, - 'factor': scaling factor} + This parameter is a dictionary containing the scaling configuration for the RoPE embeddings. Currently supports + three scaling strategies: linear and dynamic. Their scaling factor must be an float greater than 1. The expected + format is {'type': strategy name, 'factor': scaling factor} """ def __post_init__(self): diff --git a/ludwig/schema/split.py b/ludwig/schema/split.py index 36410ad467e..25779be054d 100644 --- a/ludwig/schema/split.py +++ b/ludwig/schema/split.py @@ -122,8 +122,8 @@ class DateTimeSplitConfig(BaseSplitConfig): class HashSplitConfig(BaseSplitConfig): """This Dataclass generates a schema for the hash splitting config. - This is useful for deterministically splitting on a unique ID. Even when additional rows are added to the dataset - in the future, each ID will retain its original split assignment. + This is useful for deterministically splitting on a unique ID. Even when additional rows are added to the dataset in + the future, each ID will retain its original split assignment. This approach does not guarantee that the split proportions will be assigned exactly, but the larger the dataset, the more closely the assignment should match the given proportions. diff --git a/ludwig/train.py b/ludwig/train.py index fb0f28ff003..37e6bb6f618 100644 --- a/ludwig/train.py +++ b/ludwig/train.py @@ -59,11 +59,10 @@ def train_cli( backend: Union[Backend, str] = None, random_seed: int = default_random_seed, logging_level: int = logging.INFO, - **kwargs + **kwargs, ) -> None: - """*train* defines the entire training procedure used by Ludwig's - internals. Requires most of the parameters that are taken into the model. - Builds a full ludwig model and performs the training. + """*train* defines the entire training procedure used by Ludwig's internals. Requires most of the parameters + that are taken into the model. Builds a full ludwig model and performs the training. :param config: (Union[str, dict]) in-memory representation of config or string path to a YAML config file. diff --git a/ludwig/trainers/trainer_lightgbm.py b/ludwig/trainers/trainer_lightgbm.py index 74f0df1d98d..d15982d71a9 100644 --- a/ludwig/trainers/trainer_lightgbm.py +++ b/ludwig/trainers/trainer_lightgbm.py @@ -419,8 +419,7 @@ def check_progress_on_validation( ) -> bool: """Checks the history of validation scores. - Uses history of validation scores to decide whether training - should stop. + Uses history of validation scores to decide whether training should stop. Saves the model if scores have improved. """ diff --git a/ludwig/utils/automl/type_inference.py b/ludwig/utils/automl/type_inference.py index d28cfbd56e2..f55f96e782f 100644 --- a/ludwig/utils/automl/type_inference.py +++ b/ludwig/utils/automl/type_inference.py @@ -22,9 +22,7 @@ def infer_type(field: FieldInfo, missing_value_percent: float, row_count: int) - # Inputs :param field: (FieldInfo) object describing field :param missing_value_percent: (float) percent of missing values in the column - :param row_count: (int) total number of entries in original dataset - - # Return + :param row_count: (int) total number of entries in original dataset # Return :return: (str) feature type """ if field.dtype == DATE or field.dtype.startswith("datetime"): diff --git a/ludwig/utils/calibration.py b/ludwig/utils/calibration.py index 3ecdc099708..6b5458df5f3 100644 --- a/ludwig/utils/calibration.py +++ b/ludwig/utils/calibration.py @@ -304,7 +304,8 @@ def regularization_terms(self) -> torch.Tensor: """Off-Diagonal and Intercept Regularisation (ODIR). Described in "Beyond temperature scaling: Obtaining well-calibrated multiclass probabilities with Dirichlet - calibration" https://proceedings.neurips.cc/paper/2019/file/8ca01ea920679a0fe3728441494041b9-Paper.pdf + calibration" + https://proceedings.neurips.cc/paper/2019/file/8ca01ea920679a0fe3728441494041b9-Paper.pdf """ off_diagonal_entries = torch.masked_select(self.w, ~torch.eye(self.num_classes, dtype=bool)) weight_matrix_loss = self.off_diagonal_l2 * torch.linalg.vector_norm(off_diagonal_entries) diff --git a/ludwig/utils/data_utils.py b/ludwig/utils/data_utils.py index 471605915a8..b06f753a174 100644 --- a/ludwig/utils/data_utils.py +++ b/ludwig/utils/data_utils.py @@ -700,14 +700,12 @@ def class_counts(dataset, labels_field): def load_from_file(file_name, field=None, dtype=int, ground_truth_split=2): """Load experiment data from supported file formats. - Experiment data can be test/train statistics, model predictions, - probability, ground truth, ground truth metadata. + Experiment data can be test/train statistics, model predictions, probability, ground truth, ground truth metadata. :param file_name: Path to file to be loaded :param field: Target Prediction field. :param dtype: - :param ground_truth_split: Ground truth split filter where 0 is train 1 is - validation and 2 is test split. By default test split is used when loading - ground truth from hdf5. + :param ground_truth_split: Ground truth split filter where 0 is train 1 is validation and 2 is test split. By + default test split is used when loading ground truth from hdf5. :return: Experiment data as array """ if file_name.endswith(".hdf5") and field is not None: @@ -753,7 +751,7 @@ def add_sequence_feature_column(df, col_name, seq_length): delimited strings composed of preceding values of the same column up to seq_length. For example values of the i-th row of the new column will be a space-delimited string of df[col_name][i-seq_length]. - :param df: input dataframe + :param df: input dataframe :param col_name: column name containing sequential data :param seq_length: length of an array of preceeding column values to use """ diff --git a/ludwig/utils/horovod_utils.py b/ludwig/utils/horovod_utils.py index f4482596a00..a8f6106e8ef 100644 --- a/ludwig/utils/horovod_utils.py +++ b/ludwig/utils/horovod_utils.py @@ -45,14 +45,13 @@ def has_horovodrun(): def gather_all_tensors(result: torch.Tensor, group: Optional[Any] = None) -> List[torch.Tensor]: """Function to gather all tensors from several processes onto a list that is broadcast to all processes. - Works on tensors that have the same number of dimensions, but where each dimension may differ. In this case - tensors are padded, gathered and then trimmed to secure equal workload for all processes. + Works on tensors that have the same number of dimensions, but where each dimension may differ. In this case tensors + are padded, gathered and then trimmed to secure equal workload for all processes. :param result: the value to sync :param group: the process group to gather results from (not supported: always uses world) - - :return: list with size equal to the process group where gathered_result[i] - corresponds to result tensor from process i + :return: list with size equal to the process group where gathered_result[i] corresponds to result tensor from + process i """ if group is not None: raise ValueError("Horovod does not support allgather using a subcommunicator at this time. " "Unset `group`.") diff --git a/ludwig/utils/image_utils.py b/ludwig/utils/image_utils.py index 6395a2cbe17..a2fae951777 100644 --- a/ludwig/utils/image_utils.py +++ b/ludwig/utils/image_utils.py @@ -208,7 +208,7 @@ def pad( img: torch.Tensor, new_size: Union[int, Tuple[int, int]], ) -> torch.Tensor: - """torchscript-compatible implementation of pad. + """Torchscript-compatible implementation of pad. Args: img (torch.Tensor): image with shape [..., height, width] to pad @@ -231,7 +231,7 @@ def crop( img: torch.Tensor, new_size: Union[int, Tuple[int, int]], ) -> torch.Tensor: - """torchscript-compatible implementation of crop. + """Torchscript-compatible implementation of crop. Args: img (torch.Tensor): image with shape [..., height, width] to crop @@ -246,7 +246,7 @@ def crop( @DeveloperAPI def crop_or_pad(img: torch.Tensor, new_size: Union[int, Tuple[int, int]]): - """torchscript-compatible implementation of resize using constants.CROP_OR_PAD. + """Torchscript-compatible implementation of resize using constants.CROP_OR_PAD. Args: img (torch.Tensor): image with shape [..., height, width] to resize @@ -271,7 +271,7 @@ def resize_image( crop_or_pad_constant: str = CROP_OR_PAD, interpolate_constant: str = INTERPOLATE, ) -> torch.Tensor: - """torchscript-compatible implementation of resize. + """Torchscript-compatible implementation of resize. Args: img (torch.Tensor): image with shape [..., height, width] to resize @@ -442,9 +442,8 @@ def to_tuple(v: Union[int, Tuple[int, int]]) -> Tuple[int, int]: def to_np_tuple(prop: Union[int, Iterable]) -> np.ndarray: """Creates a np array of length 2 from a Conv2D property. - E.g., stride=(2, 3) gets converted into np.array([2, 3]), where the - height_stride = 2 and width_stride = 3. stride=2 gets converted into - np.array([2, 2]). + E.g., stride=(2, 3) gets converted into np.array([2, 3]), where the height_stride = 2 and width_stride = 3. stride=2 + gets converted into np.array([2, 2]). """ if type(prop) is int: return np.ones(2).astype(int) * prop diff --git a/ludwig/utils/torch_utils.py b/ludwig/utils/torch_utils.py index 10be7c762c8..abdcce0d0c5 100644 --- a/ludwig/utils/torch_utils.py +++ b/ludwig/utils/torch_utils.py @@ -60,9 +60,7 @@ def place_on_device(x, device): def sequence_length_2D(sequence: torch.Tensor) -> torch.Tensor: """Returns the number of non-padding elements per sequence in batch. - :param sequence: (torch.Tensor) A 2D tensor of shape [batch size x max sequence length]. - - # Return + :param sequence: (torch.Tensor) A 2D tensor of shape [batch size x max sequence length]. # Return :returns: (torch.Tensor) The count on non-zero elements per sequence. """ used = (sequence != SpecialSymbol.PADDING.value).type(torch.int32) @@ -74,9 +72,7 @@ def sequence_length_2D(sequence: torch.Tensor) -> torch.Tensor: def sequence_length_3D(sequence: torch.Tensor) -> torch.Tensor: """Returns the number of non-zero elements per sequence in batch. - :param sequence: (torch.Tensor) A 3D tensor of shape [batch size x max sequence length x hidden size]. - - # Return + :param sequence: (torch.Tensor) A 3D tensor of shape [batch size x max sequence length x hidden size]. # Return :returns: (torch.Tensor) The count on non-zero elements per sequence. """ used = torch.sign(torch.amax(torch.abs(sequence), dim=2)) @@ -92,9 +88,7 @@ def sequence_mask(lengths: torch.Tensor, maxlen: Optional[int] = None, dtype: to :param lengths: (torch.Tensor) A 1d integer tensor of shape [batch size]. :param maxlen: (Optional[int]) The maximum sequence length. If not specified, the max(lengths) is used. - :param dtype: (type) The type to output. - - # Return + :param dtype: (type) The type to output. # Return :returns: (torch.Tensor) A sequence mask tensor of shape (batch_size x maxlen). """ if maxlen is None: diff --git a/ludwig/utils/triton_utils.py b/ludwig/utils/triton_utils.py index b0082d317f1..3d81cdb1069 100644 --- a/ludwig/utils/triton_utils.py +++ b/ludwig/utils/triton_utils.py @@ -739,8 +739,7 @@ def export_triton( # Inputs :param model: (LudwigModel) A ludwig model. - :param data_example: (pd.DataFrame) an example from the dataset. - Used to get dimensions throughout the pipeline. + :param data_example: (pd.DataFrame) an example from the dataset. Used to get dimensions throughout the pipeline. :param output_path: (str) The output path for the model repository. :param model_name: (str) The optional model name. :param model_version: (Union[int,str]) The optional model verison. @@ -749,9 +748,7 @@ def export_triton( :param predictor_num_instances: (int) number of instances for the predictor. :param postprocessor_num_instances: (int) number of instances for the postprocessor (on CPU). :param predictor_max_batch_size: (int) max_batch_size parameter for the predictor Triton config. - :param max_queue_delay_microseconds: (int) max_queue_delay_microseconds for all Triton configs. - - # Return + :param max_queue_delay_microseconds: (int) max_queue_delay_microseconds for all Triton configs. # Return :return: (List[TritonArtifact]) list of TritonArtifacts that contains information about exported artifacts. """ diff --git a/ludwig/utils/upload_utils.py b/ludwig/utils/upload_utils.py index f3aed5f8bea..51a0fb87efd 100644 --- a/ludwig/utils/upload_utils.py +++ b/ludwig/utils/upload_utils.py @@ -188,8 +188,8 @@ def _validate_upload_parameters( ) trained_model_artifacts_path = os.path.join(model_path, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME) - """ - Make sure the model's saved artifacts either contain: + """Make sure the model's saved artifacts either contain: + 1. pytorch_model.bin -> regular model training, such as ECD or for LLMs 2. adapter_model.bin or adapter_model.safetensors -> LLM fine-tuning using PEFT diff --git a/ludwig/visualize.py b/ludwig/visualize.py index 61c41e0fbb4..feb97897072 100644 --- a/ludwig/visualize.py +++ b/ludwig/visualize.py @@ -60,7 +60,7 @@ def _convert_ground_truth(ground_truth, feature_metadata, ground_truth_apply_idx, positive_label): - """converts non-np.array representation to be np.array.""" + """Converts non-np.array representation to be np.array.""" if "str2idx" in feature_metadata: # categorical output feature as binary ground_truth = _vectorize_ground_truth(ground_truth, feature_metadata["str2idx"], ground_truth_apply_idx) @@ -102,8 +102,7 @@ def validate_conf_thresholds_and_probabilities_2d_3d(probabilities, threshold_ou """Ensure probabilities and threshold output_feature_names arrays have two members each. :param probabilities: List of probabilities per model - :param threshhold_output_feature_names: List of threshhold output_feature_names per model - :raise: RuntimeError + :param threshhold_output_feature_names: List of threshhold output_feature_names per model :raise: RuntimeError """ validation_mapping = { "probabilities": probabilities, @@ -122,9 +121,8 @@ def load_data_for_viz(load_type, model_file_statistics, dtype=int, ground_truth_ """Load JSON files (training stats, evaluation stats...) for a list of models. :param load_type: type of the data loader to be used. - :param model_file_statistics: JSON file or list of json files containing any - model experiment stats. - :return List of training statistics loaded as json objects. + :param model_file_statistics: JSON file or list of json files containing any model experiment stats. :return List of + training statistics loaded as json objects. """ supported_load_types = dict( load_json=load_json, @@ -145,9 +143,8 @@ def load_training_stats_for_viz(load_type, model_file_statistics, dtype=int, gro """Load model file data (specifically training stats) for a list of models. :param load_type: type of the data loader to be used. - :param model_file_statistics: JSON file or list of json files containing any - model experiment stats. - :return List of model statistics loaded as TrainingStats objects. + :param model_file_statistics: JSON file or list of json files containing any model experiment stats. :return List of + model statistics loaded as TrainingStats objects. """ stats_per_model = load_data_for_viz( load_type, model_file_statistics, dtype=dtype, ground_truth_split=ground_truth_split @@ -213,7 +210,7 @@ def _validate_output_feature_name_from_test_stats(output_feature_name, test_stat def _encode_categorical_feature(raw: np.array, str2idx: dict) -> np.array: - """encodes raw categorical string value to encoded numeric value. + """Encodes raw categorical string value to encoded numeric value. Args: :param raw: (np.array) string categorical representation @@ -326,10 +323,8 @@ def generate_filename_template_path(output_dir, filename_template): Create output directory if yet does exist. :param output_dir: Directory that will contain the filename_template file - :param filename_template: name of the file template to be appended to the - filename template path - :return: path to filename template inside the output dir or None if the - output dir is None + :param filename_template: name of the file template to be appended to the filename template path + :return: path to filename template inside the output dir or None if the output dir is None """ if output_dir: os.makedirs(output_dir, exist_ok=True) @@ -343,12 +338,8 @@ def compare_performance_cli(test_statistics: Union[str, List[str]], **kwargs: di # Inputs - :param test_statistics: (Union[str, List[str]]) path to experiment test - statistics file. - :param kwargs: (dict) parameters for the requested visualizations. - - # Return - + :param test_statistics: (Union[str, List[str]]) path to experiment test statistics file. + :param kwargs: (dict) parameters for the requested visualizations. # Return :return None: """ test_stats_per_model = load_data_for_viz("load_json", test_statistics) @@ -361,12 +352,8 @@ def learning_curves_cli(training_statistics: Union[str, List[str]], **kwargs: di # Inputs - :param training_statistics: (Union[str, List[str]]) path to experiment - training statistics file - :param kwargs: (dict) parameters for the requested visualizations. - - # Return - + :param training_statistics: (Union[str, List[str]]) path to experiment training statistics file + :param kwargs: (dict) parameters for the requested visualizations. # Return :return None: """ train_stats_per_model = load_training_stats_for_viz("load_json", training_statistics) @@ -588,13 +575,9 @@ def compare_classifiers_multiclass_multimetric_cli( # Inputs - :param test_statistics: (Union[str, List[str]]) path to experiment test - statistics file. + :param test_statistics: (Union[str, List[str]]) path to experiment test statistics file. :param ground_truth_metadata: (str) path to ground truth metadata file. - :param kwargs: (dict) parameters for the requested visualizations. - - # Return - + :param kwargs: (dict) parameters for the requested visualizations. # Return :return None: """ test_stats_per_model = load_data_for_viz("load_json", test_statistics) @@ -1175,12 +1158,8 @@ def roc_curves_from_test_statistics_cli(test_statistics: Union[str, List[str]], """Load model data from files to be shown by roc_curves_from_test_statistics_cli. # Inputs - :param test_statistics: (Union[str, List[str]]) path to experiment test - statistics file. - :param kwargs: (dict) parameters for the requested visualizations. - - # Return - + :param test_statistics: (Union[str, List[str]]) path to experiment test statistics file. + :param kwargs: (dict) parameters for the requested visualizations. # Return :return None: """ test_stats_per_model = load_data_for_viz("load_json", test_statistics) @@ -1325,13 +1304,9 @@ def confusion_matrix_cli(test_statistics: Union[str, List[str]], ground_truth_me # Inputs - :param test_statistics: (Union[str, List[str]]) path to experiment test - statistics file. + :param test_statistics: (Union[str, List[str]]) path to experiment test statistics file. :param ground_truth_metadata: (str) path to ground truth metadata file. - :param kwargs: (dict) parameters for the requested visualizations. - - # Return - + :param kwargs: (dict) parameters for the requested visualizations. # Return :return None: """ test_stats_per_model = load_data_for_viz("load_json", test_statistics) @@ -1345,13 +1320,9 @@ def frequency_vs_f1_cli(test_statistics: Union[str, List[str]], ground_truth_met # Inputs - :param test_statistics: (Union[str, List[str]]) path to experiment test - statistics file. + :param test_statistics: (Union[str, List[str]]) path to experiment test statistics file. :param ground_truth_metadata: (str) path to ground truth metadata file. - :param kwargs: (dict) parameters for the requested visualizations. - - # Return - + :param kwargs: (dict) parameters for the requested visualizations. # Return :return None: """ test_stats_per_model = load_data_for_viz("load_json", test_statistics) diff --git a/tests/integration_tests/scripts/run_train_horovod.py b/tests/integration_tests/scripts/run_train_horovod.py index a40beabe5ef..8cf7182f52d 100644 --- a/tests/integration_tests/scripts/run_train_horovod.py +++ b/tests/integration_tests/scripts/run_train_horovod.py @@ -82,5 +82,5 @@ def test_horovod_intent_classification(rel_path, input_features, output_features args.rel_path, json.loads(args.input_features), json.loads(args.output_features), - **json.loads(args.ludwig_kwargs) + **json.loads(args.ludwig_kwargs), ) diff --git a/tests/integration_tests/test_explain.py b/tests/integration_tests/test_explain.py index d0b183734c0..9e1541bb476 100644 --- a/tests/integration_tests/test_explain.py +++ b/tests/integration_tests/test_explain.py @@ -166,7 +166,7 @@ def run_test_explainer_api( tmpdir, input_features=None, batch_size=128, - **kwargs + **kwargs, ): image_dest_folder = os.path.join(tmpdir, "generated_images") diff --git a/tests/integration_tests/test_visualization.py b/tests/integration_tests/test_visualization.py index 35893f625c7..0060e322b90 100644 --- a/tests/integration_tests/test_visualization.py +++ b/tests/integration_tests/test_visualization.py @@ -80,8 +80,7 @@ def get_output_feature_name(experiment_dir, output_feature=0): :param experiment_dir: Path to the experiment directory :param output_feature: position of the output feature the description.json - :return output_feature_name: name of the first output feature name - from the experiment + :return output_feature_name: name of the first output feature name from the experiment """ description_file = os.path.join(experiment_dir, DESCRIPTION_FILE_NAME) with open(description_file, "rb") as f: @@ -179,8 +178,7 @@ def test_visualization_confusion_matrix_output_saved(csv_filename): def test_visualization_compare_performance_output_saved(csv_filename): """Ensure pdf and png figures from the experiments can be saved. - Compare performance between two models. To reduce test complexity - one model is compared to it self. + Compare performance between two models. To reduce test complexity one model is compared to it self. :param csv_filename: csv fixture from tests.conftest.csv_filename :return: None diff --git a/tests/integration_tests/test_visualization_api.py b/tests/integration_tests/test_visualization_api.py index fac28182be1..0f10bc5c922 100644 --- a/tests/integration_tests/test_visualization_api.py +++ b/tests/integration_tests/test_visualization_api.py @@ -118,9 +118,7 @@ def _create_model(self): def obtain_df_splits(data_csv): """Split input data csv file in to train, validation and test dataframes. - :param data_csv: Input data CSV file. - :return test_df, train_df, val_df: Train, validation and test dataframe - splits + :param data_csv: Input data CSV file. :return test_df, train_df, val_df: Train, validation and test dataframe splits """ data_df = read_csv(data_csv) # Obtain data split array mapping data rows to split type @@ -134,8 +132,7 @@ def obtain_df_splits(data_csv): def test_learning_curves_vis_api(experiment_to_use, training_only): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -158,8 +155,7 @@ def test_learning_curves_vis_api(experiment_to_use, training_only): def test_compare_performance_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -183,8 +179,7 @@ def test_compare_performance_vis_api(experiment_to_use): def test_compare_classifier_performance_from_prob_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -211,8 +206,7 @@ def test_compare_classifier_performance_from_prob_vis_api(experiment_to_use): def test_compare_classifier_performance_from_pred_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -238,8 +232,7 @@ def test_compare_classifier_performance_from_pred_vis_api(experiment_to_use): def test_compare_classifiers_performance_subset_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -267,8 +260,7 @@ def test_compare_classifiers_performance_subset_vis_api(experiment_to_use): def test_compare_classifiers_performance_changing_k_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -295,8 +287,7 @@ def test_compare_classifiers_performance_changing_k_vis_api(experiment_to_use): def test_compare_classifiers_multiclass_multimetric_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -322,8 +313,7 @@ def test_compare_classifiers_multiclass_multimetric_vis_api(experiment_to_use): def test_compare_classifiers_predictions_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -349,8 +339,7 @@ def test_compare_classifiers_predictions_vis_api(experiment_to_use): def test_compare_classifiers_predictions_distribution_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -376,8 +365,7 @@ def test_compare_classifiers_predictions_distribution_vis_api(experiment_to_use) def test_confidence_thresholding_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -403,8 +391,7 @@ def test_confidence_thresholding_vis_api(experiment_to_use): def test_confidence_thresholding_data_vs_acc_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -430,8 +417,7 @@ def test_confidence_thresholding_data_vs_acc_vis_api(experiment_to_use): def test_confidence_thresholding_data_vs_acc_subset_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -459,8 +445,7 @@ def test_confidence_thresholding_data_vs_acc_subset_vis_api(experiment_to_use): def test_confidence_thresholding_data_vs_acc_subset_per_class_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -635,8 +620,7 @@ def test_confidence_thresholding_2thresholds_3d_vis_api(csv_filename): def test_binary_threshold_vs_metric_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -665,8 +649,7 @@ def test_binary_threshold_vs_metric_vis_api(experiment_to_use): def test_precision_recall_curves_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -727,8 +710,7 @@ def test_precision_recall_curves_from_test_statistics_vis_api(csv_filename): def test_roc_curves_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -789,8 +771,7 @@ def test_roc_curves_from_test_statistics_vis_api(csv_filename): def test_calibration_1_vs_all_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -817,8 +798,7 @@ def test_calibration_1_vs_all_vis_api(experiment_to_use): def test_calibration_multiclass_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -844,8 +824,7 @@ def test_calibration_multiclass_vis_api(experiment_to_use): def test_confusion_matrix_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use @@ -872,8 +851,7 @@ def test_confusion_matrix_vis_api(experiment_to_use): def test_frequency_vs_f1_vis_api(experiment_to_use): """Ensure pdf and png figures can be saved via visualization API call. - :param experiment_to_use: Object containing trained model and results to - test visualization + :param experiment_to_use: Object containing trained model and results to test visualization :return: None """ experiment = experiment_to_use diff --git a/tests/integration_tests/utils.py b/tests/integration_tests/utils.py index a6591f28510..fd7886199c3 100644 --- a/tests/integration_tests/utils.py +++ b/tests/integration_tests/utils.py @@ -23,7 +23,8 @@ import tempfile import traceback import uuid -#from distutils.util import strtobool + +# from distutils.util import strtobool from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING, Union import cloudpickle @@ -123,14 +124,16 @@ def train(self, *args, save_path=MODEL_FILE_NAME, **kwargs): with tempfile.TemporaryDirectory() as tmpdir: return super().train(*args, save_path=tmpdir, **kwargs) + def str2bool(val): val = val.lower() - if val in ('y', 'yes', 't', 'true', 'on', '1'): + if val in ("y", "yes", "t", "true", "on", "1"): return 1 - elif val in ('n', 'no', 'f', 'false', 'off', '0'): + elif val in ("n", "no", "f", "false", "off", "0"): return 0 else: - raise ValueError("invalid truth value {!r}".format(val)) + raise ValueError(f"invalid truth value {val!r}") + def parse_flag_from_env(key, default=False): try: diff --git a/tests/ludwig/config_validation/test_checks.py b/tests/ludwig/config_validation/test_checks.py index c614a195191..7082cce0bce 100644 --- a/tests/ludwig/config_validation/test_checks.py +++ b/tests/ludwig/config_validation/test_checks.py @@ -2,9 +2,7 @@ Note that all testing should be done with the public API, rather than individual checks. -``` -ModelConfig.from_dict(config) -``` +``` ModelConfig.from_dict(config) ``` """ import contextlib diff --git a/tests/ludwig/marshmallow/test_marshmallow_misc.py b/tests/ludwig/marshmallow/test_marshmallow_misc.py index 42aecf769f6..78ba8326459 100644 --- a/tests/ludwig/marshmallow/test_marshmallow_misc.py +++ b/tests/ludwig/marshmallow/test_marshmallow_misc.py @@ -8,7 +8,7 @@ @dataclass class CustomTestSchema(BaseMarshmallowConfig): - """sample docstring.""" + """Sample docstring.""" foo: int = 5 "foo (default: 5)" diff --git a/tests/ludwig/utils/test_hyperopt_ray_utils.py b/tests/ludwig/utils/test_hyperopt_ray_utils.py index 5d37d0aa292..0b4f180f4e8 100644 --- a/tests/ludwig/utils/test_hyperopt_ray_utils.py +++ b/tests/ludwig/utils/test_hyperopt_ray_utils.py @@ -78,7 +78,7 @@ def test_grid_strategy(key): "minimize", "validation", search_alg={TYPE: "variant_generator"}, - **{"type": "ray", "num_samples": 2, "scheduler": {"type": "fifo"}} + **{"type": "ray", "num_samples": 2, "scheduler": {"type": "fifo"}}, ) search_space = hyperopt_executor.search_space From 5279f0cbf32a9e8a3c81b1aa520dbe916e9fd361 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Sun, 1 Dec 2024 15:15:28 -0800 Subject: [PATCH 33/67] updated wandb version --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8953743c5f7..8b21cba93c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,7 +95,8 @@ test = [ # Logging and experiment tracking "aim", - "wandb<0.12.11", + #"wandb<0.12.11", + "wandb", "comet_ml", "mlflow", "sqlalchemy<2", # Pinned for aimstack compatibility From 500b5fc0a676b5c778cfdbe1b9007d535ad4d89d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 2 Dec 2024 00:33:00 +0000 Subject: [PATCH 34/67] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ludwig/automl/base_config.py | 3 +-- ludwig/schema/features/utils.py | 14 ++++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ludwig/automl/base_config.py b/ludwig/automl/base_config.py index 76e2e248281..2cf4265492e 100644 --- a/ludwig/automl/base_config.py +++ b/ludwig/automl/base_config.py @@ -294,8 +294,7 @@ def get_dataset_info_from_source(source: DataSource) -> DatasetInfo: inference. # Inputs - :param source: (DataSource) A wrapper around a data source, which may represent a pandas or Dask dataframe. # - Return + :param source: (DataSource) A wrapper around a data source, which may represent a pandas or Dask dataframe. # Return :return: (DatasetInfo) Structure containing list of FieldInfo objects. """ row_count = len(source) diff --git a/ludwig/schema/features/utils.py b/ludwig/schema/features/utils.py index 9ef5c67a368..0ce5ce20dbb 100644 --- a/ludwig/schema/features/utils.py +++ b/ludwig/schema/features/utils.py @@ -18,14 +18,16 @@ input_mixin_registry = Registry() output_mixin_registry = Registry() -""" -As of Ludwig v0.7, ECD models support the full range of feature parameters available in Ludwig, so any feature schema -can be registered into it. See `BinaryDefaultsConfig` for an example. +"""As of Ludwig v0.7, ECD models support the full range of feature parameters available in Ludwig, so any feature +schema can be registered into it. + +See `BinaryDefaultsConfig` for an example. """ ecd_defaults_config_registry = Registry() -""" -As of Ludwig v0.7, GBM models only support certain feature types and those features may only contain preprocessing -parameters (in comparison, ECD features can specify encoders and other parameters). This is why the two model types have +"""As of Ludwig v0.7, GBM models only support certain feature types and those features may only contain +preprocessing parameters (in comparison, ECD features can specify encoders and other parameters). + +This is why the two model types have separate defaults registries. See `BinaryInputFeatureConfigMixin` for an example of a schema pattern that is designed to be registered by this registry (whereas, conversely, `BinaryDefaultsConfig` is an example of one to be registered with the ECD defaults registry). From c1eb205822d826e8f31266ae722329945a464587 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Sun, 1 Dec 2024 21:16:02 -0800 Subject: [PATCH 35/67] updated matplotlib --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8b21cba93c7..34fe0c1afda 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -154,7 +154,8 @@ serve = [ tree = ["hummingbird-ml>=0.4.8", "lightgbm", "lightgbm-ray"] viz = [ "hiplot", - "matplotlib>3.4,<3.9.0; python_version > '3.6'", + matplotlib>3.4; python_version > '3.6'", + #"matplotlib>3.4,<3.9.0; python_version > '3.6'", "matplotlib>=3.0,<3.4; python_version <= '3.6'", "ptitprince", "seaborn>=0.7,<0.12", From 6f90a5f523c3b5e51aa28915022d23718c5eb13a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 2 Dec 2024 05:18:53 +0000 Subject: [PATCH 36/67] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ludwig/api.py | 88 ++++++------- ludwig/automl/auto_tune_config.py | 23 ++-- ludwig/automl/automl.py | 38 ++---- ludwig/automl/base_config.py | 22 +--- ludwig/backend/_ray210_compat.py | 2 +- ludwig/backend/base.py | 5 +- ludwig/backend/datasource.py | 14 +-- ludwig/backend/ray.py | 35 +++--- ludwig/benchmarking/benchmark.py | 17 +-- ludwig/benchmarking/profiler.py | 6 +- ludwig/benchmarking/reporting.py | 4 +- ludwig/benchmarking/summarize.py | 10 +- ludwig/benchmarking/summary_dataclasses.py | 3 +- ludwig/benchmarking/utils.py | 3 +- ludwig/callbacks.py | 3 +- ludwig/collect.py | 3 +- ludwig/combiners/combiners.py | 9 +- ludwig/config_sampling/explore_schema.py | 3 +- ludwig/config_validation/checks.py | 32 ++--- ludwig/config_validation/validation.py | 7 +- ludwig/contrib.py | 2 +- ludwig/contribs/mlflow/__init__.py | 6 +- ludwig/contribs/mlflow/model.py | 2 +- ludwig/data/cache/manager.py | 2 +- ludwig/data/cache/util.py | 3 +- ludwig/data/dataframe/dask.py | 3 +- ludwig/data/dataframe/modin.py | 3 +- ludwig/data/dataset/pandas.py | 5 +- ludwig/data/dataset/ray.py | 6 +- ludwig/data/dataset_synthesizer.py | 28 +---- ludwig/data/preprocessing.py | 118 ++++++------------ ludwig/data/prompt.py | 8 +- ludwig/data/split.py | 15 +-- ludwig/datasets/__init__.py | 4 +- ludwig/decoders/generic_decoders.py | 7 +- ludwig/decoders/image_decoders.py | 6 +- ludwig/decoders/llm_decoders.py | 3 +- ludwig/decoders/sequence_decoders.py | 6 +- ludwig/decoders/sequence_tagger.py | 6 +- ludwig/distributed/base.py | 2 +- ludwig/distributed/ddp.py | 3 +- ludwig/distributed/deepspeed.py | 6 +- ludwig/distributed/fsdp.py | 2 +- ludwig/distributed/horovod.py | 6 +- ludwig/encoders/category_encoders.py | 7 +- ludwig/encoders/date_encoders.py | 3 +- ludwig/encoders/generic_encoders.py | 6 +- ludwig/encoders/h3_encoders.py | 3 +- ludwig/encoders/image/base.py | 15 +-- ludwig/encoders/image/torchvision.py | 32 ++--- ludwig/encoders/sequence_encoders.py | 25 ++-- ludwig/encoders/text_encoders.py | 43 +++---- ludwig/experiment.py | 8 +- ludwig/explain/captum.py | 18 +-- ludwig/explain/captum_ray.py | 12 +- ludwig/export.py | 3 +- ludwig/features/audio_feature.py | 30 ++--- ludwig/features/bag_feature.py | 3 +- ludwig/features/base_feature.py | 29 ++--- ludwig/features/binary_feature.py | 31 ++--- ludwig/features/category_feature.py | 40 ++---- ludwig/features/date_feature.py | 13 +- ludwig/features/feature_registries.py | 59 +++++---- ludwig/features/feature_utils.py | 6 +- ludwig/features/h3_feature.py | 3 +- ludwig/features/image_feature.py | 84 +++++-------- ludwig/features/number_feature.py | 19 ++- ludwig/features/sequence_feature.py | 45 +++---- ludwig/features/set_feature.py | 24 ++-- ludwig/features/text_feature.py | 48 +++---- ludwig/features/timeseries_feature.py | 20 +-- ludwig/features/vector_feature.py | 19 ++- ludwig/hyperopt/execution.py | 16 ++- ludwig/hyperopt/run.py | 44 +++---- ludwig/hyperopt/utils.py | 39 ++---- ludwig/model_export/onnx_exporter.py | 3 +- ludwig/models/base.py | 11 +- ludwig/models/embedder.py | 5 +- ludwig/models/gbm.py | 3 +- ludwig/models/inference.py | 14 ++- ludwig/models/llm.py | 21 ++-- ludwig/models/predictor.py | 8 +- ludwig/models/retrieval.py | 3 +- ludwig/modules/attention_modules.py | 2 +- ludwig/modules/convolutional_modules.py | 2 +- ludwig/modules/embedding_modules.py | 2 +- ludwig/modules/fully_connected_modules.py | 3 +- ludwig/modules/loss_modules.py | 21 +--- ludwig/modules/lr_scheduler.py | 3 +- ludwig/modules/metric_modules.py | 97 +++++--------- ludwig/modules/metric_registry.py | 5 +- ludwig/modules/optimization_modules.py | 5 +- ludwig/schema/__init__.py | 6 +- ludwig/schema/combiners/sequence.py | 3 +- ludwig/schema/combiners/tab_transformer.py | 3 +- ludwig/schema/combiners/transformer.py | 3 +- ludwig/schema/combiners/utils.py | 3 +- ludwig/schema/decoders/base.py | 3 +- ludwig/schema/decoders/image_decoders.py | 5 +- ludwig/schema/decoders/utils.py | 2 +- ludwig/schema/defaults/ecd.py | 17 +-- ludwig/schema/defaults/utils.py | 2 +- ludwig/schema/encoders/base.py | 8 +- ludwig/schema/encoders/category_encoders.py | 5 +- ludwig/schema/encoders/image/base.py | 5 +- ludwig/schema/encoders/sequence_encoders.py | 5 +- ludwig/schema/encoders/text_encoders.py | 14 ++- ludwig/schema/encoders/utils.py | 2 +- ludwig/schema/features/audio_feature.py | 7 +- ludwig/schema/features/augmentation/image.py | 3 +- ludwig/schema/features/augmentation/utils.py | 2 +- ludwig/schema/features/bag_feature.py | 7 +- ludwig/schema/features/base.py | 42 ++----- ludwig/schema/features/binary_feature.py | 27 ++-- ludwig/schema/features/category_feature.py | 38 +++--- ludwig/schema/features/date_feature.py | 7 +- ludwig/schema/features/h3_feature.py | 7 +- ludwig/schema/features/image_feature.py | 24 ++-- ludwig/schema/features/loss/__init__.py | 4 +- ludwig/schema/features/loss/loss.py | 33 ++--- ludwig/schema/features/number_feature.py | 24 ++-- .../schema/features/preprocessing/__init__.py | 19 +-- ludwig/schema/features/preprocessing/audio.py | 3 +- ludwig/schema/features/preprocessing/bag.py | 3 +- .../schema/features/preprocessing/binary.py | 12 +- .../schema/features/preprocessing/category.py | 3 +- ludwig/schema/features/preprocessing/date.py | 3 +- ludwig/schema/features/preprocessing/h3.py | 3 +- ludwig/schema/features/preprocessing/image.py | 3 +- .../schema/features/preprocessing/number.py | 11 +- .../schema/features/preprocessing/sequence.py | 4 +- ludwig/schema/features/preprocessing/set.py | 4 +- ludwig/schema/features/preprocessing/text.py | 4 +- .../features/preprocessing/timeseries.py | 4 +- ludwig/schema/features/preprocessing/utils.py | 2 +- .../schema/features/preprocessing/vector.py | 4 +- ludwig/schema/features/sequence_feature.py | 21 ++-- ludwig/schema/features/set_feature.py | 18 +-- ludwig/schema/features/text_feature.py | 40 +++--- ludwig/schema/features/timeseries_feature.py | 21 ++-- ludwig/schema/features/vector_feature.py | 18 +-- ludwig/schema/hyperopt/__init__.py | 9 +- ludwig/schema/hyperopt/executor.py | 5 +- ludwig/schema/hyperopt/scheduler.py | 2 +- ludwig/schema/hyperopt/search_algorithm.py | 2 +- ludwig/schema/llms/base_model.py | 2 +- ludwig/schema/llms/peft.py | 2 +- ludwig/schema/lr_scheduler.py | 2 +- ludwig/schema/model_types/base.py | 35 ++---- ludwig/schema/model_types/ecd.py | 12 +- ludwig/schema/model_types/gbm.py | 12 +- ludwig/schema/model_types/llm.py | 21 ++-- ludwig/schema/model_types/utils.py | 28 ++--- ludwig/schema/optimizers.py | 5 +- ludwig/schema/profiler.py | 2 +- ludwig/schema/trainer.py | 30 ++--- ludwig/schema/utils.py | 14 ++- ludwig/train.py | 3 +- ludwig/trainers/trainer.py | 48 +++---- ludwig/trainers/trainer_lightgbm.py | 41 +++--- ludwig/trainers/trainer_llm.py | 15 +-- ludwig/upload.py | 3 +- ludwig/utils/automl/field_info.py | 2 +- ludwig/utils/automl/utils.py | 17 +-- ludwig/utils/backward_compatibility.py | 80 ++++-------- ludwig/utils/batch_size_tuner.py | 3 +- ludwig/utils/checkpoint_utils.py | 2 +- ludwig/utils/config_utils.py | 20 +-- ludwig/utils/data_utils.py | 10 +- ludwig/utils/date_utils.py | 2 +- ludwig/utils/defaults.py | 3 +- ludwig/utils/entmax/__init__.py | 20 ++- ludwig/utils/heuristics.py | 4 +- ludwig/utils/image_utils.py | 2 +- ludwig/utils/inference_utils.py | 23 +--- ludwig/utils/llm_utils.py | 13 +- ludwig/utils/misc_utils.py | 2 +- ludwig/utils/neuropod_utils.py | 3 +- ludwig/utils/strings_utils.py | 3 +- ludwig/utils/tokenizers.py | 16 +-- ludwig/utils/trainer_utils.py | 2 +- ludwig/utils/triton_utils.py | 34 ++--- ludwig/utils/upload_utils.py | 3 +- ludwig/visualize.py | 16 +-- tests/conftest.py | 17 +-- .../scripts/run_train_aim.py | 3 +- .../scripts/run_train_comet.py | 3 +- .../scripts/run_train_wandb.py | 4 +- tests/integration_tests/test_api.py | 14 +-- tests/integration_tests/test_automl.py | 22 ++-- tests/integration_tests/test_cache_manager.py | 5 +- .../test_cached_preprocessing.py | 7 +- tests/integration_tests/test_carton.py | 10 +- .../test_class_imbalance_feature.py | 3 +- tests/integration_tests/test_cli.py | 16 +-- tests/integration_tests/test_collect.py | 6 +- .../test_config_global_defaults.py | 24 +--- .../test_custom_components.py | 18 ++- tests/integration_tests/test_date_feature.py | 19 +-- tests/integration_tests/test_dependencies.py | 3 +- tests/integration_tests/test_experiment.py | 31 ++--- tests/integration_tests/test_explain.py | 23 ++-- tests/integration_tests/test_gbm.py | 6 +- .../integration_tests/test_graph_execution.py | 12 +- tests/integration_tests/test_horovod.py | 3 +- tests/integration_tests/test_hyperopt.py | 42 ++----- tests/integration_tests/test_hyperopt_ray.py | 9 +- .../test_hyperopt_ray_horovod.py | 12 +- .../test_input_feature_tied.py | 11 +- tests/integration_tests/test_kfold_cv.py | 13 +- tests/integration_tests/test_llm.py | 35 ++---- .../test_missing_value_strategy.py | 20 ++- tests/integration_tests/test_mlflow.py | 6 +- .../test_model_save_and_load.py | 28 ++--- .../test_model_training_options.py | 21 ++-- tests/integration_tests/test_neuropod.py | 10 +- tests/integration_tests/test_peft.py | 6 +- .../integration_tests/test_postprocessing.py | 11 +- tests/integration_tests/test_preprocessing.py | 43 ++----- tests/integration_tests/test_ray.py | 68 ++++------ tests/integration_tests/test_reducers.py | 3 +- tests/integration_tests/test_regularizers.py | 16 +-- tests/integration_tests/test_remote.py | 14 +-- .../test_sequence_decoders.py | 27 ++-- .../test_sequence_features.py | 3 +- tests/integration_tests/test_server.py | 13 +- .../integration_tests/test_simple_features.py | 17 +-- .../test_timeseries_feature.py | 6 +- tests/integration_tests/test_torchscript.py | 28 ++--- tests/integration_tests/test_trainer.py | 30 ++--- tests/integration_tests/test_triton.py | 22 ++-- tests/integration_tests/test_visualization.py | 18 ++- .../test_visualization_api.py | 19 ++- tests/integration_tests/utils.py | 39 ++---- tests/ludwig/accounting/test_used_tokens.py | 4 +- tests/ludwig/automl/test_base_config.py | 14 +-- tests/ludwig/combiners/test_combiners.py | 31 ++--- .../config_sampling/test_config_sampling.py | 6 +- .../test_validate_config_combiner.py | 3 +- .../test_validate_config_encoder.py | 14 +-- .../test_validate_config_features.py | 3 +- .../test_validate_config_hyperopt.py | 13 +- .../test_validate_config_misc.py | 71 ++++------- .../test_validate_config_preprocessing.py | 3 +- .../test_validate_config_trainer.py | 3 +- tests/ludwig/data/test_ray_data.py | 3 +- tests/ludwig/decoders/test_image_decoder.py | 6 +- tests/ludwig/decoders/test_llm_decoders.py | 3 +- .../ludwig/decoders/test_sequence_decoder.py | 14 +-- tests/ludwig/decoders/test_sequence_tagger.py | 3 +- tests/ludwig/encoders/test_bag_encoders.py | 3 +- .../ludwig/encoders/test_category_encoders.py | 6 +- tests/ludwig/encoders/test_date_encoders.py | 3 +- tests/ludwig/encoders/test_h3_encoders.py | 3 +- tests/ludwig/encoders/test_image_encoders.py | 45 ++++--- tests/ludwig/encoders/test_llm_encoders.py | 3 +- .../ludwig/encoders/test_sequence_encoders.py | 19 ++- tests/ludwig/encoders/test_set_encoders.py | 3 +- tests/ludwig/encoders/test_text_encoders.py | 18 ++- tests/ludwig/explain/test_util.py | 6 +- tests/ludwig/features/test_audio_feature.py | 3 +- tests/ludwig/features/test_binary_feature.py | 6 +- .../ludwig/features/test_category_feature.py | 3 +- tests/ludwig/features/test_date_feature.py | 3 +- tests/ludwig/features/test_image_feature.py | 19 ++- tests/ludwig/features/test_number_feature.py | 2 +- .../ludwig/features/test_sequence_features.py | 13 +- tests/ludwig/features/test_text_feature.py | 3 +- .../features/test_timeseries_feature.py | 3 +- tests/ludwig/hyperopt/test_hyperopt.py | 3 +- tests/ludwig/marshmallow/test_fields_misc.py | 3 +- .../marshmallow/test_fields_optimization.py | 3 +- .../marshmallow/test_fields_preprocessing.py | 9 +- .../marshmallow/test_marshmallow_misc.py | 4 +- .../models/test_trainable_image_layers.py | 2 +- .../models/test_training_determinism.py | 23 ++-- tests/ludwig/models/test_training_success.py | 3 +- tests/ludwig/modules/test_attention.py | 13 +- .../modules/test_convolutional_modules.py | 24 ++-- .../ludwig/modules/test_embedding_modules.py | 4 +- tests/ludwig/modules/test_encoder.py | 15 +-- tests/ludwig/modules/test_loss_modules.py | 21 ++-- tests/ludwig/modules/test_lr_scheduler.py | 6 +- tests/ludwig/modules/test_metric_modules.py | 8 +- .../ludwig/modules/test_mlp_mixer_modules.py | 2 +- tests/ludwig/modules/test_regex_freezing.py | 18 +-- tests/ludwig/modules/test_tabnet_modules.py | 6 +- .../ludwig/schema/hyperopt/test_scheduler.py | 5 +- .../schema/hyperopt/test_search_algorithm.py | 5 +- tests/ludwig/schema/test_model_config.py | 45 ++----- tests/ludwig/utils/entmax/test_losses.py | 3 +- tests/ludwig/utils/entmax/test_topk.py | 9 +- .../utils/test_backward_compatibility.py | 36 ++---- tests/ludwig/utils/test_config_utils.py | 17 +-- tests/ludwig/utils/test_data_utils.py | 19 +-- tests/ludwig/utils/test_dataframe_utils.py | 2 +- tests/ludwig/utils/test_defaults.py | 49 ++------ tests/ludwig/utils/test_fs_utils.py | 4 +- tests/ludwig/utils/test_hf_utils.py | 8 +- tests/ludwig/utils/test_image_utils.py | 20 +-- tests/ludwig/utils/test_llm_utils.py | 13 +- tests/ludwig/utils/test_model_utils.py | 10 +- tests/ludwig/utils/test_normalization.py | 3 +- tests/ludwig/utils/test_tokenizers.py | 9 +- tests/ludwig/utils/test_torch_utils.py | 11 +- tests/ludwig/utils/test_trainer_utils.py | 3 +- tests/ludwig/utils/test_upload_utils.py | 3 +- .../utils/test_version_transformation.py | 3 +- .../automl/scripts/update_golden_types.py | 4 +- .../automl/test_auto_type_inference.py | 4 +- tests/training_success/configs.py | 6 +- .../training_success/test_training_success.py | 14 +-- 312 files changed, 1727 insertions(+), 2560 deletions(-) diff --git a/ludwig/api.py b/ludwig/api.py index 063853a7104..05b201025c2 100644 --- a/ludwig/api.py +++ b/ludwig/api.py @@ -38,75 +38,57 @@ from tabulate import tabulate from ludwig.api_annotations import PublicAPI -from ludwig.backend import Backend, initialize_backend, provision_preprocessing_workers +from ludwig.backend import (Backend, initialize_backend, + provision_preprocessing_workers) from ludwig.callbacks import Callback -from ludwig.constants import ( - AUTO, - BATCH_SIZE, - EVAL_BATCH_SIZE, - FALLBACK_BATCH_SIZE, - FULL, - HYPEROPT, - HYPEROPT_WARNING, - MIN_DATASET_SPLIT_ROWS, - MODEL_ECD, - MODEL_LLM, - TEST, - TIMESERIES, - TRAINING, - VALIDATION, -) +from ludwig.constants import (AUTO, BATCH_SIZE, EVAL_BATCH_SIZE, + FALLBACK_BATCH_SIZE, FULL, HYPEROPT, + HYPEROPT_WARNING, MIN_DATASET_SPLIT_ROWS, + MODEL_ECD, MODEL_LLM, TEST, TIMESERIES, TRAINING, + VALIDATION) from ludwig.data.cache.types import CacheableDataset from ludwig.data.dataset.base import Dataset from ludwig.data.postprocessing import convert_predictions, postprocess -from ludwig.data.preprocessing import load_metadata, preprocess_for_prediction, preprocess_for_training +from ludwig.data.preprocessing import (load_metadata, + preprocess_for_prediction, + preprocess_for_training) from ludwig.datasets import load_dataset_uris -from ludwig.features.feature_registries import update_config_with_metadata, update_config_with_model -from ludwig.globals import ( - LUDWIG_VERSION, - MODEL_FILE_NAME, - MODEL_HYPERPARAMETERS_FILE_NAME, - MODEL_WEIGHTS_FILE_NAME, - set_disable_progressbar, - TRAIN_SET_METADATA_FILE_NAME, - TRAINING_CHECKPOINTS_DIR_PATH, -) +from ludwig.features.feature_registries import (update_config_with_metadata, + update_config_with_model) +from ludwig.globals import (LUDWIG_VERSION, MODEL_FILE_NAME, + MODEL_HYPERPARAMETERS_FILE_NAME, + MODEL_WEIGHTS_FILE_NAME, + TRAIN_SET_METADATA_FILE_NAME, + TRAINING_CHECKPOINTS_DIR_PATH, + set_disable_progressbar) from ludwig.models.base import BaseModel from ludwig.models.calibrator import Calibrator -from ludwig.models.inference import InferenceModule, save_ludwig_model_for_inference -from ludwig.models.predictor import ( - calculate_overall_stats, - print_evaluation_stats, - save_evaluation_stats, - save_prediction_outputs, -) +from ludwig.models.inference import (InferenceModule, + save_ludwig_model_for_inference) +from ludwig.models.predictor import (calculate_overall_stats, + print_evaluation_stats, + save_evaluation_stats, + save_prediction_outputs) from ludwig.models.registry import model_type_registry from ludwig.schema.model_config import ModelConfig from ludwig.types import ModelConfigDict, TrainingSetMetadataDict from ludwig.upload import get_upload_registry from ludwig.utils import metric_utils -from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version +from ludwig.utils.backward_compatibility import \ + upgrade_config_dict_to_latest_version from ludwig.utils.config_utils import get_preprocessing_params -from ludwig.utils.data_utils import ( - figure_data_format, - generate_kfold_splits, - load_dataset, - load_json, - load_yaml, - save_json, -) +from ludwig.utils.data_utils import (figure_data_format, generate_kfold_splits, + load_dataset, load_json, load_yaml, + save_json) from ludwig.utils.dataset_utils import generate_dataset_statistics from ludwig.utils.defaults import default_random_seed -from ludwig.utils.fs_utils import makedirs, path_exists, upload_output_directory +from ludwig.utils.fs_utils import (makedirs, path_exists, + upload_output_directory) from ludwig.utils.heuristics import get_auto_learning_rate -from ludwig.utils.llm_utils import create_text_streamer, TextStreamer -from ludwig.utils.misc_utils import ( - get_commit_hash, - get_file_names, - get_from_registry, - get_output_directory, - set_saved_weights_in_checkpoint_flag, -) +from ludwig.utils.llm_utils import TextStreamer, create_text_streamer +from ludwig.utils.misc_utils import (get_commit_hash, get_file_names, + get_from_registry, get_output_directory, + set_saved_weights_in_checkpoint_flag) from ludwig.utils.print_utils import print_boxed from ludwig.utils.tokenizers import HFTokenizer from ludwig.utils.torch_utils import DEVICE diff --git a/ludwig/automl/auto_tune_config.py b/ludwig/automl/auto_tune_config.py index fc4056e7698..5b1e4746164 100644 --- a/ludwig/automl/auto_tune_config.py +++ b/ludwig/automl/auto_tune_config.py @@ -13,21 +13,14 @@ from ludwig.api import LudwigModel from ludwig.backend import initialize_backend -from ludwig.constants import ( - AUTO, - AUTOML_DEFAULT_TEXT_ENCODER, - AUTOML_LARGE_TEXT_DATASET, - AUTOML_MAX_ROWS_PER_CHECKPOINT, - AUTOML_SMALLER_TEXT_ENCODER, - AUTOML_SMALLER_TEXT_LENGTH, - AUTOML_TEXT_ENCODER_MAX_TOKEN_LEN, - HYPEROPT, - MINIMUM_BATCH_SIZE, - PREPROCESSING, - SPACE, - TEXT, - TRAINER, -) +from ludwig.constants import (AUTO, AUTOML_DEFAULT_TEXT_ENCODER, + AUTOML_LARGE_TEXT_DATASET, + AUTOML_MAX_ROWS_PER_CHECKPOINT, + AUTOML_SMALLER_TEXT_ENCODER, + AUTOML_SMALLER_TEXT_LENGTH, + AUTOML_TEXT_ENCODER_MAX_TOKEN_LEN, HYPEROPT, + MINIMUM_BATCH_SIZE, PREPROCESSING, SPACE, TEXT, + TRAINER) from ludwig.data.preprocessing import preprocess_for_training from ludwig.features.feature_registries import update_config_with_metadata from ludwig.schema.model_config import ModelConfig diff --git a/ludwig/automl/automl.py b/ludwig/automl/automl.py index 3b0c878c073..03222395e83 100644 --- a/ludwig/automl/automl.py +++ b/ludwig/automl/automl.py @@ -20,33 +20,16 @@ from ludwig.api import LudwigModel from ludwig.api_annotations import PublicAPI -from ludwig.automl.base_config import ( - create_default_config, - DatasetInfo, - get_dataset_info, - get_features_config, - get_reference_configs, -) +from ludwig.automl.base_config import (DatasetInfo, create_default_config, + get_dataset_info, get_features_config, + get_reference_configs) from ludwig.backend import Backend, initialize_backend -from ludwig.constants import ( - AUTO, - AUTOML_DEFAULT_IMAGE_ENCODER, - AUTOML_DEFAULT_TABULAR_MODEL, - AUTOML_DEFAULT_TEXT_ENCODER, - BINARY, - CATEGORY, - ENCODER, - HYPEROPT, - IMAGE, - INPUT_FEATURES, - NAME, - NUMBER, - OUTPUT_FEATURES, - TABULAR, - TEXT, - TRAINER, - TYPE, -) +from ludwig.constants import (AUTO, AUTOML_DEFAULT_IMAGE_ENCODER, + AUTOML_DEFAULT_TABULAR_MODEL, + AUTOML_DEFAULT_TEXT_ENCODER, BINARY, CATEGORY, + ENCODER, HYPEROPT, IMAGE, INPUT_FEATURES, NAME, + NUMBER, OUTPUT_FEATURES, TABULAR, TEXT, TRAINER, + TYPE) from ludwig.contrib import add_contrib_callback_args from ludwig.data.cache.types import CacheableDataset from ludwig.datasets import load_dataset_uris @@ -55,7 +38,8 @@ from ludwig.schema.model_config import ModelConfig from ludwig.types import ModelConfigDict from ludwig.utils.automl.ray_utils import _ray_init -from ludwig.utils.automl.utils import _add_transfer_config, get_model_type, set_output_feature_metric +from ludwig.utils.automl.utils import (_add_transfer_config, get_model_type, + set_output_feature_metric) from ludwig.utils.data_utils import load_dataset, use_credentials from ludwig.utils.defaults import default_random_seed from ludwig.utils.fs_utils import open_file diff --git a/ludwig/automl/base_config.py b/ludwig/automl/base_config.py index 2cf4265492e..55633024ccd 100644 --- a/ludwig/automl/base_config.py +++ b/ludwig/automl/base_config.py @@ -20,28 +20,18 @@ import numpy as np import pandas as pd import yaml -from dataclasses_json import dataclass_json, LetterCase +from dataclasses_json import LetterCase, dataclass_json from tqdm import tqdm from ludwig.api_annotations import DeveloperAPI from ludwig.backend import Backend -from ludwig.constants import ( - COLUMN, - COMBINER, - ENCODER, - EXECUTOR, - HYPEROPT, - INPUT_FEATURES, - PREPROCESSING, - SCHEDULER, - SEARCH_ALG, - SPLIT, - TEXT, - TYPE, -) +from ludwig.constants import (COLUMN, COMBINER, ENCODER, EXECUTOR, HYPEROPT, + INPUT_FEATURES, PREPROCESSING, SCHEDULER, + SEARCH_ALG, SPLIT, TEXT, TYPE) from ludwig.types import ModelConfigDict from ludwig.utils.automl.data_source import DataSource, wrap_data_source -from ludwig.utils.automl.field_info import FieldConfig, FieldInfo, FieldMetadata +from ludwig.utils.automl.field_info import (FieldConfig, FieldInfo, + FieldMetadata) from ludwig.utils.automl.type_inference import infer_type, should_exclude from ludwig.utils.data_utils import load_yaml from ludwig.utils.misc_utils import merge_dict diff --git a/ludwig/backend/_ray210_compat.py b/ludwig/backend/_ray210_compat.py index afe1b705940..b4222000dc6 100644 --- a/ludwig/backend/_ray210_compat.py +++ b/ludwig/backend/_ray210_compat.py @@ -1,7 +1,7 @@ # Implements https://github.com/ray-project/ray/pull/30598 ahead of Ray 2.2 release. import math -from typing import Any, Callable, Dict, Optional, Type, TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Type, Union import ray from ray.air.config import RunConfig diff --git a/ludwig/backend/base.py b/ludwig/backend/base.py index f586074af64..3ef5b7fdd93 100644 --- a/ludwig/backend/base.py +++ b/ludwig/backend/base.py @@ -20,7 +20,7 @@ from abc import ABC, abstractmethod from concurrent.futures import ThreadPoolExecutor from contextlib import contextmanager -from typing import Any, Callable, Generator, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Callable, Generator import numpy as np import pandas as pd @@ -282,7 +282,8 @@ def create_trainer( model: BaseModel, **kwargs, ) -> BaseTrainer: # type: ignore[override] - from ludwig.trainers.registry import get_llm_trainers_registry, get_trainers_registry + from ludwig.trainers.registry import (get_llm_trainers_registry, + get_trainers_registry) trainer_cls: type if model.type() == MODEL_LLM: diff --git a/ludwig/backend/datasource.py b/ludwig/backend/datasource.py index aa965da8463..f184f68ec2e 100644 --- a/ludwig/backend/datasource.py +++ b/ludwig/backend/datasource.py @@ -1,6 +1,7 @@ import contextlib import logging -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TYPE_CHECKING, Union +from typing import (TYPE_CHECKING, Any, Callable, Dict, Iterable, List, + Optional, Tuple, Union) import ray import urllib3 @@ -10,14 +11,9 @@ from ray.data.datasource.binary_datasource import BinaryDatasource from ray.data.datasource.datasource import Datasource, ReadTask from ray.data.datasource.file_based_datasource import ( - _check_pyarrow_version, - _resolve_paths_and_filesystem, - _S3FileSystemWrapper, - _wrap_s3_serialization_workaround, - BaseFileMetadataProvider, - BlockOutputBuffer, - DefaultFileMetadataProvider, -) + BaseFileMetadataProvider, BlockOutputBuffer, DefaultFileMetadataProvider, + _check_pyarrow_version, _resolve_paths_and_filesystem, + _S3FileSystemWrapper, _wrap_s3_serialization_workaround) from ludwig.utils.fs_utils import get_bytes_obj_from_http_path, is_http diff --git a/ludwig/backend/ray.py b/ludwig/backend/ray.py index 01915e42fc2..dcf3bef937f 100644 --- a/ludwig/backend/ray.py +++ b/ludwig/backend/ray.py @@ -41,29 +41,28 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.backend.base import Backend, RemoteTrainingMixin -from ludwig.constants import CPU_RESOURCES_PER_TRIAL, EXECUTOR, MODEL_ECD, MODEL_LLM, NAME, PROC_COLUMN +from ludwig.constants import (CPU_RESOURCES_PER_TRIAL, EXECUTOR, MODEL_ECD, + MODEL_LLM, NAME, PROC_COLUMN) from ludwig.data.dataframe.base import DataFrameEngine from ludwig.data.dataframe.dask import tensor_extension_casting -from ludwig.data.dataset.ray import RayDataset, RayDatasetManager, RayDatasetShard -from ludwig.distributed import ( - DistributedStrategy, - get_default_strategy_name, - get_dist_strategy, - init_dist_strategy, - LocalStrategy, -) +from ludwig.data.dataset.ray import (RayDataset, RayDatasetManager, + RayDatasetShard) +from ludwig.distributed import (DistributedStrategy, LocalStrategy, + get_default_strategy_name, get_dist_strategy, + init_dist_strategy) from ludwig.models.base import BaseModel -from ludwig.models.predictor import BasePredictor, get_output_columns, get_predictor_cls +from ludwig.models.predictor import (BasePredictor, get_output_columns, + get_predictor_cls) from ludwig.schema.trainer import ECDTrainerConfig, FineTuneTrainerConfig -from ludwig.trainers.registry import ( - get_llm_ray_trainers_registry, - get_ray_trainers_registry, - register_llm_ray_trainer, - register_ray_trainer, -) +from ludwig.trainers.registry import (get_llm_ray_trainers_registry, + get_ray_trainers_registry, + register_llm_ray_trainer, + register_ray_trainer) from ludwig.trainers.trainer import BaseTrainer, RemoteTrainer, Trainer -from ludwig.trainers.trainer_llm import RemoteLLMFineTuneTrainer, RemoteLLMTrainer -from ludwig.types import HyperoptConfigDict, ModelConfigDict, TrainerConfigDict, TrainingSetMetadataDict +from ludwig.trainers.trainer_llm import (RemoteLLMFineTuneTrainer, + RemoteLLMTrainer) +from ludwig.types import (HyperoptConfigDict, ModelConfigDict, + TrainerConfigDict, TrainingSetMetadataDict) from ludwig.utils.batch_size_tuner import BatchSizeEvaluator from ludwig.utils.dataframe_utils import is_dask_series_or_df, set_index_name from ludwig.utils.fs_utils import get_fs_and_path diff --git a/ludwig/benchmarking/benchmark.py b/ludwig/benchmarking/benchmark.py index 71644362e89..c1d04a4bf42 100644 --- a/ludwig/benchmarking/benchmark.py +++ b/ludwig/benchmarking/benchmark.py @@ -7,19 +7,14 @@ import ludwig.datasets from ludwig.api import LudwigModel -from ludwig.benchmarking.artifacts import BenchmarkingResult, build_benchmarking_result +from ludwig.benchmarking.artifacts import (BenchmarkingResult, + build_benchmarking_result) from ludwig.benchmarking.profiler_callbacks import LudwigProfilerCallback from ludwig.benchmarking.utils import ( - create_default_config, - delete_hyperopt_outputs, - delete_model_checkpoints, - export_artifacts, - load_from_module, - populate_benchmarking_config_with_defaults, - propagate_global_parameters, - save_yaml, - validate_benchmarking_config, -) + create_default_config, delete_hyperopt_outputs, delete_model_checkpoints, + export_artifacts, load_from_module, + populate_benchmarking_config_with_defaults, propagate_global_parameters, + save_yaml, validate_benchmarking_config) from ludwig.contrib import add_contrib_callback_args from ludwig.hyperopt.run import hyperopt from ludwig.utils.data_utils import load_yaml diff --git a/ludwig/benchmarking/profiler.py b/ludwig/benchmarking/profiler.py index f0712314a67..039be94d4bb 100644 --- a/ludwig/benchmarking/profiler.py +++ b/ludwig/benchmarking/profiler.py @@ -16,8 +16,10 @@ from cpuinfo import get_cpu_info from gpustat.core import GPUStatCollection -from ludwig.benchmarking.profiler_dataclasses import profiler_dataclass_to_flat_dict, TorchProfilerMetrics -from ludwig.benchmarking.reporting import get_metrics_from_system_usage_profiler, get_metrics_from_torch_profiler +from ludwig.benchmarking.profiler_dataclasses import ( + TorchProfilerMetrics, profiler_dataclass_to_flat_dict) +from ludwig.benchmarking.reporting import ( + get_metrics_from_system_usage_profiler, get_metrics_from_torch_profiler) from ludwig.constants import LUDWIG_TAG from ludwig.globals import LUDWIG_VERSION from ludwig.utils.data_utils import save_json diff --git a/ludwig/benchmarking/reporting.py b/ludwig/benchmarking/reporting.py index 589461eafd2..226dcc188d4 100644 --- a/ludwig/benchmarking/reporting.py +++ b/ludwig/benchmarking/reporting.py @@ -6,7 +6,9 @@ from torch._C._autograd import _KinetoEvent from torch.autograd import DeviceType, profiler_util -from ludwig.benchmarking.profiler_dataclasses import DeviceUsageMetrics, SystemResourceMetrics, TorchProfilerMetrics +from ludwig.benchmarking.profiler_dataclasses import (DeviceUsageMetrics, + SystemResourceMetrics, + TorchProfilerMetrics) from ludwig.constants import LUDWIG_TAG diff --git a/ludwig/benchmarking/summarize.py b/ludwig/benchmarking/summarize.py index 25d49b54af4..b944579e1e7 100644 --- a/ludwig/benchmarking/summarize.py +++ b/ludwig/benchmarking/summarize.py @@ -5,13 +5,9 @@ from typing import List, Tuple from ludwig.benchmarking.summary_dataclasses import ( - build_metrics_diff, - build_resource_usage_diff, - export_metrics_diff_to_csv, - export_resource_usage_diff_to_csv, - MetricsDiff, - ResourceUsageDiff, -) + MetricsDiff, ResourceUsageDiff, build_metrics_diff, + build_resource_usage_diff, export_metrics_diff_to_csv, + export_resource_usage_diff_to_csv) from ludwig.benchmarking.utils import download_artifacts logger = logging.getLogger() diff --git a/ludwig/benchmarking/summary_dataclasses.py b/ludwig/benchmarking/summary_dataclasses.py index af18e5bc80d..f173c5a77c0 100644 --- a/ludwig/benchmarking/summary_dataclasses.py +++ b/ludwig/benchmarking/summary_dataclasses.py @@ -8,7 +8,8 @@ import ludwig.modules.metric_modules # noqa: F401 from ludwig.benchmarking.utils import format_memory, format_time from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME -from ludwig.modules.metric_registry import get_metric_classes, metric_feature_type_registry # noqa: F401 +from ludwig.modules.metric_registry import (get_metric_classes, # noqa: F401 + metric_feature_type_registry) from ludwig.types import ModelConfigDict from ludwig.utils.data_utils import load_json diff --git a/ludwig/benchmarking/utils.py b/ludwig/benchmarking/utils.py index 87fbe0d2cb4..3707dee9960 100644 --- a/ludwig/benchmarking/utils.py +++ b/ludwig/benchmarking/utils.py @@ -16,7 +16,8 @@ from ludwig.constants import BINARY, CATEGORY from ludwig.datasets import model_configs_for_dataset from ludwig.datasets.loaders.dataset_loader import DatasetLoader -from ludwig.globals import CONFIG_YAML, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME +from ludwig.globals import (CONFIG_YAML, MODEL_FILE_NAME, + MODEL_WEIGHTS_FILE_NAME) from ludwig.utils.data_utils import load_yaml from ludwig.utils.dataset_utils import get_repeatable_train_val_test_split from ludwig.utils.defaults import default_random_seed diff --git a/ludwig/callbacks.py b/ludwig/callbacks.py index 3e08962e855..9e1e0ba1db7 100644 --- a/ludwig/callbacks.py +++ b/ludwig/callbacks.py @@ -18,7 +18,8 @@ from typing import Any, Callable, Dict, List, Union from ludwig.api_annotations import PublicAPI -from ludwig.types import HyperoptConfigDict, ModelConfigDict, TrainingSetMetadataDict +from ludwig.types import (HyperoptConfigDict, ModelConfigDict, + TrainingSetMetadataDict) @PublicAPI diff --git a/ludwig/collect.py b/ludwig/collect.py index 066edcd191c..834c18d9d70 100644 --- a/ludwig/collect.py +++ b/ludwig/collect.py @@ -30,7 +30,8 @@ from ludwig.constants import FULL, TEST, TRAINING, VALIDATION from ludwig.contrib import add_contrib_callback_args from ludwig.globals import LUDWIG_VERSION -from ludwig.utils.print_utils import get_logging_level_registry, print_boxed, print_ludwig +from ludwig.utils.print_utils import (get_logging_level_registry, print_boxed, + print_ludwig) from ludwig.utils.strings_utils import make_safe_filename logger = logging.getLogger(__name__) diff --git a/ludwig/combiners/combiners.py b/ludwig/combiners/combiners.py index 06cb61a873d..0c73cfd1b6c 100644 --- a/ludwig/combiners/combiners.py +++ b/ludwig/combiners/combiners.py @@ -34,10 +34,13 @@ from ludwig.schema.combiners.base import BaseCombinerConfig from ludwig.schema.combiners.comparator import ComparatorCombinerConfig from ludwig.schema.combiners.concat import ConcatCombinerConfig -from ludwig.schema.combiners.project_aggregate import ProjectAggregateCombinerConfig +from ludwig.schema.combiners.project_aggregate import \ + ProjectAggregateCombinerConfig from ludwig.schema.combiners.sequence import SequenceCombinerConfig -from ludwig.schema.combiners.sequence_concat import SequenceConcatCombinerConfig -from ludwig.schema.combiners.tab_transformer import TabTransformerCombinerConfig +from ludwig.schema.combiners.sequence_concat import \ + SequenceConcatCombinerConfig +from ludwig.schema.combiners.tab_transformer import \ + TabTransformerCombinerConfig from ludwig.schema.combiners.tabnet import TabNetCombinerConfig from ludwig.schema.combiners.transformer import TransformerCombinerConfig from ludwig.utils.misc_utils import get_from_registry diff --git a/ludwig/config_sampling/explore_schema.py b/ludwig/config_sampling/explore_schema.py index 2f6e96649b3..61b857a3782 100644 --- a/ludwig/config_sampling/explore_schema.py +++ b/ludwig/config_sampling/explore_schema.py @@ -5,7 +5,8 @@ import pandas as pd -from ludwig.config_sampling.parameter_sampling import handle_property_type, ParameterBaseTypes +from ludwig.config_sampling.parameter_sampling import (ParameterBaseTypes, + handle_property_type) from ludwig.constants import SEQUENCE, TEXT, TIMESERIES from ludwig.data.dataset_synthesizer import build_synthetic_dataset_df from ludwig.schema.model_types.base import ModelConfig diff --git a/ludwig/config_validation/checks.py b/ludwig/config_validation/checks.py index a15b44c4a45..31cbb267c1c 100644 --- a/ludwig/config_validation/checks.py +++ b/ludwig/config_validation/checks.py @@ -2,30 +2,18 @@ from abc import ABC, abstractmethod from re import findall -from typing import Callable, TYPE_CHECKING +from typing import TYPE_CHECKING, Callable from transformers import AutoConfig from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - AUDIO, - BINARY, - CATEGORY, - IMAGE, - IN_MEMORY, - MIN_QUANTIZATION_BITS_FOR_MERGE_AND_UNLOAD, - MODEL_ECD, - MODEL_GBM, - MODEL_LLM, - NUMBER, - SEQUENCE, - SET, - TEXT, - TIMESERIES, - VECTOR, -) +from ludwig.constants import (AUDIO, BINARY, CATEGORY, IMAGE, IN_MEMORY, + MIN_QUANTIZATION_BITS_FOR_MERGE_AND_UNLOAD, + MODEL_ECD, MODEL_GBM, MODEL_LLM, NUMBER, + SEQUENCE, SET, TEXT, TIMESERIES, VECTOR) from ludwig.error import ConfigValidationError -from ludwig.utils.metric_utils import get_feature_to_metric_names_map_from_feature_collection +from ludwig.utils.metric_utils import \ + get_feature_to_metric_names_map_from_feature_collection from ludwig.utils.misc_utils import merge_dict if TYPE_CHECKING: @@ -358,7 +346,8 @@ def check_hyperopt_parameter_dicts(config: "ModelConfig") -> None: # noqa: F821 if config.hyperopt is None: return - from ludwig.schema.hyperopt.utils import get_parameter_cls, parameter_config_registry # noqa: F401 + from ludwig.schema.hyperopt.utils import (get_parameter_cls, # noqa: F401 + parameter_config_registry) for parameter, space in config.hyperopt.parameters.items(): # skip nested hyperopt parameters @@ -554,7 +543,8 @@ def check_llm_finetuning_adalora_config(config: "ModelConfig"): if config.adapter.type != "adalora": return - from peft.utils import TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING + from peft.utils import \ + TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING model_config = _get_llm_model_config(config.base_model) if model_config.model_type not in TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING: diff --git a/ludwig/config_validation/validation.py b/ludwig/config_validation/validation.py index 30725d7e49b..141cb01b975 100644 --- a/ludwig/config_validation/validation.py +++ b/ludwig/config_validation/validation.py @@ -8,12 +8,13 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.constants import BASE_MODEL, MODEL_ECD, MODEL_LLM, MODEL_TYPE from ludwig.error import ConfigValidationError - # TODO(travis): figure out why we need these imports to avoid circular import error from ludwig.schema.combiners.utils import get_combiner_jsonschema # noqa -from ludwig.schema.features.utils import get_input_feature_jsonschema, get_output_feature_jsonschema # noqa +from ludwig.schema.features.utils import (get_input_feature_jsonschema, # noqa + get_output_feature_jsonschema) from ludwig.schema.hyperopt import get_hyperopt_jsonschema # noqa -from ludwig.schema.trainer import get_model_type_jsonschema, get_trainer_jsonschema # noqa +from ludwig.schema.trainer import (get_model_type_jsonschema, # noqa + get_trainer_jsonschema) from ludwig.schema.utils import unload_jsonschema_from_marshmallow_class VALIDATION_LOCK = Lock() diff --git a/ludwig/contrib.py b/ludwig/contrib.py index 3c30bf6116f..50dca8ccad8 100644 --- a/ludwig/contrib.py +++ b/ludwig/contrib.py @@ -16,7 +16,7 @@ import argparse -from ludwig.contribs import contrib_registry, ContribLoader +from ludwig.contribs import ContribLoader, contrib_registry def create_load_action(contrib_loader: ContribLoader) -> argparse.Action: diff --git a/ludwig/contribs/mlflow/__init__.py b/ludwig/contribs/mlflow/__init__.py index 55c51a9ac88..d4fe3e1db3c 100644 --- a/ludwig/contribs/mlflow/__init__.py +++ b/ludwig/contribs/mlflow/__init__.py @@ -6,9 +6,11 @@ from ludwig.api_annotations import DeveloperAPI, PublicAPI from ludwig.callbacks import Callback from ludwig.constants import TRAINER -from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME +from ludwig.globals import (MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, + TRAIN_SET_METADATA_FILE_NAME) from ludwig.types import TrainingSetMetadataDict -from ludwig.utils.data_utils import chunk_dict, flatten_dict, save_json, to_json_dict +from ludwig.utils.data_utils import (chunk_dict, flatten_dict, save_json, + to_json_dict) from ludwig.utils.package_utils import LazyLoader mlflow = LazyLoader("mlflow", globals(), "mlflow") diff --git a/ludwig/contribs/mlflow/model.py b/ludwig/contribs/mlflow/model.py index 16403c7afdd..2243014881a 100644 --- a/ludwig/contribs/mlflow/model.py +++ b/ludwig/contribs/mlflow/model.py @@ -9,7 +9,7 @@ from mlflow.models import Model from mlflow.models.model import MLMODEL_FILE_NAME from mlflow.models.signature import ModelSignature -from mlflow.models.utils import _save_example, ModelInputExample +from mlflow.models.utils import ModelInputExample, _save_example from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS from mlflow.tracking.artifact_utils import _download_artifact_from_uri from mlflow.utils.environment import _mlflow_conda_env diff --git a/ludwig/data/cache/manager.py b/ludwig/data/cache/manager.py index bc87065a7be..f345f0e3b61 100644 --- a/ludwig/data/cache/manager.py +++ b/ludwig/data/cache/manager.py @@ -3,7 +3,7 @@ from typing import Optional from ludwig.constants import CHECKSUM, META, TEST, TRAINING, VALIDATION -from ludwig.data.cache.types import alphanum, CacheableDataset +from ludwig.data.cache.types import CacheableDataset, alphanum from ludwig.data.cache.util import calculate_checksum from ludwig.data.dataset.base import DatasetManager from ludwig.utils import data_utils diff --git a/ludwig/data/cache/util.py b/ludwig/data/cache/util.py index 14b53c78396..121ffbac44c 100644 --- a/ludwig/data/cache/util.py +++ b/ludwig/data/cache/util.py @@ -1,5 +1,6 @@ import ludwig -from ludwig.constants import DEFAULTS, INPUT_FEATURES, OUTPUT_FEATURES, PREPROCESSING, PROC_COLUMN, TYPE +from ludwig.constants import (DEFAULTS, INPUT_FEATURES, OUTPUT_FEATURES, + PREPROCESSING, PROC_COLUMN, TYPE) from ludwig.data.cache.types import CacheableDataset from ludwig.types import ModelConfigDict from ludwig.utils.data_utils import hash_dict diff --git a/ludwig/data/dataframe/dask.py b/ludwig/data/dataframe/dask.py index 5f292eeabb6..a45aacfa84d 100644 --- a/ludwig/data/dataframe/dask.py +++ b/ludwig/data/dataframe/dask.py @@ -30,7 +30,8 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.data.dataframe.base import DataFrameEngine -from ludwig.utils.data_utils import get_pa_schema, get_parquet_filename, split_by_slices +from ludwig.utils.data_utils import (get_pa_schema, get_parquet_filename, + split_by_slices) from ludwig.utils.dataframe_utils import set_index_name from ludwig.utils.fs_utils import get_fs_and_path diff --git a/ludwig/data/dataframe/modin.py b/ludwig/data/dataframe/modin.py index a0057979f1c..a7ccce3497d 100644 --- a/ludwig/data/dataframe/modin.py +++ b/ludwig/data/dataframe/modin.py @@ -21,7 +21,8 @@ from ludwig.data.dataframe.base import DataFrameEngine from ludwig.globals import PREDICTIONS_SHAPES_FILE_NAME -from ludwig.utils.data_utils import get_pa_schema, load_json, save_json, split_by_slices +from ludwig.utils.data_utils import (get_pa_schema, load_json, save_json, + split_by_slices) from ludwig.utils.dataframe_utils import flatten_df, unflatten_df diff --git a/ludwig/data/dataset/pandas.py b/ludwig/data/dataset/pandas.py index 70b2b4ae8a9..52db45eda8c 100644 --- a/ludwig/data/dataset/pandas.py +++ b/ludwig/data/dataset/pandas.py @@ -17,7 +17,7 @@ from __future__ import annotations import contextlib -from typing import Iterable, TYPE_CHECKING +from typing import TYPE_CHECKING, Iterable import numpy as np from pandas import DataFrame @@ -30,7 +30,8 @@ from ludwig.distributed import DistributedStrategy from ludwig.features.base_feature import BaseFeature from ludwig.utils.data_utils import DATA_TRAIN_HDF5_FP, load_hdf5, save_hdf5 -from ludwig.utils.dataframe_utils import from_numpy_dataset, to_numpy_dataset, to_scalar_df +from ludwig.utils.dataframe_utils import (from_numpy_dataset, to_numpy_dataset, + to_scalar_df) from ludwig.utils.defaults import default_random_seed from ludwig.utils.fs_utils import download_h5 from ludwig.utils.misc_utils import get_proc_features diff --git a/ludwig/data/dataset/ray.py b/ludwig/data/dataset/ray.py index 5ad083fa715..3d0323fd100 100644 --- a/ludwig/data/dataset/ray.py +++ b/ludwig/data/dataset/ray.py @@ -38,8 +38,10 @@ from ludwig.data.dataset.base import Dataset, DatasetManager from ludwig.distributed import DistributedStrategy from ludwig.features.base_feature import BaseFeature -from ludwig.types import FeatureConfigDict, ModelConfigDict, TrainingSetMetadataDict -from ludwig.utils.data_utils import DATA_TRAIN_HDF5_FP, DATA_TRAIN_PARQUET_FP, from_numpy_dataset, to_numpy_dataset +from ludwig.types import (FeatureConfigDict, ModelConfigDict, + TrainingSetMetadataDict) +from ludwig.utils.data_utils import (DATA_TRAIN_HDF5_FP, DATA_TRAIN_PARQUET_FP, + from_numpy_dataset, to_numpy_dataset) from ludwig.utils.dataframe_utils import to_scalar_df from ludwig.utils.defaults import default_random_seed from ludwig.utils.error_handling_utils import default_retry diff --git a/ludwig/data/dataset_synthesizer.py b/ludwig/data/dataset_synthesizer.py index 8d32e87c190..d298da4e57b 100644 --- a/ludwig/data/dataset_synthesizer.py +++ b/ludwig/data/dataset_synthesizer.py @@ -30,29 +30,11 @@ from packaging import version from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - AUDIO, - BAG, - BINARY, - CATEGORY, - CATEGORY_DISTRIBUTION, - DATE, - DECODER, - ENCODER, - H3, - IMAGE, - INPUT_FEATURES, - NAME, - NUMBER, - OUTPUT_FEATURES, - PREPROCESSING, - SEQUENCE, - SET, - TEXT, - TIMESERIES, - TYPE, - VECTOR, -) +from ludwig.constants import (AUDIO, BAG, BINARY, CATEGORY, + CATEGORY_DISTRIBUTION, DATE, DECODER, ENCODER, + H3, IMAGE, INPUT_FEATURES, NAME, NUMBER, + OUTPUT_FEATURES, PREPROCESSING, SEQUENCE, SET, + TEXT, TIMESERIES, TYPE, VECTOR) from ludwig.contrib import add_contrib_callback_args from ludwig.globals import LUDWIG_VERSION from ludwig.types import ModelConfigDict diff --git a/ludwig/data/preprocessing.py b/ludwig/data/preprocessing.py index 3754f959bc5..8947ce6bc4a 100644 --- a/ludwig/data/preprocessing.py +++ b/ludwig/data/preprocessing.py @@ -24,99 +24,55 @@ import torch from ludwig.api_annotations import DeveloperAPI -from ludwig.backend import Backend, LOCAL_BACKEND -from ludwig.config_validation.preprocessing import check_global_max_sequence_length_fits_prompt_template -from ludwig.constants import ( - BFILL, - CHECKSUM, - COLUMN, - DEFAULTS, - DROP_ROW, - ENCODER, - FFILL, - FILL_WITH_CONST, - FILL_WITH_FALSE, - FILL_WITH_MEAN, - FILL_WITH_MODE, - FILL_WITH_TRUE, - FULL, - META, - MIN_DATASET_SPLIT_ROWS, - MODEL_ECD, - NAME, - NUMBER, - PREPROCESSING, - PROC_COLUMN, - SPLIT, - SRC, - TEST, - TEXT, - TRAINING, - TYPE, - VALIDATION, -) +from ludwig.backend import LOCAL_BACKEND, Backend +from ludwig.config_validation.preprocessing import \ + check_global_max_sequence_length_fits_prompt_template +from ludwig.constants import (BFILL, CHECKSUM, COLUMN, DEFAULTS, DROP_ROW, + ENCODER, FFILL, FILL_WITH_CONST, FILL_WITH_FALSE, + FILL_WITH_MEAN, FILL_WITH_MODE, FILL_WITH_TRUE, + FULL, META, MIN_DATASET_SPLIT_ROWS, MODEL_ECD, + NAME, NUMBER, PREPROCESSING, PROC_COLUMN, SPLIT, + SRC, TEST, TEXT, TRAINING, TYPE, VALIDATION) from ludwig.data.cache.manager import DatasetCache from ludwig.data.cache.types import wrap -from ludwig.data.concatenate_datasets import concatenate_df, concatenate_files, concatenate_splits +from ludwig.data.concatenate_datasets import (concatenate_df, + concatenate_files, + concatenate_splits) from ludwig.data.dataset.base import Dataset from ludwig.data.prompt import format_input_with_prompt, index_column from ludwig.data.split import get_splitter, split_dataset from ludwig.data.utils import get_input_and_output_features, set_fixed_split from ludwig.datasets import load_dataset_uris from ludwig.features.feature_registries import get_base_type_registry -from ludwig.models.embedder import create_embed_batch_size_evaluator, create_embed_transform_fn +from ludwig.models.embedder import (create_embed_batch_size_evaluator, + create_embed_transform_fn) from ludwig.schema.encoders.utils import get_encoder_cls -from ludwig.types import FeatureConfigDict, ModelConfigDict, PreprocessingConfigDict, TrainingSetMetadataDict +from ludwig.types import (FeatureConfigDict, ModelConfigDict, + PreprocessingConfigDict, TrainingSetMetadataDict) from ludwig.utils import data_utils, strings_utils from ludwig.utils.backward_compatibility import upgrade_metadata -from ludwig.utils.data_utils import ( - CACHEABLE_FORMATS, - CSV_FORMATS, - DATA_TEST_PARQUET_FP, - DATA_TRAIN_HDF5_FP, - DATA_TRAIN_PARQUET_FP, - DATA_VALIDATION_PARQUET_FP, - DATAFRAME_FORMATS, - DICT_FORMATS, - EXCEL_FORMATS, - FEATHER_FORMATS, - figure_data_format, - FWF_FORMATS, - get_split_path, - HDF5_FORMATS, - HTML_FORMATS, - JSON_FORMATS, - JSONL_FORMATS, - ORC_FORMATS, - override_in_memory_flag, - PARQUET_FORMATS, - PICKLE_FORMATS, - read_csv, - read_excel, - read_feather, - read_fwf, - read_html, - read_json, - read_jsonl, - read_orc, - read_parquet, - read_pickle, - read_sas, - read_spss, - read_stata, - read_tsv, - sanitize_column_names, - SAS_FORMATS, - SPSS_FORMATS, - STATA_FORMATS, - TSV_FORMATS, -) +from ludwig.utils.data_utils import (CACHEABLE_FORMATS, CSV_FORMATS, + DATA_TEST_PARQUET_FP, DATA_TRAIN_HDF5_FP, + DATA_TRAIN_PARQUET_FP, + DATA_VALIDATION_PARQUET_FP, + DATAFRAME_FORMATS, DICT_FORMATS, + EXCEL_FORMATS, FEATHER_FORMATS, + FWF_FORMATS, HDF5_FORMATS, HTML_FORMATS, + JSON_FORMATS, JSONL_FORMATS, ORC_FORMATS, + PARQUET_FORMATS, PICKLE_FORMATS, + SAS_FORMATS, SPSS_FORMATS, STATA_FORMATS, + TSV_FORMATS, figure_data_format, + get_split_path, override_in_memory_flag, + read_csv, read_excel, read_feather, + read_fwf, read_html, read_json, + read_jsonl, read_orc, read_parquet, + read_pickle, read_sas, read_spss, + read_stata, read_tsv, + sanitize_column_names) from ludwig.utils.dataframe_utils import is_dask_series_or_df -from ludwig.utils.defaults import ( - default_prediction_preprocessing_parameters, - default_random_seed, - default_training_preprocessing_parameters, -) +from ludwig.utils.defaults import (default_prediction_preprocessing_parameters, + default_random_seed, + default_training_preprocessing_parameters) from ludwig.utils.fs_utils import file_lock, path_exists from ludwig.utils.misc_utils import get_from_registry, merge_dict from ludwig.utils.types import DataFrame, Series diff --git a/ludwig/data/prompt.py b/ludwig/data/prompt.py index 205d6706fb1..a22b462a428 100644 --- a/ludwig/data/prompt.py +++ b/ludwig/data/prompt.py @@ -2,15 +2,17 @@ import logging import os import string -from typing import Any, Dict, List, Optional, Set, Tuple, Type, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Type import pandas as pd if TYPE_CHECKING: from ludwig.backend.base import Backend -from ludwig.models.retrieval import df_checksum, get_retrieval_model, RetrievalModel -from ludwig.utils.fs_utils import get_default_cache_location, makedirs, path_exists +from ludwig.models.retrieval import (RetrievalModel, df_checksum, + get_retrieval_model) +from ludwig.utils.fs_utils import (get_default_cache_location, makedirs, + path_exists) from ludwig.utils.types import DataFrame, Series logger = logging.getLogger(__name__) diff --git a/ludwig/data/split.py b/ludwig/data/split.py index ce8cea95c2b..364bbe5b48a 100644 --- a/ludwig/data/split.py +++ b/ludwig/data/split.py @@ -15,7 +15,7 @@ import logging from abc import ABC, abstractmethod -from typing import List, Optional, Tuple, TYPE_CHECKING +from typing import TYPE_CHECKING, List, Optional, Tuple from zlib import crc32 import numpy as np @@ -23,15 +23,12 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.backend.base import Backend -from ludwig.constants import BINARY, CATEGORY, DATE, MIN_DATASET_SPLIT_ROWS, SPLIT +from ludwig.constants import (BINARY, CATEGORY, DATE, MIN_DATASET_SPLIT_ROWS, + SPLIT) from ludwig.error import ConfigValidationError -from ludwig.schema.split import ( - DateTimeSplitConfig, - FixedSplitConfig, - HashSplitConfig, - RandomSplitConfig, - StratifySplitConfig, -) +from ludwig.schema.split import (DateTimeSplitConfig, FixedSplitConfig, + HashSplitConfig, RandomSplitConfig, + StratifySplitConfig) from ludwig.types import ModelConfigDict, PreprocessingConfigDict from ludwig.utils.data_utils import hash_dict, split_dataset_ttv from ludwig.utils.defaults import default_random_seed diff --git a/ludwig/datasets/__init__.py b/ludwig/datasets/__init__.py index dbe45169376..12867c1838c 100644 --- a/ludwig/datasets/__init__.py +++ b/ludwig/datasets/__init__.py @@ -11,12 +11,12 @@ from ludwig.api_annotations import DeveloperAPI, PublicAPI from ludwig.backend.base import Backend -from ludwig.constants import AUDIO, BINARY, CATEGORY, IMAGE, NUMBER, TEST, TEXT, TRAIN, TYPE, VALIDATION +from ludwig.constants import (AUDIO, BINARY, CATEGORY, IMAGE, NUMBER, TEST, + TEXT, TRAIN, TYPE, VALIDATION) from ludwig.data.cache.types import CacheableDataframe from ludwig.datasets import configs from ludwig.datasets.dataset_config import DatasetConfig from ludwig.datasets.loaders.dataset_loader import DatasetLoader - # PublicAPI from ludwig.datasets.utils import model_configs_for_dataset # noqa from ludwig.globals import LUDWIG_VERSION diff --git a/ludwig/decoders/generic_decoders.py b/ludwig/decoders/generic_decoders.py index ac5e971a8fe..7dfc3b7ea94 100644 --- a/ludwig/decoders/generic_decoders.py +++ b/ludwig/decoders/generic_decoders.py @@ -19,10 +19,13 @@ import torch from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import BINARY, CATEGORY, CATEGORY_DISTRIBUTION, LOSS, NUMBER, SET, TIMESERIES, TYPE, VECTOR +from ludwig.constants import (BINARY, CATEGORY, CATEGORY_DISTRIBUTION, LOSS, + NUMBER, SET, TIMESERIES, TYPE, VECTOR) from ludwig.decoders.base import Decoder from ludwig.decoders.registry import register_decoder -from ludwig.schema.decoders.base import ClassifierConfig, PassthroughDecoderConfig, ProjectorConfig, RegressorConfig +from ludwig.schema.decoders.base import (ClassifierConfig, + PassthroughDecoderConfig, + ProjectorConfig, RegressorConfig) from ludwig.utils.torch_utils import Dense, get_activation logger = logging.getLogger(__name__) diff --git a/ludwig/decoders/image_decoders.py b/ludwig/decoders/image_decoders.py index aad1f2dd613..73e59594fa6 100644 --- a/ludwig/decoders/image_decoders.py +++ b/ludwig/decoders/image_decoders.py @@ -19,11 +19,13 @@ import torch from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ENCODER_OUTPUT_STATE, HIDDEN, IMAGE, LOGITS, PREDICTIONS +from ludwig.constants import (ENCODER_OUTPUT_STATE, HIDDEN, IMAGE, LOGITS, + PREDICTIONS) from ludwig.decoders.base import Decoder from ludwig.decoders.registry import register_decoder from ludwig.modules.convolutional_modules import UNetUpStack -from ludwig.schema.decoders.image_decoders import ImageDecoderConfig, UNetDecoderConfig +from ludwig.schema.decoders.image_decoders import (ImageDecoderConfig, + UNetDecoderConfig) logger = logging.getLogger(__name__) diff --git a/ludwig/decoders/llm_decoders.py b/ludwig/decoders/llm_decoders.py index 5763f5a5868..cac584a04b2 100644 --- a/ludwig/decoders/llm_decoders.py +++ b/ludwig/decoders/llm_decoders.py @@ -10,7 +10,8 @@ from ludwig.decoders.base import Decoder from ludwig.decoders.registry import register_decoder from ludwig.decoders.utils import extract_generated_tokens -from ludwig.schema.decoders.llm_decoders import CategoryExtractorDecoderConfig, TextExtractorDecoderConfig +from ludwig.schema.decoders.llm_decoders import ( + CategoryExtractorDecoderConfig, TextExtractorDecoderConfig) from ludwig.utils.strings_utils import get_tokenizer logger = logging.getLogger(__name__) diff --git a/ludwig/decoders/sequence_decoders.py b/ludwig/decoders/sequence_decoders.py index 33fc8b40141..01be9b7bd29 100644 --- a/ludwig/decoders/sequence_decoders.py +++ b/ludwig/decoders/sequence_decoders.py @@ -22,9 +22,11 @@ from ludwig.constants import LOGITS, PREDICTIONS, PROBABILITIES, SEQUENCE, TEXT from ludwig.decoders.base import Decoder from ludwig.decoders.registry import register_decoder -from ludwig.decoders.sequence_decoder_utils import get_lstm_init_state, get_rnn_init_state +from ludwig.decoders.sequence_decoder_utils import (get_lstm_init_state, + get_rnn_init_state) from ludwig.modules.reduction_modules import SequenceReducer -from ludwig.schema.decoders.sequence_decoders import SequenceGeneratorDecoderConfig +from ludwig.schema.decoders.sequence_decoders import \ + SequenceGeneratorDecoderConfig from ludwig.utils import strings_utils logger = logging.getLogger(__name__) diff --git a/ludwig/decoders/sequence_tagger.py b/ludwig/decoders/sequence_tagger.py index 78fabbe9e4c..96bbe53bb84 100644 --- a/ludwig/decoders/sequence_tagger.py +++ b/ludwig/decoders/sequence_tagger.py @@ -4,11 +4,13 @@ import torch from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import HIDDEN, LOGITS, PREDICTIONS, PROBABILITIES, SEQUENCE, TEXT +from ludwig.constants import (HIDDEN, LOGITS, PREDICTIONS, PROBABILITIES, + SEQUENCE, TEXT) from ludwig.decoders.base import Decoder from ludwig.decoders.registry import register_decoder from ludwig.modules.attention_modules import MultiHeadSelfAttention -from ludwig.schema.decoders.sequence_decoders import SequenceTaggerDecoderConfig +from ludwig.schema.decoders.sequence_decoders import \ + SequenceTaggerDecoderConfig from ludwig.utils.torch_utils import Dense logger = logging.getLogger(__name__) diff --git a/ludwig/distributed/base.py b/ludwig/distributed/base.py index 3649c9e30c0..915b41b34fb 100644 --- a/ludwig/distributed/base.py +++ b/ludwig/distributed/base.py @@ -2,7 +2,7 @@ import contextlib from abc import ABC, abstractmethod -from typing import Any, Callable, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Callable import torch from torch import nn diff --git a/ludwig/distributed/ddp.py b/ludwig/distributed/ddp.py index a70d308fea3..6f83d7abf8b 100644 --- a/ludwig/distributed/ddp.py +++ b/ludwig/distributed/ddp.py @@ -2,7 +2,8 @@ import logging import os import socket -from typing import Any, Callable, Dict, Optional, Tuple, Type, TYPE_CHECKING, Union +from typing import (TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Type, + Union) import torch import torch.distributed as dist diff --git a/ludwig/distributed/deepspeed.py b/ludwig/distributed/deepspeed.py index a5677f66538..7717d6f19b1 100644 --- a/ludwig/distributed/deepspeed.py +++ b/ludwig/distributed/deepspeed.py @@ -1,12 +1,14 @@ import logging import os import warnings -from typing import Any, Dict, List, Mapping, Optional, Tuple, TYPE_CHECKING, Union +from typing import (TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, + Union) import deepspeed import deepspeed.comm import torch -from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint +from deepspeed.utils.zero_to_fp32 import \ + get_fp32_state_dict_from_zero_checkpoint from packaging import version from torch import nn from torch.optim.optimizer import Optimizer diff --git a/ludwig/distributed/fsdp.py b/ludwig/distributed/fsdp.py index 368e22df7fc..d078a34f343 100644 --- a/ludwig/distributed/fsdp.py +++ b/ludwig/distributed/fsdp.py @@ -1,5 +1,5 @@ import logging -from typing import Optional, Tuple, TYPE_CHECKING +from typing import TYPE_CHECKING, Optional, Tuple import torch from torch import nn diff --git a/ludwig/distributed/horovod.py b/ludwig/distributed/horovod.py index 80ea4f784cc..847d08dc083 100644 --- a/ludwig/distributed/horovod.py +++ b/ludwig/distributed/horovod.py @@ -1,6 +1,7 @@ import contextlib import logging -from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TYPE_CHECKING +from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, + Type) import horovod.torch as hvd import ray @@ -16,7 +17,8 @@ from ludwig.constants import AUTO from ludwig.distributed.base import DistributedStrategy from ludwig.modules.optimization_modules import create_optimizer -from ludwig.utils.horovod_utils import gather_all_tensors, is_distributed_available +from ludwig.utils.horovod_utils import (gather_all_tensors, + is_distributed_available) if TYPE_CHECKING: from ludwig.schema.trainer import ECDTrainerConfig diff --git a/ludwig/encoders/category_encoders.py b/ludwig/encoders/category_encoders.py index 2a41ccfce5d..43a6d8ecae8 100644 --- a/ludwig/encoders/category_encoders.py +++ b/ludwig/encoders/category_encoders.py @@ -27,11 +27,8 @@ from ludwig.modules.embedding_modules import Embed from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.category_encoders import ( - CategoricalEmbedConfig, - CategoricalOneHotEncoderConfig, - CategoricalPassthroughEncoderConfig, - CategoricalSparseConfig, -) + CategoricalEmbedConfig, CategoricalOneHotEncoderConfig, + CategoricalPassthroughEncoderConfig, CategoricalSparseConfig) logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/date_encoders.py b/ludwig/encoders/date_encoders.py index fa5b6329454..de985638416 100644 --- a/ludwig/encoders/date_encoders.py +++ b/ludwig/encoders/date_encoders.py @@ -26,7 +26,8 @@ from ludwig.modules.embedding_modules import Embed from ludwig.modules.fully_connected_modules import FCStack from ludwig.schema.encoders.base import BaseEncoderConfig -from ludwig.schema.encoders.date_encoders import DateEmbedConfig, DateWaveConfig +from ludwig.schema.encoders.date_encoders import (DateEmbedConfig, + DateWaveConfig) from ludwig.utils import torch_utils logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/generic_encoders.py b/ludwig/encoders/generic_encoders.py index a92e2580463..41b300d506b 100644 --- a/ludwig/encoders/generic_encoders.py +++ b/ludwig/encoders/generic_encoders.py @@ -19,12 +19,14 @@ import torch from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import BINARY, ENCODER_OUTPUT, NUMBER, TEXT, TIMESERIES, VECTOR +from ludwig.constants import (BINARY, ENCODER_OUTPUT, NUMBER, TEXT, TIMESERIES, + VECTOR) from ludwig.encoders.base import Encoder from ludwig.encoders.registry import register_encoder from ludwig.encoders.types import EncoderOutputDict from ludwig.modules.fully_connected_modules import FCStack -from ludwig.schema.encoders.base import BaseEncoderConfig, DenseEncoderConfig, PassthroughEncoderConfig +from ludwig.schema.encoders.base import (BaseEncoderConfig, DenseEncoderConfig, + PassthroughEncoderConfig) logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/h3_encoders.py b/ludwig/encoders/h3_encoders.py index 4eb766c512d..6c5965f4149 100644 --- a/ludwig/encoders/h3_encoders.py +++ b/ludwig/encoders/h3_encoders.py @@ -29,7 +29,8 @@ from ludwig.modules.recurrent_modules import RecurrentStack from ludwig.modules.reduction_modules import SequenceReducer from ludwig.schema.encoders.base import BaseEncoderConfig -from ludwig.schema.encoders.h3_encoders import H3EmbedConfig, H3RNNConfig, H3WeightedSumConfig +from ludwig.schema.encoders.h3_encoders import (H3EmbedConfig, H3RNNConfig, + H3WeightedSumConfig) from ludwig.utils import torch_utils logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/image/base.py b/ludwig/encoders/image/base.py index 1e3bc5ca580..27a9a7e37ed 100644 --- a/ludwig/encoders/image/base.py +++ b/ludwig/encoders/image/base.py @@ -23,17 +23,14 @@ from ludwig.encoders.base import Encoder from ludwig.encoders.registry import register_encoder from ludwig.encoders.types import EncoderOutputDict -from ludwig.modules.convolutional_modules import Conv2DStack, ResNet, UNetDownStack +from ludwig.modules.convolutional_modules import (Conv2DStack, ResNet, + UNetDownStack) from ludwig.modules.fully_connected_modules import FCStack from ludwig.modules.mlp_mixer_modules import MLPMixer -from ludwig.schema.encoders.image.base import ( - ImageEncoderConfig, - MLPMixerConfig, - ResNetConfig, - Stacked2DCNNConfig, - UNetEncoderConfig, - ViTConfig, -) +from ludwig.schema.encoders.image.base import (ImageEncoderConfig, + MLPMixerConfig, ResNetConfig, + Stacked2DCNNConfig, + UNetEncoderConfig, ViTConfig) from ludwig.utils.torch_utils import FreezeModule logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/image/torchvision.py b/ludwig/encoders/image/torchvision.py index dbebff2f6ec..b8cfbf7ffbf 100644 --- a/ludwig/encoders/image/torchvision.py +++ b/ludwig/encoders/image/torchvision.py @@ -13,27 +13,17 @@ from ludwig.encoders.types import EncoderOutputDict from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.image.torchvision import ( - TVAlexNetEncoderConfig, - TVConvNeXtEncoderConfig, - TVDenseNetEncoderConfig, - TVEfficientNetEncoderConfig, - TVGoogLeNetEncoderConfig, - TVInceptionV3EncoderConfig, - TVMaxVitEncoderConfig, - TVMNASNetEncoderConfig, - TVMobileNetV2EncoderConfig, - TVMobileNetV3EncoderConfig, - TVRegNetEncoderConfig, - TVResNetEncoderConfig, - TVResNeXtEncoderConfig, - TVShuffleNetV2EncoderConfig, - TVSqueezeNetEncoderConfig, - TVSwinTransformerEncoderConfig, - TVVGGEncoderConfig, - TVViTEncoderConfig, - TVWideResNetEncoderConfig, -) -from ludwig.utils.image_utils import register_torchvision_model_variants, torchvision_model_registry, TVModelVariant + TVAlexNetEncoderConfig, TVConvNeXtEncoderConfig, TVDenseNetEncoderConfig, + TVEfficientNetEncoderConfig, TVGoogLeNetEncoderConfig, + TVInceptionV3EncoderConfig, TVMaxVitEncoderConfig, TVMNASNetEncoderConfig, + TVMobileNetV2EncoderConfig, TVMobileNetV3EncoderConfig, + TVRegNetEncoderConfig, TVResNetEncoderConfig, TVResNeXtEncoderConfig, + TVShuffleNetV2EncoderConfig, TVSqueezeNetEncoderConfig, + TVSwinTransformerEncoderConfig, TVVGGEncoderConfig, TVViTEncoderConfig, + TVWideResNetEncoderConfig) +from ludwig.utils.image_utils import (TVModelVariant, + register_torchvision_model_variants, + torchvision_model_registry) logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/sequence_encoders.py b/ludwig/encoders/sequence_encoders.py index b5de6177412..225cbb3f79c 100644 --- a/ludwig/encoders/sequence_encoders.py +++ b/ludwig/encoders/sequence_encoders.py @@ -20,27 +20,24 @@ from torch import nn from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import AUDIO, ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, SEQUENCE, TEXT, TIMESERIES +from ludwig.constants import (AUDIO, ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, + SEQUENCE, TEXT, TIMESERIES) from ludwig.encoders.base import Encoder -from ludwig.encoders.registry import register_encoder, register_sequence_encoder +from ludwig.encoders.registry import (register_encoder, + register_sequence_encoder) from ludwig.encoders.types import EncoderOutputDict from ludwig.modules.attention_modules import TransformerStack -from ludwig.modules.convolutional_modules import Conv1DStack, ParallelConv1D, ParallelConv1DStack -from ludwig.modules.embedding_modules import EmbedSequence, TokenAndPositionEmbedding +from ludwig.modules.convolutional_modules import (Conv1DStack, ParallelConv1D, + ParallelConv1DStack) +from ludwig.modules.embedding_modules import (EmbedSequence, + TokenAndPositionEmbedding) from ludwig.modules.fully_connected_modules import FCStack from ludwig.modules.recurrent_modules import RecurrentStack from ludwig.modules.reduction_modules import SequenceReducer from ludwig.schema.encoders.sequence_encoders import ( - ParallelCNNConfig, - SequenceEmbedConfig, - SequenceEncoderConfig, - SequencePassthroughConfig, - StackedCNNConfig, - StackedCNNRNNConfig, - StackedParallelCNNConfig, - StackedRNNConfig, - StackedTransformerConfig, -) + ParallelCNNConfig, SequenceEmbedConfig, SequenceEncoderConfig, + SequencePassthroughConfig, StackedCNNConfig, StackedCNNRNNConfig, + StackedParallelCNNConfig, StackedRNNConfig, StackedTransformerConfig) logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/text_encoders.py b/ludwig/encoders/text_encoders.py index f34e500b237..ab8189d1922 100644 --- a/ludwig/encoders/text_encoders.py +++ b/ludwig/encoders/text_encoders.py @@ -16,7 +16,8 @@ import contextlib import inspect import logging -from typing import Any, Callable, Dict, List, Optional, Type, TYPE_CHECKING, TypeVar, Union +from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type, + TypeVar, Union) import numpy as np import torch @@ -31,33 +32,25 @@ from ludwig.modules.reduction_modules import SequenceReducer from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.sequence_encoders import SequenceEncoderConfig -from ludwig.schema.encoders.text_encoders import ( - ALBERTConfig, - AutoTransformerConfig, - BERTConfig, - CamemBERTConfig, - CTRLConfig, - DebertaV2Config, - DistilBERTConfig, - ELECTRAConfig, - FlauBERTConfig, - GPT2Config, - GPTConfig, - LLMEncoderConfig, - LongformerConfig, - MT5Config, - RoBERTaConfig, - T5Config, - TfIdfEncoderConfig, - TransformerXLConfig, - XLMConfig, - XLMRoBERTaConfig, - XLNetConfig, -) +from ludwig.schema.encoders.text_encoders import (ALBERTConfig, + AutoTransformerConfig, + BERTConfig, CamemBERTConfig, + CTRLConfig, DebertaV2Config, + DistilBERTConfig, + ELECTRAConfig, + FlauBERTConfig, GPT2Config, + GPTConfig, LLMEncoderConfig, + LongformerConfig, MT5Config, + RoBERTaConfig, T5Config, + TfIdfEncoderConfig, + TransformerXLConfig, + XLMConfig, XLMRoBERTaConfig, + XLNetConfig) from ludwig.schema.llms.peft import BaseAdapterConfig from ludwig.utils.data_utils import clear_data_cache from ludwig.utils.hf_utils import load_pretrained_hf_model_with_hub_fallback -from ludwig.utils.llm_utils import get_context_len, initialize_adapter, load_pretrained_from_config +from ludwig.utils.llm_utils import (get_context_len, initialize_adapter, + load_pretrained_from_config) from ludwig.utils.tokenizers import HFTokenizer from ludwig.utils.torch_utils import FreezeModule diff --git a/ludwig/experiment.py b/ludwig/experiment.py index 7615a4f9783..0c0be84e156 100644 --- a/ludwig/experiment.py +++ b/ludwig/experiment.py @@ -21,15 +21,17 @@ import pandas as pd -from ludwig.api import kfold_cross_validate, LudwigModel +from ludwig.api import LudwigModel, kfold_cross_validate from ludwig.backend import ALL_BACKENDS, Backend, initialize_backend from ludwig.callbacks import Callback -from ludwig.constants import CONTINUE_PROMPT, FULL, HYPEROPT, HYPEROPT_WARNING, TEST, TRAINING, VALIDATION +from ludwig.constants import (CONTINUE_PROMPT, FULL, HYPEROPT, + HYPEROPT_WARNING, TEST, TRAINING, VALIDATION) from ludwig.contrib import add_contrib_callback_args from ludwig.globals import LUDWIG_VERSION from ludwig.utils.data_utils import load_config_from_str, load_yaml, save_json from ludwig.utils.defaults import default_random_seed -from ludwig.utils.print_utils import get_logging_level_registry, print_ludwig, query_yes_no +from ludwig.utils.print_utils import (get_logging_level_registry, print_ludwig, + query_yes_no) logger = logging.getLogger(__name__) diff --git a/ludwig/explain/captum.py b/ludwig/explain/captum.py index 643d5d2ddc3..3aa5b29d81e 100644 --- a/ludwig/explain/captum.py +++ b/ludwig/explain/captum.py @@ -16,21 +16,9 @@ from ludwig.api import LudwigModel from ludwig.api_annotations import PublicAPI -from ludwig.constants import ( - BINARY, - CATEGORY, - DATE, - IMAGE, - INPUT_FEATURES, - MINIMUM_BATCH_SIZE, - NAME, - NUMBER, - PREPROCESSING, - SEQUENCE, - SET, - TEXT, - UNKNOWN_SYMBOL, -) +from ludwig.constants import (BINARY, CATEGORY, DATE, IMAGE, INPUT_FEATURES, + MINIMUM_BATCH_SIZE, NAME, NUMBER, PREPROCESSING, + SEQUENCE, SET, TEXT, UNKNOWN_SYMBOL) from ludwig.data.preprocessing import preprocess_for_prediction from ludwig.explain.explainer import Explainer from ludwig.explain.explanation import ExplanationsResult diff --git a/ludwig/explain/captum_ray.py b/ludwig/explain/captum_ray.py index 21d15db3815..639ee74305f 100644 --- a/ludwig/explain/captum_ray.py +++ b/ludwig/explain/captum_ray.py @@ -9,14 +9,10 @@ from ludwig.api import LudwigModel from ludwig.api_annotations import PublicAPI -from ludwig.explain.captum import ( - ExplanationRunConfig, - get_baseline, - get_input_tensors, - get_total_attribution, - IntegratedGradientsExplainer, - retry_with_halved_batch_size, -) +from ludwig.explain.captum import (ExplanationRunConfig, + IntegratedGradientsExplainer, get_baseline, + get_input_tensors, get_total_attribution, + retry_with_halved_batch_size) from ludwig.explain.explanation import ExplanationsResult from ludwig.features.feature_utils import LudwigFeatureDict from ludwig.utils.torch_utils import get_torch_device diff --git a/ludwig/export.py b/ludwig/export.py index 60622650a1f..a8633094633 100644 --- a/ludwig/export.py +++ b/ludwig/export.py @@ -23,7 +23,8 @@ from ludwig.contrib import add_contrib_callback_args from ludwig.globals import LUDWIG_VERSION from ludwig.utils.carton_utils import export_carton as utils_export_carton -from ludwig.utils.neuropod_utils import export_neuropod as utils_export_neuropod +from ludwig.utils.neuropod_utils import \ + export_neuropod as utils_export_neuropod from ludwig.utils.print_utils import get_logging_level_registry, print_ludwig from ludwig.utils.triton_utils import export_triton as utils_export_triton diff --git a/ludwig/features/audio_feature.py b/ludwig/features/audio_feature.py index a9ce7a2fd0a..fdce28a7635 100644 --- a/ludwig/features/audio_feature.py +++ b/ludwig/features/audio_feature.py @@ -22,26 +22,22 @@ import torchaudio from packaging import version -from ludwig.constants import AUDIO, AUDIO_FEATURE_KEYS, COLUMN, NAME, PREPROCESSING, PROC_COLUMN, SRC, TYPE +from ludwig.constants import (AUDIO, AUDIO_FEATURE_KEYS, COLUMN, NAME, + PREPROCESSING, PROC_COLUMN, SRC, TYPE) from ludwig.features.base_feature import BaseFeatureMixin from ludwig.features.sequence_feature import SequenceInputFeature from ludwig.schema.features.audio_feature import AudioInputFeatureConfig -from ludwig.types import FeatureMetadataDict, ModelConfigDict, PreprocessingConfigDict, TrainingSetMetadataDict -from ludwig.utils.audio_utils import ( - calculate_mean, - calculate_var, - get_default_audio, - get_fbank, - get_group_delay, - get_length_in_samp, - get_max_length_stft_based, - get_non_symmetric_length, - get_phase_stft_magnitude, - get_stft_magnitude, - is_torch_audio_tuple, - read_audio_from_bytes_obj, - read_audio_from_path, -) +from ludwig.types import (FeatureMetadataDict, ModelConfigDict, + PreprocessingConfigDict, TrainingSetMetadataDict) +from ludwig.utils.audio_utils import (calculate_mean, calculate_var, + get_default_audio, get_fbank, + get_group_delay, get_length_in_samp, + get_max_length_stft_based, + get_non_symmetric_length, + get_phase_stft_magnitude, + get_stft_magnitude, is_torch_audio_tuple, + read_audio_from_bytes_obj, + read_audio_from_path) from ludwig.utils.data_utils import get_abs_path from ludwig.utils.fs_utils import has_remote_protocol from ludwig.utils.misc_utils import set_default_value diff --git a/ludwig/features/bag_feature.py b/ludwig/features/bag_feature.py index 2f963a46fb2..91c8d70e111 100644 --- a/ludwig/features/bag_feature.py +++ b/ludwig/features/bag_feature.py @@ -24,7 +24,8 @@ from ludwig.features.feature_utils import set_str_to_idx from ludwig.features.set_feature import _SetPreprocessing from ludwig.schema.features.bag_feature import BagInputFeatureConfig -from ludwig.types import FeatureMetadataDict, ModelConfigDict, PreprocessingConfigDict, TrainingSetMetadataDict +from ludwig.types import (FeatureMetadataDict, ModelConfigDict, + PreprocessingConfigDict, TrainingSetMetadataDict) from ludwig.utils.strings_utils import create_vocabulary logger = logging.getLogger(__name__) diff --git a/ludwig/features/base_feature.py b/ludwig/features/base_feature.py index 94b29b558a1..1d210582442 100644 --- a/ludwig/features/base_feature.py +++ b/ludwig/features/base_feature.py @@ -20,32 +20,23 @@ import torch from torch import Tensor -from ludwig.constants import ( - ENCODER_OUTPUT, - ENCODER_OUTPUT_STATE, - HIDDEN, - LENGTHS, - LOGITS, - LOSS, - PREDICTIONS, - PROBABILITIES, -) +from ludwig.constants import (ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, HIDDEN, + LENGTHS, LOGITS, LOSS, PREDICTIONS, + PROBABILITIES) from ludwig.decoders.registry import get_decoder_cls from ludwig.encoders.registry import get_encoder_cls from ludwig.features.feature_utils import get_input_size_with_dependencies from ludwig.modules.fully_connected_modules import FCStack from ludwig.modules.loss_modules import create_loss from ludwig.modules.metric_modules import LossMetric, LudwigMetric, MeanMetric -from ludwig.modules.metric_registry import get_metric_classes, get_metric_cls, get_metric_tensor_input +from ludwig.modules.metric_registry import (get_metric_classes, get_metric_cls, + get_metric_tensor_input) from ludwig.modules.reduction_modules import SequenceReducer -from ludwig.schema.features.base import BaseFeatureConfig, BaseOutputFeatureConfig -from ludwig.types import ( - FeatureConfigDict, - FeatureMetadataDict, - ModelConfigDict, - PreprocessingConfigDict, - TrainingSetMetadataDict, -) +from ludwig.schema.features.base import (BaseFeatureConfig, + BaseOutputFeatureConfig) +from ludwig.types import (FeatureConfigDict, FeatureMetadataDict, + ModelConfigDict, PreprocessingConfigDict, + TrainingSetMetadataDict) from ludwig.utils import output_feature_utils from ludwig.utils.calibration import CalibrationModule from ludwig.utils.torch_utils import LudwigModule diff --git a/ludwig/features/binary_feature.py b/ludwig/features/binary_feature.py index ed6c8264ef1..f445ef84291 100644 --- a/ludwig/features/binary_feature.py +++ b/ludwig/features/binary_feature.py @@ -19,26 +19,21 @@ import numpy as np import torch -from ludwig.constants import BINARY, COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, PROBABILITIES, PROBABILITY, PROC_COLUMN +from ludwig.constants import (BINARY, COLUMN, HIDDEN, LOGITS, NAME, + PREDICTIONS, PROBABILITIES, PROBABILITY, + PROC_COLUMN) from ludwig.error import InputDataError -from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule -from ludwig.schema.features.binary_feature import BinaryInputFeatureConfig, BinaryOutputFeatureConfig -from ludwig.types import ( - FeatureConfigDict, - FeatureMetadataDict, - FeaturePostProcessingOutputDict, - ModelConfigDict, - PreprocessingConfigDict, - TrainingSetMetadataDict, -) +from ludwig.features.base_feature import (BaseFeatureMixin, InputFeature, + OutputFeature, PredictModule) +from ludwig.schema.features.binary_feature import (BinaryInputFeatureConfig, + BinaryOutputFeatureConfig) +from ludwig.types import (FeatureConfigDict, FeatureMetadataDict, + FeaturePostProcessingOutputDict, ModelConfigDict, + PreprocessingConfigDict, TrainingSetMetadataDict) from ludwig.utils import calibration, output_feature_utils, strings_utils -from ludwig.utils.eval_utils import ( - average_precision_score, - ConfusionMatrix, - precision_recall_curve, - roc_auc_score, - roc_curve, -) +from ludwig.utils.eval_utils import (ConfusionMatrix, average_precision_score, + precision_recall_curve, roc_auc_score, + roc_curve) from ludwig.utils.types import DataFrame, TorchscriptPreprocessingInput logger = logging.getLogger(__name__) diff --git a/ludwig/features/category_feature.py b/ludwig/features/category_feature.py index b1b912c0e2a..9e2da1e1885 100644 --- a/ludwig/features/category_feature.py +++ b/ludwig/features/category_feature.py @@ -19,40 +19,26 @@ import numpy as np import torch -from ludwig.constants import ( - CATEGORY, - CATEGORY_DISTRIBUTION, - COLUMN, - HIDDEN, - LOGITS, - NAME, - PREDICTIONS, - PREPROCESSING, - PROBABILITIES, - PROBABILITY, - PROC_COLUMN, - PROJECTION_INPUT, -) +from ludwig.constants import (CATEGORY, CATEGORY_DISTRIBUTION, COLUMN, HIDDEN, + LOGITS, NAME, PREDICTIONS, PREPROCESSING, + PROBABILITIES, PROBABILITY, PROC_COLUMN, + PROJECTION_INPUT) from ludwig.error import InputDataError -from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule +from ludwig.features.base_feature import (BaseFeatureMixin, InputFeature, + OutputFeature, PredictModule) from ludwig.features.vector_feature import VectorFeatureMixin from ludwig.schema.features.category_feature import ( - CategoryDistributionOutputFeatureConfig, - CategoryInputFeatureConfig, - CategoryOutputFeatureConfig, -) + CategoryDistributionOutputFeatureConfig, CategoryInputFeatureConfig, + CategoryOutputFeatureConfig) from ludwig.schema.features.loss.loss import CORNLossConfig -from ludwig.types import ( - FeatureMetadataDict, - FeaturePostProcessingOutputDict, - ModelConfigDict, - PreprocessingConfigDict, - TrainingSetMetadataDict, -) +from ludwig.types import (FeatureMetadataDict, FeaturePostProcessingOutputDict, + ModelConfigDict, PreprocessingConfigDict, + TrainingSetMetadataDict) from ludwig.utils import calibration, output_feature_utils from ludwig.utils.eval_utils import ConfusionMatrix from ludwig.utils.math_utils import int_type, softmax -from ludwig.utils.strings_utils import create_vocabulary_single_token, UNKNOWN_SYMBOL +from ludwig.utils.strings_utils import (UNKNOWN_SYMBOL, + create_vocabulary_single_token) from ludwig.utils.types import TorchscriptPreprocessingInput logger = logging.getLogger(__name__) diff --git a/ludwig/features/date_feature.py b/ludwig/features/date_feature.py index aa6712992f5..a3f837cc062 100644 --- a/ludwig/features/date_feature.py +++ b/ludwig/features/date_feature.py @@ -23,14 +23,11 @@ from ludwig.constants import COLUMN, DATE, PROC_COLUMN from ludwig.features.base_feature import BaseFeatureMixin, InputFeature from ludwig.schema.features.date_feature import DateInputFeatureConfig -from ludwig.types import ( - FeatureConfigDict, - FeatureMetadataDict, - ModelConfigDict, - PreprocessingConfigDict, - TrainingSetMetadataDict, -) -from ludwig.utils.date_utils import create_vector_from_datetime_obj, parse_datetime +from ludwig.types import (FeatureConfigDict, FeatureMetadataDict, + ModelConfigDict, PreprocessingConfigDict, + TrainingSetMetadataDict) +from ludwig.utils.date_utils import (create_vector_from_datetime_obj, + parse_datetime) from ludwig.utils.types import DataFrame, TorchscriptPreprocessingInput logger = logging.getLogger(__name__) diff --git a/ludwig/features/feature_registries.py b/ludwig/features/feature_registries.py index 2a738a2979f..e7842720bf7 100644 --- a/ludwig/features/feature_registries.py +++ b/ludwig/features/feature_registries.py @@ -12,44 +12,41 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -from typing import Any, Dict, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Dict from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - AUDIO, - BAG, - BINARY, - CATEGORY, - CATEGORY_DISTRIBUTION, - DATE, - H3, - IMAGE, - NUMBER, - SEQUENCE, - SET, - TEXT, - TIMESERIES, - VECTOR, -) +from ludwig.constants import (AUDIO, BAG, BINARY, CATEGORY, + CATEGORY_DISTRIBUTION, DATE, H3, IMAGE, NUMBER, + SEQUENCE, SET, TEXT, TIMESERIES, VECTOR) from ludwig.features.audio_feature import AudioFeatureMixin, AudioInputFeature from ludwig.features.bag_feature import BagFeatureMixin, BagInputFeature -from ludwig.features.binary_feature import BinaryFeatureMixin, BinaryInputFeature, BinaryOutputFeature +from ludwig.features.binary_feature import (BinaryFeatureMixin, + BinaryInputFeature, + BinaryOutputFeature) from ludwig.features.category_feature import ( - CategoryDistributionFeatureMixin, - CategoryDistributionOutputFeature, - CategoryFeatureMixin, - CategoryInputFeature, - CategoryOutputFeature, -) + CategoryDistributionFeatureMixin, CategoryDistributionOutputFeature, + CategoryFeatureMixin, CategoryInputFeature, CategoryOutputFeature) from ludwig.features.date_feature import DateFeatureMixin, DateInputFeature from ludwig.features.h3_feature import H3FeatureMixin, H3InputFeature -from ludwig.features.image_feature import ImageFeatureMixin, ImageInputFeature, ImageOutputFeature -from ludwig.features.number_feature import NumberFeatureMixin, NumberInputFeature, NumberOutputFeature -from ludwig.features.sequence_feature import SequenceFeatureMixin, SequenceInputFeature, SequenceOutputFeature -from ludwig.features.set_feature import SetFeatureMixin, SetInputFeature, SetOutputFeature -from ludwig.features.text_feature import TextFeatureMixin, TextInputFeature, TextOutputFeature -from ludwig.features.timeseries_feature import TimeseriesFeatureMixin, TimeseriesInputFeature, TimeseriesOutputFeature -from ludwig.features.vector_feature import VectorFeatureMixin, VectorInputFeature, VectorOutputFeature +from ludwig.features.image_feature import (ImageFeatureMixin, + ImageInputFeature, + ImageOutputFeature) +from ludwig.features.number_feature import (NumberFeatureMixin, + NumberInputFeature, + NumberOutputFeature) +from ludwig.features.sequence_feature import (SequenceFeatureMixin, + SequenceInputFeature, + SequenceOutputFeature) +from ludwig.features.set_feature import (SetFeatureMixin, SetInputFeature, + SetOutputFeature) +from ludwig.features.text_feature import (TextFeatureMixin, TextInputFeature, + TextOutputFeature) +from ludwig.features.timeseries_feature import (TimeseriesFeatureMixin, + TimeseriesInputFeature, + TimeseriesOutputFeature) +from ludwig.features.vector_feature import (VectorFeatureMixin, + VectorInputFeature, + VectorOutputFeature) from ludwig.utils.misc_utils import get_from_registry if TYPE_CHECKING: diff --git a/ludwig/features/feature_utils.py b/ludwig/features/feature_utils.py index 50834a89cfc..0d730b57df8 100644 --- a/ludwig/features/feature_utils.py +++ b/ludwig/features/feature_utils.py @@ -19,9 +19,11 @@ import numpy as np import torch -from ludwig.constants import NAME, PREPROCESSING, SEQUENCE, TEXT, TIMESERIES, TYPE +from ludwig.constants import (NAME, PREPROCESSING, SEQUENCE, TEXT, TIMESERIES, + TYPE) from ludwig.utils.data_utils import hash_dict -from ludwig.utils.strings_utils import get_tokenizer_from_registry, UNKNOWN_SYMBOL +from ludwig.utils.strings_utils import (UNKNOWN_SYMBOL, + get_tokenizer_from_registry) SEQUENCE_TYPES = {SEQUENCE, TEXT, TIMESERIES} FEATURE_NAME_SUFFIX = "__ludwig" diff --git a/ludwig/features/h3_feature.py b/ludwig/features/h3_feature.py index ab58de22042..9cf90a12b90 100644 --- a/ludwig/features/h3_feature.py +++ b/ludwig/features/h3_feature.py @@ -21,7 +21,8 @@ from ludwig.constants import COLUMN, H3, PROC_COLUMN from ludwig.features.base_feature import BaseFeatureMixin, InputFeature from ludwig.schema.features.h3_feature import H3InputFeatureConfig -from ludwig.types import FeatureMetadataDict, ModelConfigDict, PreprocessingConfigDict, TrainingSetMetadataDict +from ludwig.types import (FeatureMetadataDict, ModelConfigDict, + PreprocessingConfigDict, TrainingSetMetadataDict) from ludwig.utils.h3_util import h3_to_components from ludwig.utils.types import TorchscriptPreprocessingInput diff --git a/ludwig/features/image_feature.py b/ludwig/features/image_feature.py index a1488440f41..756dea36517 100644 --- a/ludwig/features/image_feature.py +++ b/ludwig/features/image_feature.py @@ -27,70 +27,44 @@ from torchvision.transforms import functional as F from torchvision.transforms.functional import normalize -from ludwig.constants import ( - CHECKSUM, - COLUMN, - ENCODER, - HEIGHT, - IMAGE, - IMAGENET1K, - INFER_IMAGE_DIMENSIONS, - INFER_IMAGE_MAX_HEIGHT, - INFER_IMAGE_MAX_WIDTH, - INFER_IMAGE_NUM_CLASSES, - INFER_IMAGE_SAMPLE_SIZE, - LOGITS, - NAME, - NUM_CHANNELS, - PREDICTIONS, - PREPROCESSING, - PROC_COLUMN, - REQUIRES_EQUAL_DIMENSIONS, - SRC, - TRAINING, - TYPE, - WIDTH, -) +from ludwig.constants import (CHECKSUM, COLUMN, ENCODER, HEIGHT, IMAGE, + IMAGENET1K, INFER_IMAGE_DIMENSIONS, + INFER_IMAGE_MAX_HEIGHT, INFER_IMAGE_MAX_WIDTH, + INFER_IMAGE_NUM_CLASSES, INFER_IMAGE_SAMPLE_SIZE, + LOGITS, NAME, NUM_CHANNELS, PREDICTIONS, + PREPROCESSING, PROC_COLUMN, + REQUIRES_EQUAL_DIMENSIONS, SRC, TRAINING, TYPE, + WIDTH) from ludwig.data.cache.types import wrap from ludwig.encoders.base import Encoder from ludwig.encoders.image.torchvision import TVModelVariant -from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule +from ludwig.features.base_feature import (BaseFeatureMixin, InputFeature, + OutputFeature, PredictModule) from ludwig.schema.features.augmentation.base import BaseAugmentationConfig from ludwig.schema.features.augmentation.image import ( - AutoAugmentationConfig, - RandomBlurConfig, - RandomBrightnessConfig, - RandomContrastConfig, - RandomHorizontalFlipConfig, - RandomRotateConfig, - RandomVerticalFlipConfig, -) -from ludwig.schema.features.image_feature import ImageInputFeatureConfig, ImageOutputFeatureConfig -from ludwig.types import ( - FeatureMetadataDict, - FeaturePostProcessingOutputDict, - ModelConfigDict, - PreprocessingConfigDict, - TrainingSetMetadataDict, -) + AutoAugmentationConfig, RandomBlurConfig, RandomBrightnessConfig, + RandomContrastConfig, RandomHorizontalFlipConfig, RandomRotateConfig, + RandomVerticalFlipConfig) +from ludwig.schema.features.image_feature import (ImageInputFeatureConfig, + ImageOutputFeatureConfig) +from ludwig.types import (FeatureMetadataDict, FeaturePostProcessingOutputDict, + ModelConfigDict, PreprocessingConfigDict, + TrainingSetMetadataDict) from ludwig.utils import output_feature_utils -from ludwig.utils.augmentation_utils import get_augmentation_op, register_augmentation_op +from ludwig.utils.augmentation_utils import (get_augmentation_op, + register_augmentation_op) from ludwig.utils.data_utils import get_abs_path from ludwig.utils.dataframe_utils import is_dask_series_or_df from ludwig.utils.fs_utils import has_remote_protocol, upload_h5 -from ludwig.utils.image_utils import ( - get_class_mask_from_image, - get_gray_default_image, - get_image_from_class_mask, - get_unique_channels, - grayscale, - num_channels_in_image, - read_image_from_bytes_obj, - read_image_from_path, - resize_image, - ResizeChannels, - torchvision_model_registry, -) +from ludwig.utils.image_utils import (ResizeChannels, + get_class_mask_from_image, + get_gray_default_image, + get_image_from_class_mask, + get_unique_channels, grayscale, + num_channels_in_image, + read_image_from_bytes_obj, + read_image_from_path, resize_image, + torchvision_model_registry) from ludwig.utils.misc_utils import set_default_value from ludwig.utils.types import Series, TorchscriptPreprocessingInput diff --git a/ludwig/features/number_feature.py b/ludwig/features/number_feature.py index 1b76d700570..d2d883f34c1 100644 --- a/ludwig/features/number_feature.py +++ b/ludwig/features/number_feature.py @@ -23,16 +23,15 @@ import torch from torch import nn -from ludwig.constants import COLUMN, HIDDEN, LOGITS, NAME, NUMBER, PREDICTIONS, PROC_COLUMN -from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule -from ludwig.schema.features.number_feature import NumberInputFeatureConfig, NumberOutputFeatureConfig -from ludwig.types import ( - FeatureMetadataDict, - FeaturePostProcessingOutputDict, - ModelConfigDict, - PreprocessingConfigDict, - TrainingSetMetadataDict, -) +from ludwig.constants import (COLUMN, HIDDEN, LOGITS, NAME, NUMBER, + PREDICTIONS, PROC_COLUMN) +from ludwig.features.base_feature import (BaseFeatureMixin, InputFeature, + OutputFeature, PredictModule) +from ludwig.schema.features.number_feature import (NumberInputFeatureConfig, + NumberOutputFeatureConfig) +from ludwig.types import (FeatureMetadataDict, FeaturePostProcessingOutputDict, + ModelConfigDict, PreprocessingConfigDict, + TrainingSetMetadataDict) from ludwig.utils import output_feature_utils from ludwig.utils.misc_utils import get_from_registry from ludwig.utils.types import TorchscriptPreprocessingInput diff --git a/ludwig/features/sequence_feature.py b/ludwig/features/sequence_feature.py index 6b62b37eb14..56c72b78153 100644 --- a/ludwig/features/sequence_feature.py +++ b/ludwig/features/sequence_feature.py @@ -21,37 +21,24 @@ import numpy as np import torch -from ludwig.constants import ( - COLUMN, - LAST_PREDICTIONS, - LENGTHS, - NAME, - PREDICTIONS, - PROBABILITIES, - PROBABILITY, - PROC_COLUMN, - SEQUENCE, -) -from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule -from ludwig.features.feature_utils import compute_sequence_probability, compute_token_probabilities -from ludwig.schema.features.sequence_feature import SequenceInputFeatureConfig, SequenceOutputFeatureConfig -from ludwig.types import ( - FeatureMetadataDict, - FeaturePostProcessingOutputDict, - ModelConfigDict, - PreprocessingConfigDict, - TrainingSetMetadataDict, -) +from ludwig.constants import (COLUMN, LAST_PREDICTIONS, LENGTHS, NAME, + PREDICTIONS, PROBABILITIES, PROBABILITY, + PROC_COLUMN, SEQUENCE) +from ludwig.features.base_feature import (BaseFeatureMixin, InputFeature, + OutputFeature, PredictModule) +from ludwig.features.feature_utils import (compute_sequence_probability, + compute_token_probabilities) +from ludwig.schema.features.sequence_feature import ( + SequenceInputFeatureConfig, SequenceOutputFeatureConfig) +from ludwig.types import (FeatureMetadataDict, FeaturePostProcessingOutputDict, + ModelConfigDict, PreprocessingConfigDict, + TrainingSetMetadataDict) from ludwig.utils import output_feature_utils from ludwig.utils.math_utils import softmax -from ludwig.utils.strings_utils import ( - build_sequence_matrix, - create_vocabulary, - SpecialSymbol, - START_SYMBOL, - STOP_SYMBOL, - UNKNOWN_SYMBOL, -) +from ludwig.utils.strings_utils import (START_SYMBOL, STOP_SYMBOL, + UNKNOWN_SYMBOL, SpecialSymbol, + build_sequence_matrix, + create_vocabulary) from ludwig.utils.tokenizers import get_tokenizer_from_registry from ludwig.utils.types import TorchscriptPreprocessingInput diff --git a/ludwig/features/set_feature.py b/ludwig/features/set_feature.py index 80bdf910313..3ea9806a697 100644 --- a/ludwig/features/set_feature.py +++ b/ludwig/features/set_feature.py @@ -19,20 +19,20 @@ import numpy as np import torch -from ludwig.constants import COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, PROBABILITIES, PROC_COLUMN, SET -from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule +from ludwig.constants import (COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, + PROBABILITIES, PROC_COLUMN, SET) +from ludwig.features.base_feature import (BaseFeatureMixin, InputFeature, + OutputFeature, PredictModule) from ludwig.features.feature_utils import set_str_to_idx -from ludwig.schema.features.set_feature import SetInputFeatureConfig, SetOutputFeatureConfig -from ludwig.types import ( - FeatureMetadataDict, - FeaturePostProcessingOutputDict, - ModelConfigDict, - PreprocessingConfigDict, - TrainingSetMetadataDict, -) +from ludwig.schema.features.set_feature import (SetInputFeatureConfig, + SetOutputFeatureConfig) +from ludwig.types import (FeatureMetadataDict, FeaturePostProcessingOutputDict, + ModelConfigDict, PreprocessingConfigDict, + TrainingSetMetadataDict) from ludwig.utils import output_feature_utils -from ludwig.utils.strings_utils import create_vocabulary, UNKNOWN_SYMBOL -from ludwig.utils.tokenizers import get_tokenizer_from_registry, TORCHSCRIPT_COMPATIBLE_TOKENIZERS +from ludwig.utils.strings_utils import UNKNOWN_SYMBOL, create_vocabulary +from ludwig.utils.tokenizers import (TORCHSCRIPT_COMPATIBLE_TOKENIZERS, + get_tokenizer_from_registry) from ludwig.utils.types import TorchscriptPreprocessingInput logger = logging.getLogger(__name__) diff --git a/ludwig/features/text_feature.py b/ludwig/features/text_feature.py index 1056ae820c1..5b3e3238e5c 100644 --- a/ludwig/features/text_feature.py +++ b/ludwig/features/text_feature.py @@ -22,40 +22,26 @@ from torch import Tensor from transformers import PreTrainedTokenizer -from ludwig.constants import ( - COLUMN, - IGNORE_INDEX_TOKEN_ID, - LAST_PREDICTIONS, - LENGTHS, - NAME, - PREDICTIONS, - PREPROCESSING, - PROBABILITIES, - PROBABILITY, - PROC_COLUMN, - RESPONSE, - TEXT, -) +from ludwig.constants import (COLUMN, IGNORE_INDEX_TOKEN_ID, LAST_PREDICTIONS, + LENGTHS, NAME, PREDICTIONS, PREPROCESSING, + PROBABILITIES, PROBABILITY, PROC_COLUMN, + RESPONSE, TEXT) from ludwig.features.base_feature import BaseFeatureMixin, OutputFeature -from ludwig.features.feature_utils import compute_sequence_probability, compute_token_probabilities -from ludwig.features.sequence_feature import ( - _SequencePostprocessing, - _SequencePreprocessing, - SequenceInputFeature, - SequenceOutputFeature, -) +from ludwig.features.feature_utils import (compute_sequence_probability, + compute_token_probabilities) +from ludwig.features.sequence_feature import (SequenceInputFeature, + SequenceOutputFeature, + _SequencePostprocessing, + _SequencePreprocessing) from ludwig.modules.metric_registry import get_metric_tensor_input -from ludwig.schema.features.text_feature import TextInputFeatureConfig, TextOutputFeatureConfig -from ludwig.types import FeatureMetadataDict, ModelConfigDict, PreprocessingConfigDict, TrainingSetMetadataDict +from ludwig.schema.features.text_feature import (TextInputFeatureConfig, + TextOutputFeatureConfig) +from ludwig.types import (FeatureMetadataDict, ModelConfigDict, + PreprocessingConfigDict, TrainingSetMetadataDict) from ludwig.utils.math_utils import softmax -from ludwig.utils.strings_utils import ( - build_sequence_matrix, - create_vocabulary, - get_tokenizer, - SpecialSymbol, - UNKNOWN_SYMBOL, - Vocabulary, -) +from ludwig.utils.strings_utils import (UNKNOWN_SYMBOL, SpecialSymbol, + Vocabulary, build_sequence_matrix, + create_vocabulary, get_tokenizer) logger = logging.getLogger(__name__) diff --git a/ludwig/features/timeseries_feature.py b/ludwig/features/timeseries_feature.py index 45708c95dea..64b0aaf4342 100644 --- a/ludwig/features/timeseries_feature.py +++ b/ludwig/features/timeseries_feature.py @@ -14,18 +14,24 @@ # limitations under the License. # ============================================================================== import logging -from typing import Dict, List, TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Dict, List, Union import numpy as np import torch -from ludwig.constants import COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, PROC_COLUMN, TIMESERIES -from ludwig.features.base_feature import BaseFeatureMixin, OutputFeature, PredictModule +from ludwig.constants import (COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, + PROC_COLUMN, TIMESERIES) +from ludwig.features.base_feature import (BaseFeatureMixin, OutputFeature, + PredictModule) from ludwig.features.sequence_feature import SequenceInputFeature -from ludwig.features.vector_feature import _VectorPostprocessing, _VectorPredict -from ludwig.schema.features.timeseries_feature import TimeseriesInputFeatureConfig, TimeseriesOutputFeatureConfig -from ludwig.types import FeatureMetadataDict, ModelConfigDict, PreprocessingConfigDict, TrainingSetMetadataDict -from ludwig.utils.tokenizers import get_tokenizer_from_registry, TORCHSCRIPT_COMPATIBLE_TOKENIZERS +from ludwig.features.vector_feature import (_VectorPostprocessing, + _VectorPredict) +from ludwig.schema.features.timeseries_feature import ( + TimeseriesInputFeatureConfig, TimeseriesOutputFeatureConfig) +from ludwig.types import (FeatureMetadataDict, ModelConfigDict, + PreprocessingConfigDict, TrainingSetMetadataDict) +from ludwig.utils.tokenizers import (TORCHSCRIPT_COMPATIBLE_TOKENIZERS, + get_tokenizer_from_registry) from ludwig.utils.types import Series, TorchscriptPreprocessingInput if TYPE_CHECKING: diff --git a/ludwig/features/vector_feature.py b/ludwig/features/vector_feature.py index 06ad7ef0fc8..34fa2d1d6a2 100644 --- a/ludwig/features/vector_feature.py +++ b/ludwig/features/vector_feature.py @@ -19,16 +19,15 @@ import numpy as np import torch -from ludwig.constants import COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, PROC_COLUMN, VECTOR -from ludwig.features.base_feature import InputFeature, OutputFeature, PredictModule -from ludwig.schema.features.vector_feature import VectorInputFeatureConfig, VectorOutputFeatureConfig -from ludwig.types import ( - FeatureMetadataDict, - FeaturePostProcessingOutputDict, - ModelConfigDict, - PreprocessingConfigDict, - TrainingSetMetadataDict, -) +from ludwig.constants import (COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, + PROC_COLUMN, VECTOR) +from ludwig.features.base_feature import (InputFeature, OutputFeature, + PredictModule) +from ludwig.schema.features.vector_feature import (VectorInputFeatureConfig, + VectorOutputFeatureConfig) +from ludwig.types import (FeatureMetadataDict, FeaturePostProcessingOutputDict, + ModelConfigDict, PreprocessingConfigDict, + TrainingSetMetadataDict) from ludwig.utils import output_feature_utils from ludwig.utils.types import TorchscriptPreprocessingInput diff --git a/ludwig/hyperopt/execution.py b/ludwig/hyperopt/execution.py index c1ad226a116..a427350fd45 100644 --- a/ludwig/hyperopt/execution.py +++ b/ludwig/hyperopt/execution.py @@ -21,9 +21,11 @@ from ray import tune from ray.air import Checkpoint from ray.air.config import CheckpointConfig, FailureConfig, RunConfig -from ray.tune import ExperimentAnalysis, register_trainable, Stopper, TuneConfig +from ray.tune import (ExperimentAnalysis, Stopper, TuneConfig, + register_trainable) from ray.tune.execution.placement_groups import PlacementGroupFactory -from ray.tune.schedulers.resource_changing_scheduler import DistributeResources, ResourceChangingScheduler +from ray.tune.schedulers.resource_changing_scheduler import ( + DistributeResources, ResourceChangingScheduler) from ray.tune.search import BasicVariantGenerator, ConcurrencyLimiter from ray.tune.tuner import Tuner from ray.tune.utils import wait_for_gpu @@ -31,10 +33,11 @@ from ludwig.api import LudwigModel from ludwig.api_annotations import PublicAPI -from ludwig.backend import initialize_backend, RAY +from ludwig.backend import RAY, initialize_backend from ludwig.backend.ray import initialize_ray from ludwig.callbacks import Callback -from ludwig.constants import MAXIMIZE, TEST, TRAINER, TRAINING, TYPE, VALIDATION +from ludwig.constants import (MAXIMIZE, TEST, TRAINER, TRAINING, TYPE, + VALIDATION) from ludwig.globals import MODEL_FILE_NAME from ludwig.hyperopt.results import HyperoptResults, TrialResults from ludwig.hyperopt.syncer import RemoteSyncer @@ -44,7 +47,7 @@ from ludwig.schema.model_config import ModelConfig from ludwig.types import ModelConfigDict from ludwig.utils import fs_utils, metric_utils -from ludwig.utils.data_utils import hash_dict, NumpyEncoder, use_credentials +from ludwig.utils.data_utils import NumpyEncoder, hash_dict, use_credentials from ludwig.utils.defaults import default_random_seed from ludwig.utils.error_handling_utils import default_retry from ludwig.utils.fs_utils import has_remote_protocol, safe_move_file @@ -803,7 +806,8 @@ def run_experiment_trial(config, local_hyperopt_dict, checkpoint_dir=None): self.sync_config = tune.SyncConfig(upload_dir=output_directory, syncer=self.sync_client) output_directory = None elif self.kubernetes_namespace: - from ray.tune.integration.kubernetes import KubernetesSyncClient, NamespacedKubernetesSyncer + from ray.tune.integration.kubernetes import ( + KubernetesSyncClient, NamespacedKubernetesSyncer) self.sync_config = tune.SyncConfig(sync_to_driver=NamespacedKubernetesSyncer(self.kubernetes_namespace)) self.sync_client = KubernetesSyncClient(self.kubernetes_namespace) diff --git a/ludwig/hyperopt/run.py b/ludwig/hyperopt/run.py index 3c53e089b8b..a5e76813527 100644 --- a/ludwig/hyperopt/run.py +++ b/ludwig/hyperopt/run.py @@ -9,39 +9,22 @@ from tabulate import tabulate from ludwig.api import LudwigModel -from ludwig.backend import Backend, initialize_backend, LocalBackend +from ludwig.backend import Backend, LocalBackend, initialize_backend from ludwig.callbacks import Callback -from ludwig.constants import ( - AUTO, - COMBINED, - EXECUTOR, - GOAL, - HYPEROPT, - LOSS, - MAX_CONCURRENT_TRIALS, - METRIC, - NAME, - OUTPUT_FEATURES, - PARAMETERS, - PREPROCESSING, - SEARCH_ALG, - SPLIT, - TEST, - TRAINING, - TYPE, - VALIDATION, -) +from ludwig.constants import (AUTO, COMBINED, EXECUTOR, GOAL, HYPEROPT, LOSS, + MAX_CONCURRENT_TRIALS, METRIC, NAME, + OUTPUT_FEATURES, PARAMETERS, PREPROCESSING, + SEARCH_ALG, SPLIT, TEST, TRAINING, TYPE, + VALIDATION) from ludwig.data.split import get_splitter from ludwig.hyperopt.results import HyperoptResults -from ludwig.hyperopt.utils import ( - log_warning_if_all_grid_type_parameters, - print_hyperopt_results, - save_hyperopt_stats, - should_tune_preprocessing, - update_hyperopt_params_with_defaults, -) +from ludwig.hyperopt.utils import (log_warning_if_all_grid_type_parameters, + print_hyperopt_results, save_hyperopt_stats, + should_tune_preprocessing, + update_hyperopt_params_with_defaults) from ludwig.schema.model_config import ModelConfig -from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version +from ludwig.utils.backward_compatibility import \ + upgrade_config_dict_to_latest_version from ludwig.utils.dataset_utils import generate_dataset_statistics from ludwig.utils.defaults import default_random_seed from ludwig.utils.fs_utils import makedirs, open_file @@ -196,7 +179,8 @@ def hyperopt( :return: (List[dict]) List of results for each trial, ordered by descending performance on the target metric. """ - from ludwig.hyperopt.execution import get_build_hyperopt_executor, RayTuneExecutor + from ludwig.hyperopt.execution import (RayTuneExecutor, + get_build_hyperopt_executor) # check if config is a path or a dict if isinstance(config, str): # assume path diff --git a/ludwig/hyperopt/utils.py b/ludwig/hyperopt/utils.py index 08c3e8f54b4..c894271a108 100644 --- a/ludwig/hyperopt/utils.py +++ b/ludwig/hyperopt/utils.py @@ -7,40 +7,19 @@ from typing import Any, Dict from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - AUTO, - COMBINED, - EXECUTOR, - GOAL, - GRID_SEARCH, - HYPEROPT, - INPUT_FEATURES, - LOSS, - MAX_CONCURRENT_TRIALS, - METRIC, - MINIMIZE, - NAME, - NUM_SAMPLES, - OUTPUT_FEATURES, - PARAMETERS, - PREPROCESSING, - RAY, - SPACE, - SPLIT, - TYPE, - VALIDATION, -) +from ludwig.constants import (AUTO, COMBINED, EXECUTOR, GOAL, GRID_SEARCH, + HYPEROPT, INPUT_FEATURES, LOSS, + MAX_CONCURRENT_TRIALS, METRIC, MINIMIZE, NAME, + NUM_SAMPLES, OUTPUT_FEATURES, PARAMETERS, + PREPROCESSING, RAY, SPACE, SPLIT, TYPE, + VALIDATION) from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME from ludwig.hyperopt.results import HyperoptResults, TrialResults from ludwig.types import HyperoptConfigDict, ModelConfigDict from ludwig.utils.data_utils import save_json -from ludwig.utils.misc_utils import ( - get_class_attributes, - get_from_registry, - merge_dict, - set_default_value, - set_default_values, -) +from ludwig.utils.misc_utils import (get_class_attributes, get_from_registry, + merge_dict, set_default_value, + set_default_values) from ludwig.utils.print_utils import print_boxed logger = logging.getLogger(__name__) diff --git a/ludwig/model_export/onnx_exporter.py b/ludwig/model_export/onnx_exporter.py index 1659a2c197a..767f6b5d99e 100644 --- a/ludwig/model_export/onnx_exporter.py +++ b/ludwig/model_export/onnx_exporter.py @@ -4,7 +4,8 @@ import torch from ludwig.api import LudwigModel -from ludwig.model_export.base_model_exporter import BaseModelExporter, LudwigTorchWrapper +from ludwig.model_export.base_model_exporter import (BaseModelExporter, + LudwigTorchWrapper) # Copyright (c) 2023 Predibase, Inc., 2019 Uber Technologies, Inc. diff --git a/ludwig/models/base.py b/ludwig/models/base.py index c173af824b6..989121b904c 100644 --- a/ludwig/models/base.py +++ b/ludwig/models/base.py @@ -11,12 +11,17 @@ from ludwig.combiners.combiners import Combiner from ludwig.constants import COMBINED, LOSS, NAME from ludwig.encoders.base import Encoder -from ludwig.features.base_feature import create_passthrough_input_feature, InputFeature, ModuleWrapper, OutputFeature -from ludwig.features.feature_registries import get_input_type_registry, get_output_type_registry +from ludwig.features.base_feature import (InputFeature, ModuleWrapper, + OutputFeature, + create_passthrough_input_feature) +from ludwig.features.feature_registries import (get_input_type_registry, + get_output_type_registry) from ludwig.features.feature_utils import LudwigFeatureDict from ludwig.modules.metric_modules import LudwigMetric from ludwig.modules.training_hooks import TrainingHook -from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig, FeatureCollection +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig, + FeatureCollection) from ludwig.utils.algorithms_utils import topological_sort_feature_dependencies from ludwig.utils.metric_utils import get_scalar_from_ludwig_metric from ludwig.utils.misc_utils import get_from_registry diff --git a/ludwig/models/embedder.py b/ludwig/models/embedder.py index a5a152e7c93..aa4c48c57a4 100644 --- a/ludwig/models/embedder.py +++ b/ludwig/models/embedder.py @@ -9,13 +9,14 @@ from ludwig.features.feature_registries import get_input_type_registry from ludwig.features.feature_utils import LudwigFeatureDict from ludwig.models.base import BaseModel -from ludwig.schema.features.base import BaseInputFeatureConfig, FeatureCollection +from ludwig.schema.features.base import (BaseInputFeatureConfig, + FeatureCollection) from ludwig.schema.features.utils import get_input_feature_cls from ludwig.types import FeatureConfigDict, TrainingSetMetadataDict from ludwig.utils.batch_size_tuner import BatchSizeEvaluator from ludwig.utils.dataframe_utils import from_numpy_dataset from ludwig.utils.misc_utils import get_from_registry -from ludwig.utils.torch_utils import get_torch_device, LudwigModule +from ludwig.utils.torch_utils import LudwigModule, get_torch_device @DeveloperAPI diff --git a/ludwig/models/gbm.py b/ludwig/models/gbm.py index b249fce2cb6..adf5ffd149b 100644 --- a/ludwig/models/gbm.py +++ b/ludwig/models/gbm.py @@ -12,7 +12,8 @@ from ludwig.features.base_feature import OutputFeature from ludwig.globals import MODEL_WEIGHTS_FILE_NAME from ludwig.models.base import BaseModel -from ludwig.schema.features.base import BaseOutputFeatureConfig, FeatureCollection +from ludwig.schema.features.base import (BaseOutputFeatureConfig, + FeatureCollection) from ludwig.schema.model_config import ModelConfig from ludwig.utils import output_feature_utils from ludwig.utils.fs_utils import path_exists diff --git a/ludwig/models/inference.py b/ludwig/models/inference.py index 82890572979..5e279753292 100644 --- a/ludwig/models/inference.py +++ b/ludwig/models/inference.py @@ -1,6 +1,6 @@ import logging import os -from typing import Any, Dict, Optional, TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Any, Dict, Optional, Union import pandas as pd import torch @@ -10,14 +10,18 @@ from ludwig.data.postprocessing import convert_dict_to_df from ludwig.data.preprocessing import load_metadata from ludwig.features.feature_registries import get_input_type_registry -from ludwig.features.feature_utils import get_module_dict_key_from_name, get_name_from_module_dict_key -from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME +from ludwig.features.feature_utils import (get_module_dict_key_from_name, + get_name_from_module_dict_key) +from ludwig.globals import (MODEL_HYPERPARAMETERS_FILE_NAME, + TRAIN_SET_METADATA_FILE_NAME) from ludwig.types import ModelConfigDict, TrainingSetMetadataDict from ludwig.utils import output_feature_utils from ludwig.utils.data_utils import load_json, save_json -from ludwig.utils.inference_utils import get_filename_from_stage, to_inference_module_input_from_dataframe +from ludwig.utils.inference_utils import ( + get_filename_from_stage, to_inference_module_input_from_dataframe) from ludwig.utils.misc_utils import get_from_registry -from ludwig.utils.output_feature_utils import get_feature_name_from_concat_name, get_tensor_name_from_concat_name +from ludwig.utils.output_feature_utils import ( + get_feature_name_from_concat_name, get_tensor_name_from_concat_name) from ludwig.utils.torch_utils import DEVICE from ludwig.utils.types import TorchDevice, TorchscriptPreprocessingInput diff --git a/ludwig/models/llm.py b/ludwig/models/llm.py index 3a970cd2013..d7105955278 100644 --- a/ludwig/models/llm.py +++ b/ludwig/models/llm.py @@ -8,29 +8,26 @@ from transformers import AutoConfig, GenerationConfig from ludwig.accounting.used_tokens import get_used_tokens_for_llm -from ludwig.constants import IGNORE_INDEX_TOKEN_ID, LOGITS, MODEL_LLM, PREDICTIONS, TEXT, USED_TOKENS +from ludwig.constants import (IGNORE_INDEX_TOKEN_ID, LOGITS, MODEL_LLM, + PREDICTIONS, TEXT, USED_TOKENS) from ludwig.features.base_feature import ModuleWrapper, OutputFeature from ludwig.features.feature_utils import LudwigFeatureDict from ludwig.features.text_feature import TextOutputFeature from ludwig.globals import MODEL_WEIGHTS_FILE_NAME from ludwig.models.base import BaseModel from ludwig.modules.training_hooks import NEFTuneHook -from ludwig.schema.features.base import BaseOutputFeatureConfig, FeatureCollection +from ludwig.schema.features.base import (BaseOutputFeatureConfig, + FeatureCollection) from ludwig.schema.model_types.llm import LLMModelConfig from ludwig.utils.augmentation_utils import AugmentationPipelines from ludwig.utils.data_utils import clear_data_cache -from ludwig.utils.llm_quantization_utils import convert_quantized_linear_to_linear +from ludwig.utils.llm_quantization_utils import \ + convert_quantized_linear_to_linear from ludwig.utils.llm_utils import ( - add_left_padding, - generate_merged_ids, - get_context_len, + add_left_padding, generate_merged_ids, get_context_len, get_realigned_target_and_prediction_tensors_for_inference, - initialize_adapter, - load_pretrained_from_config, - pad_target_tensor_for_fine_tuning, - remove_left_padding, - to_device, -) + initialize_adapter, load_pretrained_from_config, + pad_target_tensor_for_fine_tuning, remove_left_padding, to_device) from ludwig.utils.logging_utils import log_once from ludwig.utils.output_feature_utils import set_output_feature_tensor from ludwig.utils.tokenizers import HFTokenizer diff --git a/ludwig/models/predictor.py b/ludwig/models/predictor.py index 977fcb0a263..01b10ae7106 100644 --- a/ludwig/models/predictor.py +++ b/ludwig/models/predictor.py @@ -2,7 +2,7 @@ import os import sys from abc import ABC, abstractmethod -from collections import defaultdict, OrderedDict +from collections import OrderedDict, defaultdict from pprint import pformat from typing import Dict, List, Optional, Type @@ -12,11 +12,13 @@ import torch from torch import nn -from ludwig.constants import COMBINED, LAST_HIDDEN, LOGITS, MODEL_ECD, MODEL_GBM, MODEL_LLM +from ludwig.constants import (COMBINED, LAST_HIDDEN, LOGITS, MODEL_ECD, + MODEL_GBM, MODEL_LLM) from ludwig.data.dataset.base import Dataset from ludwig.data.utils import convert_to_dict from ludwig.distributed.base import DistributedStrategy, LocalStrategy -from ludwig.globals import is_progressbar_disabled, PREDICTIONS_PARQUET_FILE_NAME, TEST_STATISTICS_FILE_NAME +from ludwig.globals import (PREDICTIONS_PARQUET_FILE_NAME, + TEST_STATISTICS_FILE_NAME, is_progressbar_disabled) from ludwig.models.base import BaseModel from ludwig.progress_bar import LudwigProgressBar from ludwig.utils.data_utils import save_csv, save_json diff --git a/ludwig/models/retrieval.py b/ludwig/models/retrieval.py index 553fe85f3a6..fc499baea62 100644 --- a/ludwig/models/retrieval.py +++ b/ludwig/models/retrieval.py @@ -2,7 +2,8 @@ import json import os from abc import ABC, abstractmethod -from typing import Any, Callable, Dict, List, Optional, Type, TYPE_CHECKING, Union +from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type, + Union) import numpy as np import pandas as pd diff --git a/ludwig/modules/attention_modules.py b/ludwig/modules/attention_modules.py index 064997d093d..c599aff871b 100644 --- a/ludwig/modules/attention_modules.py +++ b/ludwig/modules/attention_modules.py @@ -18,7 +18,7 @@ from torch import nn from torch.nn import functional as F -from ludwig.utils.torch_utils import get_activation, LudwigModule +from ludwig.utils.torch_utils import LudwigModule, get_activation logger = logging.getLogger(__name__) diff --git a/ludwig/modules/convolutional_modules.py b/ludwig/modules/convolutional_modules.py index 198d9d1eae9..7d3cbc0c378 100644 --- a/ludwig/modules/convolutional_modules.py +++ b/ludwig/modules/convolutional_modules.py @@ -20,7 +20,7 @@ import torch.nn as nn from ludwig.utils.image_utils import get_img_output_shape -from ludwig.utils.torch_utils import get_activation, LudwigModule +from ludwig.utils.torch_utils import LudwigModule, get_activation logger = logging.getLogger(__name__) diff --git a/ludwig/modules/embedding_modules.py b/ludwig/modules/embedding_modules.py index d53769f242a..169e074f55b 100644 --- a/ludwig/modules/embedding_modules.py +++ b/ludwig/modules/embedding_modules.py @@ -21,7 +21,7 @@ from ludwig.constants import TYPE from ludwig.modules.initializer_modules import get_initializer from ludwig.utils.data_utils import load_pretrained_embeddings -from ludwig.utils.torch_utils import get_torch_device, LudwigModule +from ludwig.utils.torch_utils import LudwigModule, get_torch_device logger = logging.getLogger(__name__) diff --git a/ludwig/modules/fully_connected_modules.py b/ludwig/modules/fully_connected_modules.py index 61b0f23ade6..92c8888f29f 100644 --- a/ludwig/modules/fully_connected_modules.py +++ b/ludwig/modules/fully_connected_modules.py @@ -20,7 +20,8 @@ from torch.nn import Dropout, Linear, ModuleList from ludwig.modules.normalization_modules import create_norm_layer -from ludwig.utils.torch_utils import activations, initializer_registry, LudwigModule +from ludwig.utils.torch_utils import (LudwigModule, activations, + initializer_registry) logger = logging.getLogger(__name__) diff --git a/ludwig/modules/loss_modules.py b/ludwig/modules/loss_modules.py index a239def352d..26c704dee0e 100644 --- a/ludwig/modules/loss_modules.py +++ b/ludwig/modules/loss_modules.py @@ -17,7 +17,7 @@ from typing import Type import torch -from torch import nn, Tensor +from torch import Tensor, nn from torch.nn import HuberLoss as _HuberLoss from torch.nn import L1Loss from torch.nn import MSELoss as _MSELoss @@ -27,20 +27,11 @@ from ludwig.constants import LOGITS from ludwig.modules.loss_implementations.corn import corn_loss from ludwig.schema.features.loss.loss import ( - BaseLossConfig, - BWCEWLossConfig, - CORNLossConfig, - HuberLossConfig, - MAELossConfig, - MAPELossConfig, - MSELossConfig, - NextTokenSoftmaxCrossEntropyLossConfig, - RMSELossConfig, - RMSPELossConfig, - SequenceSoftmaxCrossEntropyLossConfig, - SigmoidCrossEntropyLossConfig, - SoftmaxCrossEntropyLossConfig, -) + BaseLossConfig, BWCEWLossConfig, CORNLossConfig, HuberLossConfig, + MAELossConfig, MAPELossConfig, MSELossConfig, + NextTokenSoftmaxCrossEntropyLossConfig, RMSELossConfig, RMSPELossConfig, + SequenceSoftmaxCrossEntropyLossConfig, SigmoidCrossEntropyLossConfig, + SoftmaxCrossEntropyLossConfig) from ludwig.utils import strings_utils from ludwig.utils.registry import Registry diff --git a/ludwig/modules/lr_scheduler.py b/ludwig/modules/lr_scheduler.py index a796a73d410..64bb3848eb1 100644 --- a/ludwig/modules/lr_scheduler.py +++ b/ludwig/modules/lr_scheduler.py @@ -3,7 +3,8 @@ from typing import Any, Callable, Dict from torch.optim import Optimizer -from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, LambdaLR, ReduceLROnPlateau, SequentialLR +from torch.optim.lr_scheduler import (CosineAnnealingWarmRestarts, LambdaLR, + ReduceLROnPlateau, SequentialLR) from ludwig.constants import MINIMIZE, TRAINING, VALIDATION from ludwig.modules.metric_registry import get_metric_objective diff --git a/ludwig/modules/metric_modules.py b/ludwig/modules/metric_modules.py index a126b81ad7b..c44d831dd32 100644 --- a/ludwig/modules/metric_modules.py +++ b/ludwig/modules/metric_modules.py @@ -22,80 +22,43 @@ from torchmetrics import MeanAbsoluteError, MeanAbsolutePercentageError from torchmetrics import MeanMetric as _MeanMetric from torchmetrics import MeanSquaredError, Metric -from torchmetrics.classification import ( - BinaryAccuracy, - BinaryAUROC, - BinaryPrecision, - BinaryRecall, - BinarySpecificity, - MulticlassAccuracy, - MulticlassAUROC, -) -from torchmetrics.functional.regression.r2 import _r2_score_compute, _r2_score_update +from torchmetrics.classification import (BinaryAccuracy, BinaryAUROC, + BinaryPrecision, BinaryRecall, + BinarySpecificity, MulticlassAccuracy, + MulticlassAUROC) +from torchmetrics.functional.regression.r2 import (_r2_score_compute, + _r2_score_update) from torchmetrics.metric import jit_distributed_available from torchmetrics.text import BLEUScore, CharErrorRate, WordErrorRate from torchmetrics.text.perplexity import Perplexity from torchmetrics.text.rouge import ROUGEScore -from ludwig.constants import ( # RESPONSE, - ACCURACY, - ACCURACY_MICRO, - BINARY, - BINARY_WEIGHTED_CROSS_ENTROPY, - CATEGORY, - CATEGORY_DISTRIBUTION, - CORN, - HITS_AT_K, - HUBER, - IGNORE_INDEX_TOKEN_ID, - IMAGE, - JACCARD, - LOGITS, - LOSS, - MAXIMIZE, - MEAN_ABSOLUTE_ERROR, - MEAN_ABSOLUTE_PERCENTAGE_ERROR, - MEAN_SQUARED_ERROR, - MINIMIZE, - NEXT_TOKEN_PERPLEXITY, - NUMBER, - PERPLEXITY, - PRECISION, - PREDICTIONS, - PROBABILITIES, - R2, - RECALL, - ROC_AUC, - ROOT_MEAN_SQUARED_ERROR, - ROOT_MEAN_SQUARED_PERCENTAGE_ERROR, - SEQUENCE, - SEQUENCE_ACCURACY, - SET, - SPECIFICITY, - TEXT, - TIMESERIES, - TOKEN_ACCURACY, - VECTOR, -) +from ludwig.constants import (ACCURACY, ACCURACY_MICRO, BINARY, # RESPONSE, + BINARY_WEIGHTED_CROSS_ENTROPY, CATEGORY, + CATEGORY_DISTRIBUTION, CORN, HITS_AT_K, HUBER, + IGNORE_INDEX_TOKEN_ID, IMAGE, JACCARD, LOGITS, + LOSS, MAXIMIZE, MEAN_ABSOLUTE_ERROR, + MEAN_ABSOLUTE_PERCENTAGE_ERROR, + MEAN_SQUARED_ERROR, MINIMIZE, + NEXT_TOKEN_PERPLEXITY, NUMBER, PERPLEXITY, + PRECISION, PREDICTIONS, PROBABILITIES, R2, + RECALL, ROC_AUC, ROOT_MEAN_SQUARED_ERROR, + ROOT_MEAN_SQUARED_PERCENTAGE_ERROR, SEQUENCE, + SEQUENCE_ACCURACY, SET, SPECIFICITY, TEXT, + TIMESERIES, TOKEN_ACCURACY, VECTOR) from ludwig.distributed import get_current_dist_strategy -from ludwig.modules.loss_modules import ( - BWCEWLoss, - CORNLoss, - HuberLoss, - NextTokenSoftmaxCrossEntropyLoss, - SequenceSoftmaxCrossEntropyLoss, - SigmoidCrossEntropyLoss, - SoftmaxCrossEntropyLoss, -) -from ludwig.modules.metric_registry import get_metric_objective, get_metric_registry, register_metric +from ludwig.modules.loss_modules import (BWCEWLoss, CORNLoss, HuberLoss, + NextTokenSoftmaxCrossEntropyLoss, + SequenceSoftmaxCrossEntropyLoss, + SigmoidCrossEntropyLoss, + SoftmaxCrossEntropyLoss) +from ludwig.modules.metric_registry import (get_metric_objective, + get_metric_registry, + register_metric) from ludwig.schema.features.loss.loss import ( - BWCEWLossConfig, - CORNLossConfig, - HuberLossConfig, - SequenceSoftmaxCrossEntropyLossConfig, - SigmoidCrossEntropyLossConfig, - SoftmaxCrossEntropyLossConfig, -) + BWCEWLossConfig, CORNLossConfig, HuberLossConfig, + SequenceSoftmaxCrossEntropyLossConfig, SigmoidCrossEntropyLossConfig, + SoftmaxCrossEntropyLossConfig) from ludwig.utils.loss_utils import rmspe_loss from ludwig.utils.metric_utils import masked_correct_predictions from ludwig.utils.torch_utils import sequence_length_2D diff --git a/ludwig/modules/metric_registry.py b/ludwig/modules/metric_registry.py index d9a06134a54..7837564f9d5 100644 --- a/ludwig/modules/metric_registry.py +++ b/ludwig/modules/metric_registry.py @@ -1,7 +1,8 @@ -from typing import Dict, List, Literal, TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Dict, List, Literal, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import LOGITS, MAXIMIZE, MINIMIZE, PREDICTIONS, PROBABILITIES, RESPONSE +from ludwig.constants import (LOGITS, MAXIMIZE, MINIMIZE, PREDICTIONS, + PROBABILITIES, RESPONSE) from ludwig.utils.registry import Registry if TYPE_CHECKING: diff --git a/ludwig/modules/optimization_modules.py b/ludwig/modules/optimization_modules.py index b762f23e53d..b2d6efd4081 100644 --- a/ludwig/modules/optimization_modules.py +++ b/ludwig/modules/optimization_modules.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== from dataclasses import asdict -from typing import Dict, Optional, Tuple, Type, TYPE_CHECKING +from typing import TYPE_CHECKING, Dict, Optional, Tuple, Type import torch @@ -21,7 +21,8 @@ from ludwig.utils.torch_utils import LudwigModule if TYPE_CHECKING: - from ludwig.schema.optimizers import BaseOptimizerConfig, GradientClippingConfig + from ludwig.schema.optimizers import (BaseOptimizerConfig, + GradientClippingConfig) def create_clipper(gradient_clipping_config: Optional["GradientClippingConfig"]): diff --git a/ludwig/schema/__init__.py b/ludwig/schema/__init__.py index 77ecd60c32c..8c190cc6f8d 100644 --- a/ludwig/schema/__init__.py +++ b/ludwig/schema/__init__.py @@ -1,5 +1,7 @@ # TODO(travis): figure out why we need these imports to avoid circular import error from ludwig.schema.combiners.utils import get_combiner_jsonschema # noqa -from ludwig.schema.features.utils import get_input_feature_jsonschema, get_output_feature_jsonschema # noqa +from ludwig.schema.features.utils import (get_input_feature_jsonschema, # noqa + get_output_feature_jsonschema) from ludwig.schema.hyperopt import get_hyperopt_jsonschema # noqa -from ludwig.schema.trainer import get_model_type_jsonschema, get_trainer_jsonschema # noqa +from ludwig.schema.trainer import (get_model_type_jsonschema, # noqa + get_trainer_jsonschema) diff --git a/ludwig/schema/combiners/sequence.py b/ludwig/schema/combiners/sequence.py index 33907ab37d2..c28881fc172 100644 --- a/ludwig/schema/combiners/sequence.py +++ b/ludwig/schema/combiners/sequence.py @@ -4,7 +4,8 @@ from ludwig.constants import MODEL_ECD, SEQUENCE from ludwig.schema import utils as schema_utils from ludwig.schema.combiners.base import BaseCombinerConfig -from ludwig.schema.combiners.sequence_concat import MAIN_SEQUENCE_FEATURE_DESCRIPTION +from ludwig.schema.combiners.sequence_concat import \ + MAIN_SEQUENCE_FEATURE_DESCRIPTION from ludwig.schema.combiners.utils import register_combiner_config from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField diff --git a/ludwig/schema/combiners/tab_transformer.py b/ludwig/schema/combiners/tab_transformer.py index 7d0cfe90123..ee50ffed2ff 100644 --- a/ludwig/schema/combiners/tab_transformer.py +++ b/ludwig/schema/combiners/tab_transformer.py @@ -3,7 +3,8 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils from ludwig.schema.combiners.base import BaseCombinerConfig -from ludwig.schema.combiners.common_transformer_options import CommonTransformerConfig +from ludwig.schema.combiners.common_transformer_options import \ + CommonTransformerConfig from ludwig.schema.combiners.utils import register_combiner_config from ludwig.schema.metadata import COMBINER_METADATA from ludwig.schema.utils import ludwig_dataclass diff --git a/ludwig/schema/combiners/transformer.py b/ludwig/schema/combiners/transformer.py index 780bd4342f5..6a026b1bf4d 100644 --- a/ludwig/schema/combiners/transformer.py +++ b/ludwig/schema/combiners/transformer.py @@ -3,7 +3,8 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils from ludwig.schema.combiners.base import BaseCombinerConfig -from ludwig.schema.combiners.common_transformer_options import CommonTransformerConfig +from ludwig.schema.combiners.common_transformer_options import \ + CommonTransformerConfig from ludwig.schema.combiners.utils import register_combiner_config from ludwig.schema.metadata import COMBINER_METADATA from ludwig.schema.utils import ludwig_dataclass diff --git a/ludwig/schema/combiners/utils.py b/ludwig/schema/combiners/utils.py index dfad5c83f46..3e28fa0d529 100644 --- a/ludwig/schema/combiners/utils.py +++ b/ludwig/schema/combiners/utils.py @@ -5,7 +5,8 @@ from ludwig.schema import utils as schema_utils from ludwig.schema.combiners.base import BaseCombinerConfig from ludwig.schema.metadata import COMBINER_METADATA -from ludwig.schema.metadata.parameter_metadata import convert_metadata_to_json, ParameterMetadata +from ludwig.schema.metadata.parameter_metadata import ( + ParameterMetadata, convert_metadata_to_json) from ludwig.utils.registry import Registry DEFAULT_VALUE = "concat" diff --git a/ludwig/schema/decoders/base.py b/ludwig/schema/decoders/base.py index f1e27833fd8..98bdadf3179 100644 --- a/ludwig/schema/decoders/base.py +++ b/ludwig/schema/decoders/base.py @@ -2,7 +2,8 @@ from typing import Dict, List, Tuple, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import BINARY, CATEGORY, MODEL_ECD, MODEL_GBM, MODEL_LLM, NUMBER, SET, TIMESERIES, VECTOR +from ludwig.constants import (BINARY, CATEGORY, MODEL_ECD, MODEL_GBM, + MODEL_LLM, NUMBER, SET, TIMESERIES, VECTOR) from ludwig.schema import common_fields from ludwig.schema import utils as schema_utils from ludwig.schema.decoders.utils import register_decoder_config diff --git a/ludwig/schema/decoders/image_decoders.py b/ludwig/schema/decoders/image_decoders.py index 1adfb4b343f..bd591181abb 100644 --- a/ludwig/schema/decoders/image_decoders.py +++ b/ludwig/schema/decoders/image_decoders.py @@ -1,4 +1,4 @@ -from typing import Optional, TYPE_CHECKING +from typing import TYPE_CHECKING, Optional from ludwig.api_annotations import DeveloperAPI from ludwig.constants import IMAGE, MODEL_ECD @@ -9,7 +9,8 @@ from ludwig.schema.utils import ludwig_dataclass if TYPE_CHECKING: - from ludwig.schema.features.preprocessing.image import ImagePreprocessingConfig + from ludwig.schema.features.preprocessing.image import \ + ImagePreprocessingConfig class ImageDecoderConfig(BaseDecoderConfig): diff --git a/ludwig/schema/decoders/utils.py b/ludwig/schema/decoders/utils.py index 1a1fec77552..fe6105fe916 100644 --- a/ludwig/schema/decoders/utils.py +++ b/ludwig/schema/decoders/utils.py @@ -1,5 +1,5 @@ from dataclasses import Field -from typing import Any, Dict, List, Optional, Type, TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union from ludwig.api_annotations import DeveloperAPI from ludwig.constants import MODEL_ECD, TYPE diff --git a/ludwig/schema/defaults/ecd.py b/ludwig/schema/defaults/ecd.py index ccf41e8c4ed..622e9cdd310 100644 --- a/ludwig/schema/defaults/ecd.py +++ b/ludwig/schema/defaults/ecd.py @@ -1,19 +1,6 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - AUDIO, - BAG, - BINARY, - CATEGORY, - DATE, - H3, - IMAGE, - NUMBER, - SEQUENCE, - SET, - TEXT, - TIMESERIES, - VECTOR, -) +from ludwig.constants import (AUDIO, BAG, BINARY, CATEGORY, DATE, H3, IMAGE, + NUMBER, SEQUENCE, SET, TEXT, TIMESERIES, VECTOR) from ludwig.schema import utils as schema_utils from ludwig.schema.defaults.base import BaseDefaultsConfig from ludwig.schema.defaults.utils import DefaultsDataclassField diff --git a/ludwig/schema/defaults/utils.py b/ludwig/schema/defaults/utils.py index a7c4560002b..0ee0b2e8f69 100644 --- a/ludwig/schema/defaults/utils.py +++ b/ludwig/schema/defaults/utils.py @@ -1,6 +1,6 @@ from dataclasses import field -from marshmallow import fields, ValidationError +from marshmallow import ValidationError, fields import ludwig.schema.utils as schema_utils from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/schema/encoders/base.py b/ludwig/schema/encoders/base.py index 5b2010dce6c..fff788c2d6b 100644 --- a/ludwig/schema/encoders/base.py +++ b/ludwig/schema/encoders/base.py @@ -1,8 +1,9 @@ from abc import ABC -from typing import List, TYPE_CHECKING, Union +from typing import TYPE_CHECKING, List, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import BINARY, MODEL_ECD, MODEL_GBM, MODEL_LLM, NUMBER, TEXT, TIMESERIES, VECTOR +from ludwig.constants import (BINARY, MODEL_ECD, MODEL_GBM, MODEL_LLM, NUMBER, + TEXT, TIMESERIES, VECTOR) from ludwig.schema import common_fields from ludwig.schema import utils as schema_utils from ludwig.schema.encoders.utils import register_encoder_config @@ -10,7 +11,8 @@ from ludwig.schema.utils import ludwig_dataclass if TYPE_CHECKING: - from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig + from ludwig.schema.features.preprocessing.base import \ + BasePreprocessingConfig @DeveloperAPI diff --git a/ludwig/schema/encoders/category_encoders.py b/ludwig/schema/encoders/category_encoders.py index c54cc8be24b..52397cfa292 100644 --- a/ludwig/schema/encoders/category_encoders.py +++ b/ludwig/schema/encoders/category_encoders.py @@ -1,4 +1,4 @@ -from typing import List, TYPE_CHECKING +from typing import TYPE_CHECKING, List from ludwig.api_annotations import DeveloperAPI from ludwig.constants import CATEGORY, MODEL_ECD, MODEL_GBM @@ -10,7 +10,8 @@ from ludwig.schema.utils import ludwig_dataclass if TYPE_CHECKING: - from ludwig.schema.features.preprocessing.category import CategoryPreprocessingConfig + from ludwig.schema.features.preprocessing.category import \ + CategoryPreprocessingConfig @DeveloperAPI diff --git a/ludwig/schema/encoders/image/base.py b/ludwig/schema/encoders/image/base.py index c0feeecb3b8..53d5c7e94ed 100644 --- a/ludwig/schema/encoders/image/base.py +++ b/ludwig/schema/encoders/image/base.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union from ludwig.api_annotations import DeveloperAPI from ludwig.constants import IMAGE @@ -10,7 +10,8 @@ from ludwig.utils.torch_utils import initializer_registry if TYPE_CHECKING: - from ludwig.schema.features.preprocessing.image import ImagePreprocessingConfig + from ludwig.schema.features.preprocessing.image import \ + ImagePreprocessingConfig class ImageEncoderConfig(BaseEncoderConfig): diff --git a/ludwig/schema/encoders/sequence_encoders.py b/ludwig/schema/encoders/sequence_encoders.py index c6ef4c746e6..b3d9d7251a7 100644 --- a/ludwig/schema/encoders/sequence_encoders.py +++ b/ludwig/schema/encoders/sequence_encoders.py @@ -1,5 +1,5 @@ from dataclasses import Field -from typing import List, Optional, TYPE_CHECKING +from typing import TYPE_CHECKING, List, Optional from ludwig.api_annotations import DeveloperAPI from ludwig.constants import AUDIO, SEQUENCE, TEXT, TIMESERIES @@ -11,7 +11,8 @@ from ludwig.schema.utils import ludwig_dataclass if TYPE_CHECKING: - from ludwig.schema.features.preprocessing.sequence import SequencePreprocessingConfig + from ludwig.schema.features.preprocessing.sequence import \ + SequencePreprocessingConfig CONV_LAYERS_DESCRIPTION = """ A list of dictionaries containing the parameters of all the convolutional layers. diff --git a/ludwig/schema/encoders/text_encoders.py b/ludwig/schema/encoders/text_encoders.py index 72420c55ae3..9097da33aff 100644 --- a/ludwig/schema/encoders/text_encoders.py +++ b/ludwig/schema/encoders/text_encoders.py @@ -1,4 +1,4 @@ -from typing import Callable, Dict, List, Optional, TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union from ludwig.api_annotations import DeveloperAPI from ludwig.constants import MODEL_ECD, MODEL_GBM, TEXT @@ -8,15 +8,19 @@ from ludwig.schema.encoders.text.hf_model_params import DebertaModelParams from ludwig.schema.encoders.utils import register_encoder_config from ludwig.schema.llms.base_model import BaseModelDataclassField -from ludwig.schema.llms.model_parameters import ModelParametersConfig, ModelParametersConfigField +from ludwig.schema.llms.model_parameters import (ModelParametersConfig, + ModelParametersConfigField) from ludwig.schema.llms.peft import AdapterDataclassField, BaseAdapterConfig -from ludwig.schema.llms.quantization import QuantizationConfig, QuantizationConfigField +from ludwig.schema.llms.quantization import (QuantizationConfig, + QuantizationConfigField) from ludwig.schema.metadata import ENCODER_METADATA -from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY, ParameterMetadata +from ludwig.schema.metadata.parameter_metadata import (INTERNAL_ONLY, + ParameterMetadata) from ludwig.schema.utils import ludwig_dataclass if TYPE_CHECKING: - from ludwig.schema.features.preprocessing.text import TextPreprocessingConfig + from ludwig.schema.features.preprocessing.text import \ + TextPreprocessingConfig class HFEncoderConfig(SequenceEncoderConfig): diff --git a/ludwig/schema/encoders/utils.py b/ludwig/schema/encoders/utils.py index f2d7bfcea1a..bbf665d482d 100644 --- a/ludwig/schema/encoders/utils.py +++ b/ludwig/schema/encoders/utils.py @@ -1,5 +1,5 @@ from dataclasses import Field -from typing import Any, Dict, List, Optional, Type, TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union from ludwig.api_annotations import DeveloperAPI from ludwig.constants import MODEL_ECD, TYPE diff --git a/ludwig/schema/features/audio_feature.py b/ludwig/schema/features/audio_feature.py index dbac117706c..2153049119c 100644 --- a/ludwig/schema/features/audio_feature.py +++ b/ludwig/schema/features/audio_feature.py @@ -5,8 +5,11 @@ from ludwig.schema.encoders.utils import EncoderDataclassField from ludwig.schema.features.base import BaseInputFeatureConfig from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ecd_defaults_config_registry, ecd_input_config_registry, input_mixin_registry +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + input_mixin_registry) from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/augmentation/image.py b/ludwig/schema/features/augmentation/image.py index 74dc1d2673e..db5671c7258 100644 --- a/ludwig/schema/features/augmentation/image.py +++ b/ludwig/schema/features/augmentation/image.py @@ -2,7 +2,8 @@ from ludwig.constants import AUGMENTATION, IMAGE, TYPE from ludwig.schema import utils as schema_utils from ludwig.schema.features.augmentation.base import BaseAugmentationConfig -from ludwig.schema.features.augmentation.utils import register_augmentation_config +from ludwig.schema.features.augmentation.utils import \ + register_augmentation_config from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.utils import ludwig_dataclass diff --git a/ludwig/schema/features/augmentation/utils.py b/ludwig/schema/features/augmentation/utils.py index e24afecee7e..d1807af0555 100644 --- a/ludwig/schema/features/augmentation/utils.py +++ b/ludwig/schema/features/augmentation/utils.py @@ -2,7 +2,7 @@ from dataclasses import field from typing import Any, Dict, List, Optional, Union -from marshmallow import fields, ValidationError +from marshmallow import ValidationError, fields from ludwig.api_annotations import DeveloperAPI from ludwig.constants import TYPE diff --git a/ludwig/schema/features/bag_feature.py b/ludwig/schema/features/bag_feature.py index 3d627a8b4f8..6e27641eff7 100644 --- a/ludwig/schema/features/bag_feature.py +++ b/ludwig/schema/features/bag_feature.py @@ -5,8 +5,11 @@ from ludwig.schema.encoders.utils import EncoderDataclassField from ludwig.schema.features.base import BaseInputFeatureConfig from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ecd_defaults_config_registry, ecd_input_config_registry, input_mixin_registry +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + input_mixin_registry) from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/base.py b/ludwig/schema/features/base.py index 2d2a857a03e..1cceed290fb 100644 --- a/ludwig/schema/features/base.py +++ b/ludwig/schema/features/base.py @@ -6,37 +6,21 @@ from rich.console import Console from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - AUDIO, - BAG, - BINARY, - CATEGORY, - DATE, - H3, - IMAGE, - MODEL_ECD, - MODEL_GBM, - MODEL_LLM, - NUMBER, - SEQUENCE, - SET, - TEXT, - TIMESERIES, - VECTOR, -) +from ludwig.constants import (AUDIO, BAG, BINARY, CATEGORY, DATE, H3, IMAGE, + MODEL_ECD, MODEL_GBM, MODEL_LLM, NUMBER, + SEQUENCE, SET, TEXT, TIMESERIES, VECTOR) from ludwig.error import ConfigValidationError from ludwig.schema import utils as schema_utils -from ludwig.schema.features.utils import ( - ecd_input_config_registry, - ecd_output_config_registry, - gbm_input_config_registry, - gbm_output_config_registry, - get_input_feature_jsonschema, - get_output_feature_jsonschema, - llm_input_config_registry, - llm_output_config_registry, -) -from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY, ParameterMetadata +from ludwig.schema.features.utils import (ecd_input_config_registry, + ecd_output_config_registry, + gbm_input_config_registry, + gbm_output_config_registry, + get_input_feature_jsonschema, + get_output_feature_jsonschema, + llm_input_config_registry, + llm_output_config_registry) +from ludwig.schema.metadata.parameter_metadata import (INTERNAL_ONLY, + ParameterMetadata) from ludwig.schema.utils import ludwig_dataclass logger = logging.getLogger(__name__) diff --git a/ludwig/schema/features/binary_feature.py b/ludwig/schema/features/binary_feature.py index 81929866722..02b2dc0c490 100644 --- a/ludwig/schema/features/binary_feature.py +++ b/ludwig/schema/features/binary_feature.py @@ -1,25 +1,26 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import BINARY, BINARY_WEIGHTED_CROSS_ENTROPY, MODEL_ECD, MODEL_GBM, ROC_AUC +from ludwig.constants import (BINARY, BINARY_WEIGHTED_CROSS_ENTROPY, MODEL_ECD, + MODEL_GBM, ROC_AUC) from ludwig.schema import utils as schema_utils from ludwig.schema.decoders.base import BaseDecoderConfig from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig) from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ( - ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - gbm_defaults_config_registry, - gbm_input_config_registry, - gbm_output_config_registry, - input_mixin_registry, - output_mixin_registry, -) +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + gbm_defaults_config_registry, + gbm_input_config_registry, + gbm_output_config_registry, + input_mixin_registry, + output_mixin_registry) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/category_feature.py b/ludwig/schema/features/category_feature.py index c39ee4eabf6..75ecdbbe2bc 100644 --- a/ludwig/schema/features/category_feature.py +++ b/ludwig/schema/features/category_feature.py @@ -1,34 +1,28 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - ACCURACY, - CATEGORY, - CATEGORY_DISTRIBUTION, - MODEL_ECD, - MODEL_GBM, - MODEL_LLM, - SOFTMAX_CROSS_ENTROPY, -) +from ludwig.constants import (ACCURACY, CATEGORY, CATEGORY_DISTRIBUTION, + MODEL_ECD, MODEL_GBM, MODEL_LLM, + SOFTMAX_CROSS_ENTROPY) from ludwig.schema import utils as schema_utils from ludwig.schema.decoders.base import BaseDecoderConfig from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig) from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ( - ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - gbm_defaults_config_registry, - gbm_input_config_registry, - gbm_output_config_registry, - input_mixin_registry, - llm_output_config_registry, - output_mixin_registry, -) +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + gbm_defaults_config_registry, + gbm_input_config_registry, + gbm_output_config_registry, + input_mixin_registry, + llm_output_config_registry, + output_mixin_registry) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/date_feature.py b/ludwig/schema/features/date_feature.py index 55b2408df9c..4032ee6e575 100644 --- a/ludwig/schema/features/date_feature.py +++ b/ludwig/schema/features/date_feature.py @@ -5,8 +5,11 @@ from ludwig.schema.encoders.utils import EncoderDataclassField from ludwig.schema.features.base import BaseInputFeatureConfig from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ecd_defaults_config_registry, ecd_input_config_registry, input_mixin_registry +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + input_mixin_registry) from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/h3_feature.py b/ludwig/schema/features/h3_feature.py index 1c712b0ae39..d0f79f701fb 100644 --- a/ludwig/schema/features/h3_feature.py +++ b/ludwig/schema/features/h3_feature.py @@ -5,8 +5,11 @@ from ludwig.schema.encoders.utils import EncoderDataclassField from ludwig.schema.features.base import BaseInputFeatureConfig from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ecd_defaults_config_registry, ecd_input_config_registry, input_mixin_registry +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + input_mixin_registry) from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/image_feature.py b/ludwig/schema/features/image_feature.py index 9322ee253cb..85564eb7435 100644 --- a/ludwig/schema/features/image_feature.py +++ b/ludwig/schema/features/image_feature.py @@ -8,20 +8,22 @@ from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField from ludwig.schema.features.augmentation.base import BaseAugmentationConfig -from ludwig.schema.features.augmentation.image import RandomHorizontalFlipConfig, RandomRotateConfig -from ludwig.schema.features.augmentation.utils import AugmentationDataclassField -from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig +from ludwig.schema.features.augmentation.image import ( + RandomHorizontalFlipConfig, RandomRotateConfig) +from ludwig.schema.features.augmentation.utils import \ + AugmentationDataclassField +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig) from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ( - ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - input_mixin_registry, - output_mixin_registry, -) +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + input_mixin_registry, + output_mixin_registry) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/loss/__init__.py b/ludwig/schema/features/loss/__init__.py index e7ee0534df8..b47ad206099 100644 --- a/ludwig/schema/features/loss/__init__.py +++ b/ludwig/schema/features/loss/__init__.py @@ -1 +1,3 @@ -from ludwig.schema.features.loss.loss import get_loss_classes, get_loss_cls, get_loss_schema_registry # noqa +from ludwig.schema.features.loss.loss import (get_loss_classes, # noqa + get_loss_cls, + get_loss_schema_registry) diff --git a/ludwig/schema/features/loss/loss.py b/ludwig/schema/features/loss/loss.py index 2dffbe34293..2b8c5e83f71 100644 --- a/ludwig/schema/features/loss/loss.py +++ b/ludwig/schema/features/loss/loss.py @@ -1,29 +1,16 @@ from typing import Dict, List, Type, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - BINARY, - BINARY_WEIGHTED_CROSS_ENTROPY, - CATEGORY, - CORN, - HUBER, - IMAGE, - MEAN_ABSOLUTE_ERROR, - MEAN_ABSOLUTE_PERCENTAGE_ERROR, - MEAN_SQUARED_ERROR, - NEXT_TOKEN_SOFTMAX_CROSS_ENTROPY, - NUMBER, - ROOT_MEAN_SQUARED_ERROR, - ROOT_MEAN_SQUARED_PERCENTAGE_ERROR, - SEQUENCE, - SEQUENCE_SOFTMAX_CROSS_ENTROPY, - SET, - SIGMOID_CROSS_ENTROPY, - SOFTMAX_CROSS_ENTROPY, - TEXT, - TIMESERIES, - VECTOR, -) +from ludwig.constants import (BINARY, BINARY_WEIGHTED_CROSS_ENTROPY, CATEGORY, + CORN, HUBER, IMAGE, MEAN_ABSOLUTE_ERROR, + MEAN_ABSOLUTE_PERCENTAGE_ERROR, + MEAN_SQUARED_ERROR, + NEXT_TOKEN_SOFTMAX_CROSS_ENTROPY, NUMBER, + ROOT_MEAN_SQUARED_ERROR, + ROOT_MEAN_SQUARED_PERCENTAGE_ERROR, SEQUENCE, + SEQUENCE_SOFTMAX_CROSS_ENTROPY, SET, + SIGMOID_CROSS_ENTROPY, SOFTMAX_CROSS_ENTROPY, + TEXT, TIMESERIES, VECTOR) from ludwig.schema import utils as schema_utils from ludwig.schema.metadata import LOSS_METADATA from ludwig.schema.utils import ludwig_dataclass diff --git a/ludwig/schema/features/number_feature.py b/ludwig/schema/features/number_feature.py index 97ea49123c6..0e8562cb456 100644 --- a/ludwig/schema/features/number_feature.py +++ b/ludwig/schema/features/number_feature.py @@ -7,21 +7,21 @@ from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig) from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ( - ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - gbm_defaults_config_registry, - gbm_input_config_registry, - gbm_output_config_registry, - input_mixin_registry, - output_mixin_registry, -) +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + gbm_defaults_config_registry, + gbm_input_config_registry, + gbm_output_config_registry, + input_mixin_registry, + output_mixin_registry) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/preprocessing/__init__.py b/ludwig/schema/features/preprocessing/__init__.py index 90440ec1c4e..3cbd2a1ffa2 100644 --- a/ludwig/schema/features/preprocessing/__init__.py +++ b/ludwig/schema/features/preprocessing/__init__.py @@ -1,16 +1,5 @@ # Register all preprocessors -from ludwig.schema.features.preprocessing import ( # noqa - audio, - bag, - binary, - category, - date, - h3, - image, - number, - sequence, - set, - text, - timeseries, - vector, -) +from ludwig.schema.features.preprocessing import (audio, bag, binary, # noqa + category, date, h3, image, + number, sequence, set, text, + timeseries, vector) diff --git a/ludwig/schema/features/preprocessing/audio.py b/ludwig/schema/features/preprocessing/audio.py index 3eba94e9d76..c88ae0c002b 100644 --- a/ludwig/schema/features/preprocessing/audio.py +++ b/ludwig/schema/features/preprocessing/audio.py @@ -1,5 +1,6 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import AUDIO, BFILL, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING +from ludwig.constants import (AUDIO, BFILL, MISSING_VALUE_STRATEGY_OPTIONS, + PREPROCESSING) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/bag.py b/ludwig/schema/features/preprocessing/bag.py index 6f6402b95c7..2ef9bbc8d63 100644 --- a/ludwig/schema/features/preprocessing/bag.py +++ b/ludwig/schema/features/preprocessing/bag.py @@ -1,5 +1,6 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import BAG, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING +from ludwig.constants import (BAG, FILL_WITH_CONST, + MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/binary.py b/ludwig/schema/features/preprocessing/binary.py index 6641d8b0c45..f4f709525d2 100644 --- a/ludwig/schema/features/preprocessing/binary.py +++ b/ludwig/schema/features/preprocessing/binary.py @@ -1,16 +1,8 @@ from typing import Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - BFILL, - BINARY, - DROP_ROW, - FFILL, - FILL_WITH_FALSE, - FILL_WITH_MODE, - FILL_WITH_TRUE, - PREPROCESSING, -) +from ludwig.constants import (BFILL, BINARY, DROP_ROW, FFILL, FILL_WITH_FALSE, + FILL_WITH_MODE, FILL_WITH_TRUE, PREPROCESSING) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/category.py b/ludwig/schema/features/preprocessing/category.py index 540cd654185..3ceb596889e 100644 --- a/ludwig/schema/features/preprocessing/category.py +++ b/ludwig/schema/features/preprocessing/category.py @@ -1,7 +1,8 @@ from typing import List from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import CATEGORY, DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING +from ludwig.constants import (CATEGORY, DROP_ROW, FILL_WITH_CONST, + MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING) from ludwig.error import ConfigValidationError from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig diff --git a/ludwig/schema/features/preprocessing/date.py b/ludwig/schema/features/preprocessing/date.py index 597ea8d53be..99ff06698bb 100644 --- a/ludwig/schema/features/preprocessing/date.py +++ b/ludwig/schema/features/preprocessing/date.py @@ -1,5 +1,6 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import BFILL, DATE, DROP_ROW, FFILL, FILL_WITH_CONST, PREPROCESSING +from ludwig.constants import (BFILL, DATE, DROP_ROW, FFILL, FILL_WITH_CONST, + PREPROCESSING) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/h3.py b/ludwig/schema/features/preprocessing/h3.py index 51b57ee3984..4abb0a3e413 100644 --- a/ludwig/schema/features/preprocessing/h3.py +++ b/ludwig/schema/features/preprocessing/h3.py @@ -1,5 +1,6 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import FILL_WITH_CONST, H3, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING +from ludwig.constants import (FILL_WITH_CONST, H3, + MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/image.py b/ludwig/schema/features/preprocessing/image.py index dd8caa51b53..a78663e0dff 100644 --- a/ludwig/schema/features/preprocessing/image.py +++ b/ludwig/schema/features/preprocessing/image.py @@ -1,7 +1,8 @@ from typing import Optional, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import BFILL, DROP_ROW, IMAGE, IMAGENET1K, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING +from ludwig.constants import (BFILL, DROP_ROW, IMAGE, IMAGENET1K, + MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/number.py b/ludwig/schema/features/preprocessing/number.py index 17a4ef408d2..c1b34329b4f 100644 --- a/ludwig/schema/features/preprocessing/number.py +++ b/ludwig/schema/features/preprocessing/number.py @@ -1,14 +1,9 @@ from typing import Optional from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - DROP_ROW, - FILL_WITH_CONST, - FILL_WITH_MEAN, - MISSING_VALUE_STRATEGY_OPTIONS, - NUMBER, - PREPROCESSING, -) +from ludwig.constants import (DROP_ROW, FILL_WITH_CONST, FILL_WITH_MEAN, + MISSING_VALUE_STRATEGY_OPTIONS, NUMBER, + PREPROCESSING) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/sequence.py b/ludwig/schema/features/preprocessing/sequence.py index 6d504d0fecc..fb737590736 100644 --- a/ludwig/schema/features/preprocessing/sequence.py +++ b/ludwig/schema/features/preprocessing/sequence.py @@ -1,5 +1,7 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, SEQUENCE +from ludwig.constants import (DROP_ROW, FILL_WITH_CONST, + MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, + SEQUENCE) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/set.py b/ludwig/schema/features/preprocessing/set.py index 9d9ef513dcd..5c6bfcf4db2 100644 --- a/ludwig/schema/features/preprocessing/set.py +++ b/ludwig/schema/features/preprocessing/set.py @@ -1,5 +1,7 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, SET +from ludwig.constants import (DROP_ROW, FILL_WITH_CONST, + MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, + SET) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/text.py b/ludwig/schema/features/preprocessing/text.py index ec4230fd098..e887b053089 100644 --- a/ludwig/schema/features/preprocessing/text.py +++ b/ludwig/schema/features/preprocessing/text.py @@ -1,5 +1,7 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, TEXT +from ludwig.constants import (DROP_ROW, FILL_WITH_CONST, + MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, + TEXT) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/timeseries.py b/ludwig/schema/features/preprocessing/timeseries.py index 647dd9a63c5..48e49d22870 100644 --- a/ludwig/schema/features/preprocessing/timeseries.py +++ b/ludwig/schema/features/preprocessing/timeseries.py @@ -1,5 +1,7 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, TIMESERIES +from ludwig.constants import (DROP_ROW, FILL_WITH_CONST, + MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, + TIMESERIES) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/utils.py b/ludwig/schema/features/preprocessing/utils.py index 48bac3160fc..5e69d638b37 100644 --- a/ludwig/schema/features/preprocessing/utils.py +++ b/ludwig/schema/features/preprocessing/utils.py @@ -1,6 +1,6 @@ from dataclasses import field -from marshmallow import fields, ValidationError +from marshmallow import ValidationError, fields from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils diff --git a/ludwig/schema/features/preprocessing/vector.py b/ludwig/schema/features/preprocessing/vector.py index ca83daa14c3..d93ab7f4d14 100644 --- a/ludwig/schema/features/preprocessing/vector.py +++ b/ludwig/schema/features/preprocessing/vector.py @@ -1,5 +1,7 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, VECTOR +from ludwig.constants import (DROP_ROW, FILL_WITH_CONST, + MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, + VECTOR) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/sequence_feature.py b/ludwig/schema/features/sequence_feature.py index df2b2fa0f61..7bd7933a5f4 100644 --- a/ludwig/schema/features/sequence_feature.py +++ b/ludwig/schema/features/sequence_feature.py @@ -1,22 +1,23 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import LOSS, MODEL_ECD, SEQUENCE, SEQUENCE_SOFTMAX_CROSS_ENTROPY +from ludwig.constants import (LOSS, MODEL_ECD, SEQUENCE, + SEQUENCE_SOFTMAX_CROSS_ENTROPY) from ludwig.schema import utils as schema_utils from ludwig.schema.decoders.base import BaseDecoderConfig from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig) from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ( - ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - input_mixin_registry, - output_mixin_registry, -) +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + input_mixin_registry, + output_mixin_registry) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/set_feature.py b/ludwig/schema/features/set_feature.py index 96cc3f68b7e..547964dcf18 100644 --- a/ludwig/schema/features/set_feature.py +++ b/ludwig/schema/features/set_feature.py @@ -5,18 +5,18 @@ from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig) from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ( - ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - input_mixin_registry, - output_mixin_registry, -) +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + input_mixin_registry, + output_mixin_registry) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/text_feature.py b/ludwig/schema/features/text_feature.py index 8c9984a6016..883ad44a2ff 100644 --- a/ludwig/schema/features/text_feature.py +++ b/ludwig/schema/features/text_feature.py @@ -1,35 +1,29 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - LOSS, - MODEL_ECD, - MODEL_GBM, - MODEL_LLM, - NEXT_TOKEN_SOFTMAX_CROSS_ENTROPY, - SEQUENCE_SOFTMAX_CROSS_ENTROPY, - TEXT, -) +from ludwig.constants import (LOSS, MODEL_ECD, MODEL_GBM, MODEL_LLM, + NEXT_TOKEN_SOFTMAX_CROSS_ENTROPY, + SEQUENCE_SOFTMAX_CROSS_ENTROPY, TEXT) from ludwig.schema import utils as schema_utils from ludwig.schema.decoders.base import BaseDecoderConfig from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig) from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ( - ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - gbm_defaults_config_registry, - gbm_input_config_registry, - input_mixin_registry, - llm_defaults_config_registry, - llm_input_config_registry, - llm_output_config_registry, - output_mixin_registry, -) +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + gbm_defaults_config_registry, + gbm_input_config_registry, + input_mixin_registry, + llm_defaults_config_registry, + llm_input_config_registry, + llm_output_config_registry, + output_mixin_registry) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/timeseries_feature.py b/ludwig/schema/features/timeseries_feature.py index a5eaeb05a7f..a9f6e23c61a 100644 --- a/ludwig/schema/features/timeseries_feature.py +++ b/ludwig/schema/features/timeseries_feature.py @@ -1,22 +1,23 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import HUBER, MEAN_SQUARED_ERROR, MODEL_ECD, TIMESERIES, VECTOR +from ludwig.constants import (HUBER, MEAN_SQUARED_ERROR, MODEL_ECD, TIMESERIES, + VECTOR) from ludwig.schema import utils as schema_utils from ludwig.schema.decoders.base import BaseDecoderConfig from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig) from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ( - ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - input_mixin_registry, - output_mixin_registry, -) +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + input_mixin_registry, + output_mixin_registry) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/vector_feature.py b/ludwig/schema/features/vector_feature.py index e7cc7c99e4a..524e8e10f2c 100644 --- a/ludwig/schema/features/vector_feature.py +++ b/ludwig/schema/features/vector_feature.py @@ -5,18 +5,18 @@ from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig) from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField -from ludwig.schema.features.utils import ( - ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - input_mixin_registry, - output_mixin_registry, -) +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField +from ludwig.schema.features.utils import (ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + input_mixin_registry, + output_mixin_registry) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/hyperopt/__init__.py b/ludwig/schema/hyperopt/__init__.py index 6a81d8eaebe..74c92cb9f1d 100644 --- a/ludwig/schema/hyperopt/__init__.py +++ b/ludwig/schema/hyperopt/__init__.py @@ -6,11 +6,14 @@ import ludwig.schema.hyperopt.parameter # noqa: F401 from ludwig.api_annotations import DeveloperAPI from ludwig.constants import LOSS, TEST, TRAIN, VALIDATION -from ludwig.modules import metric_modules # noqa: Needed to ensure that the metric registry is populated. +from ludwig.modules import \ + metric_modules # noqa: Needed to ensure that the metric registry is populated. from ludwig.modules.metric_registry import get_metric_registry from ludwig.schema import utils as schema_utils -from ludwig.schema.hyperopt.executor import ExecutorConfig, ExecutorDataclassField -from ludwig.schema.hyperopt.search_algorithm import BaseSearchAlgorithmConfig, SearchAlgorithmDataclassField +from ludwig.schema.hyperopt.executor import (ExecutorConfig, + ExecutorDataclassField) +from ludwig.schema.hyperopt.search_algorithm import ( + BaseSearchAlgorithmConfig, SearchAlgorithmDataclassField) @DeveloperAPI diff --git a/ludwig/schema/hyperopt/executor.py b/ludwig/schema/hyperopt/executor.py index 3adfe36c1b6..cc69cf870c2 100644 --- a/ludwig/schema/hyperopt/executor.py +++ b/ludwig/schema/hyperopt/executor.py @@ -1,12 +1,13 @@ from dataclasses import field from typing import Dict, Optional, Union -from marshmallow import fields, ValidationError +from marshmallow import ValidationError, fields from ludwig.api_annotations import DeveloperAPI from ludwig.constants import RAY from ludwig.schema import utils as schema_utils -from ludwig.schema.hyperopt.scheduler import BaseSchedulerConfig, SchedulerDataclassField +from ludwig.schema.hyperopt.scheduler import (BaseSchedulerConfig, + SchedulerDataclassField) from ludwig.schema.utils import ludwig_dataclass diff --git a/ludwig/schema/hyperopt/scheduler.py b/ludwig/schema/hyperopt/scheduler.py index 2641c04f6bc..547be79d508 100644 --- a/ludwig/schema/hyperopt/scheduler.py +++ b/ludwig/schema/hyperopt/scheduler.py @@ -3,7 +3,7 @@ from importlib import import_module from typing import Callable, Dict, Optional, Tuple, Union -from marshmallow import fields, ValidationError +from marshmallow import ValidationError, fields from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils diff --git a/ludwig/schema/hyperopt/search_algorithm.py b/ludwig/schema/hyperopt/search_algorithm.py index 17dc942c557..45a3dc635bd 100644 --- a/ludwig/schema/hyperopt/search_algorithm.py +++ b/ludwig/schema/hyperopt/search_algorithm.py @@ -2,7 +2,7 @@ from importlib import import_module from typing import Dict, List, Optional -from marshmallow import fields, ValidationError +from marshmallow import ValidationError, fields from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils diff --git a/ludwig/schema/llms/base_model.py b/ludwig/schema/llms/base_model.py index fe77291bfc5..d72172ccf64 100644 --- a/ludwig/schema/llms/base_model.py +++ b/ludwig/schema/llms/base_model.py @@ -2,7 +2,7 @@ import os from dataclasses import field -from marshmallow import fields, ValidationError +from marshmallow import ValidationError, fields from transformers import AutoConfig from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/schema/llms/peft.py b/ludwig/schema/llms/peft.py index 104a316179c..98c8c5f968d 100644 --- a/ludwig/schema/llms/peft.py +++ b/ludwig/schema/llms/peft.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import List, Optional, Type, TYPE_CHECKING +from typing import TYPE_CHECKING, List, Optional, Type from ludwig.api_annotations import DeveloperAPI from ludwig.error import ConfigValidationError diff --git a/ludwig/schema/lr_scheduler.py b/ludwig/schema/lr_scheduler.py index 3bfedab82bf..bb782cad4be 100644 --- a/ludwig/schema/lr_scheduler.py +++ b/ludwig/schema/lr_scheduler.py @@ -2,7 +2,7 @@ from dataclasses import field from typing import Dict -from marshmallow import fields, ValidationError +from marshmallow import ValidationError, fields import ludwig.schema.utils as schema_utils from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/schema/model_types/base.py b/ludwig/schema/model_types/base.py index 410aa5c454e..e681c98bbe8 100644 --- a/ludwig/schema/model_types/base.py +++ b/ludwig/schema/model_types/base.py @@ -7,39 +7,28 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.config_validation.checks import get_config_check_registry from ludwig.config_validation.validation import check_schema -from ludwig.constants import ( - BACKEND, - COLUMN, - DEPENDENCIES, - ENCODER, - INPUT_FEATURES, - MODEL_ECD, - NAME, - OUTPUT_FEATURES, - TIED, -) +from ludwig.constants import (BACKEND, COLUMN, DEPENDENCIES, ENCODER, + INPUT_FEATURES, MODEL_ECD, NAME, OUTPUT_FEATURES, + TIED) from ludwig.error import ConfigValidationError from ludwig.globals import LUDWIG_VERSION from ludwig.schema import utils as schema_utils from ludwig.schema.defaults.base import BaseDefaultsConfig -from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig, FeatureCollection +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig, + FeatureCollection) from ludwig.schema.hyperopt import HyperoptConfig from ludwig.schema.model_types.utils import ( - merge_fixed_preprocessing_params, - merge_with_defaults, - sanitize_and_filter_combiner_entities_, - set_derived_feature_columns_, - set_hyperopt_defaults_, - set_llm_parameters, - set_preprocessing_parameters, - set_tagger_decoder_parameters, - set_validation_parameters, -) + merge_fixed_preprocessing_params, merge_with_defaults, + sanitize_and_filter_combiner_entities_, set_derived_feature_columns_, + set_hyperopt_defaults_, set_llm_parameters, set_preprocessing_parameters, + set_tagger_decoder_parameters, set_validation_parameters) from ludwig.schema.preprocessing import PreprocessingConfig from ludwig.schema.trainer import BaseTrainerConfig from ludwig.schema.utils import ludwig_dataclass from ludwig.types import ModelConfigDict -from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version +from ludwig.utils.backward_compatibility import \ + upgrade_config_dict_to_latest_version from ludwig.utils.data_utils import get_sanitized_feature_name, load_yaml from ludwig.utils.registry import Registry diff --git a/ludwig/schema/model_types/ecd.py b/ludwig/schema/model_types/ecd.py index 967d12ae143..c1d07d1ccb1 100644 --- a/ludwig/schema/model_types/ecd.py +++ b/ludwig/schema/model_types/ecd.py @@ -5,13 +5,11 @@ from ludwig.schema.combiners.base import BaseCombinerConfig from ludwig.schema.combiners.utils import CombinerSelection from ludwig.schema.defaults.ecd import ECDDefaultsConfig, ECDDefaultsField -from ludwig.schema.features.base import ( - BaseInputFeatureConfig, - BaseOutputFeatureConfig, - ECDInputFeatureSelection, - ECDOutputFeatureSelection, - FeatureCollection, -) +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig, + ECDInputFeatureSelection, + ECDOutputFeatureSelection, + FeatureCollection) from ludwig.schema.hyperopt import HyperoptConfig, HyperoptField from ludwig.schema.model_types.base import ModelConfig, register_model_type from ludwig.schema.preprocessing import PreprocessingConfig, PreprocessingField diff --git a/ludwig/schema/model_types/gbm.py b/ludwig/schema/model_types/gbm.py index 9fda9294d92..a19a8c0db4c 100644 --- a/ludwig/schema/model_types/gbm.py +++ b/ludwig/schema/model_types/gbm.py @@ -3,13 +3,11 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils from ludwig.schema.defaults.gbm import GBMDefaultsConfig, GBMDefaultsField -from ludwig.schema.features.base import ( - BaseInputFeatureConfig, - BaseOutputFeatureConfig, - FeatureCollection, - GBMInputFeatureSelection, - GBMOutputFeatureSelection, -) +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig, + FeatureCollection, + GBMInputFeatureSelection, + GBMOutputFeatureSelection) from ludwig.schema.hyperopt import HyperoptConfig, HyperoptField from ludwig.schema.model_types.base import ModelConfig, register_model_type from ludwig.schema.preprocessing import PreprocessingConfig, PreprocessingField diff --git a/ludwig/schema/model_types/llm.py b/ludwig/schema/model_types/llm.py index 95ddb29bc69..a56a35968a4 100644 --- a/ludwig/schema/model_types/llm.py +++ b/ludwig/schema/model_types/llm.py @@ -3,20 +3,21 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils from ludwig.schema.defaults.llm import LLMDefaultsConfig, LLMDefaultsField -from ludwig.schema.features.base import ( - BaseInputFeatureConfig, - BaseOutputFeatureConfig, - FeatureCollection, - LLMInputFeatureSelection, - LLMOutputFeatureSelection, -) +from ludwig.schema.features.base import (BaseInputFeatureConfig, + BaseOutputFeatureConfig, + FeatureCollection, + LLMInputFeatureSelection, + LLMOutputFeatureSelection) from ludwig.schema.hyperopt import HyperoptConfig, HyperoptField from ludwig.schema.llms.base_model import BaseModelDataclassField -from ludwig.schema.llms.generation import LLMGenerationConfig, LLMGenerationConfigField -from ludwig.schema.llms.model_parameters import ModelParametersConfig, ModelParametersConfigField +from ludwig.schema.llms.generation import (LLMGenerationConfig, + LLMGenerationConfigField) +from ludwig.schema.llms.model_parameters import (ModelParametersConfig, + ModelParametersConfigField) from ludwig.schema.llms.peft import AdapterDataclassField, BaseAdapterConfig from ludwig.schema.llms.prompt import PromptConfig, PromptConfigField -from ludwig.schema.llms.quantization import QuantizationConfig, QuantizationConfigField +from ludwig.schema.llms.quantization import (QuantizationConfig, + QuantizationConfigField) from ludwig.schema.model_types.base import ModelConfig, register_model_type from ludwig.schema.preprocessing import PreprocessingConfig, PreprocessingField from ludwig.schema.trainer import LLMTrainerConfig, LLMTrainerDataclassField diff --git a/ludwig/schema/model_types/utils.py b/ludwig/schema/model_types/utils.py index d229214118f..067849955c2 100644 --- a/ludwig/schema/model_types/utils.py +++ b/ludwig/schema/model_types/utils.py @@ -2,31 +2,16 @@ import logging import sys import warnings -from typing import Any, Dict, List, Mapping, Set, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Set from marshmallow import ValidationError from transformers import AutoConfig from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - CATEGORY, - COMBINED, - DECODER, - DEFAULTS, - ENCODER, - GRID_SEARCH, - INPUT_FEATURES, - LOSS, - MODEL_ECD, - MODEL_LLM, - OUTPUT_FEATURES, - PARAMETERS, - PREPROCESSING, - SEQUENCE, - SPACE, - TEXT, - TYPE, -) +from ludwig.constants import (CATEGORY, COMBINED, DECODER, DEFAULTS, ENCODER, + GRID_SEARCH, INPUT_FEATURES, LOSS, MODEL_ECD, + MODEL_LLM, OUTPUT_FEATURES, PARAMETERS, + PREPROCESSING, SEQUENCE, SPACE, TEXT, TYPE) from ludwig.features.feature_utils import compute_feature_hash from ludwig.schema.features.utils import output_config_registry from ludwig.schema.hyperopt.scheduler import BaseHyperbandSchedulerConfig @@ -111,7 +96,8 @@ def set_validation_parameters(config: "ModelConfig"): else: # Determine the proper validation field for the user, like if the user specifies "accuracy" but forgets to # change the validation field from "combined" to the name of the feature that produces accuracy metrics. - from ludwig.utils.metric_utils import get_feature_to_metric_names_map + from ludwig.utils.metric_utils import \ + get_feature_to_metric_names_map feature_to_metric_names_map = get_feature_to_metric_names_map(config.output_features.to_list()) validation_field = None diff --git a/ludwig/schema/optimizers.py b/ludwig/schema/optimizers.py index b7d6d0a8268..3b9a8e8d5be 100644 --- a/ludwig/schema/optimizers.py +++ b/ludwig/schema/optimizers.py @@ -4,12 +4,13 @@ import bitsandbytes as bnb import torch -from marshmallow import fields, ValidationError +from marshmallow import ValidationError, fields import ludwig.schema.utils as schema_utils from ludwig.api_annotations import DeveloperAPI from ludwig.schema.metadata import OPTIMIZER_METADATA -from ludwig.schema.metadata.parameter_metadata import convert_metadata_to_json, ParameterMetadata +from ludwig.schema.metadata.parameter_metadata import ( + ParameterMetadata, convert_metadata_to_json) from ludwig.schema.utils import ludwig_dataclass from ludwig.utils.registry import Registry diff --git a/ludwig/schema/profiler.py b/ludwig/schema/profiler.py index b857df62439..c41ffa7910f 100644 --- a/ludwig/schema/profiler.py +++ b/ludwig/schema/profiler.py @@ -1,7 +1,7 @@ from dataclasses import field from typing import Dict -from marshmallow import fields, ValidationError +from marshmallow import ValidationError, fields import ludwig.schema.utils as schema_utils from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/schema/trainer.py b/ludwig/schema/trainer.py index a164e86fed5..ff54a3e03d9 100644 --- a/ludwig/schema/trainer.py +++ b/ludwig/schema/trainer.py @@ -8,27 +8,18 @@ from packaging.version import parse as parse_version from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - AUTO, - EFFECTIVE_BATCH_SIZE, - LOSS, - MAX_BATCH_SIZE, - MAX_POSSIBLE_BATCH_SIZE, - MODEL_ECD, - MODEL_GBM, - MODEL_LLM, - TRAINING, -) +from ludwig.constants import (AUTO, EFFECTIVE_BATCH_SIZE, LOSS, MAX_BATCH_SIZE, + MAX_POSSIBLE_BATCH_SIZE, MODEL_ECD, MODEL_GBM, + MODEL_LLM, TRAINING) from ludwig.error import ConfigValidationError from ludwig.schema import utils as schema_utils -from ludwig.schema.lr_scheduler import LRSchedulerConfig, LRSchedulerDataclassField +from ludwig.schema.lr_scheduler import (LRSchedulerConfig, + LRSchedulerDataclassField) from ludwig.schema.metadata import TRAINER_METADATA -from ludwig.schema.optimizers import ( - BaseOptimizerConfig, - GradientClippingConfig, - GradientClippingDataclassField, - OptimizerDataclassField, -) +from ludwig.schema.optimizers import (BaseOptimizerConfig, + GradientClippingConfig, + GradientClippingDataclassField, + OptimizerDataclassField) from ludwig.schema.profiler import ProfilerConfig, ProfilerDataclassField from ludwig.schema.utils import ludwig_dataclass from ludwig.utils.registry import Registry @@ -470,7 +461,8 @@ def __post_init__(self): ) def update_batch_size_grad_accum(self, num_workers: int): - from ludwig.utils.trainer_utils import get_rendered_batch_size_grad_accum + from ludwig.utils.trainer_utils import \ + get_rendered_batch_size_grad_accum self.batch_size, self.gradient_accumulation_steps = get_rendered_batch_size_grad_accum(self, num_workers) diff --git a/ludwig/schema/utils.py b/ludwig/schema/utils.py index dc876c98490..729ce555dad 100644 --- a/ludwig/schema/utils.py +++ b/ludwig/schema/utils.py @@ -2,7 +2,7 @@ import os import warnings from abc import ABC, abstractmethod -from dataclasses import field, Field +from dataclasses import Field, field from functools import lru_cache from typing import Any from typing import Dict as TDict @@ -11,16 +11,19 @@ import marshmallow_dataclass import yaml -from marshmallow import EXCLUDE, fields, pre_load, schema, validate, ValidationError +from marshmallow import (EXCLUDE, ValidationError, fields, pre_load, schema, + validate) from marshmallow.utils import missing from marshmallow_dataclass import dataclass as m_dataclass from marshmallow_jsonschema import JSONSchema as js from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ACTIVE, COLUMN, LUDWIG_SCHEMA_VALIDATION_POLICY, NAME, PROC_COLUMN, TYPE +from ludwig.constants import (ACTIVE, COLUMN, LUDWIG_SCHEMA_VALIDATION_POLICY, + NAME, PROC_COLUMN, TYPE) from ludwig.modules.reduction_modules import reduce_mode_registry from ludwig.schema.metadata import COMMON_METADATA -from ludwig.schema.metadata.parameter_metadata import convert_metadata_to_json, ParameterMetadata +from ludwig.schema.metadata.parameter_metadata import ( + ParameterMetadata, convert_metadata_to_json) from ludwig.utils.misc_utils import scrub_creds from ludwig.utils.registry import Registry from ludwig.utils.torch_utils import activations, initializer_registry @@ -53,7 +56,8 @@ def load_trainer_with_kwargs( otherwise passes all other parameters through without change. """ from ludwig.constants import MODEL_ECD, MODEL_GBM, MODEL_LLM - from ludwig.schema.trainer import ECDTrainerConfig, GBMTrainerConfig, LLMTrainerConfig + from ludwig.schema.trainer import (ECDTrainerConfig, GBMTrainerConfig, + LLMTrainerConfig) # TODO: use registry pattern for trainers if model_type == MODEL_ECD: diff --git a/ludwig/train.py b/ludwig/train.py index 37e6bb6f618..424d4e23cab 100644 --- a/ludwig/train.py +++ b/ludwig/train.py @@ -28,7 +28,8 @@ from ludwig.globals import LUDWIG_VERSION from ludwig.utils.data_utils import load_config_from_str, load_yaml from ludwig.utils.defaults import default_random_seed -from ludwig.utils.print_utils import get_logging_level_registry, print_ludwig, query_yes_no +from ludwig.utils.print_utils import (get_logging_level_registry, print_ludwig, + query_yes_no) logger = logging.getLogger(__name__) diff --git a/ludwig/trainers/trainer.py b/ludwig/trainers/trainer.py index 74bbdd5885b..0ced212ee53 100644 --- a/ludwig/trainers/trainer.py +++ b/ludwig/trainers/trainer.py @@ -34,32 +34,21 @@ import torch from torch.utils.tensorboard import SummaryWriter -from ludwig.constants import ( - AUTO, - LOSS, - MAX_CPU_BATCH_SIZE, - MINIMIZE, - MODEL_ECD, - MODEL_LLM, - TEST, - TRAINING, - USED_TOKENS, - VALIDATION, -) +from ludwig.constants import (AUTO, LOSS, MAX_CPU_BATCH_SIZE, MINIMIZE, + MODEL_ECD, MODEL_LLM, TEST, TRAINING, + USED_TOKENS, VALIDATION) from ludwig.data.dataset.base import Dataset from ludwig.distributed.base import DistributedStrategy, LocalStrategy -from ludwig.globals import ( - is_progressbar_disabled, - MODEL_FILE_NAME, - MODEL_HYPERPARAMETERS_FILE_NAME, - TRAINING_CHECKPOINTS_DIR_PATH, - TRAINING_PROGRESS_TRACKER_FILE_NAME, -) +from ludwig.globals import (MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, + TRAINING_CHECKPOINTS_DIR_PATH, + TRAINING_PROGRESS_TRACKER_FILE_NAME, + is_progressbar_disabled) from ludwig.models.ecd import ECD from ludwig.models.llm import LLM from ludwig.models.predictor import Predictor from ludwig.modules.lr_scheduler import LRScheduler -from ludwig.modules.metric_modules import get_improved_fn, get_initial_validation_value +from ludwig.modules.metric_modules import (get_improved_fn, + get_initial_validation_value) from ludwig.modules.metric_registry import get_metric_objective from ludwig.modules.optimization_modules import create_clipper from ludwig.progress_bar import LudwigProgressBar @@ -75,21 +64,18 @@ from ludwig.utils.defaults import default_random_seed from ludwig.utils.fs_utils import path_exists from ludwig.utils.llm_utils import update_embedding_layer -from ludwig.utils.metric_utils import get_metric_names, TrainerMetric +from ludwig.utils.metric_utils import TrainerMetric, get_metric_names from ludwig.utils.metrics_printed_table import print_metrics_table from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.model_utils import contains_nan_or_inf_tensors from ludwig.utils.torch_utils import get_torch_device -from ludwig.utils.trainer_utils import ( - append_metrics, - freeze_layers_regex, - get_final_steps_per_checkpoint, - get_latest_metrics_dict, - get_new_progress_tracker, - get_total_expected_checkpoints, - get_total_steps, - ProgressTracker, -) +from ludwig.utils.trainer_utils import (ProgressTracker, append_metrics, + freeze_layers_regex, + get_final_steps_per_checkpoint, + get_latest_metrics_dict, + get_new_progress_tracker, + get_total_expected_checkpoints, + get_total_steps) logger = logging.getLogger(__name__) diff --git a/ludwig/trainers/trainer_lightgbm.py b/ludwig/trainers/trainer_lightgbm.py index d15982d71a9..9a1642a13c5 100644 --- a/ludwig/trainers/trainer_lightgbm.py +++ b/ludwig/trainers/trainer_lightgbm.py @@ -11,19 +11,18 @@ import torch from torch.utils.tensorboard import SummaryWriter -from ludwig.constants import BINARY, CATEGORY, MINIMIZE, MODEL_GBM, NUMBER, TEST, TRAINING, VALIDATION +from ludwig.constants import (BINARY, CATEGORY, MINIMIZE, MODEL_GBM, NUMBER, + TEST, TRAINING, VALIDATION) from ludwig.distributed import init_dist_strategy from ludwig.distributed.base import DistributedStrategy, LocalStrategy from ludwig.features.feature_utils import LudwigFeatureDict -from ludwig.globals import ( - is_progressbar_disabled, - MODEL_FILE_NAME, - TRAINING_CHECKPOINTS_DIR_PATH, - TRAINING_PROGRESS_TRACKER_FILE_NAME, -) +from ludwig.globals import (MODEL_FILE_NAME, TRAINING_CHECKPOINTS_DIR_PATH, + TRAINING_PROGRESS_TRACKER_FILE_NAME, + is_progressbar_disabled) from ludwig.models.gbm import GBM from ludwig.models.predictor import Predictor -from ludwig.modules.metric_modules import get_improved_fn, get_initial_validation_value +from ludwig.modules.metric_modules import (get_improved_fn, + get_initial_validation_value) from ludwig.modules.metric_registry import get_metric_objective from ludwig.progress_bar import LudwigProgressBar from ludwig.schema.trainer import BaseTrainerConfig, GBMTrainerConfig @@ -33,25 +32,17 @@ from ludwig.utils import time_utils from ludwig.utils.checkpoint_utils import CheckpointManager from ludwig.utils.defaults import default_random_seed -from ludwig.utils.gbm_utils import ( - get_single_output_feature, - get_targets, - log_loss_objective, - logits_to_predictions, - multiclass_objective, - store_predictions, - store_predictions_ray, - TrainLogits, -) -from ludwig.utils.metric_utils import get_metric_names, TrainerMetric +from ludwig.utils.gbm_utils import (TrainLogits, get_single_output_feature, + get_targets, log_loss_objective, + logits_to_predictions, + multiclass_objective, store_predictions, + store_predictions_ray) +from ludwig.utils.metric_utils import TrainerMetric, get_metric_names from ludwig.utils.metrics_printed_table import print_metrics_table from ludwig.utils.misc_utils import set_random_seed -from ludwig.utils.trainer_utils import ( - append_metrics, - get_latest_metrics_dict, - get_new_progress_tracker, - ProgressTracker, -) +from ludwig.utils.trainer_utils import (ProgressTracker, append_metrics, + get_latest_metrics_dict, + get_new_progress_tracker) try: import ray diff --git a/ludwig/trainers/trainer_llm.py b/ludwig/trainers/trainer_llm.py index 727257c1375..44ed92f59b8 100644 --- a/ludwig/trainers/trainer_llm.py +++ b/ludwig/trainers/trainer_llm.py @@ -13,23 +13,24 @@ from ludwig.models.llm import LLM from ludwig.models.predictor import LlmFineTunePredictor, LlmPredictor from ludwig.modules.metric_modules import get_initial_validation_value -from ludwig.schema.trainer import BaseTrainerConfig, FineTuneTrainerConfig, NoneTrainerConfig +from ludwig.schema.trainer import (BaseTrainerConfig, FineTuneTrainerConfig, + NoneTrainerConfig) from ludwig.trainers.base import BaseTrainer -from ludwig.trainers.registry import register_llm_ray_trainer, register_llm_trainer +from ludwig.trainers.registry import (register_llm_ray_trainer, + register_llm_trainer) from ludwig.trainers.trainer import Trainer from ludwig.types import ModelConfigDict from ludwig.utils import time_utils from ludwig.utils.batch_size_tuner import ( - BatchSizeEvaluator, - LLMFinetunePredictBatchSizeEvaluator, - LLMFinetuneTrainerBatchSizeEvaluator, -) + BatchSizeEvaluator, LLMFinetunePredictBatchSizeEvaluator, + LLMFinetuneTrainerBatchSizeEvaluator) from ludwig.utils.defaults import default_random_seed from ludwig.utils.metric_utils import TrainerMetric from ludwig.utils.metrics_printed_table import print_metrics_table from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.torch_utils import get_torch_device -from ludwig.utils.trainer_utils import append_metrics, get_new_progress_tracker, ProgressTracker +from ludwig.utils.trainer_utils import (ProgressTracker, append_metrics, + get_new_progress_tracker) logger = logging.getLogger(__name__) diff --git a/ludwig/upload.py b/ludwig/upload.py index acb325046f0..c4e323ff0a3 100644 --- a/ludwig/upload.py +++ b/ludwig/upload.py @@ -4,7 +4,8 @@ import sys from typing import Optional -from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME +from ludwig.globals import (MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, + MODEL_WEIGHTS_FILE_NAME) from ludwig.utils.print_utils import get_logging_level_registry from ludwig.utils.upload_utils import HuggingFaceHub, Predibase diff --git a/ludwig/utils/automl/field_info.py b/ludwig/utils/automl/field_info.py index bab4518be43..633076a4a15 100644 --- a/ludwig/utils/automl/field_info.py +++ b/ludwig/utils/automl/field_info.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import List -from dataclasses_json import dataclass_json, LetterCase +from dataclasses_json import LetterCase, dataclass_json from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/utils/automl/utils.py b/ludwig/utils/automl/utils.py index 30c57511538..0c2b6502daf 100644 --- a/ludwig/utils/automl/utils.py +++ b/ludwig/utils/automl/utils.py @@ -6,20 +6,9 @@ from pandas import Series from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - BINARY, - CATEGORY, - COMBINER, - CONFIG, - HYPEROPT, - IMBALANCE_DETECTION_RATIO, - NAME, - NUMBER, - PARAMETERS, - SEARCH_ALG, - TRAINER, - TYPE, -) +from ludwig.constants import (BINARY, CATEGORY, COMBINER, CONFIG, HYPEROPT, + IMBALANCE_DETECTION_RATIO, NAME, NUMBER, + PARAMETERS, SEARCH_ALG, TRAINER, TYPE) from ludwig.features.feature_registries import get_output_type_registry from ludwig.modules.metric_registry import get_metric_objective from ludwig.schema.combiners.utils import get_combiner_jsonschema diff --git a/ludwig/utils/backward_compatibility.py b/ludwig/utils/backward_compatibility.py index d52fc343500..52e0e942499 100644 --- a/ludwig/utils/backward_compatibility.py +++ b/ludwig/utils/backward_compatibility.py @@ -19,70 +19,32 @@ from typing import Any, Callable, Dict, List, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - AUDIO, - BIAS, - CLASS_WEIGHTS, - COLUMN, - CONV_BIAS, - CONV_USE_BIAS, - DECODER, - DEFAULT_BIAS, - DEFAULT_USE_BIAS, - DEFAULTS, - ENCODER, - EVAL_BATCH_SIZE, - EXECUTOR, - FORCE_SPLIT, - HEIGHT, - HYPEROPT, - IMAGE, - INPUT_FEATURES, - LOSS, - MISSING_VALUE_STRATEGY, - MODEL_ECD, - MODEL_GBM, - MODEL_TYPE, - NAME, - NUM_SAMPLES, - NUMBER, - OUTPUT_FEATURES, - PARAMETERS, - PREPROCESSING, - PROBABILITIES, - RANDOM, - RAY, - SAMPLER, - SCHEDULER, - SEARCH_ALG, - SEQUENCE, - SPLIT, - SPLIT_PROBABILITIES, - STRATIFY, - TEXT, - TIMESERIES, - TRAINER, - TRAINING, - TYPE, - USE_BIAS, - WIDTH, -) -from ludwig.features.feature_registries import get_base_type_registry, get_input_type_registry, get_output_type_registry +from ludwig.constants import (AUDIO, BIAS, CLASS_WEIGHTS, COLUMN, CONV_BIAS, + CONV_USE_BIAS, DECODER, DEFAULT_BIAS, + DEFAULT_USE_BIAS, DEFAULTS, ENCODER, + EVAL_BATCH_SIZE, EXECUTOR, FORCE_SPLIT, HEIGHT, + HYPEROPT, IMAGE, INPUT_FEATURES, LOSS, + MISSING_VALUE_STRATEGY, MODEL_ECD, MODEL_GBM, + MODEL_TYPE, NAME, NUM_SAMPLES, NUMBER, + OUTPUT_FEATURES, PARAMETERS, PREPROCESSING, + PROBABILITIES, RANDOM, RAY, SAMPLER, SCHEDULER, + SEARCH_ALG, SEQUENCE, SPLIT, SPLIT_PROBABILITIES, + STRATIFY, TEXT, TIMESERIES, TRAINER, TRAINING, + TYPE, USE_BIAS, WIDTH) +from ludwig.features.feature_registries import (get_base_type_registry, + get_input_type_registry, + get_output_type_registry) from ludwig.globals import LUDWIG_VERSION from ludwig.schema.defaults.gbm import GBMDefaultsConfig from ludwig.schema.encoders.utils import get_encoder_cls -from ludwig.types import ( - FeatureConfigDict, - FeatureTypeDefaultsDict, - HyperoptConfigDict, - ModelConfigDict, - PreprocessingConfigDict, - TrainerConfigDict, - TrainingSetMetadataDict, -) +from ludwig.types import (FeatureConfigDict, FeatureTypeDefaultsDict, + HyperoptConfigDict, ModelConfigDict, + PreprocessingConfigDict, TrainerConfigDict, + TrainingSetMetadataDict) from ludwig.utils.metric_utils import TrainerMetric from ludwig.utils.misc_utils import get_from_registry, merge_dict -from ludwig.utils.version_transformation import VersionTransformation, VersionTransformationRegistry +from ludwig.utils.version_transformation import (VersionTransformation, + VersionTransformationRegistry) config_transformation_registry = VersionTransformationRegistry() diff --git a/ludwig/utils/batch_size_tuner.py b/ludwig/utils/batch_size_tuner.py index 0e9568a850d..033310f9218 100644 --- a/ludwig/utils/batch_size_tuner.py +++ b/ludwig/utils/batch_size_tuner.py @@ -8,7 +8,8 @@ import torch from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import MAX_BATCH_SIZE_DATASET_FRACTION, MIN_POSSIBLE_BATCH_SIZE +from ludwig.constants import (MAX_BATCH_SIZE_DATASET_FRACTION, + MIN_POSSIBLE_BATCH_SIZE) logger = logging.getLogger(__name__) diff --git a/ludwig/utils/checkpoint_utils.py b/ludwig/utils/checkpoint_utils.py index d117a053443..0953538c03a 100644 --- a/ludwig/utils/checkpoint_utils.py +++ b/ludwig/utils/checkpoint_utils.py @@ -13,7 +13,7 @@ import uuid from abc import ABC, abstractmethod from glob import glob -from typing import Any, Dict, Mapping, Optional, Tuple, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Tuple import torch from torch.optim import Optimizer diff --git a/ludwig/utils/config_utils.py b/ludwig/utils/config_utils.py index 6c4970c255e..bcd2d643b25 100644 --- a/ludwig/utils/config_utils.py +++ b/ludwig/utils/config_utils.py @@ -1,23 +1,13 @@ from typing import Any, Dict, List, Set, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - DECODER, - ENCODER, - IMAGE, - INPUT_FEATURES, - MODEL_ECD, - MODEL_LLM, - MODEL_TYPE, - PREPROCESSING, - SEQUENCE, - TEXT, - TIMESERIES, - TYPE, -) +from ludwig.constants import (DECODER, ENCODER, IMAGE, INPUT_FEATURES, + MODEL_ECD, MODEL_LLM, MODEL_TYPE, PREPROCESSING, + SEQUENCE, TEXT, TIMESERIES, TYPE) from ludwig.features.feature_registries import get_input_type_registry from ludwig.schema.model_config import ModelConfig -from ludwig.types import FeatureConfigDict, FeatureTypeDefaultsDict, PreprocessingConfigDict +from ludwig.types import (FeatureConfigDict, FeatureTypeDefaultsDict, + PreprocessingConfigDict) @DeveloperAPI diff --git a/ludwig/utils/data_utils.py b/ludwig/utils/data_utils.py index b06f753a174..efdd9d1720c 100644 --- a/ludwig/utils/data_utils.py +++ b/ludwig/utils/data_utils.py @@ -43,9 +43,13 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.constants import PREPROCESSING, SPLIT from ludwig.data.cache.types import CacheableDataset -from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME -from ludwig.utils.dataframe_utils import from_numpy_dataset, is_dask_lib, to_numpy_dataset -from ludwig.utils.fs_utils import download_h5, has_remote_protocol, open_file, upload_h5 +from ludwig.globals import (MODEL_HYPERPARAMETERS_FILE_NAME, + MODEL_WEIGHTS_FILE_NAME, + TRAIN_SET_METADATA_FILE_NAME) +from ludwig.utils.dataframe_utils import (from_numpy_dataset, is_dask_lib, + to_numpy_dataset) +from ludwig.utils.fs_utils import (download_h5, has_remote_protocol, open_file, + upload_h5) from ludwig.utils.math_utils import cumsum from ludwig.utils.misc_utils import get_from_registry from ludwig.utils.types import DataFrame diff --git a/ludwig/utils/date_utils.py b/ludwig/utils/date_utils.py index 3ab9babf9c2..932ccaf85ce 100644 --- a/ludwig/utils/date_utils.py +++ b/ludwig/utils/date_utils.py @@ -18,7 +18,7 @@ from typing import Union import numpy as np -from dateutil.parser import parse, ParserError +from dateutil.parser import ParserError, parse from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/utils/defaults.py b/ludwig/utils/defaults.py index a623011a991..53239a9bed0 100644 --- a/ludwig/utils/defaults.py +++ b/ludwig/utils/defaults.py @@ -25,7 +25,8 @@ from ludwig.globals import LUDWIG_VERSION from ludwig.schema.model_config import ModelConfig from ludwig.schema.preprocessing import PreprocessingConfig -from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version +from ludwig.utils.backward_compatibility import \ + upgrade_config_dict_to_latest_version from ludwig.utils.data_utils import load_config_from_str, load_yaml from ludwig.utils.fs_utils import open_file from ludwig.utils.print_utils import print_ludwig diff --git a/ludwig/utils/entmax/__init__.py b/ludwig/utils/entmax/__init__.py index 7eb4162ff95..2730034a0c1 100644 --- a/ludwig/utils/entmax/__init__.py +++ b/ludwig/utils/entmax/__init__.py @@ -1,17 +1,13 @@ __version__ = "1.1.dev0" -from ludwig.utils.entmax.activations import entmax15, Entmax15, sparsemax, Sparsemax -from ludwig.utils.entmax.losses import ( - entmax15_loss, - Entmax15Loss, - entmax_bisect_loss, - EntmaxBisectLoss, - sparsemax_bisect_loss, - sparsemax_loss, - SparsemaxBisectLoss, - SparsemaxLoss, -) -from ludwig.utils.entmax.root_finding import entmax_bisect, EntmaxBisect, sparsemax_bisect, SparsemaxBisect +from ludwig.utils.entmax.activations import (Entmax15, Sparsemax, entmax15, + sparsemax) +from ludwig.utils.entmax.losses import (Entmax15Loss, EntmaxBisectLoss, + SparsemaxBisectLoss, SparsemaxLoss, + entmax15_loss, entmax_bisect_loss, + sparsemax_bisect_loss, sparsemax_loss) +from ludwig.utils.entmax.root_finding import (EntmaxBisect, SparsemaxBisect, + entmax_bisect, sparsemax_bisect) __all__ = [ "entmax15", diff --git a/ludwig/utils/heuristics.py b/ludwig/utils/heuristics.py index dd44a10e4b4..a7b525395a0 100644 --- a/ludwig/utils/heuristics.py +++ b/ludwig/utils/heuristics.py @@ -1,5 +1,7 @@ from ludwig.schema.model_config import ModelConfig -from ludwig.utils.config_utils import has_pretrained_encoder, has_trainable_encoder, has_unstructured_input_feature +from ludwig.utils.config_utils import (has_pretrained_encoder, + has_trainable_encoder, + has_unstructured_input_feature) def get_auto_learning_rate(config: ModelConfig) -> float: diff --git a/ludwig/utils/image_utils.py b/ludwig/utils/image_utils.py index a2fae951777..fd5a3259759 100644 --- a/ludwig/utils/image_utils.py +++ b/ludwig/utils/image_utils.py @@ -24,7 +24,7 @@ import tifffile import torch import torchvision.transforms.functional as F -from torchvision.io import decode_image, ImageReadMode +from torchvision.io import ImageReadMode, decode_image from torchvision.models._api import WeightsEnum from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/utils/inference_utils.py b/ludwig/utils/inference_utils.py index ae5d966488b..d471d78501b 100644 --- a/ludwig/utils/inference_utils.py +++ b/ludwig/utils/inference_utils.py @@ -4,25 +4,10 @@ import pandas as pd import torch -from ludwig.constants import ( - AUDIO, - BAG, - BINARY, - CATEGORY, - COLUMN, - DATE, - IMAGE, - NAME, - POSTPROCESSOR, - PREDICTOR, - PREPROCESSOR, - SEQUENCE, - SET, - TEXT, - TIMESERIES, - TYPE, - VECTOR, -) +from ludwig.constants import (AUDIO, BAG, BINARY, CATEGORY, COLUMN, DATE, + IMAGE, NAME, POSTPROCESSOR, PREDICTOR, + PREPROCESSOR, SEQUENCE, SET, TEXT, TIMESERIES, + TYPE, VECTOR) from ludwig.types import FeatureConfigDict, ModelConfigDict from ludwig.utils.audio_utils import read_audio_from_path from ludwig.utils.date_utils import create_vector_from_datetime_obj diff --git a/ludwig/utils/llm_utils.py b/ludwig/utils/llm_utils.py index 29452237e33..4875e82ded8 100644 --- a/ludwig/utils/llm_utils.py +++ b/ludwig/utils/llm_utils.py @@ -1,16 +1,18 @@ import copy import logging import tempfile -from typing import Dict, Optional, Tuple, TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union import torch import torch.nn.functional as F import transformers from bitsandbytes.nn.modules import Embedding from packaging import version -from transformers import AutoConfig, AutoModelForCausalLM, PreTrainedModel, PreTrainedTokenizer, TextStreamer +from transformers import (AutoConfig, AutoModelForCausalLM, PreTrainedModel, + PreTrainedTokenizer, TextStreamer) -from ludwig.constants import IGNORE_INDEX_TOKEN_ID, LOGITS, PREDICTIONS, PROBABILITIES +from ludwig.constants import (IGNORE_INDEX_TOKEN_ID, LOGITS, PREDICTIONS, + PROBABILITIES) from ludwig.schema.trainer import LLMTrainerConfig from ludwig.utils.error_handling_utils import default_retry from ludwig.utils.logging_utils import log_once @@ -158,7 +160,8 @@ def initialize_adapter( logger.info(f"Using pretrained adapter weights: {config_obj.adapter.pretrained_adapter_weights}") # Leave this import inline to support a minimal install of Ludwig - from peft import MODEL_TYPE_TO_PEFT_MODEL_MAPPING, PeftConfig # noqa + from peft import (MODEL_TYPE_TO_PEFT_MODEL_MAPPING, # noqa + PeftConfig) peft_config = PeftConfig.from_pretrained(config_obj.adapter.pretrained_adapter_weights) @@ -167,7 +170,7 @@ def initialize_adapter( ) else: # Leave this import inline to support a minimal install of Ludwig - from peft import get_peft_model, TaskType # noqa + from peft import TaskType, get_peft_model # noqa # If no pretrained adapter is provided, we want to load untrained weights into the model peft_config = config_obj.adapter.to_config( diff --git a/ludwig/utils/misc_utils.py b/ludwig/utils/misc_utils.py index 6949d1bdf1a..907e9f3c0d5 100644 --- a/ludwig/utils/misc_utils.py +++ b/ludwig/utils/misc_utils.py @@ -21,7 +21,7 @@ import weakref from collections import OrderedDict from collections.abc import Mapping -from typing import Any, Dict, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Dict import numpy import torch diff --git a/ludwig/utils/neuropod_utils.py b/ludwig/utils/neuropod_utils.py index e3e3f504338..e0e58613f07 100644 --- a/ludwig/utils/neuropod_utils.py +++ b/ludwig/utils/neuropod_utils.py @@ -107,7 +107,8 @@ def _get_output_spec(model: LudwigModel) -> List[Dict[str, Any]]: @DeveloperAPI def export_neuropod(model: LudwigModel, neuropod_path: str, neuropod_model_name="ludwig_model"): try: - from neuropod.backends.torchscript.packager import create_torchscript_neuropod + from neuropod.backends.torchscript.packager import \ + create_torchscript_neuropod except ImportError: raise RuntimeError('The "neuropod" package is not installed in your environment.') diff --git a/ludwig/utils/strings_utils.py b/ludwig/utils/strings_utils.py index 16aaa9a7252..eb6ce7791a8 100644 --- a/ludwig/utils/strings_utils.py +++ b/ludwig/utils/strings_utils.py @@ -24,7 +24,8 @@ import numpy as np from dateutil.parser import parse as parse_datetime -from ludwig.constants import PADDING_SYMBOL, START_SYMBOL, STOP_SYMBOL, UNKNOWN_SYMBOL +from ludwig.constants import (PADDING_SYMBOL, START_SYMBOL, STOP_SYMBOL, + UNKNOWN_SYMBOL) from ludwig.data.dataframe.base import DataFrameEngine from ludwig.data.dataframe.pandas import PANDAS from ludwig.utils.fs_utils import open_file diff --git a/ludwig/utils/tokenizers.py b/ludwig/utils/tokenizers.py index 99cde68d51a..7d53572316c 100644 --- a/ludwig/utils/tokenizers.py +++ b/ludwig/utils/tokenizers.py @@ -855,17 +855,11 @@ def _set_pad_token(self) -> None: # CodeGenTokenizer Used by Phi-2 # GPTNeoXTokenizerFast Used by Pythia - from transformers import ( - CodeGenTokenizer, - CodeGenTokenizerFast, - CodeLlamaTokenizer, - CodeLlamaTokenizerFast, - GPT2Tokenizer, - GPT2TokenizerFast, - GPTNeoXTokenizerFast, - LlamaTokenizer, - LlamaTokenizerFast, - ) + from transformers import (CodeGenTokenizer, CodeGenTokenizerFast, + CodeLlamaTokenizer, CodeLlamaTokenizerFast, + GPT2Tokenizer, GPT2TokenizerFast, + GPTNeoXTokenizerFast, LlamaTokenizer, + LlamaTokenizerFast) # Tokenizers might have the pad token id attribute since they tend to use the same base class, but # it can be set to None so we check for this explicitly. diff --git a/ludwig/utils/trainer_utils.py b/ludwig/utils/trainer_utils.py index 8a9fd779d4f..9a505023524 100644 --- a/ludwig/utils/trainer_utils.py +++ b/ludwig/utils/trainer_utils.py @@ -1,7 +1,7 @@ import logging import re from collections import defaultdict -from typing import Dict, List, Tuple, TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Dict, List, Tuple, Union try: from typing import Literal diff --git a/ludwig/utils/triton_utils.py b/ludwig/utils/triton_utils.py index 3d81cdb1069..07178542038 100644 --- a/ludwig/utils/triton_utils.py +++ b/ludwig/utils/triton_utils.py @@ -11,33 +11,17 @@ from ludwig.api import LudwigModel from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import ( - AUDIO, - BAG, - BINARY, - CATEGORY, - DATE, - IMAGE, - INPUT_FEATURES, - POSTPROCESSOR, - PREDICTOR, - PREPROCESSOR, - SEQUENCE, - SET, - TEXT, - TIMESERIES, - TYPE, - VECTOR, -) +from ludwig.constants import (AUDIO, BAG, BINARY, CATEGORY, DATE, IMAGE, + INPUT_FEATURES, POSTPROCESSOR, PREDICTOR, + PREPROCESSOR, SEQUENCE, SET, TEXT, TIMESERIES, + TYPE, VECTOR) from ludwig.data.dataset_synthesizer import build_synthetic_dataset -from ludwig.models.inference import ( - _InferencePostprocessor, - _InferencePredictor, - _InferencePreprocessor, - InferenceModule, -) +from ludwig.models.inference import (InferenceModule, _InferencePostprocessor, + _InferencePredictor, + _InferencePreprocessor) from ludwig.types import ModelConfigDict -from ludwig.utils.inference_utils import to_inference_module_input_from_dataframe +from ludwig.utils.inference_utils import \ + to_inference_module_input_from_dataframe from ludwig.utils.misc_utils import remove_empty_lines from ludwig.utils.torch_utils import model_size, place_on_device from ludwig.utils.types import TorchAudioTuple, TorchscriptPreprocessingInput diff --git a/ludwig/utils/upload_utils.py b/ludwig/utils/upload_utils.py index 51a0fb87efd..7991ba8982a 100644 --- a/ludwig/utils/upload_utils.py +++ b/ludwig/utils/upload_utils.py @@ -7,7 +7,8 @@ from huggingface_hub import HfApi, login from huggingface_hub.hf_api import CommitInfo -from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME +from ludwig.globals import (MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, + MODEL_WEIGHTS_FILE_NAME) logger = logging.getLogger(__name__) diff --git a/ludwig/visualize.py b/ludwig/visualize.py index feb97897072..1bbc5aff346 100644 --- a/ludwig/visualize.py +++ b/ludwig/visualize.py @@ -33,18 +33,14 @@ from ludwig.api_annotations import DeveloperAPI, PublicAPI from ludwig.backend import LOCAL_BACKEND from ludwig.callbacks import Callback -from ludwig.constants import ACCURACY, EDIT_DISTANCE, HITS_AT_K, LOSS, PREDICTIONS, SPACE, SPLIT +from ludwig.constants import (ACCURACY, EDIT_DISTANCE, HITS_AT_K, LOSS, + PREDICTIONS, SPACE, SPLIT) from ludwig.contrib import add_contrib_callback_args from ludwig.utils import visualization_utils -from ludwig.utils.data_utils import ( - CACHEABLE_FORMATS, - data_reader_registry, - figure_data_format_dataset, - load_array, - load_from_file, - load_json, - replace_file_extension, -) +from ludwig.utils.data_utils import (CACHEABLE_FORMATS, data_reader_registry, + figure_data_format_dataset, load_array, + load_from_file, load_json, + replace_file_extension) from ludwig.utils.dataframe_utils import to_numpy_dataset, unflatten_df from ludwig.utils.fs_utils import path_exists from ludwig.utils.misc_utils import get_from_registry diff --git a/tests/conftest.py b/tests/conftest.py index 9dae92e2e65..f7642dd5d06 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,19 +21,12 @@ import pytest -from ludwig.constants import ( - BATCH_SIZE, - COMBINER, - EPOCHS, - HYPEROPT, - INPUT_FEATURES, - NAME, - OUTPUT_FEATURES, - TRAINER, - TYPE, -) +from ludwig.constants import (BATCH_SIZE, COMBINER, EPOCHS, HYPEROPT, + INPUT_FEATURES, NAME, OUTPUT_FEATURES, TRAINER, + TYPE) from ludwig.hyperopt.run import hyperopt -from tests.integration_tests.utils import category_feature, generate_data, text_feature +from tests.integration_tests.utils import (category_feature, generate_data, + text_feature) TEST_SUITE_TIMEOUT_S = int(os.environ.get("LUDWIG_TEST_SUITE_TIMEOUT_S", 3600)) diff --git a/tests/integration_tests/scripts/run_train_aim.py b/tests/integration_tests/scripts/run_train_aim.py index 659f5119347..a67abbe9dbe 100644 --- a/tests/integration_tests/scripts/run_train_aim.py +++ b/tests/integration_tests/scripts/run_train_aim.py @@ -8,7 +8,8 @@ import aim # noqa from ludwig.contribs.aim import AimCallback -from tests.integration_tests.utils import category_feature, generate_data, image_feature, run_experiment +from tests.integration_tests.utils import (category_feature, generate_data, + image_feature, run_experiment) PATH_HERE = os.path.abspath(os.path.dirname(__file__)) PATH_ROOT = os.path.join(PATH_HERE, "..", "..", "..") diff --git a/tests/integration_tests/scripts/run_train_comet.py b/tests/integration_tests/scripts/run_train_comet.py index 52842b40200..8beb71774c0 100644 --- a/tests/integration_tests/scripts/run_train_comet.py +++ b/tests/integration_tests/scripts/run_train_comet.py @@ -28,7 +28,8 @@ PATH_ROOT = os.path.join(PATH_HERE, "..", "..", "..") sys.path.insert(0, os.path.abspath(PATH_ROOT)) -from tests.integration_tests.utils import category_feature, generate_data, image_feature # noqa +from tests.integration_tests.utils import (category_feature, # noqa + generate_data, image_feature) parser = argparse.ArgumentParser() parser.add_argument("--csv-filename", required=True) diff --git a/tests/integration_tests/scripts/run_train_wandb.py b/tests/integration_tests/scripts/run_train_wandb.py index 5496a4ebdb4..e386eb2feda 100644 --- a/tests/integration_tests/scripts/run_train_wandb.py +++ b/tests/integration_tests/scripts/run_train_wandb.py @@ -18,7 +18,9 @@ PATH_ROOT = os.path.join(PATH_HERE, "..", "..", "..") sys.path.insert(0, os.path.abspath(PATH_ROOT)) -from tests.integration_tests.utils import category_feature, generate_data, image_feature, run_experiment # noqa +from tests.integration_tests.utils import (category_feature, # noqa + generate_data, image_feature, + run_experiment) parser = argparse.ArgumentParser() parser.add_argument("--csv-filename", required=True) diff --git a/tests/integration_tests/test_api.py b/tests/integration_tests/test_api.py index 21e81f08202..01994119e42 100644 --- a/tests/integration_tests/test_api.py +++ b/tests/integration_tests/test_api.py @@ -28,16 +28,10 @@ from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME from ludwig.models.inference import InferenceModule from ludwig.utils.data_utils import read_csv -from tests.integration_tests.utils import ( - category_feature, - ENCODERS, - generate_data, - get_weights, - image_feature, - run_api_experiment, - sequence_feature, - text_feature, -) +from tests.integration_tests.utils import (ENCODERS, category_feature, + generate_data, get_weights, + image_feature, run_api_experiment, + sequence_feature, text_feature) def run_api_experiment_separated_datasets(input_features, output_features, data_csv): diff --git a/tests/integration_tests/test_automl.py b/tests/integration_tests/test_automl.py index 4ed11229aaf..e5e78b190a8 100644 --- a/tests/integration_tests/test_automl.py +++ b/tests/integration_tests/test_automl.py @@ -8,28 +8,24 @@ import pytest from ludwig.api import LudwigModel -from ludwig.constants import COLUMN, ENCODER, INPUT_FEATURES, NAME, OUTPUT_FEATURES, PREPROCESSING, SPLIT, TYPE +from ludwig.constants import (COLUMN, ENCODER, INPUT_FEATURES, NAME, + OUTPUT_FEATURES, PREPROCESSING, SPLIT, TYPE) from ludwig.schema.model_types.base import ModelConfig from ludwig.types import FeatureConfigDict, ModelConfigDict from ludwig.utils.misc_utils import merge_dict -from tests.integration_tests.utils import ( - binary_feature, - category_feature, - generate_data, - image_feature, - minio_test_creds, - number_feature, - private_param, - remote_tmpdir, - text_feature, -) +from tests.integration_tests.utils import (binary_feature, category_feature, + generate_data, image_feature, + minio_test_creds, number_feature, + private_param, remote_tmpdir, + text_feature) ray = pytest.importorskip("ray") import dask.dataframe as dd # noqa E402 from ray.tune.experiment.trial import Trial # noqa E402 -from ludwig.automl import auto_train, create_auto_config, train_with_config # noqa E402 +from ludwig.automl import (auto_train, create_auto_config, # noqa E402 + train_with_config) from ludwig.automl.automl import OUTPUT_DIR # noqa E402 from ludwig.hyperopt.execution import RayTuneExecutor # noqa E402 diff --git a/tests/integration_tests/test_cache_manager.py b/tests/integration_tests/test_cache_manager.py index 747fe400eed..b4241cde51b 100644 --- a/tests/integration_tests/test_cache_manager.py +++ b/tests/integration_tests/test_cache_manager.py @@ -5,11 +5,12 @@ import pytest from ludwig.constants import CHECKSUM, META, TEST, TRAINING, VALIDATION -from ludwig.data.cache.manager import alphanum, CacheManager +from ludwig.data.cache.manager import CacheManager, alphanum from ludwig.data.cache.types import CacheableDataframe, wrap from ludwig.data.dataset.pandas import PandasDatasetManager from ludwig.globals import TRAINING_PREPROC_FILE_NAME -from tests.integration_tests.utils import category_feature, LocalTestBackend, sequence_feature +from tests.integration_tests.utils import (LocalTestBackend, category_feature, + sequence_feature) @pytest.fixture diff --git a/tests/integration_tests/test_cached_preprocessing.py b/tests/integration_tests/test_cached_preprocessing.py index d035180dde6..61973993bf8 100644 --- a/tests/integration_tests/test_cached_preprocessing.py +++ b/tests/integration_tests/test_cached_preprocessing.py @@ -4,9 +4,12 @@ import pytest from ludwig.api import LudwigModel -from ludwig.constants import MODEL_ECD, MODEL_GBM, PREPROCESSING, PROC_COLUMN, TRAINER +from ludwig.constants import (MODEL_ECD, MODEL_GBM, PREPROCESSING, PROC_COLUMN, + TRAINER) from tests.integration_tests.test_gbm import category_feature -from tests.integration_tests.utils import binary_feature, generate_data, number_feature, run_test_suite, text_feature +from tests.integration_tests.utils import (binary_feature, generate_data, + number_feature, run_test_suite, + text_feature) @pytest.mark.slow diff --git a/tests/integration_tests/test_carton.py b/tests/integration_tests/test_carton.py index 37fb0b4e389..e44376bf247 100644 --- a/tests/integration_tests/test_carton.py +++ b/tests/integration_tests/test_carton.py @@ -25,13 +25,9 @@ from ludwig.api import LudwigModel from ludwig.constants import BATCH_SIZE, NAME, PREDICTIONS, TRAINER from ludwig.utils.carton_utils import export_carton -from tests.integration_tests.utils import ( - binary_feature, - category_feature, - generate_data, - LocalTestBackend, - number_feature, -) +from tests.integration_tests.utils import (LocalTestBackend, binary_feature, + category_feature, generate_data, + number_feature) @pytest.mark.skipif(platform.system() == "Windows", reason="Carton is not supported on Windows") diff --git a/tests/integration_tests/test_class_imbalance_feature.py b/tests/integration_tests/test_class_imbalance_feature.py index 82b31514c49..e52a9e30952 100644 --- a/tests/integration_tests/test_class_imbalance_feature.py +++ b/tests/integration_tests/test_class_imbalance_feature.py @@ -8,7 +8,8 @@ from ludwig.api import LudwigModel from ludwig.backend import LocalBackend -from tests.integration_tests.utils import create_data_set_to_use, RAY_BACKEND_CONFIG, spawn +from tests.integration_tests.utils import (RAY_BACKEND_CONFIG, + create_data_set_to_use, spawn) try: import ray diff --git a/tests/integration_tests/test_cli.py b/tests/integration_tests/test_cli.py index 0c0302a2e5c..dd865d20107 100644 --- a/tests/integration_tests/test_cli.py +++ b/tests/integration_tests/test_cli.py @@ -24,20 +24,14 @@ import pytest import yaml -from ludwig.constants import ( - BATCH_SIZE, - COMBINER, - EVAL_BATCH_SIZE, - INPUT_FEATURES, - NAME, - OUTPUT_FEATURES, - PREPROCESSING, - TRAINER, -) +from ludwig.constants import (BATCH_SIZE, COMBINER, EVAL_BATCH_SIZE, + INPUT_FEATURES, NAME, OUTPUT_FEATURES, + PREPROCESSING, TRAINER) from ludwig.globals import MODEL_FILE_NAME from ludwig.types import FeatureConfigDict from ludwig.utils.data_utils import load_yaml -from tests.integration_tests.utils import category_feature, generate_data, number_feature, sequence_feature +from tests.integration_tests.utils import (category_feature, generate_data, + number_feature, sequence_feature) pytestmark = pytest.mark.integration_tests_b diff --git a/tests/integration_tests/test_collect.py b/tests/integration_tests/test_collect.py index a3d4574152f..acef2001b78 100644 --- a/tests/integration_tests/test_collect.py +++ b/tests/integration_tests/test_collect.py @@ -19,11 +19,13 @@ import torch from ludwig.api import LudwigModel -from ludwig.collect import collect_activations, collect_weights, print_model_summary +from ludwig.collect import (collect_activations, collect_weights, + print_model_summary) from ludwig.constants import BATCH_SIZE, ENCODER, TRAINER, TYPE from ludwig.globals import MODEL_FILE_NAME from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.utils import category_feature, ENCODERS, generate_data, sequence_feature +from tests.integration_tests.utils import (ENCODERS, category_feature, + generate_data, sequence_feature) DEVICE = get_torch_device() diff --git a/tests/integration_tests/test_config_global_defaults.py b/tests/integration_tests/test_config_global_defaults.py index 57e90634589..32288f21918 100644 --- a/tests/integration_tests/test_config_global_defaults.py +++ b/tests/integration_tests/test_config_global_defaults.py @@ -1,25 +1,13 @@ import logging from typing import Dict, Tuple -from ludwig.constants import ( - BATCH_SIZE, - CATEGORY, - COMBINER, - DECODER, - DEFAULTS, - ENCODER, - EPOCHS, - FILL_WITH_CONST, - INPUT_FEATURES, - LOSS, - OUTPUT_FEATURES, - PREPROCESSING, - TEXT, - TRAINER, - TYPE, -) +from ludwig.constants import (BATCH_SIZE, CATEGORY, COMBINER, DECODER, + DEFAULTS, ENCODER, EPOCHS, FILL_WITH_CONST, + INPUT_FEATURES, LOSS, OUTPUT_FEATURES, + PREPROCESSING, TEXT, TRAINER, TYPE) from ludwig.schema.model_config import ModelConfig -from tests.integration_tests.utils import category_feature, generate_data, run_experiment, text_feature +from tests.integration_tests.utils import (category_feature, generate_data, + run_experiment, text_feature) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/tests/integration_tests/test_custom_components.py b/tests/integration_tests/test_custom_components.py index 98f9b01da3d..99c95e892ff 100644 --- a/tests/integration_tests/test_custom_components.py +++ b/tests/integration_tests/test_custom_components.py @@ -4,11 +4,12 @@ import torch from marshmallow_dataclass import dataclass -from torch import nn, Tensor +from torch import Tensor, nn from ludwig.api import LudwigModel from ludwig.combiners.combiners import Combiner, register_combiner -from ludwig.constants import BATCH_SIZE, ENCODER_OUTPUT, LOGITS, MINIMIZE, NUMBER, TRAINER +from ludwig.constants import (BATCH_SIZE, ENCODER_OUTPUT, LOGITS, MINIMIZE, + NUMBER, TRAINER) from ludwig.decoders.base import Decoder from ludwig.decoders.registry import register_decoder from ludwig.encoders.base import Encoder @@ -23,14 +24,11 @@ from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import register_encoder_config from ludwig.schema.features.loss.loss import BaseLossConfig -from ludwig.schema.features.loss.loss import register_loss as register_loss_schema -from tests.integration_tests.utils import ( - category_feature, - generate_data, - LocalTestBackend, - number_feature, - sequence_feature, -) +from ludwig.schema.features.loss.loss import \ + register_loss as register_loss_schema +from tests.integration_tests.utils import (LocalTestBackend, category_feature, + generate_data, number_feature, + sequence_feature) @register_encoder_config("custom_number_encoder", NUMBER) diff --git a/tests/integration_tests/test_date_feature.py b/tests/integration_tests/test_date_feature.py index ea04edff36c..e4364e8bcdb 100644 --- a/tests/integration_tests/test_date_feature.py +++ b/tests/integration_tests/test_date_feature.py @@ -6,21 +6,10 @@ from dateutil.parser import parse from ludwig.api import LudwigModel -from ludwig.constants import ( - BACKEND, - BINARY, - DATE, - EPOCHS, - FILL_WITH_CONST, - INPUT_FEATURES, - MISSING_VALUE_STRATEGY, - NAME, - OUTPUT_FEATURES, - PREPROCESSING, - RAY, - TRAINER, - TYPE, -) +from ludwig.constants import (BACKEND, BINARY, DATE, EPOCHS, FILL_WITH_CONST, + INPUT_FEATURES, MISSING_VALUE_STRATEGY, NAME, + OUTPUT_FEATURES, PREPROCESSING, RAY, TRAINER, + TYPE) from ludwig.utils.date_utils import create_vector_from_datetime_obj ray = pytest.importorskip("ray") diff --git a/tests/integration_tests/test_dependencies.py b/tests/integration_tests/test_dependencies.py index 5ccb96a1731..6a94f136c7c 100644 --- a/tests/integration_tests/test_dependencies.py +++ b/tests/integration_tests/test_dependencies.py @@ -9,7 +9,8 @@ from ludwig.modules.reduction_modules import SequenceReducer from ludwig.schema.model_config import ModelConfig from ludwig.utils import output_feature_utils -from tests.integration_tests.utils import generate_output_features_with_dependencies, number_feature +from tests.integration_tests.utils import ( + generate_output_features_with_dependencies, number_feature) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/tests/integration_tests/test_experiment.py b/tests/integration_tests/test_experiment.py index cf46c1672cc..9b9a8c8ca3a 100644 --- a/tests/integration_tests/test_experiment.py +++ b/tests/integration_tests/test_experiment.py @@ -28,7 +28,8 @@ from ludwig.api import LudwigModel from ludwig.backend import LOCAL_BACKEND from ludwig.callbacks import Callback -from ludwig.constants import BATCH_SIZE, COLUMN, ENCODER, H3, NAME, PREPROCESSING, TRAINER, TYPE +from ludwig.constants import (BATCH_SIZE, COLUMN, ENCODER, H3, NAME, + PREPROCESSING, TRAINER, TYPE) from ludwig.data.concatenate_datasets import concatenate_df from ludwig.data.dataset_synthesizer import build_synthetic_dataset_df from ludwig.data.preprocessing import preprocess_for_training @@ -40,29 +41,13 @@ from ludwig.utils.data_utils import read_csv from ludwig.utils.defaults import default_random_seed from tests.integration_tests.utils import ( - audio_feature, - bag_feature, - binary_feature, - category_distribution_feature, - category_feature, - create_data_set_to_use, - date_feature, - ENCODERS, - generate_data, + ENCODERS, TEXT_ENCODERS, LocalTestBackend, audio_feature, bag_feature, + binary_feature, category_distribution_feature, category_feature, + create_data_set_to_use, date_feature, generate_data, generate_output_features_with_dependencies, - generate_output_features_with_dependencies_complex, - h3_feature, - image_feature, - LocalTestBackend, - number_feature, - run_experiment, - sequence_feature, - set_feature, - TEXT_ENCODERS, - text_feature, - timeseries_feature, - vector_feature, -) + generate_output_features_with_dependencies_complex, h3_feature, + image_feature, number_feature, run_experiment, sequence_feature, + set_feature, text_feature, timeseries_feature, vector_feature) pytestmark = pytest.mark.integration_tests_d diff --git a/tests/integration_tests/test_explain.py b/tests/integration_tests/test_explain.py index 9e1541bb476..a436313509e 100644 --- a/tests/integration_tests/test_explain.py +++ b/tests/integration_tests/test_explain.py @@ -6,25 +6,18 @@ import pytest from ludwig.api import LudwigModel -from ludwig.constants import BATCH_SIZE, BINARY, CATEGORY, MINIMUM_BATCH_SIZE, MODEL_ECD, MODEL_GBM, TYPE +from ludwig.constants import (BATCH_SIZE, BINARY, CATEGORY, MINIMUM_BATCH_SIZE, + MODEL_ECD, MODEL_GBM, TYPE) from ludwig.explain.captum import IntegratedGradientsExplainer from ludwig.explain.explainer import Explainer from ludwig.explain.explanation import Explanation from ludwig.explain.gbm import GBMExplainer -from tests.integration_tests.utils import ( - binary_feature, - category_feature, - date_feature, - generate_data, - image_feature, - LocalTestBackend, - number_feature, - sequence_feature, - set_feature, - text_feature, - timeseries_feature, - vector_feature, -) +from tests.integration_tests.utils import (LocalTestBackend, binary_feature, + category_feature, date_feature, + generate_data, image_feature, + number_feature, sequence_feature, + set_feature, text_feature, + timeseries_feature, vector_feature) try: from ludwig.explain.captum_ray import RayIntegratedGradientsExplainer diff --git a/tests/integration_tests/test_gbm.py b/tests/integration_tests/test_gbm.py index bad65f375a7..1aeab03b436 100644 --- a/tests/integration_tests/test_gbm.py +++ b/tests/integration_tests/test_gbm.py @@ -4,14 +4,16 @@ import pytest from ludwig.api import LudwigModel -from ludwig.constants import INPUT_FEATURES, MODEL_TYPE, OUTPUT_FEATURES, TRAINER +from ludwig.constants import (INPUT_FEATURES, MODEL_TYPE, OUTPUT_FEATURES, + TRAINER) from ludwig.error import ConfigValidationError from ludwig.globals import MODEL_FILE_NAME from ludwig.schema.model_types.base import ModelConfig from tests.integration_tests import synthetic_test_data from tests.integration_tests.utils import binary_feature from tests.integration_tests.utils import category_feature as _category_feature -from tests.integration_tests.utils import generate_data, number_feature, text_feature +from tests.integration_tests.utils import (generate_data, number_feature, + text_feature) pytestmark = pytest.mark.integration_tests_b diff --git a/tests/integration_tests/test_graph_execution.py b/tests/integration_tests/test_graph_execution.py index 495175ed3b5..7a07e004a97 100644 --- a/tests/integration_tests/test_graph_execution.py +++ b/tests/integration_tests/test_graph_execution.py @@ -15,15 +15,9 @@ import pytest from tests.integration_tests.utils import ( - category_feature, - generate_data, - generate_output_features_with_dependencies, - number_feature, - run_experiment, - sequence_feature, - set_feature, - text_feature, -) + category_feature, generate_data, + generate_output_features_with_dependencies, number_feature, run_experiment, + sequence_feature, set_feature, text_feature) @pytest.mark.parametrize( diff --git a/tests/integration_tests/test_horovod.py b/tests/integration_tests/test_horovod.py index ef624e20269..4b0fc2d8729 100644 --- a/tests/integration_tests/test_horovod.py +++ b/tests/integration_tests/test_horovod.py @@ -30,7 +30,8 @@ HOROVOD_AVAILABLE = True from ludwig.constants import ENCODER, TYPE -from tests.integration_tests.utils import category_feature, ENCODERS, generate_data, sequence_feature +from tests.integration_tests.utils import (ENCODERS, category_feature, + generate_data, sequence_feature) # This script will run the actual test model training in parallel TEST_SCRIPT = os.path.join(os.path.dirname(__file__), "scripts", "run_train_horovod.py") diff --git a/tests/integration_tests/test_hyperopt.py b/tests/integration_tests/test_hyperopt.py index 42f4e73c645..de6317af407 100644 --- a/tests/integration_tests/test_hyperopt.py +++ b/tests/integration_tests/test_hyperopt.py @@ -21,29 +21,11 @@ import pytest from ludwig.backend import initialize_backend -from ludwig.constants import ( - ACCURACY, - AUTO, - BATCH_SIZE, - CATEGORY, - COMBINER, - EXECUTOR, - HYPEROPT, - INPUT_FEATURES, - MAX_CONCURRENT_TRIALS, - MODEL_ECD, - MODEL_GBM, - MODEL_TYPE, - NAME, - OUTPUT_FEATURES, - RAY, - TEST, - TEXT, - TRAINER, - TRAINING, - TYPE, - VALIDATION, -) +from ludwig.constants import (ACCURACY, AUTO, BATCH_SIZE, CATEGORY, COMBINER, + EXECUTOR, HYPEROPT, INPUT_FEATURES, + MAX_CONCURRENT_TRIALS, MODEL_ECD, MODEL_GBM, + MODEL_TYPE, NAME, OUTPUT_FEATURES, RAY, TEST, + TEXT, TRAINER, TRAINING, TYPE, VALIDATION) from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME, MODEL_FILE_NAME from ludwig.hyperopt.results import HyperoptResults from ludwig.hyperopt.run import hyperopt @@ -51,18 +33,14 @@ from ludwig.schema.model_config import ModelConfig from ludwig.utils import fs_utils from ludwig.utils.data_utils import load_json, use_credentials -from tests.integration_tests.utils import ( - category_feature, - generate_data, - minio_test_creds, - private_param, - remote_tmpdir, - text_feature, -) +from tests.integration_tests.utils import (category_feature, generate_data, + minio_test_creds, private_param, + remote_tmpdir, text_feature) ray = pytest.importorskip("ray") -from ludwig.hyperopt.execution import get_build_hyperopt_executor, RayTuneExecutor # noqa +from ludwig.hyperopt.execution import (RayTuneExecutor, # noqa + get_build_hyperopt_executor) pytestmark = [pytest.mark.distributed, pytest.mark.integration_tests_a] diff --git a/tests/integration_tests/test_hyperopt_ray.py b/tests/integration_tests/test_hyperopt_ray.py index 07e3374d1c6..fcd99911438 100644 --- a/tests/integration_tests/test_hyperopt_ray.py +++ b/tests/integration_tests/test_hyperopt_ray.py @@ -24,15 +24,18 @@ from ludwig.backend import initialize_backend from ludwig.callbacks import Callback -from ludwig.constants import ACCURACY, AUTO, BATCH_SIZE, EXECUTOR, MAX_CONCURRENT_TRIALS, TRAINER +from ludwig.constants import (ACCURACY, AUTO, BATCH_SIZE, EXECUTOR, + MAX_CONCURRENT_TRIALS, TRAINER) from ludwig.contribs.mlflow import MlflowCallback -from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME +from ludwig.globals import (HYPEROPT_STATISTICS_FILE_NAME, MODEL_FILE_NAME, + MODEL_HYPERPARAMETERS_FILE_NAME) from ludwig.hyperopt.results import HyperoptResults from ludwig.hyperopt.run import hyperopt from ludwig.hyperopt.utils import update_hyperopt_params_with_defaults from ludwig.schema.model_config import ModelConfig from ludwig.utils.automl.utils import get_model_type -from tests.integration_tests.utils import category_feature, generate_data, text_feature +from tests.integration_tests.utils import (category_feature, generate_data, + text_feature) try: import ray diff --git a/tests/integration_tests/test_hyperopt_ray_horovod.py b/tests/integration_tests/test_hyperopt_ray_horovod.py index 0b9bb513e0b..8e57667a03e 100644 --- a/tests/integration_tests/test_hyperopt_ray_horovod.py +++ b/tests/integration_tests/test_hyperopt_ray_horovod.py @@ -21,20 +21,24 @@ from ludwig.api import LudwigModel from ludwig.callbacks import Callback -from ludwig.constants import ACCURACY, AUTO, BATCH_SIZE, EXECUTOR, MAX_CONCURRENT_TRIALS, TRAINER +from ludwig.constants import (ACCURACY, AUTO, BATCH_SIZE, EXECUTOR, + MAX_CONCURRENT_TRIALS, TRAINER) from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME from ludwig.hyperopt.results import HyperoptResults from ludwig.hyperopt.run import hyperopt from ludwig.hyperopt.utils import update_hyperopt_params_with_defaults from ludwig.schema.model_config import ModelConfig -from tests.integration_tests.utils import binary_feature, create_data_set_to_use, generate_data, number_feature +from tests.integration_tests.utils import (binary_feature, + create_data_set_to_use, + generate_data, number_feature) try: import ray - from ray.tune.syncer import get_node_to_storage_syncer, SyncConfig + from ray.tune.syncer import SyncConfig, get_node_to_storage_syncer from ludwig.backend.ray import RayBackend - from ludwig.hyperopt.execution import _get_relative_checkpoints_dir_parts, RayTuneExecutor + from ludwig.hyperopt.execution import (RayTuneExecutor, + _get_relative_checkpoints_dir_parts) except ImportError: ray = None RayTuneExecutor = object diff --git a/tests/integration_tests/test_input_feature_tied.py b/tests/integration_tests/test_input_feature_tied.py index 906c0459388..542ea36e57c 100644 --- a/tests/integration_tests/test_input_feature_tied.py +++ b/tests/integration_tests/test_input_feature_tied.py @@ -4,14 +4,9 @@ from ludwig.models.base import BaseModel from ludwig.schema.model_config import ModelConfig -from tests.integration_tests.utils import ( - category_feature, - generate_data, - number_feature, - run_experiment, - sequence_feature, - text_feature, -) +from tests.integration_tests.utils import (category_feature, generate_data, + number_feature, run_experiment, + sequence_feature, text_feature) # InputFeatureOptions namedtuple structure: # feature_type: input feature type, e.g., number, category, etc. diff --git a/tests/integration_tests/test_kfold_cv.py b/tests/integration_tests/test_kfold_cv.py index a987f07018a..1f4b954b128 100644 --- a/tests/integration_tests/test_kfold_cv.py +++ b/tests/integration_tests/test_kfold_cv.py @@ -10,15 +10,10 @@ from ludwig.constants import BATCH_SIZE, TRAINER from ludwig.experiment import kfold_cross_validate_cli from ludwig.utils.data_utils import load_json -from tests.integration_tests.utils import ( - binary_feature, - category_feature, - create_data_set_to_use, - generate_data, - number_feature, - sequence_feature, - text_feature, -) +from tests.integration_tests.utils import (binary_feature, category_feature, + create_data_set_to_use, + generate_data, number_feature, + sequence_feature, text_feature) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/tests/integration_tests/test_llm.py b/tests/integration_tests/test_llm.py index ed377b48ca3..573082c0ec0 100644 --- a/tests/integration_tests/test_llm.py +++ b/tests/integration_tests/test_llm.py @@ -14,37 +14,20 @@ import ludwig.error as ludwig_error from ludwig.api import LudwigModel -from ludwig.constants import ( - ADAPTER, - BACKEND, - BASE_MODEL, - BATCH_SIZE, - COMBINER, - EPOCHS, - EVAL_BATCH_SIZE, - GENERATION, - INPUT_FEATURES, - MERGE_ADAPTER_INTO_BASE_MODEL, - MODEL_ECD, - MODEL_LLM, - MODEL_TYPE, - OUTPUT_FEATURES, - POSTPROCESSOR, - PREPROCESSING, - PRETRAINED_ADAPTER_WEIGHTS, - PROGRESSBAR, - PROMPT, - QUANTIZATION, - TARGET_MODULES, - TRAINER, - TYPE, -) +from ludwig.constants import (ADAPTER, BACKEND, BASE_MODEL, BATCH_SIZE, + COMBINER, EPOCHS, EVAL_BATCH_SIZE, GENERATION, + INPUT_FEATURES, MERGE_ADAPTER_INTO_BASE_MODEL, + MODEL_ECD, MODEL_LLM, MODEL_TYPE, + OUTPUT_FEATURES, POSTPROCESSOR, PREPROCESSING, + PRETRAINED_ADAPTER_WEIGHTS, PROGRESSBAR, PROMPT, + QUANTIZATION, TARGET_MODULES, TRAINER, TYPE) from ludwig.globals import MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME from ludwig.models.llm import LLM from ludwig.schema.model_types.base import ModelConfig from ludwig.utils.fs_utils import list_file_names_in_directory from ludwig.utils.types import DataFrame -from tests.integration_tests.utils import category_feature, generate_data, text_feature +from tests.integration_tests.utils import (category_feature, generate_data, + text_feature) pytestmark = pytest.mark.llm diff --git a/tests/integration_tests/test_missing_value_strategy.py b/tests/integration_tests/test_missing_value_strategy.py index fd32b396603..4d5757387ed 100644 --- a/tests/integration_tests/test_missing_value_strategy.py +++ b/tests/integration_tests/test_missing_value_strategy.py @@ -20,20 +20,14 @@ import pytest from ludwig.api import LudwigModel -from ludwig.constants import BATCH_SIZE, COLUMN, DROP_ROW, FILL_WITH_MEAN, PREPROCESSING, PROC_COLUMN, TRAINER +from ludwig.constants import (BATCH_SIZE, COLUMN, DROP_ROW, FILL_WITH_MEAN, + PREPROCESSING, PROC_COLUMN, TRAINER) from ludwig.globals import MODEL_FILE_NAME -from tests.integration_tests.utils import ( - binary_feature, - category_feature, - generate_data, - LocalTestBackend, - number_feature, - read_csv_with_nan, - sequence_feature, - set_feature, - text_feature, - vector_feature, -) +from tests.integration_tests.utils import (LocalTestBackend, binary_feature, + category_feature, generate_data, + number_feature, read_csv_with_nan, + sequence_feature, set_feature, + text_feature, vector_feature) def test_missing_value_prediction(tmpdir, csv_filename): diff --git a/tests/integration_tests/test_mlflow.py b/tests/integration_tests/test_mlflow.py index bac996a781a..0ba5b28f6cd 100644 --- a/tests/integration_tests/test_mlflow.py +++ b/tests/integration_tests/test_mlflow.py @@ -14,8 +14,10 @@ from ludwig.contribs.mlflow import MlflowCallback from ludwig.export import export_mlflow from ludwig.globals import MODEL_FILE_NAME -from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version -from tests.integration_tests.utils import category_feature, FakeRemoteBackend, generate_data, sequence_feature +from ludwig.utils.backward_compatibility import \ + upgrade_config_dict_to_latest_version +from tests.integration_tests.utils import (FakeRemoteBackend, category_feature, + generate_data, sequence_feature) def run_mlflow_callback_test(mlflow_client, config, training_data, val_data, test_data, tmpdir, exp_name=None): diff --git a/tests/integration_tests/test_model_save_and_load.py b/tests/integration_tests/test_model_save_and_load.py index ffba4ab72ff..9788e63acf0 100644 --- a/tests/integration_tests/test_model_save_and_load.py +++ b/tests/integration_tests/test_model_save_and_load.py @@ -8,29 +8,21 @@ import torch from ludwig.api import LudwigModel -from ludwig.constants import BATCH_SIZE, ENCODER, LOSS, NAME, PREPROCESSING, TRAINER, TRAINING, TYPE +from ludwig.constants import (BATCH_SIZE, ENCODER, LOSS, NAME, PREPROCESSING, + TRAINER, TRAINING, TYPE) from ludwig.data.split import get_splitter from ludwig.globals import MODEL_FILE_NAME from ludwig.modules.loss_modules import MSELoss from ludwig.schema.features.loss.loss import MSELossConfig from ludwig.utils.data_utils import read_csv -from tests.integration_tests.utils import ( - audio_feature, - bag_feature, - binary_feature, - category_feature, - date_feature, - generate_data, - h3_feature, - image_feature, - LocalTestBackend, - number_feature, - sequence_feature, - set_feature, - text_feature, - timeseries_feature, - vector_feature, -) +from tests.integration_tests.utils import (LocalTestBackend, audio_feature, + bag_feature, binary_feature, + category_feature, date_feature, + generate_data, h3_feature, + image_feature, number_feature, + sequence_feature, set_feature, + text_feature, timeseries_feature, + vector_feature) def test_model_load_from_checkpoint(tmpdir, csv_filename, tmp_path): diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index 9df49906c2a..e59d1175ba8 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -11,27 +11,22 @@ from ludwig import globals as global_vars from ludwig.api import LudwigModel from ludwig.backend import LOCAL_BACKEND -from ludwig.constants import ( - BATCH_SIZE, - CATEGORY, - DEFAULTS, - EPOCHS, - INPUT_FEATURES, - OUTPUT_FEATURES, - PREPROCESSING, - TRAINER, - TRAINING, -) +from ludwig.constants import (BATCH_SIZE, CATEGORY, DEFAULTS, EPOCHS, + INPUT_FEATURES, OUTPUT_FEATURES, PREPROCESSING, + TRAINER, TRAINING) from ludwig.contribs.mlflow import MlflowCallback from ludwig.experiment import experiment_cli from ludwig.features.number_feature import numeric_transformation_registry -from ludwig.globals import DESCRIPTION_FILE_NAME, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME, TRAINING_PREPROC_FILE_NAME +from ludwig.globals import (DESCRIPTION_FILE_NAME, MODEL_FILE_NAME, + MODEL_WEIGHTS_FILE_NAME, + TRAINING_PREPROC_FILE_NAME) from ludwig.schema.optimizers import optimizer_registry from ludwig.utils.data_utils import load_json, replace_file_extension from ludwig.utils.misc_utils import get_from_registry from ludwig.utils.package_utils import LazyLoader from tests.integration_tests import synthetic_test_data -from tests.integration_tests.utils import category_feature, generate_data, LocalTestBackend +from tests.integration_tests.utils import (LocalTestBackend, category_feature, + generate_data) mlflow = LazyLoader("mlflow", globals(), "mlflow") diff --git a/tests/integration_tests/test_neuropod.py b/tests/integration_tests/test_neuropod.py index 40b8629c1b3..bb1cb1e657a 100644 --- a/tests/integration_tests/test_neuropod.py +++ b/tests/integration_tests/test_neuropod.py @@ -26,13 +26,9 @@ from ludwig.api import LudwigModel from ludwig.constants import BATCH_SIZE, NAME, PREDICTIONS, TRAINER from ludwig.utils.neuropod_utils import export_neuropod -from tests.integration_tests.utils import ( - binary_feature, - category_feature, - generate_data, - LocalTestBackend, - number_feature, -) +from tests.integration_tests.utils import (LocalTestBackend, binary_feature, + category_feature, generate_data, + number_feature) @pytest.mark.skipif(platform.system() == "Windows", reason="Neuropod is not supported on Windows") diff --git a/tests/integration_tests/test_peft.py b/tests/integration_tests/test_peft.py index 9e6fcb2a865..f3ad8e67a46 100644 --- a/tests/integration_tests/test_peft.py +++ b/tests/integration_tests/test_peft.py @@ -2,8 +2,10 @@ import pytest -from ludwig.constants import COMBINER, EPOCHS, INPUT_FEATURES, OUTPUT_FEATURES, TRAINER, TYPE -from tests.integration_tests.utils import binary_feature, generate_data, run_test_suite, text_feature +from ludwig.constants import (COMBINER, EPOCHS, INPUT_FEATURES, + OUTPUT_FEATURES, TRAINER, TYPE) +from tests.integration_tests.utils import (binary_feature, generate_data, + run_test_suite, text_feature) @pytest.mark.integration_tests_e diff --git a/tests/integration_tests/test_postprocessing.py b/tests/integration_tests/test_postprocessing.py index 3990c44b831..3047d2a29b2 100644 --- a/tests/integration_tests/test_postprocessing.py +++ b/tests/integration_tests/test_postprocessing.py @@ -25,14 +25,9 @@ from ludwig.api import LudwigModel from ludwig.constants import BATCH_SIZE, DECODER, NAME, TRAINER from ludwig.globals import MODEL_FILE_NAME -from tests.integration_tests.utils import ( - binary_feature, - category_feature, - generate_data, - RAY_BACKEND_CONFIG, - set_feature, - text_feature, -) +from tests.integration_tests.utils import (RAY_BACKEND_CONFIG, binary_feature, + category_feature, generate_data, + set_feature, text_feature) def random_binary_logits(*args, num_predict_samples, **kwargs): diff --git a/tests/integration_tests/test_preprocessing.py b/tests/integration_tests/test_preprocessing.py index b8a330a88ea..bdd68d7bccd 100644 --- a/tests/integration_tests/test_preprocessing.py +++ b/tests/integration_tests/test_preprocessing.py @@ -16,43 +16,20 @@ from ludwig.api import LudwigModel from ludwig.backend import initialize_backend from ludwig.callbacks import Callback -from ludwig.constants import ( - BASE_MODEL, - BATCH_SIZE, - COLUMN, - DECODER, - EPOCHS, - FULL, - INPUT_FEATURES, - MODEL_ECD, - MODEL_LLM, - MODEL_TYPE, - NAME, - OUTPUT_FEATURES, - PREPROCESSING, - PROC_COLUMN, - PROMPT, - SPLIT, - TRAINER, - TYPE, -) +from ludwig.constants import (BASE_MODEL, BATCH_SIZE, COLUMN, DECODER, EPOCHS, + FULL, INPUT_FEATURES, MODEL_ECD, MODEL_LLM, + MODEL_TYPE, NAME, OUTPUT_FEATURES, PREPROCESSING, + PROC_COLUMN, PROMPT, SPLIT, TRAINER, TYPE) from ludwig.data.concatenate_datasets import concatenate_df -from ludwig.data.preprocessing import handle_features_with_prompt_config, preprocess_for_prediction +from ludwig.data.preprocessing import (handle_features_with_prompt_config, + preprocess_for_prediction) from ludwig.schema.llms.prompt import PromptConfig from ludwig.schema.model_types.base import ModelConfig from tests.integration_tests.utils import ( - assert_preprocessed_dataset_shape_and_dtype_for_feature, - audio_feature, - binary_feature, - category_feature, - generate_data, - generate_data_as_dataframe, - image_feature, - LocalTestBackend, - number_feature, - sequence_feature, - text_feature, -) + LocalTestBackend, assert_preprocessed_dataset_shape_and_dtype_for_feature, + audio_feature, binary_feature, category_feature, generate_data, + generate_data_as_dataframe, image_feature, number_feature, + sequence_feature, text_feature) NUM_EXAMPLES = 20 diff --git a/tests/integration_tests/test_ray.py b/tests/integration_tests/test_ray.py index 02407e318ff..06a56584521 100644 --- a/tests/integration_tests/test_ray.py +++ b/tests/integration_tests/test_ray.py @@ -23,56 +23,29 @@ import torch from ludwig.api import LudwigModel -from ludwig.backend import create_ray_backend, initialize_backend, LOCAL_BACKEND -from ludwig.constants import ( - AUDIO, - BAG, - BALANCE_PERCENTAGE_TOLERANCE, - BATCH_SIZE, - BFILL, - BINARY, - CATEGORY, - COLUMN, - DATE, - H3, - IMAGE, - MAX_BATCH_SIZE_DATASET_FRACTION, - NAME, - NUMBER, - PREPROCESSING, - SEQUENCE, - SET, - SPLIT, - TEXT, - TIMESERIES, - TRAINER, - VECTOR, -) +from ludwig.backend import (LOCAL_BACKEND, create_ray_backend, + initialize_backend) +from ludwig.constants import (AUDIO, BAG, BALANCE_PERCENTAGE_TOLERANCE, + BATCH_SIZE, BFILL, BINARY, CATEGORY, COLUMN, + DATE, H3, IMAGE, MAX_BATCH_SIZE_DATASET_FRACTION, + NAME, NUMBER, PREPROCESSING, SEQUENCE, SET, + SPLIT, TEXT, TIMESERIES, TRAINER, VECTOR) from ludwig.data.preprocessing import balance_data from ludwig.data.split import DEFAULT_PROBABILITIES from ludwig.globals import MODEL_FILE_NAME from ludwig.utils.data_utils import read_parquet from ludwig.utils.misc_utils import merge_dict -from tests.integration_tests.utils import ( - audio_feature, - augment_dataset_with_none, - bag_feature, - binary_feature, - category_feature, - create_data_set_to_use, - date_feature, - generate_data, - h3_feature, - image_feature, - number_feature, - RAY_BACKEND_CONFIG, - sequence_feature, - set_feature, - text_feature, - timeseries_feature, - train_with_backend, - vector_feature, -) +from tests.integration_tests.utils import (RAY_BACKEND_CONFIG, audio_feature, + augment_dataset_with_none, + bag_feature, binary_feature, + category_feature, + create_data_set_to_use, + date_feature, generate_data, + h3_feature, image_feature, + number_feature, sequence_feature, + set_feature, text_feature, + timeseries_feature, + train_with_backend, vector_feature) ray = pytest.importorskip("ray") # noqa @@ -84,9 +57,10 @@ import ray.exceptions # noqa: E402 from ray.air.config import DatasetConfig # noqa: E402 from ray.data import Dataset, DatasetPipeline # noqa: E402 -from ray.train._internal.dataset_spec import DataParallelIngestSpec # noqa: E402 +from ray.train._internal.dataset_spec import \ + DataParallelIngestSpec # noqa: E402 -from ludwig.backend.ray import get_trainer_kwargs, RayBackend # noqa: E402 +from ludwig.backend.ray import RayBackend, get_trainer_kwargs # noqa: E402 from ludwig.data.dataframe.dask import DaskEngine # noqa: E402 try: diff --git a/tests/integration_tests/test_reducers.py b/tests/integration_tests/test_reducers.py index c1ce56e1156..cf56a66dd0a 100644 --- a/tests/integration_tests/test_reducers.py +++ b/tests/integration_tests/test_reducers.py @@ -1,7 +1,8 @@ import pytest from ludwig.modules.reduction_modules import reduce_mode_registry -from tests.integration_tests.utils import category_feature, generate_data, run_experiment, sequence_feature +from tests.integration_tests.utils import (category_feature, generate_data, + run_experiment, sequence_feature) @pytest.mark.parametrize("reduce_output", reduce_mode_registry) diff --git a/tests/integration_tests/test_regularizers.py b/tests/integration_tests/test_regularizers.py index 2e5c3c2bca0..6f24e3f29e1 100644 --- a/tests/integration_tests/test_regularizers.py +++ b/tests/integration_tests/test_regularizers.py @@ -10,17 +10,11 @@ from ludwig.data.preprocessing import preprocess_for_training from ludwig.utils.data_utils import read_csv from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.utils import ( - binary_feature, - category_feature, - date_feature, - generate_data, - image_feature, - LocalTestBackend, - number_feature, - sequence_feature, - set_feature, -) +from tests.integration_tests.utils import (LocalTestBackend, binary_feature, + category_feature, date_feature, + generate_data, image_feature, + number_feature, sequence_feature, + set_feature) DEVICE = get_torch_device() BATCH_SIZE = 32 diff --git a/tests/integration_tests/test_remote.py b/tests/integration_tests/test_remote.py index e9f38e101a8..aff93961422 100644 --- a/tests/integration_tests/test_remote.py +++ b/tests/integration_tests/test_remote.py @@ -6,17 +6,13 @@ from ludwig.api import LudwigModel from ludwig.backend import initialize_backend from ludwig.constants import BATCH_SIZE, TRAINER -from ludwig.globals import DESCRIPTION_FILE_NAME, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME +from ludwig.globals import (DESCRIPTION_FILE_NAME, MODEL_FILE_NAME, + MODEL_WEIGHTS_FILE_NAME) from ludwig.utils import fs_utils from ludwig.utils.data_utils import use_credentials -from tests.integration_tests.utils import ( - category_feature, - generate_data, - minio_test_creds, - private_param, - remote_tmpdir, - sequence_feature, -) +from tests.integration_tests.utils import (category_feature, generate_data, + minio_test_creds, private_param, + remote_tmpdir, sequence_feature) pytestmark = pytest.mark.integration_tests_b diff --git a/tests/integration_tests/test_sequence_decoders.py b/tests/integration_tests/test_sequence_decoders.py index 00645f0267a..fcf0264a9fa 100644 --- a/tests/integration_tests/test_sequence_decoders.py +++ b/tests/integration_tests/test_sequence_decoders.py @@ -2,26 +2,13 @@ import pytest -from ludwig.constants import ( - BATCH_SIZE, - DECODER, - ENCODER, - EPOCHS, - INPUT_FEATURES, - OUTPUT_FEATURES, - SEQUENCE, - TEXT, - TRAINER, - TYPE, -) -from tests.integration_tests.utils import ( - create_data_set_to_use, - generate_data, - RAY_BACKEND_CONFIG, - sequence_feature, - text_feature, - train_with_backend, -) +from ludwig.constants import (BATCH_SIZE, DECODER, ENCODER, EPOCHS, + INPUT_FEATURES, OUTPUT_FEATURES, SEQUENCE, TEXT, + TRAINER, TYPE) +from tests.integration_tests.utils import (RAY_BACKEND_CONFIG, + create_data_set_to_use, + generate_data, sequence_feature, + text_feature, train_with_backend) pytestmark = pytest.mark.integration_tests_c diff --git a/tests/integration_tests/test_sequence_features.py b/tests/integration_tests/test_sequence_features.py index eb1a3f45c96..9252cdddbda 100644 --- a/tests/integration_tests/test_sequence_features.py +++ b/tests/integration_tests/test_sequence_features.py @@ -11,7 +11,8 @@ from ludwig.data.dataset_synthesizer import build_synthetic_dataset from ludwig.data.preprocessing import preprocess_for_training from ludwig.features.feature_registries import update_config_with_metadata -from tests.integration_tests.utils import generate_data, run_experiment, sequence_feature +from tests.integration_tests.utils import (generate_data, run_experiment, + sequence_feature) # # this test is focused on testing input sequence features with all encoders diff --git a/tests/integration_tests/test_server.py b/tests/integration_tests/test_server.py index b4e1d374068..6757f759764 100644 --- a/tests/integration_tests/test_server.py +++ b/tests/integration_tests/test_server.py @@ -24,15 +24,10 @@ from ludwig.constants import BATCH_SIZE, DECODER, TRAINER from ludwig.serve import server from ludwig.utils.data_utils import read_csv -from tests.integration_tests.utils import ( - audio_feature, - category_feature, - generate_data, - image_feature, - LocalTestBackend, - number_feature, - text_feature, -) +from tests.integration_tests.utils import (LocalTestBackend, audio_feature, + category_feature, generate_data, + image_feature, number_feature, + text_feature) logger = logging.getLogger(__name__) diff --git a/tests/integration_tests/test_simple_features.py b/tests/integration_tests/test_simple_features.py index 6a18c03d7bd..20aad8d5109 100644 --- a/tests/integration_tests/test_simple_features.py +++ b/tests/integration_tests/test_simple_features.py @@ -19,18 +19,11 @@ import pytest from ludwig.constants import NAME -from tests.integration_tests.utils import ( - bag_feature, - binary_feature, - category_feature, - generate_data, - number_feature, - run_experiment, - sequence_feature, - set_feature, - text_feature, - vector_feature, -) +from tests.integration_tests.utils import (bag_feature, binary_feature, + category_feature, generate_data, + number_feature, run_experiment, + sequence_feature, set_feature, + text_feature, vector_feature) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/tests/integration_tests/test_timeseries_feature.py b/tests/integration_tests/test_timeseries_feature.py index c6d2d316cee..fdea52331ee 100644 --- a/tests/integration_tests/test_timeseries_feature.py +++ b/tests/integration_tests/test_timeseries_feature.py @@ -4,9 +4,11 @@ import torch from ludwig.api import LudwigModel -from ludwig.constants import COLUMN, ENCODER_OUTPUT, INPUT_FEATURES, OUTPUT_FEATURES +from ludwig.constants import (COLUMN, ENCODER_OUTPUT, INPUT_FEATURES, + OUTPUT_FEATURES) from ludwig.features.timeseries_feature import TimeseriesInputFeature -from ludwig.schema.features.timeseries_feature import TimeseriesInputFeatureConfig +from ludwig.schema.features.timeseries_feature import \ + TimeseriesInputFeatureConfig from ludwig.schema.utils import load_config_with_kwargs from tests.integration_tests.utils import number_feature, timeseries_feature diff --git a/tests/integration_tests/test_torchscript.py b/tests/integration_tests/test_torchscript.py index bb06371f733..fb37d40c7fa 100644 --- a/tests/integration_tests/test_torchscript.py +++ b/tests/integration_tests/test_torchscript.py @@ -25,7 +25,8 @@ from ludwig.api import LudwigModel from ludwig.backend import RAY -from ludwig.constants import BATCH_SIZE, COMBINER, EVAL_BATCH_SIZE, LOGITS, NAME, PREDICTIONS, PROBABILITIES, TRAINER +from ludwig.constants import (BATCH_SIZE, COMBINER, EVAL_BATCH_SIZE, LOGITS, + NAME, PREDICTIONS, PROBABILITIES, TRAINER) from ludwig.data.preprocessing import preprocess_for_prediction from ludwig.features.number_feature import numeric_transformation_registry from ludwig.globals import TRAIN_SET_METADATA_FILE_NAME @@ -33,23 +34,14 @@ from ludwig.utils import output_feature_utils from ludwig.utils.tokenizers import TORCHSCRIPT_COMPATIBLE_TOKENIZERS from tests.integration_tests import utils -from tests.integration_tests.utils import ( - audio_feature, - bag_feature, - binary_feature, - category_feature, - date_feature, - generate_data, - h3_feature, - image_feature, - LocalTestBackend, - number_feature, - sequence_feature, - set_feature, - text_feature, - timeseries_feature, - vector_feature, -) +from tests.integration_tests.utils import (LocalTestBackend, audio_feature, + bag_feature, binary_feature, + category_feature, date_feature, + generate_data, h3_feature, + image_feature, number_feature, + sequence_feature, set_feature, + text_feature, timeseries_feature, + vector_feature) @pytest.mark.integration_tests_e diff --git a/tests/integration_tests/test_trainer.py b/tests/integration_tests/test_trainer.py index 7de2dfd7c7a..bf36f9d397f 100644 --- a/tests/integration_tests/test_trainer.py +++ b/tests/integration_tests/test_trainer.py @@ -11,29 +11,17 @@ from ludwig.api import LudwigModel from ludwig.callbacks import Callback -from ludwig.constants import ( - BATCH_SIZE, - EFFECTIVE_BATCH_SIZE, - EPOCHS, - EVAL_BATCH_SIZE, - INPUT_FEATURES, - MAX_BATCH_SIZE_DATASET_FRACTION, - OUTPUT_FEATURES, - TRAINER, -) +from ludwig.constants import (BATCH_SIZE, EFFECTIVE_BATCH_SIZE, EPOCHS, + EVAL_BATCH_SIZE, INPUT_FEATURES, + MAX_BATCH_SIZE_DATASET_FRACTION, OUTPUT_FEATURES, + TRAINER) from ludwig.distributed import init_dist_strategy from ludwig.globals import MODEL_FILE_NAME -from tests.integration_tests.utils import ( - binary_feature, - category_feature, - generate_data, - LocalTestBackend, - number_feature, - RAY_BACKEND_CONFIG, - sequence_feature, - text_feature, - vector_feature, -) +from tests.integration_tests.utils import (RAY_BACKEND_CONFIG, + LocalTestBackend, binary_feature, + category_feature, generate_data, + number_feature, sequence_feature, + text_feature, vector_feature) try: from ludwig.backend.horovod import HorovodBackend diff --git a/tests/integration_tests/test_triton.py b/tests/integration_tests/test_triton.py index bbbc92234eb..5a15051b59a 100644 --- a/tests/integration_tests/test_triton.py +++ b/tests/integration_tests/test_triton.py @@ -23,19 +23,15 @@ from ludwig.constants import BATCH_SIZE, TRAINER from ludwig.data.dataset_synthesizer import build_synthetic_dataset_df from ludwig.utils.data_utils import load_yaml -from ludwig.utils.inference_utils import to_inference_module_input_from_dataframe -from ludwig.utils.triton_utils import export_triton, get_inference_modules, POSTPROCESSOR, PREDICTOR, PREPROCESSOR -from tests.integration_tests.utils import ( - binary_feature, - category_feature, - generate_data, - LocalTestBackend, - number_feature, - sequence_feature, - set_feature, - text_feature, - vector_feature, -) +from ludwig.utils.inference_utils import \ + to_inference_module_input_from_dataframe +from ludwig.utils.triton_utils import (POSTPROCESSOR, PREDICTOR, PREPROCESSOR, + export_triton, get_inference_modules) +from tests.integration_tests.utils import (LocalTestBackend, binary_feature, + category_feature, generate_data, + number_feature, sequence_feature, + set_feature, text_feature, + vector_feature) def test_triton_torchscript(csv_filename, tmpdir): diff --git a/tests/integration_tests/test_visualization.py b/tests/integration_tests/test_visualization.py index 0060e322b90..f2712325a28 100644 --- a/tests/integration_tests/test_visualization.py +++ b/tests/integration_tests/test_visualization.py @@ -29,20 +29,16 @@ from ludwig.constants import BATCH_SIZE, ENCODER, TRAINER, TYPE from ludwig.experiment import experiment_cli -from ludwig.globals import DESCRIPTION_FILE_NAME, PREDICTIONS_PARQUET_FILE_NAME, TEST_STATISTICS_FILE_NAME +from ludwig.globals import (DESCRIPTION_FILE_NAME, + PREDICTIONS_PARQUET_FILE_NAME, + TEST_STATISTICS_FILE_NAME) from ludwig.utils.data_utils import get_split_path from ludwig.visualize import _extract_ground_truth_values from tests.integration_tests.test_visualization_api import obtain_df_splits -from tests.integration_tests.utils import ( - bag_feature, - binary_feature, - category_feature, - generate_data, - number_feature, - sequence_feature, - set_feature, - text_feature, -) +from tests.integration_tests.utils import (bag_feature, binary_feature, + category_feature, generate_data, + number_feature, sequence_feature, + set_feature, text_feature) pytestmark = pytest.mark.integration_tests_c diff --git a/tests/integration_tests/test_visualization_api.py b/tests/integration_tests/test_visualization_api.py index 0f10bc5c922..cbf6ca2e46e 100644 --- a/tests/integration_tests/test_visualization_api.py +++ b/tests/integration_tests/test_visualization_api.py @@ -22,21 +22,16 @@ from ludwig import visualize from ludwig.api import LudwigModel, TrainingStats -from ludwig.constants import BATCH_SIZE, ENCODER, NAME, PREDICTIONS, PROBABILITIES, PROBABILITY, TRAINER, TYPE +from ludwig.constants import (BATCH_SIZE, ENCODER, NAME, PREDICTIONS, + PROBABILITIES, PROBABILITY, TRAINER, TYPE) from ludwig.data.split import get_splitter from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME from ludwig.utils.data_utils import read_csv -from tests.integration_tests.utils import ( - bag_feature, - binary_feature, - category_feature, - generate_data, - LocalTestBackend, - number_feature, - sequence_feature, - set_feature, - text_feature, -) +from tests.integration_tests.utils import (LocalTestBackend, bag_feature, + binary_feature, category_feature, + generate_data, number_feature, + sequence_feature, set_feature, + text_feature) pytestmark = pytest.mark.integration_tests_c diff --git a/tests/integration_tests/utils.py b/tests/integration_tests/utils.py index fd7886199c3..b347524a6e7 100644 --- a/tests/integration_tests/utils.py +++ b/tests/integration_tests/utils.py @@ -23,9 +23,8 @@ import tempfile import traceback import uuid - # from distutils.util import strtobool -from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union import cloudpickle import numpy as np @@ -37,32 +36,13 @@ from ludwig.api import LudwigModel from ludwig.backend import LocalBackend -from ludwig.constants import ( - AUDIO, - BAG, - BATCH_SIZE, - BINARY, - CATEGORY, - CATEGORY_DISTRIBUTION, - COLUMN, - DATE, - DECODER, - ENCODER, - H3, - IMAGE, - MODEL_ECD, - NAME, - NUMBER, - PROC_COLUMN, - SEQUENCE, - SET, - SPLIT, - TEXT, - TIMESERIES, - TRAINER, - VECTOR, -) -from ludwig.data.dataset_synthesizer import build_synthetic_dataset, DATETIME_FORMATS +from ludwig.constants import (AUDIO, BAG, BATCH_SIZE, BINARY, CATEGORY, + CATEGORY_DISTRIBUTION, COLUMN, DATE, DECODER, + ENCODER, H3, IMAGE, MODEL_ECD, NAME, NUMBER, + PROC_COLUMN, SEQUENCE, SET, SPLIT, TEXT, + TIMESERIES, TRAINER, VECTOR) +from ludwig.data.dataset_synthesizer import (DATETIME_FORMATS, + build_synthetic_dataset) from ludwig.experiment import experiment_cli from ludwig.features.feature_utils import compute_feature_hash from ludwig.globals import MODEL_FILE_NAME, PREDICTIONS_PARQUET_FILE_NAME @@ -70,7 +50,8 @@ from ludwig.schema.encoders.utils import get_encoder_classes from ludwig.trainers.trainer import Trainer from ludwig.utils import fs_utils -from ludwig.utils.data_utils import read_csv, replace_file_extension, use_credentials +from ludwig.utils.data_utils import (read_csv, replace_file_extension, + use_credentials) if TYPE_CHECKING: from ludwig.data.dataset.base import Dataset diff --git a/tests/ludwig/accounting/test_used_tokens.py b/tests/ludwig/accounting/test_used_tokens.py index f760dbd6d44..d0d52f5427d 100644 --- a/tests/ludwig/accounting/test_used_tokens.py +++ b/tests/ludwig/accounting/test_used_tokens.py @@ -1,6 +1,8 @@ import torch -from ludwig.accounting.used_tokens import get_used_tokens_for_ecd, get_used_tokens_for_gbm, get_used_tokens_for_llm +from ludwig.accounting.used_tokens import (get_used_tokens_for_ecd, + get_used_tokens_for_gbm, + get_used_tokens_for_llm) def test_get_used_tokens_for_gbm(): diff --git a/tests/ludwig/automl/test_base_config.py b/tests/ludwig/automl/test_base_config.py index ad42c9deed5..89f4ee8d3ad 100644 --- a/tests/ludwig/automl/test_base_config.py +++ b/tests/ludwig/automl/test_base_config.py @@ -8,17 +8,15 @@ ray = pytest.importorskip("ray") # noqa -from ludwig.automl.base_config import ( # noqa - get_dataset_info, - get_dataset_info_from_source, - get_field_metadata, - get_reference_configs, - is_field_boolean, -) +from ludwig.automl.base_config import (get_dataset_info, # noqa + get_dataset_info_from_source, + get_field_metadata, + get_reference_configs, is_field_boolean) from ludwig.data.dataframe.dask import DaskEngine # noqa from ludwig.data.dataframe.pandas import PandasEngine # noqa from ludwig.schema.model_types.base import ModelConfig # noqa -from ludwig.utils.automl.data_source import DataframeSource, wrap_data_source # noqa +from ludwig.utils.automl.data_source import (DataframeSource, # noqa + wrap_data_source) pytestmark = pytest.mark.distributed diff --git a/tests/ludwig/combiners/test_combiners.py b/tests/ludwig/combiners/test_combiners.py index 645d5afec71..6c517f97489 100644 --- a/tests/ludwig/combiners/test_combiners.py +++ b/tests/ludwig/combiners/test_combiners.py @@ -6,30 +6,31 @@ import pytest import torch -from ludwig.combiners.combiners import ( - ComparatorCombiner, - ConcatCombiner, - ProjectAggregateCombiner, - SequenceCombiner, - SequenceConcatCombiner, - TabNetCombiner, - TabTransformerCombiner, - TransformerCombiner, -) -from ludwig.constants import CATEGORY, ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, TYPE +from ludwig.combiners.combiners import (ComparatorCombiner, ConcatCombiner, + ProjectAggregateCombiner, + SequenceCombiner, + SequenceConcatCombiner, TabNetCombiner, + TabTransformerCombiner, + TransformerCombiner) +from ludwig.constants import (CATEGORY, ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, + TYPE) from ludwig.encoders.registry import get_sequence_encoder_registry from ludwig.schema.combiners.comparator import ComparatorCombinerConfig from ludwig.schema.combiners.concat import ConcatCombinerConfig -from ludwig.schema.combiners.project_aggregate import ProjectAggregateCombinerConfig +from ludwig.schema.combiners.project_aggregate import \ + ProjectAggregateCombinerConfig from ludwig.schema.combiners.sequence import SequenceCombinerConfig -from ludwig.schema.combiners.sequence_concat import SequenceConcatCombinerConfig -from ludwig.schema.combiners.tab_transformer import TabTransformerCombinerConfig +from ludwig.schema.combiners.sequence_concat import \ + SequenceConcatCombinerConfig +from ludwig.schema.combiners.tab_transformer import \ + TabTransformerCombinerConfig from ludwig.schema.combiners.tabnet import TabNetCombinerConfig from ludwig.schema.combiners.transformer import TransformerCombinerConfig from ludwig.schema.utils import load_config from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/tests/ludwig/config_sampling/test_config_sampling.py b/tests/ludwig/config_sampling/test_config_sampling.py index e4f74f1db57..2133bf2f809 100644 --- a/tests/ludwig/config_sampling/test_config_sampling.py +++ b/tests/ludwig/config_sampling/test_config_sampling.py @@ -2,10 +2,8 @@ from ludwig.utils.data_utils import load_json from tests.training_success.test_training_success import ( - combiner_config_generator, - defaults_config_generator, - ecd_trainer_config_generator, -) + combiner_config_generator, defaults_config_generator, + ecd_trainer_config_generator) def full_config_generator(generator_fn, *args): diff --git a/tests/ludwig/config_validation/test_validate_config_combiner.py b/tests/ludwig/config_validation/test_validate_config_combiner.py index 2ef8a97a22e..46460ae3556 100644 --- a/tests/ludwig/config_validation/test_validate_config_combiner.py +++ b/tests/ludwig/config_validation/test_validate_config_combiner.py @@ -3,7 +3,8 @@ from ludwig.config_validation.validation import check_schema, get_schema from ludwig.constants import MODEL_ECD, TRAINER from ludwig.error import ConfigValidationError -from tests.integration_tests.utils import binary_feature, category_feature, number_feature +from tests.integration_tests.utils import (binary_feature, category_feature, + number_feature) def test_combiner_schema_is_not_empty_for_ECD(): diff --git a/tests/ludwig/config_validation/test_validate_config_encoder.py b/tests/ludwig/config_validation/test_validate_config_encoder.py index 2fb94bf6737..f0bc9cf9f3d 100644 --- a/tests/ludwig/config_validation/test_validate_config_encoder.py +++ b/tests/ludwig/config_validation/test_validate_config_encoder.py @@ -1,15 +1,13 @@ import pytest -from ludwig.constants import DEFAULTS, ENCODER, INPUT_FEATURES, NAME, OUTPUT_FEATURES, SEQUENCE, TEXT, TIMESERIES, TYPE +from ludwig.constants import (DEFAULTS, ENCODER, INPUT_FEATURES, NAME, + OUTPUT_FEATURES, SEQUENCE, TEXT, TIMESERIES, + TYPE) from ludwig.error import ConfigValidationError from ludwig.schema.model_config import ModelConfig -from tests.integration_tests.utils import ( - binary_feature, - number_feature, - sequence_feature, - text_feature, - timeseries_feature, -) +from tests.integration_tests.utils import (binary_feature, number_feature, + sequence_feature, text_feature, + timeseries_feature) @pytest.mark.parametrize("feature_type", [SEQUENCE, TEXT, TIMESERIES]) diff --git a/tests/ludwig/config_validation/test_validate_config_features.py b/tests/ludwig/config_validation/test_validate_config_features.py index e034c655347..d699f42cf56 100644 --- a/tests/ludwig/config_validation/test_validate_config_features.py +++ b/tests/ludwig/config_validation/test_validate_config_features.py @@ -2,7 +2,8 @@ from ludwig.config_validation.validation import check_schema from ludwig.error import ConfigValidationError -from tests.integration_tests.utils import binary_feature, category_feature, number_feature, text_feature +from tests.integration_tests.utils import (binary_feature, category_feature, + number_feature, text_feature) def test_config_input_output_features(): diff --git a/tests/ludwig/config_validation/test_validate_config_hyperopt.py b/tests/ludwig/config_validation/test_validate_config_hyperopt.py index 87dfbbd917e..e83e632b9ea 100644 --- a/tests/ludwig/config_validation/test_validate_config_hyperopt.py +++ b/tests/ludwig/config_validation/test_validate_config_hyperopt.py @@ -7,16 +7,9 @@ import ludwig.schema.hyperopt.parameter # noqa: F401 import ludwig.schema.hyperopt.scheduler # noqa: F401 import ludwig.schema.hyperopt.search_algorithm # noqa: F401 -from ludwig.constants import ( - EXECUTOR, - HYPEROPT, - INPUT_FEATURES, - OUTPUT_FEATURES, - PARAMETERS, - SCHEDULER, - SEARCH_ALG, - TYPE, -) +from ludwig.constants import (EXECUTOR, HYPEROPT, INPUT_FEATURES, + OUTPUT_FEATURES, PARAMETERS, SCHEDULER, + SEARCH_ALG, TYPE) from ludwig.error import ConfigValidationError from ludwig.schema.hyperopt import utils from ludwig.schema.model_types.base import ModelConfig diff --git a/tests/ludwig/config_validation/test_validate_config_misc.py b/tests/ludwig/config_validation/test_validate_config_misc.py index d8d0220f17c..28a7f7571c1 100644 --- a/tests/ludwig/config_validation/test_validate_config_misc.py +++ b/tests/ludwig/config_validation/test_validate_config_misc.py @@ -1,26 +1,10 @@ import pytest from ludwig.config_validation.validation import check_schema, get_schema -from ludwig.constants import ( - ACTIVE, - AUDIO, - BACKEND, - CATEGORY, - COLUMN, - DECODER, - DEFAULTS, - ENCODER, - LOSS, - MODEL_ECD, - MODEL_GBM, - MODEL_LLM, - MODEL_TYPE, - NAME, - PREPROCESSING, - PROC_COLUMN, - TRAINER, - TYPE, -) +from ludwig.constants import (ACTIVE, AUDIO, BACKEND, CATEGORY, COLUMN, + DECODER, DEFAULTS, ENCODER, LOSS, MODEL_ECD, + MODEL_GBM, MODEL_LLM, MODEL_TYPE, NAME, + PREPROCESSING, PROC_COLUMN, TRAINER, TYPE) from ludwig.error import ConfigValidationError from ludwig.features.feature_registries import get_output_type_registry from ludwig.schema import utils as schema_utils @@ -29,37 +13,36 @@ from ludwig.schema.defaults.gbm import GBMDefaultsConfig from ludwig.schema.features.preprocessing.audio import AudioPreprocessingConfig from ludwig.schema.features.preprocessing.bag import BagPreprocessingConfig -from ludwig.schema.features.preprocessing.binary import BinaryPreprocessingConfig -from ludwig.schema.features.preprocessing.category import CategoryPreprocessingConfig +from ludwig.schema.features.preprocessing.binary import \ + BinaryPreprocessingConfig +from ludwig.schema.features.preprocessing.category import \ + CategoryPreprocessingConfig from ludwig.schema.features.preprocessing.date import DatePreprocessingConfig from ludwig.schema.features.preprocessing.h3 import H3PreprocessingConfig from ludwig.schema.features.preprocessing.image import ImagePreprocessingConfig -from ludwig.schema.features.preprocessing.number import NumberPreprocessingConfig -from ludwig.schema.features.preprocessing.sequence import SequencePreprocessingConfig +from ludwig.schema.features.preprocessing.number import \ + NumberPreprocessingConfig +from ludwig.schema.features.preprocessing.sequence import \ + SequencePreprocessingConfig from ludwig.schema.features.preprocessing.set import SetPreprocessingConfig from ludwig.schema.features.preprocessing.text import TextPreprocessingConfig -from ludwig.schema.features.preprocessing.timeseries import TimeseriesPreprocessingConfig -from ludwig.schema.features.preprocessing.vector import VectorPreprocessingConfig -from ludwig.schema.features.utils import get_input_feature_jsonschema, get_output_feature_jsonschema +from ludwig.schema.features.preprocessing.timeseries import \ + TimeseriesPreprocessingConfig +from ludwig.schema.features.preprocessing.vector import \ + VectorPreprocessingConfig +from ludwig.schema.features.utils import (get_input_feature_jsonschema, + get_output_feature_jsonschema) from ludwig.schema.llms.peft import LoraConfig from ludwig.schema.model_types.base import ModelConfig -from ludwig.schema.utils import ludwig_dataclass, unload_jsonschema_from_marshmallow_class -from tests.integration_tests.utils import ( - audio_feature, - bag_feature, - binary_feature, - category_feature, - date_feature, - ENCODERS, - h3_feature, - image_feature, - number_feature, - sequence_feature, - set_feature, - text_feature, - timeseries_feature, - vector_feature, -) +from ludwig.schema.utils import (ludwig_dataclass, + unload_jsonschema_from_marshmallow_class) +from tests.integration_tests.utils import (ENCODERS, audio_feature, + bag_feature, binary_feature, + category_feature, date_feature, + h3_feature, image_feature, + number_feature, sequence_feature, + set_feature, text_feature, + timeseries_feature, vector_feature) def test_config_features(): diff --git a/tests/ludwig/config_validation/test_validate_config_preprocessing.py b/tests/ludwig/config_validation/test_validate_config_preprocessing.py index bc05a1dff8f..87d2daa4d3f 100644 --- a/tests/ludwig/config_validation/test_validate_config_preprocessing.py +++ b/tests/ludwig/config_validation/test_validate_config_preprocessing.py @@ -1,6 +1,7 @@ import pytest -from ludwig.config_validation.preprocessing import check_global_max_sequence_length_fits_prompt_template +from ludwig.config_validation.preprocessing import \ + check_global_max_sequence_length_fits_prompt_template from ludwig.config_validation.validation import check_schema from tests.integration_tests.utils import binary_feature, category_feature diff --git a/tests/ludwig/config_validation/test_validate_config_trainer.py b/tests/ludwig/config_validation/test_validate_config_trainer.py index ead0f8cfa57..bd7116ae2fa 100644 --- a/tests/ludwig/config_validation/test_validate_config_trainer.py +++ b/tests/ludwig/config_validation/test_validate_config_trainer.py @@ -5,7 +5,8 @@ from ludwig.error import ConfigValidationError from ludwig.schema.optimizers import optimizer_registry from ludwig.schema.trainer import ECDTrainerConfig -from tests.integration_tests.utils import binary_feature, category_feature, number_feature +from tests.integration_tests.utils import (binary_feature, category_feature, + number_feature) # Note: simple tests for now, but once we add dependent fields we can add tests for more complex relationships in this # file. Currently verifies that the nested fields work, as the others are covered by basic marshmallow validation: diff --git a/tests/ludwig/data/test_ray_data.py b/tests/ludwig/data/test_ray_data.py index a71c8ae910b..ab68a9c3984 100644 --- a/tests/ludwig/data/test_ray_data.py +++ b/tests/ludwig/data/test_ray_data.py @@ -9,7 +9,8 @@ ray = pytest.importorskip("ray") # noqa dask = pytest.importorskip("dask") # noqa -from ludwig.data.dataset.ray import RayDatasetBatcher, read_remote_parquet # noqa +from ludwig.data.dataset.ray import (RayDatasetBatcher, # noqa + read_remote_parquet) # Mark the entire module as distributed pytestmark = pytest.mark.distributed diff --git a/tests/ludwig/decoders/test_image_decoder.py b/tests/ludwig/decoders/test_image_decoder.py index 2f48591da7b..a89028c7bb9 100644 --- a/tests/ludwig/decoders/test_image_decoder.py +++ b/tests/ludwig/decoders/test_image_decoder.py @@ -1,11 +1,13 @@ import pytest import torch -from ludwig.constants import ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, HIDDEN, LOGITS +from ludwig.constants import (ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, HIDDEN, + LOGITS) from ludwig.decoders.image_decoders import UNetDecoder from ludwig.encoders.image.base import UNetEncoder from ludwig.utils.misc_utils import set_random_seed -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated RANDOM_SEED = 1919 diff --git a/tests/ludwig/decoders/test_llm_decoders.py b/tests/ludwig/decoders/test_llm_decoders.py index 6408efdbb9b..c12fc5cf49f 100644 --- a/tests/ludwig/decoders/test_llm_decoders.py +++ b/tests/ludwig/decoders/test_llm_decoders.py @@ -1,7 +1,8 @@ import pytest import torch -from ludwig.constants import BACKEND, BASE_MODEL, GENERATION, INPUT_FEATURES, MODEL_TYPE, OUTPUT_FEATURES +from ludwig.constants import (BACKEND, BASE_MODEL, GENERATION, INPUT_FEATURES, + MODEL_TYPE, OUTPUT_FEATURES) from ludwig.decoders.llm_decoders import TextExtractorDecoder from ludwig.schema.model_config import ModelConfig from tests.integration_tests.utils import text_feature diff --git a/tests/ludwig/decoders/test_sequence_decoder.py b/tests/ludwig/decoders/test_sequence_decoder.py index 6abfe66a2f2..f5554d7faa8 100644 --- a/tests/ludwig/decoders/test_sequence_decoder.py +++ b/tests/ludwig/decoders/test_sequence_decoder.py @@ -2,15 +2,13 @@ import torch from ludwig.constants import HIDDEN, LOGITS -from ludwig.decoders.sequence_decoders import ( - LSTMDecoder, - RNNDecoder, - SequenceGeneratorDecoder, - SequenceLSTMDecoder, - SequenceRNNDecoder, -) +from ludwig.decoders.sequence_decoders import (LSTMDecoder, RNNDecoder, + SequenceGeneratorDecoder, + SequenceLSTMDecoder, + SequenceRNNDecoder) from ludwig.utils.misc_utils import set_random_seed -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated RANDOM_SEED = 1919 diff --git a/tests/ludwig/decoders/test_sequence_tagger.py b/tests/ludwig/decoders/test_sequence_tagger.py index 0a239f0633b..dba28b8792e 100644 --- a/tests/ludwig/decoders/test_sequence_tagger.py +++ b/tests/ludwig/decoders/test_sequence_tagger.py @@ -4,7 +4,8 @@ from ludwig.constants import HIDDEN, LOGITS from ludwig.decoders.sequence_tagger import SequenceTaggerDecoder from ludwig.utils.misc_utils import set_random_seed -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated RANDOM_SEED = 1919 diff --git a/tests/ludwig/encoders/test_bag_encoders.py b/tests/ludwig/encoders/test_bag_encoders.py index 89912eda131..51d2ffacfb6 100644 --- a/tests/ludwig/encoders/test_bag_encoders.py +++ b/tests/ludwig/encoders/test_bag_encoders.py @@ -6,7 +6,8 @@ from ludwig.constants import ENCODER_OUTPUT from ludwig.encoders.bag_encoders import BagEmbedWeightedEncoder from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated RANDOM_SEED = 1919 DEVICE = get_torch_device() diff --git a/tests/ludwig/encoders/test_category_encoders.py b/tests/ludwig/encoders/test_category_encoders.py index 6080f3f70df..c7dacf166a5 100644 --- a/tests/ludwig/encoders/test_category_encoders.py +++ b/tests/ludwig/encoders/test_category_encoders.py @@ -4,9 +4,11 @@ import torch from ludwig.constants import ENCODER_OUTPUT -from ludwig.encoders.category_encoders import CategoricalEmbedEncoder, CategoricalSparseEncoder +from ludwig.encoders.category_encoders import (CategoricalEmbedEncoder, + CategoricalSparseEncoder) from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated RANDOM_SEED = 1919 DEVICE = get_torch_device() diff --git a/tests/ludwig/encoders/test_date_encoders.py b/tests/ludwig/encoders/test_date_encoders.py index a2c74be1af9..4897c7d1216 100644 --- a/tests/ludwig/encoders/test_date_encoders.py +++ b/tests/ludwig/encoders/test_date_encoders.py @@ -6,7 +6,8 @@ from ludwig.encoders.date_encoders import DateEmbed, DateWave from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated RANDOM_SEED = 1919 DEVICE = get_torch_device() diff --git a/tests/ludwig/encoders/test_h3_encoders.py b/tests/ludwig/encoders/test_h3_encoders.py index 911ece92604..4442c06a167 100644 --- a/tests/ludwig/encoders/test_h3_encoders.py +++ b/tests/ludwig/encoders/test_h3_encoders.py @@ -6,7 +6,8 @@ from ludwig.encoders import h3_encoders from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated RANDOM_SEED = 1919 DEVICE = get_torch_device() diff --git a/tests/ludwig/encoders/test_image_encoders.py b/tests/ludwig/encoders/test_image_encoders.py index c1f74208e3a..e425fad2c57 100644 --- a/tests/ludwig/encoders/test_image_encoders.py +++ b/tests/ludwig/encoders/test_image_encoders.py @@ -4,31 +4,30 @@ import torch from ludwig.constants import ENCODER_OUTPUT -from ludwig.encoders.image.base import MLPMixerEncoder, ResNetEncoder, Stacked2DCNN, UNetEncoder, ViTEncoder -from ludwig.encoders.image.torchvision import ( - TVAlexNetEncoder, - TVConvNeXtEncoder, - TVDenseNetEncoder, - TVEfficientNetEncoder, - TVGoogLeNetEncoder, - TVInceptionV3Encoder, - TVMaxVitEncoder, - TVMNASNetEncoder, - TVMobileNetV2Encoder, - TVMobileNetV3Encoder, - TVRegNetEncoder, - TVResNetEncoder, - TVResNeXtEncoder, - TVShuffleNetV2Encoder, - TVSqueezeNetEncoder, - TVSwinTransformerEncoder, - TVVGGEncoder, - TVViTEncoder, - TVWideResNetEncoder, -) +from ludwig.encoders.image.base import (MLPMixerEncoder, ResNetEncoder, + Stacked2DCNN, UNetEncoder, ViTEncoder) +from ludwig.encoders.image.torchvision import (TVAlexNetEncoder, + TVConvNeXtEncoder, + TVDenseNetEncoder, + TVEfficientNetEncoder, + TVGoogLeNetEncoder, + TVInceptionV3Encoder, + TVMaxVitEncoder, + TVMNASNetEncoder, + TVMobileNetV2Encoder, + TVMobileNetV3Encoder, + TVRegNetEncoder, + TVResNetEncoder, + TVResNeXtEncoder, + TVShuffleNetV2Encoder, + TVSqueezeNetEncoder, + TVSwinTransformerEncoder, + TVVGGEncoder, TVViTEncoder, + TVWideResNetEncoder) from ludwig.utils.image_utils import torchvision_model_registry from ludwig.utils.misc_utils import set_random_seed -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated RANDOM_SEED = 1919 diff --git a/tests/ludwig/encoders/test_llm_encoders.py b/tests/ludwig/encoders/test_llm_encoders.py index 56cc6ad4b0e..5ba40e6ef97 100644 --- a/tests/ludwig/encoders/test_llm_encoders.py +++ b/tests/ludwig/encoders/test_llm_encoders.py @@ -7,7 +7,8 @@ from ludwig.encoders.text_encoders import LLMEncoder from ludwig.schema.encoders.text_encoders import LLMEncoderConfig -from ludwig.schema.llms.peft import AdaloraConfig, BaseAdapterConfig, IA3Config, LoraConfig +from ludwig.schema.llms.peft import (AdaloraConfig, BaseAdapterConfig, + IA3Config, LoraConfig) from ludwig.utils.llm_utils import get_context_len # Mapping of adapter types to test against and their respective config objects. diff --git a/tests/ludwig/encoders/test_sequence_encoders.py b/tests/ludwig/encoders/test_sequence_encoders.py index a0349ab6f5f..9eb138ed79c 100644 --- a/tests/ludwig/encoders/test_sequence_encoders.py +++ b/tests/ludwig/encoders/test_sequence_encoders.py @@ -4,18 +4,15 @@ import torch from ludwig.constants import ENCODER_OUTPUT -from ludwig.encoders.sequence_encoders import ( - ParallelCNN, - SequenceEmbedEncoder, - SequencePassthroughEncoder, - StackedCNN, - StackedCNNRNN, - StackedParallelCNN, - StackedRNN, - StackedTransformer, -) +from ludwig.encoders.sequence_encoders import (ParallelCNN, + SequenceEmbedEncoder, + SequencePassthroughEncoder, + StackedCNN, StackedCNNRNN, + StackedParallelCNN, StackedRNN, + StackedTransformer) from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated DEVICE = get_torch_device() RANDOM_SEED = 1919 diff --git a/tests/ludwig/encoders/test_set_encoders.py b/tests/ludwig/encoders/test_set_encoders.py index 742b4b4ed8f..2f175ef6591 100644 --- a/tests/ludwig/encoders/test_set_encoders.py +++ b/tests/ludwig/encoders/test_set_encoders.py @@ -7,7 +7,8 @@ from ludwig.encoders.set_encoders import SetSparseEncoder from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated RANDOM_SEED = 1919 DEVICE = get_torch_device() diff --git a/tests/ludwig/encoders/test_text_encoders.py b/tests/ludwig/encoders/test_text_encoders.py index 2bd4c2ab840..dfc9756fc5a 100644 --- a/tests/ludwig/encoders/test_text_encoders.py +++ b/tests/ludwig/encoders/test_text_encoders.py @@ -8,22 +8,20 @@ import ludwig.schema.encoders.utils as schema_encoders_utils from ludwig.api import LudwigModel -from ludwig.constants import ENCODER, ENCODER_OUTPUT, MODEL_ECD, NAME, TEXT, TRAINER +from ludwig.constants import (ENCODER, ENCODER_OUTPUT, MODEL_ECD, NAME, TEXT, + TRAINER) from ludwig.encoders import text_encoders from ludwig.error import ConfigValidationError from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME from ludwig.schema.model_config import ModelConfig from ludwig.utils.data_utils import load_json from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated -from tests.integration_tests.utils import ( - category_feature, - clear_huggingface_cache, - generate_data, - HF_ENCODERS, - LocalTestBackend, - text_feature, -) +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated +from tests.integration_tests.utils import (HF_ENCODERS, LocalTestBackend, + category_feature, + clear_huggingface_cache, + generate_data, text_feature) DEVICE = get_torch_device() RANDOM_SEED = 1919 diff --git a/tests/ludwig/explain/test_util.py b/tests/ludwig/explain/test_util.py index 08e87b21fa4..97996c7f306 100644 --- a/tests/ludwig/explain/test_util.py +++ b/tests/ludwig/explain/test_util.py @@ -6,8 +6,10 @@ from ludwig.api import LudwigModel from ludwig.constants import NAME -from ludwig.explain.util import get_absolute_module_key_from_submodule, replace_layer_with_copy -from tests.integration_tests.utils import binary_feature, generate_data, LocalTestBackend, text_feature +from ludwig.explain.util import (get_absolute_module_key_from_submodule, + replace_layer_with_copy) +from tests.integration_tests.utils import (LocalTestBackend, binary_feature, + generate_data, text_feature) def test_get_absolute_module_key_from_submodule(): diff --git a/tests/ludwig/features/test_audio_feature.py b/tests/ludwig/features/test_audio_feature.py index 2323d1274e6..3a0c84858f2 100644 --- a/tests/ludwig/features/test_audio_feature.py +++ b/tests/ludwig/features/test_audio_feature.py @@ -12,7 +12,8 @@ from ludwig.schema.features.audio_feature import AudioInputFeatureConfig from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.utils import audio_feature, category_feature, generate_data +from tests.integration_tests.utils import (audio_feature, category_feature, + generate_data) BATCH_SIZE = 2 SEQ_SIZE = 20 diff --git a/tests/ludwig/features/test_binary_feature.py b/tests/ludwig/features/test_binary_feature.py index 851d580fad6..6f86c8a9243 100644 --- a/tests/ludwig/features/test_binary_feature.py +++ b/tests/ludwig/features/test_binary_feature.py @@ -4,8 +4,10 @@ import torch from ludwig.constants import ENCODER, ENCODER_OUTPUT -from ludwig.features.binary_feature import BinaryInputFeature, BinaryOutputFeature -from ludwig.schema.features.binary_feature import BinaryInputFeatureConfig, BinaryOutputFeatureConfig +from ludwig.features.binary_feature import (BinaryInputFeature, + BinaryOutputFeature) +from ludwig.schema.features.binary_feature import (BinaryInputFeatureConfig, + BinaryOutputFeatureConfig) from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils.torch_utils import get_torch_device diff --git a/tests/ludwig/features/test_category_feature.py b/tests/ludwig/features/test_category_feature.py index 7f982e24b67..fec6e26445c 100644 --- a/tests/ludwig/features/test_category_feature.py +++ b/tests/ludwig/features/test_category_feature.py @@ -6,7 +6,8 @@ from ludwig.constants import ENCODER, ENCODER_OUTPUT, TYPE from ludwig.features.category_feature import CategoryInputFeature -from ludwig.schema.features.category_feature import ECDCategoryInputFeatureConfig +from ludwig.schema.features.category_feature import \ + ECDCategoryInputFeatureConfig from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils.misc_utils import merge_dict from ludwig.utils.torch_utils import get_torch_device diff --git a/tests/ludwig/features/test_date_feature.py b/tests/ludwig/features/test_date_feature.py index f527379b92b..1e95958ad16 100644 --- a/tests/ludwig/features/test_date_feature.py +++ b/tests/ludwig/features/test_date_feature.py @@ -6,7 +6,8 @@ import torch from dateutil.parser import parse -from ludwig.constants import ENCODER_OUTPUT, FILL_WITH_CONST, MISSING_VALUE_STRATEGY +from ludwig.constants import (ENCODER_OUTPUT, FILL_WITH_CONST, + MISSING_VALUE_STRATEGY) from ludwig.features import date_feature from ludwig.features.date_feature import DateInputFeature from ludwig.schema.features.date_feature import DateInputFeatureConfig diff --git a/tests/ludwig/features/test_image_feature.py b/tests/ludwig/features/test_image_feature.py index ac01adc0216..b67650324de 100644 --- a/tests/ludwig/features/test_image_feature.py +++ b/tests/ludwig/features/test_image_feature.py @@ -4,18 +4,13 @@ import pytest import torch -from ludwig.constants import ( - BFILL, - CROP_OR_PAD, - ENCODER, - ENCODER_OUTPUT, - ENCODER_OUTPUT_STATE, - INTERPOLATE, - LOGITS, - TYPE, -) -from ludwig.features.image_feature import _ImagePreprocessing, ImageInputFeature, ImageOutputFeature -from ludwig.schema.features.image_feature import ImageInputFeatureConfig, ImageOutputFeatureConfig +from ludwig.constants import (BFILL, CROP_OR_PAD, ENCODER, ENCODER_OUTPUT, + ENCODER_OUTPUT_STATE, INTERPOLATE, LOGITS, TYPE) +from ludwig.features.image_feature import (ImageInputFeature, + ImageOutputFeature, + _ImagePreprocessing) +from ludwig.schema.features.image_feature import (ImageInputFeatureConfig, + ImageOutputFeatureConfig) from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils.misc_utils import merge_dict from ludwig.utils.torch_utils import get_torch_device diff --git a/tests/ludwig/features/test_number_feature.py b/tests/ludwig/features/test_number_feature.py index 8d710c418fa..f9f89f9c2e8 100644 --- a/tests/ludwig/features/test_number_feature.py +++ b/tests/ludwig/features/test_number_feature.py @@ -6,7 +6,7 @@ import torch from ludwig.constants import ENCODER_OUTPUT -from ludwig.features.number_feature import _OutlierReplacer, NumberInputFeature +from ludwig.features.number_feature import NumberInputFeature, _OutlierReplacer from ludwig.schema.features.number_feature import ECDNumberInputFeatureConfig from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils.misc_utils import merge_dict diff --git a/tests/ludwig/features/test_sequence_features.py b/tests/ludwig/features/test_sequence_features.py index e4e84d7ef39..e64021bb9ea 100644 --- a/tests/ludwig/features/test_sequence_features.py +++ b/tests/ludwig/features/test_sequence_features.py @@ -5,11 +5,16 @@ import pytest import torch -from ludwig.constants import ENCODER_OUTPUT, LAST_HIDDEN, LOGITS, SEQUENCE, TEXT, TYPE -from ludwig.features.sequence_feature import _SequencePreprocessing, SequenceInputFeature, SequenceOutputFeature +from ludwig.constants import (ENCODER_OUTPUT, LAST_HIDDEN, LOGITS, SEQUENCE, + TEXT, TYPE) +from ludwig.features.sequence_feature import (SequenceInputFeature, + SequenceOutputFeature, + _SequencePreprocessing) from ludwig.features.text_feature import TextInputFeature, TextOutputFeature -from ludwig.schema.features.sequence_feature import SequenceInputFeatureConfig, SequenceOutputFeatureConfig -from ludwig.schema.features.text_feature import ECDTextInputFeatureConfig, ECDTextOutputFeatureConfig +from ludwig.schema.features.sequence_feature import ( + SequenceInputFeatureConfig, SequenceOutputFeatureConfig) +from ludwig.schema.features.text_feature import (ECDTextInputFeatureConfig, + ECDTextOutputFeatureConfig) from ludwig.utils.torch_utils import get_torch_device from tests.integration_tests.utils import ENCODERS, sequence_feature diff --git a/tests/ludwig/features/test_text_feature.py b/tests/ludwig/features/test_text_feature.py index c3574baccd0..163255dc368 100644 --- a/tests/ludwig/features/test_text_feature.py +++ b/tests/ludwig/features/test_text_feature.py @@ -4,7 +4,8 @@ from transformers import AutoTokenizer from ludwig.backend import LocalBackend -from ludwig.constants import IGNORE_INDEX_TOKEN_ID, LOGITS, PREDICTIONS, PROBABILITIES +from ludwig.constants import (IGNORE_INDEX_TOKEN_ID, LOGITS, PREDICTIONS, + PROBABILITIES) from ludwig.features import text_feature TEST_MODEL_NAME = "hf-internal-testing/tiny-random-OPTForCausalLM" diff --git a/tests/ludwig/features/test_timeseries_feature.py b/tests/ludwig/features/test_timeseries_feature.py index 2f768d16d3f..4200ee6ba07 100644 --- a/tests/ludwig/features/test_timeseries_feature.py +++ b/tests/ludwig/features/test_timeseries_feature.py @@ -5,7 +5,8 @@ from ludwig.constants import ENCODER, ENCODER_OUTPUT, TYPE from ludwig.features.timeseries_feature import TimeseriesInputFeature -from ludwig.schema.features.timeseries_feature import TimeseriesInputFeatureConfig +from ludwig.schema.features.timeseries_feature import \ + TimeseriesInputFeatureConfig from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils.torch_utils import get_torch_device diff --git a/tests/ludwig/hyperopt/test_hyperopt.py b/tests/ludwig/hyperopt/test_hyperopt.py index 367e2b8027d..78cb6302906 100644 --- a/tests/ludwig/hyperopt/test_hyperopt.py +++ b/tests/ludwig/hyperopt/test_hyperopt.py @@ -1,7 +1,8 @@ import pytest from ludwig.constants import INPUT_FEATURES, NAME, OUTPUT_FEATURES, TYPE -from ludwig.hyperopt.utils import log_warning_if_all_grid_type_parameters, substitute_parameters +from ludwig.hyperopt.utils import (log_warning_if_all_grid_type_parameters, + substitute_parameters) from ludwig.schema.model_config import ModelConfig BASE_CONFIG = { diff --git a/tests/ludwig/marshmallow/test_fields_misc.py b/tests/ludwig/marshmallow/test_fields_misc.py index 824144638ea..aab14e584a8 100644 --- a/tests/ludwig/marshmallow/test_fields_misc.py +++ b/tests/ludwig/marshmallow/test_fields_misc.py @@ -1,7 +1,8 @@ from typing import Dict, Tuple, Union import pytest -from marshmallow.exceptions import ValidationError as MarshmallowValidationError +from marshmallow.exceptions import \ + ValidationError as MarshmallowValidationError from marshmallow_dataclass import dataclass from ludwig.config_validation.validation import get_validator, validate diff --git a/tests/ludwig/marshmallow/test_fields_optimization.py b/tests/ludwig/marshmallow/test_fields_optimization.py index 59686e5eaa4..87ffcecafa9 100644 --- a/tests/ludwig/marshmallow/test_fields_optimization.py +++ b/tests/ludwig/marshmallow/test_fields_optimization.py @@ -2,7 +2,8 @@ from typing import Optional import pytest -from marshmallow.exceptions import ValidationError as MarshmallowValidationError +from marshmallow.exceptions import \ + ValidationError as MarshmallowValidationError from marshmallow_dataclass import dataclass import ludwig.schema.optimizers as lso diff --git a/tests/ludwig/marshmallow/test_fields_preprocessing.py b/tests/ludwig/marshmallow/test_fields_preprocessing.py index a340e3ad561..6c55427a647 100644 --- a/tests/ludwig/marshmallow/test_fields_preprocessing.py +++ b/tests/ludwig/marshmallow/test_fields_preprocessing.py @@ -1,9 +1,12 @@ #! /usr/bin/env python -from ludwig.schema.features.preprocessing.binary import BinaryPreprocessingConfig -from ludwig.schema.features.preprocessing.category import CategoryPreprocessingConfig -from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.preprocessing.binary import \ + BinaryPreprocessingConfig +from ludwig.schema.features.preprocessing.category import \ + CategoryPreprocessingConfig +from ludwig.schema.features.preprocessing.utils import \ + PreprocessingDataclassField def get_marshmallow_from_dataclass_field(dfield): diff --git a/tests/ludwig/marshmallow/test_marshmallow_misc.py b/tests/ludwig/marshmallow/test_marshmallow_misc.py index 78ba8326459..18564e7cf23 100644 --- a/tests/ludwig/marshmallow/test_marshmallow_misc.py +++ b/tests/ludwig/marshmallow/test_marshmallow_misc.py @@ -3,7 +3,9 @@ import ludwig.combiners.combiners as lcc from ludwig.schema.trainer import ECDTrainerConfig -from ludwig.schema.utils import assert_is_a_marshmallow_class, BaseMarshmallowConfig, load_config_with_kwargs +from ludwig.schema.utils import (BaseMarshmallowConfig, + assert_is_a_marshmallow_class, + load_config_with_kwargs) @dataclass diff --git a/tests/ludwig/models/test_trainable_image_layers.py b/tests/ludwig/models/test_trainable_image_layers.py index e90b5320095..5f304514f60 100644 --- a/tests/ludwig/models/test_trainable_image_layers.py +++ b/tests/ludwig/models/test_trainable_image_layers.py @@ -3,7 +3,7 @@ import pytest import torch -from torchvision.models import resnet18, ResNet18_Weights +from torchvision.models import ResNet18_Weights, resnet18 from ludwig.api import LudwigModel from ludwig.data.dataset_synthesizer import cli_synthesize_dataset diff --git a/tests/ludwig/models/test_training_determinism.py b/tests/ludwig/models/test_training_determinism.py index ff304252dac..3477f1d5466 100644 --- a/tests/ludwig/models/test_training_determinism.py +++ b/tests/ludwig/models/test_training_determinism.py @@ -7,22 +7,13 @@ from ludwig.api import LudwigModel from ludwig.constants import BATCH_SIZE, EVAL_BATCH_SIZE, TRAINER from ludwig.utils.numerical_test_utils import assert_all_finite -from tests.integration_tests.utils import ( - audio_feature, - bag_feature, - binary_feature, - category_feature, - date_feature, - generate_data, - h3_feature, - image_feature, - number_feature, - sequence_feature, - set_feature, - text_feature, - timeseries_feature, - vector_feature, -) +from tests.integration_tests.utils import (audio_feature, bag_feature, + binary_feature, category_feature, + date_feature, generate_data, + h3_feature, image_feature, + number_feature, sequence_feature, + set_feature, text_feature, + timeseries_feature, vector_feature) @pytest.mark.distributed diff --git a/tests/ludwig/models/test_training_success.py b/tests/ludwig/models/test_training_success.py index ec7e5e12b96..a77cc59a3a3 100644 --- a/tests/ludwig/models/test_training_success.py +++ b/tests/ludwig/models/test_training_success.py @@ -2,7 +2,8 @@ from ludwig.api import LudwigModel from ludwig.constants import BINARY, TRAINER -from tests.integration_tests.utils import binary_feature, category_feature, generate_data +from tests.integration_tests.utils import (binary_feature, category_feature, + generate_data) def generate_data_and_train(config, csv_filename): diff --git a/tests/ludwig/modules/test_attention.py b/tests/ludwig/modules/test_attention.py index 59206cdff04..01538800590 100644 --- a/tests/ludwig/modules/test_attention.py +++ b/tests/ludwig/modules/test_attention.py @@ -1,14 +1,13 @@ import pytest import torch -from ludwig.modules.attention_modules import ( - FeedForwardAttentionReducer, - MultiHeadSelfAttention, - TransformerBlock, - TransformerStack, -) +from ludwig.modules.attention_modules import (FeedForwardAttentionReducer, + MultiHeadSelfAttention, + TransformerBlock, + TransformerStack) from ludwig.utils.misc_utils import set_random_seed -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated RANDOM_SEED = 1919 diff --git a/tests/ludwig/modules/test_convolutional_modules.py b/tests/ludwig/modules/test_convolutional_modules.py index f1296919b46..8303c8e7643 100644 --- a/tests/ludwig/modules/test_convolutional_modules.py +++ b/tests/ludwig/modules/test_convolutional_modules.py @@ -3,21 +3,17 @@ import pytest import torch -from ludwig.modules.convolutional_modules import ( - Conv1DLayer, - Conv1DStack, - Conv2DLayer, - Conv2DLayerFixedPadding, - Conv2DStack, - ParallelConv1D, - ParallelConv1DStack, - ResNet, - ResNetBlock, - ResNetBlockLayer, - ResNetBottleneckBlock, -) +from ludwig.modules.convolutional_modules import (Conv1DLayer, Conv1DStack, + Conv2DLayer, + Conv2DLayerFixedPadding, + Conv2DStack, ParallelConv1D, + ParallelConv1DStack, ResNet, + ResNetBlock, + ResNetBlockLayer, + ResNetBottleneckBlock) from ludwig.utils.image_utils import get_img_output_shape -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated BATCH_SIZE = 2 SEQ_SIZE = 17 diff --git a/tests/ludwig/modules/test_embedding_modules.py b/tests/ludwig/modules/test_embedding_modules.py index a3d2ed65866..5b69c8ba935 100644 --- a/tests/ludwig/modules/test_embedding_modules.py +++ b/tests/ludwig/modules/test_embedding_modules.py @@ -3,7 +3,9 @@ import pytest import torch -from ludwig.modules.embedding_modules import Embed, EmbedSequence, EmbedSet, EmbedWeighted, TokenAndPositionEmbedding +from ludwig.modules.embedding_modules import (Embed, EmbedSequence, EmbedSet, + EmbedWeighted, + TokenAndPositionEmbedding) from ludwig.utils.torch_utils import get_torch_device DEVICE = get_torch_device() diff --git a/tests/ludwig/modules/test_encoder.py b/tests/ludwig/modules/test_encoder.py index 00cc5b9e739..96f44de4c32 100644 --- a/tests/ludwig/modules/test_encoder.py +++ b/tests/ludwig/modules/test_encoder.py @@ -22,16 +22,13 @@ from ludwig.data.dataset_synthesizer import build_vocab from ludwig.encoders.base import Encoder from ludwig.encoders.image.base import MLPMixerEncoder, Stacked2DCNN -from ludwig.encoders.sequence_encoders import ( - ParallelCNN, - SequenceEmbedEncoder, - StackedCNN, - StackedCNNRNN, - StackedParallelCNN, - StackedRNN, -) +from ludwig.encoders.sequence_encoders import (ParallelCNN, + SequenceEmbedEncoder, + StackedCNN, StackedCNNRNN, + StackedParallelCNN, StackedRNN) from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated DROPOUT = 0.5 DEVICE = get_torch_device() diff --git a/tests/ludwig/modules/test_loss_modules.py b/tests/ludwig/modules/test_loss_modules.py index 2e11909dfb0..cad098a8782 100644 --- a/tests/ludwig/modules/test_loss_modules.py +++ b/tests/ludwig/modules/test_loss_modules.py @@ -9,20 +9,15 @@ from ludwig.features.set_feature import SetOutputFeature from ludwig.features.text_feature import TextOutputFeature from ludwig.modules import loss_modules -from ludwig.schema.features.loss.loss import ( - BWCEWLossConfig, - CORNLossConfig, - HuberLossConfig, - MAELossConfig, - MAPELossConfig, - MSELossConfig, - RMSELossConfig, - RMSPELossConfig, - SigmoidCrossEntropyLossConfig, - SoftmaxCrossEntropyLossConfig, -) +from ludwig.schema.features.loss.loss import (BWCEWLossConfig, CORNLossConfig, + HuberLossConfig, MAELossConfig, + MAPELossConfig, MSELossConfig, + RMSELossConfig, RMSPELossConfig, + SigmoidCrossEntropyLossConfig, + SoftmaxCrossEntropyLossConfig) from ludwig.schema.model_config import ModelConfig -from tests.integration_tests.utils import category_feature, set_feature, text_feature +from tests.integration_tests.utils import (category_feature, set_feature, + text_feature) def from_float(v: float) -> torch.Tensor: diff --git a/tests/ludwig/modules/test_lr_scheduler.py b/tests/ludwig/modules/test_lr_scheduler.py index 94876250173..77117938a3b 100644 --- a/tests/ludwig/modules/test_lr_scheduler.py +++ b/tests/ludwig/modules/test_lr_scheduler.py @@ -3,10 +3,12 @@ import numpy as np from torch.optim import SGD -from ludwig.features.number_feature import NumberInputFeature, NumberOutputFeature +from ludwig.features.number_feature import (NumberInputFeature, + NumberOutputFeature) from ludwig.modules.lr_scheduler import LRScheduler from ludwig.schema.encoders.base import DenseEncoderConfig -from ludwig.schema.features.number_feature import ECDNumberOutputFeatureConfig, NumberInputFeatureConfig +from ludwig.schema.features.number_feature import ( + ECDNumberOutputFeatureConfig, NumberInputFeatureConfig) from ludwig.schema.lr_scheduler import LRSchedulerConfig from ludwig.utils.metric_utils import TrainerMetric from ludwig.utils.trainer_utils import get_new_progress_tracker diff --git a/tests/ludwig/modules/test_metric_modules.py b/tests/ludwig/modules/test_metric_modules.py index 8c305c1bb16..c382ef86480 100644 --- a/tests/ludwig/modules/test_metric_modules.py +++ b/tests/ludwig/modules/test_metric_modules.py @@ -3,11 +3,9 @@ from ludwig.distributed import init_dist_strategy from ludwig.modules import metric_modules -from ludwig.schema.features.loss.loss import ( - BWCEWLossConfig, - SigmoidCrossEntropyLossConfig, - SoftmaxCrossEntropyLossConfig, -) +from ludwig.schema.features.loss.loss import (BWCEWLossConfig, + SigmoidCrossEntropyLossConfig, + SoftmaxCrossEntropyLossConfig) # Required for local testing. init_dist_strategy("local") diff --git a/tests/ludwig/modules/test_mlp_mixer_modules.py b/tests/ludwig/modules/test_mlp_mixer_modules.py index ac91e4644fb..bbd6c986250 100644 --- a/tests/ludwig/modules/test_mlp_mixer_modules.py +++ b/tests/ludwig/modules/test_mlp_mixer_modules.py @@ -1,6 +1,6 @@ import pytest -from ludwig.modules.mlp_mixer_modules import MixerBlock, MLP, MLPMixer +from ludwig.modules.mlp_mixer_modules import MLP, MixerBlock, MLPMixer from .test_utils import assert_output_shapes diff --git a/tests/ludwig/modules/test_regex_freezing.py b/tests/ludwig/modules/test_regex_freezing.py index 7ec39544710..adb99666e2e 100644 --- a/tests/ludwig/modules/test_regex_freezing.py +++ b/tests/ludwig/modules/test_regex_freezing.py @@ -5,23 +5,15 @@ import pytest from ludwig.api import LudwigModel -from ludwig.constants import ( - BASE_MODEL, - BATCH_SIZE, - EPOCHS, - GENERATION, - INPUT_FEATURES, - MODEL_LLM, - MODEL_TYPE, - OUTPUT_FEATURES, - TRAINER, - TYPE, -) +from ludwig.constants import (BASE_MODEL, BATCH_SIZE, EPOCHS, GENERATION, + INPUT_FEATURES, MODEL_LLM, MODEL_TYPE, + OUTPUT_FEATURES, TRAINER, TYPE) from ludwig.encoders.image.torchvision import TVEfficientNetEncoder from ludwig.schema.trainer import ECDTrainerConfig from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.trainer_utils import freeze_layers_regex -from tests.integration_tests.utils import category_feature, generate_data, image_feature, text_feature +from tests.integration_tests.utils import (category_feature, generate_data, + image_feature, text_feature) RANDOM_SEED = 130 diff --git a/tests/ludwig/modules/test_tabnet_modules.py b/tests/ludwig/modules/test_tabnet_modules.py index ea4bf537fb2..4abc4eec4a3 100644 --- a/tests/ludwig/modules/test_tabnet_modules.py +++ b/tests/ludwig/modules/test_tabnet_modules.py @@ -3,9 +3,11 @@ import pytest import torch -from ludwig.modules.tabnet_modules import AttentiveTransformer, FeatureBlock, FeatureTransformer, TabNet +from ludwig.modules.tabnet_modules import (AttentiveTransformer, FeatureBlock, + FeatureTransformer, TabNet) from ludwig.utils.entmax import sparsemax -from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import \ + check_module_parameters_updated RANDOM_SEED = 67 diff --git a/tests/ludwig/schema/hyperopt/test_scheduler.py b/tests/ludwig/schema/hyperopt/test_scheduler.py index 363a3268c6f..7ef447a208c 100644 --- a/tests/ludwig/schema/hyperopt/test_scheduler.py +++ b/tests/ludwig/schema/hyperopt/test_scheduler.py @@ -1,8 +1,9 @@ import pytest from ludwig.schema.hyperopt.scheduler import BaseSchedulerConfig -from ludwig.schema.hyperopt.utils import register_scheduler_config, scheduler_config_registry -from ludwig.schema.utils import ludwig_dataclass, ProtectedString +from ludwig.schema.hyperopt.utils import (register_scheduler_config, + scheduler_config_registry) +from ludwig.schema.utils import ProtectedString, ludwig_dataclass @pytest.fixture( diff --git a/tests/ludwig/schema/hyperopt/test_search_algorithm.py b/tests/ludwig/schema/hyperopt/test_search_algorithm.py index 023a14a4533..c9db41b1f36 100644 --- a/tests/ludwig/schema/hyperopt/test_search_algorithm.py +++ b/tests/ludwig/schema/hyperopt/test_search_algorithm.py @@ -1,8 +1,9 @@ import pytest from ludwig.schema.hyperopt.search_algorithm import BaseSearchAlgorithmConfig -from ludwig.schema.hyperopt.utils import register_search_algorithm_config, search_algorithm_config_registry -from ludwig.schema.utils import ludwig_dataclass, ProtectedString +from ludwig.schema.hyperopt.utils import (register_search_algorithm_config, + search_algorithm_config_registry) +from ludwig.schema.utils import ProtectedString, ludwig_dataclass @pytest.fixture( diff --git a/tests/ludwig/schema/test_model_config.py b/tests/ludwig/schema/test_model_config.py index 21e2883b989..a9fdc6aa3f7 100644 --- a/tests/ludwig/schema/test_model_config.py +++ b/tests/ludwig/schema/test_model_config.py @@ -5,42 +5,21 @@ import pytest import yaml -from ludwig.constants import ( - ACTIVE, - BASE_MODEL, - CLIP, - COLUMN, - COMBINER, - DECODER, - DEFAULT_VALIDATION_METRIC, - DEFAULTS, - DEPENDENCIES, - ENCODER, - HYPEROPT, - INPUT_FEATURES, - INPUT_SIZE, - LOSS, - MODEL_ECD, - MODEL_GBM, - MODEL_LLM, - MODEL_TYPE, - NAME, - NUM_CLASSES, - OPTIMIZER, - OUTPUT_FEATURES, - PREPROCESSING, - PROC_COLUMN, - REDUCE_DEPENDENCIES, - REDUCE_INPUT, - TIED, - TRAINER, - TYPE, -) +from ludwig.constants import (ACTIVE, BASE_MODEL, CLIP, COLUMN, COMBINER, + DECODER, DEFAULT_VALIDATION_METRIC, DEFAULTS, + DEPENDENCIES, ENCODER, HYPEROPT, INPUT_FEATURES, + INPUT_SIZE, LOSS, MODEL_ECD, MODEL_GBM, + MODEL_LLM, MODEL_TYPE, NAME, NUM_CLASSES, + OPTIMIZER, OUTPUT_FEATURES, PREPROCESSING, + PROC_COLUMN, REDUCE_DEPENDENCIES, REDUCE_INPUT, + TIED, TRAINER, TYPE) from ludwig.error import ConfigValidationError from ludwig.schema.decoders.base import ClassifierConfig from ludwig.schema.encoders.text_encoders import BERTConfig -from ludwig.schema.features.augmentation.image import RandomBlurConfig, RandomRotateConfig -from ludwig.schema.features.image_feature import AUGMENTATION_DEFAULT_OPERATIONS +from ludwig.schema.features.augmentation.image import (RandomBlurConfig, + RandomRotateConfig) +from ludwig.schema.features.image_feature import \ + AUGMENTATION_DEFAULT_OPERATIONS from ludwig.schema.features.number_feature import NumberOutputFeatureConfig from ludwig.schema.features.text_feature import TextOutputFeatureConfig from ludwig.schema.llms.quantization import QuantizationConfig diff --git a/tests/ludwig/utils/entmax/test_losses.py b/tests/ludwig/utils/entmax/test_losses.py index 6c56a7e7c9c..04fba713100 100644 --- a/tests/ludwig/utils/entmax/test_losses.py +++ b/tests/ludwig/utils/entmax/test_losses.py @@ -5,7 +5,8 @@ from torch.autograd import gradcheck from ludwig.constants import IGNORE_INDEX_TOKEN_ID -from ludwig.utils.entmax.losses import Entmax15Loss, EntmaxBisectLoss, SparsemaxBisectLoss, SparsemaxLoss +from ludwig.utils.entmax.losses import (Entmax15Loss, EntmaxBisectLoss, + SparsemaxBisectLoss, SparsemaxLoss) # make data Xs = [torch.randn(4, 10, dtype=torch.float64, requires_grad=True) for _ in range(5)] diff --git a/tests/ludwig/utils/entmax/test_topk.py b/tests/ludwig/utils/entmax/test_topk.py index d4f7063e1ce..1a6da119b4d 100644 --- a/tests/ludwig/utils/entmax/test_topk.py +++ b/tests/ludwig/utils/entmax/test_topk.py @@ -2,12 +2,9 @@ import torch from torch.autograd import gradcheck -from ludwig.utils.entmax.activations import ( - _entmax_threshold_and_support, - _sparsemax_threshold_and_support, - Entmax15, - Sparsemax, -) +from ludwig.utils.entmax.activations import (Entmax15, Sparsemax, + _entmax_threshold_and_support, + _sparsemax_threshold_and_support) @pytest.mark.parametrize("dim", (0, 1, 2)) diff --git a/tests/ludwig/utils/test_backward_compatibility.py b/tests/ludwig/utils/test_backward_compatibility.py index 1548b268cec..2250b93f333 100644 --- a/tests/ludwig/utils/test_backward_compatibility.py +++ b/tests/ludwig/utils/test_backward_compatibility.py @@ -4,36 +4,18 @@ import pytest -from ludwig.constants import ( - BATCH_SIZE, - BFILL, - CLASS_WEIGHTS, - DEFAULTS, - EVAL_BATCH_SIZE, - EXECUTOR, - HYPEROPT, - INPUT_FEATURES, - LEARNING_RATE_SCHEDULER, - LOSS, - NUMBER, - OUTPUT_FEATURES, - PREPROCESSING, - SCHEDULER, - SPLIT, - TRAINER, - TYPE, -) +from ludwig.constants import (BATCH_SIZE, BFILL, CLASS_WEIGHTS, DEFAULTS, + EVAL_BATCH_SIZE, EXECUTOR, HYPEROPT, + INPUT_FEATURES, LEARNING_RATE_SCHEDULER, LOSS, + NUMBER, OUTPUT_FEATURES, PREPROCESSING, + SCHEDULER, SPLIT, TRAINER, TYPE) from ludwig.schema.model_config import ModelConfig from ludwig.schema.trainer import ECDTrainerConfig from ludwig.utils.backward_compatibility import ( - _update_backend_cache_credentials, - _upgrade_encoder_decoder_params, - _upgrade_feature, - _upgrade_preprocessing_split, - upgrade_config_dict_to_latest_version, - upgrade_missing_value_strategy, - upgrade_model_progress, -) + _update_backend_cache_credentials, _upgrade_encoder_decoder_params, + _upgrade_feature, _upgrade_preprocessing_split, + upgrade_config_dict_to_latest_version, upgrade_missing_value_strategy, + upgrade_model_progress) from ludwig.utils.trainer_utils import TrainerMetric diff --git a/tests/ludwig/utils/test_config_utils.py b/tests/ludwig/utils/test_config_utils.py index c5274f198ff..9978d75f57d 100644 --- a/tests/ludwig/utils/test_config_utils.py +++ b/tests/ludwig/utils/test_config_utils.py @@ -3,20 +3,9 @@ import pytest -from ludwig.constants import ( - BASE_MODEL, - BINARY, - ENCODER, - INPUT_FEATURES, - MODEL_ECD, - MODEL_GBM, - MODEL_LLM, - MODEL_TYPE, - NAME, - OUTPUT_FEATURES, - TEXT, - TYPE, -) +from ludwig.constants import (BASE_MODEL, BINARY, ENCODER, INPUT_FEATURES, + MODEL_ECD, MODEL_GBM, MODEL_LLM, MODEL_TYPE, + NAME, OUTPUT_FEATURES, TEXT, TYPE) from ludwig.schema.encoders.text_encoders import BERTConfig from ludwig.schema.encoders.utils import get_encoder_cls from ludwig.schema.features.preprocessing.text import TextPreprocessingConfig diff --git a/tests/ludwig/utils/test_data_utils.py b/tests/ludwig/utils/test_data_utils.py index d7299925f43..9da81d3c48a 100644 --- a/tests/ludwig/utils/test_data_utils.py +++ b/tests/ludwig/utils/test_data_utils.py @@ -24,19 +24,12 @@ from ludwig.api import LudwigModel from ludwig.data.cache.types import CacheableDataframe from ludwig.data.dataset_synthesizer import build_synthetic_dataset_df -from ludwig.utils.data_utils import ( - add_sequence_feature_column, - figure_data_format_dataset, - get_abs_path, - hash_dict, - NumpyEncoder, - PANDAS_DF, - read_csv, - read_html, - read_parquet, - sanitize_column_names, - use_credentials, -) +from ludwig.utils.data_utils import (PANDAS_DF, NumpyEncoder, + add_sequence_feature_column, + figure_data_format_dataset, get_abs_path, + hash_dict, read_csv, read_html, + read_parquet, sanitize_column_names, + use_credentials) from tests.integration_tests.utils import private_param try: diff --git a/tests/ludwig/utils/test_dataframe_utils.py b/tests/ludwig/utils/test_dataframe_utils.py index 3594c41b96b..7e96245378a 100644 --- a/tests/ludwig/utils/test_dataframe_utils.py +++ b/tests/ludwig/utils/test_dataframe_utils.py @@ -2,7 +2,7 @@ import pandas as pd import pytest -from ludwig.backend import create_backend, LOCAL_BACKEND +from ludwig.backend import LOCAL_BACKEND, create_backend from ludwig.utils.dataframe_utils import to_numpy_dataset, to_scalar_df try: diff --git a/tests/ludwig/utils/test_defaults.py b/tests/ludwig/utils/test_defaults.py index 2872bfc8ffe..3e5dab1faa8 100644 --- a/tests/ludwig/utils/test_defaults.py +++ b/tests/ludwig/utils/test_defaults.py @@ -2,46 +2,21 @@ import pytest -from ludwig.constants import ( - CATEGORY, - COMBINER, - DECODER, - DEFAULTS, - DEPENDENCIES, - DROP_ROW, - EARLY_STOP, - ENCODER, - EXECUTOR, - FILL_WITH_MODE, - HYPEROPT, - INPUT_FEATURES, - LOSS, - MISSING_VALUE_STRATEGY, - MODEL_ECD, - MODEL_TYPE, - OUTPUT_FEATURES, - PREPROCESSING, - REDUCE_DEPENDENCIES, - REDUCE_INPUT, - SCHEDULER, - SUM, - TIED, - TOP_K, - TRAINER, - TYPE, -) +from ludwig.constants import (CATEGORY, COMBINER, DECODER, DEFAULTS, + DEPENDENCIES, DROP_ROW, EARLY_STOP, ENCODER, + EXECUTOR, FILL_WITH_MODE, HYPEROPT, + INPUT_FEATURES, LOSS, MISSING_VALUE_STRATEGY, + MODEL_ECD, MODEL_TYPE, OUTPUT_FEATURES, + PREPROCESSING, REDUCE_DEPENDENCIES, REDUCE_INPUT, + SCHEDULER, SUM, TIED, TOP_K, TRAINER, TYPE) from ludwig.schema.model_config import ModelConfig from ludwig.schema.trainer import ECDTrainerConfig -from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version +from ludwig.utils.backward_compatibility import \ + upgrade_config_dict_to_latest_version from ludwig.utils.misc_utils import merge_dict, set_default_values -from tests.integration_tests.utils import ( - binary_feature, - category_feature, - number_feature, - sequence_feature, - text_feature, - vector_feature, -) +from tests.integration_tests.utils import (binary_feature, category_feature, + number_feature, sequence_feature, + text_feature, vector_feature) HYPEROPT_CONFIG = { "parameters": { diff --git a/tests/ludwig/utils/test_fs_utils.py b/tests/ludwig/utils/test_fs_utils.py index fef3e2b3bc3..307bff1ee22 100644 --- a/tests/ludwig/utils/test_fs_utils.py +++ b/tests/ludwig/utils/test_fs_utils.py @@ -6,7 +6,9 @@ import pytest -from ludwig.utils.fs_utils import get_fs_and_path, list_file_names_in_directory, safe_move_directory +from ludwig.utils.fs_utils import (get_fs_and_path, + list_file_names_in_directory, + safe_move_directory) logger = logging.getLogger(__name__) diff --git a/tests/ludwig/utils/test_hf_utils.py b/tests/ludwig/utils/test_hf_utils.py index f4a04388b6e..cf4b2215fdc 100644 --- a/tests/ludwig/utils/test_hf_utils.py +++ b/tests/ludwig/utils/test_hf_utils.py @@ -6,11 +6,9 @@ from transformers import AlbertModel, BertModel, BertTokenizer from ludwig.encoders.text_encoders import ALBERTEncoder, BERTEncoder -from ludwig.utils.hf_utils import ( - load_pretrained_hf_model_from_hub, - load_pretrained_hf_model_with_hub_fallback, - upload_folder_to_hfhub, -) +from ludwig.utils.hf_utils import (load_pretrained_hf_model_from_hub, + load_pretrained_hf_model_with_hub_fallback, + upload_folder_to_hfhub) @pytest.mark.parametrize( diff --git a/tests/ludwig/utils/test_image_utils.py b/tests/ludwig/utils/test_image_utils.py index d063614435e..f70860421a6 100644 --- a/tests/ludwig/utils/test_image_utils.py +++ b/tests/ludwig/utils/test_image_utils.py @@ -18,20 +18,12 @@ import torch import torchvision.transforms.functional as F -from ludwig.utils.image_utils import ( - crop, - crop_or_pad, - get_class_mask_from_image, - get_image_from_class_mask, - get_unique_channels, - grayscale, - is_image_score, - num_channels_in_image, - pad, - read_image_as_tif, - resize_image, - ResizeChannels, -) +from ludwig.utils.image_utils import (ResizeChannels, crop, crop_or_pad, + get_class_mask_from_image, + get_image_from_class_mask, + get_unique_channels, grayscale, + is_image_score, num_channels_in_image, + pad, read_image_as_tif, resize_image) @pytest.mark.parametrize("pad_fn", [pad, torch.jit.script(pad)]) diff --git a/tests/ludwig/utils/test_llm_utils.py b/tests/ludwig/utils/test_llm_utils.py index a79085ef893..b062fdf9a07 100644 --- a/tests/ludwig/utils/test_llm_utils.py +++ b/tests/ludwig/utils/test_llm_utils.py @@ -5,17 +5,10 @@ from ludwig.constants import LOGITS, PREDICTIONS, PROBABILITIES from ludwig.modules.training_hooks import NEFTuneHook from ludwig.utils.llm_utils import ( - add_left_padding, - create_attention_mask, - FALLBACK_CONTEXT_LEN, - find_last_matching_index, - generate_merged_ids, - get_context_len, + FALLBACK_CONTEXT_LEN, add_left_padding, create_attention_mask, + find_last_matching_index, generate_merged_ids, get_context_len, get_realigned_target_and_prediction_tensors_for_inference, - has_padding_token, - pad_target_tensor_for_fine_tuning, - remove_left_padding, -) + has_padding_token, pad_target_tensor_for_fine_tuning, remove_left_padding) from ludwig.utils.tokenizers import HFTokenizer pytestmark = [pytest.mark.llm] diff --git a/tests/ludwig/utils/test_model_utils.py b/tests/ludwig/utils/test_model_utils.py index c5acbfca927..4b25e8893dc 100644 --- a/tests/ludwig/utils/test_model_utils.py +++ b/tests/ludwig/utils/test_model_utils.py @@ -2,12 +2,10 @@ import torch from transformers import AutoModelForCausalLM -from ludwig.utils.model_utils import ( - contains_nan_or_inf_tensors, - extract_tensors, - find_embedding_layer_with_path, - replace_tensors, -) +from ludwig.utils.model_utils import (contains_nan_or_inf_tensors, + extract_tensors, + find_embedding_layer_with_path, + replace_tensors) class SampleModel(torch.nn.Module): diff --git a/tests/ludwig/utils/test_normalization.py b/tests/ludwig/utils/test_normalization.py index f5ccf691366..c4803a0a520 100644 --- a/tests/ludwig/utils/test_normalization.py +++ b/tests/ludwig/utils/test_normalization.py @@ -21,7 +21,8 @@ from ludwig.backend import initialize_backend from ludwig.constants import COLUMN, NAME, PROC_COLUMN from ludwig.features.feature_utils import compute_feature_hash -from ludwig.features.number_feature import NumberFeatureMixin, numeric_transformation_registry +from ludwig.features.number_feature import (NumberFeatureMixin, + numeric_transformation_registry) from ludwig.utils.types import DataFrame diff --git a/tests/ludwig/utils/test_tokenizers.py b/tests/ludwig/utils/test_tokenizers.py index 0fa8104ed10..b43850963aa 100644 --- a/tests/ludwig/utils/test_tokenizers.py +++ b/tests/ludwig/utils/test_tokenizers.py @@ -1,9 +1,6 @@ -from ludwig.utils.tokenizers import ( - EnglishLemmatizeFilterTokenizer, - get_tokenizer_from_registry, - NgramTokenizer, - StringSplitTokenizer, -) +from ludwig.utils.tokenizers import (EnglishLemmatizeFilterTokenizer, + NgramTokenizer, StringSplitTokenizer, + get_tokenizer_from_registry) def test_ngram_tokenizer(): diff --git a/tests/ludwig/utils/test_torch_utils.py b/tests/ludwig/utils/test_torch_utils.py index f77a6b171c7..1f3520ba030 100644 --- a/tests/ludwig/utils/test_torch_utils.py +++ b/tests/ludwig/utils/test_torch_utils.py @@ -6,13 +6,10 @@ import pytest import torch -from ludwig.utils.torch_utils import ( - _get_torch_init_params, - _set_torch_init_params, - initialize_pytorch, - sequence_length_2D, - sequence_length_3D, -) +from ludwig.utils.torch_utils import (_get_torch_init_params, + _set_torch_init_params, + initialize_pytorch, sequence_length_2D, + sequence_length_3D) @pytest.mark.parametrize("input_sequence", [[[0, 1, 1], [2, 0, 0], [3, 3, 3]]]) diff --git a/tests/ludwig/utils/test_trainer_utils.py b/tests/ludwig/utils/test_trainer_utils.py index 763f311bcfc..521c4b9ea2f 100644 --- a/tests/ludwig/utils/test_trainer_utils.py +++ b/tests/ludwig/utils/test_trainer_utils.py @@ -6,7 +6,8 @@ from ludwig.constants import AUTO, BATCH_SIZE, COMBINED, LOSS from ludwig.features.category_feature import CategoryOutputFeature from ludwig.features.feature_utils import LudwigFeatureDict -from ludwig.schema.features.category_feature import ECDCategoryOutputFeatureConfig +from ludwig.schema.features.category_feature import \ + ECDCategoryOutputFeatureConfig from ludwig.schema.trainer import ECDTrainerConfig from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils import trainer_utils diff --git a/tests/ludwig/utils/test_upload_utils.py b/tests/ludwig/utils/test_upload_utils.py index 548713233d4..d7c2d1c2d42 100644 --- a/tests/ludwig/utils/test_upload_utils.py +++ b/tests/ludwig/utils/test_upload_utils.py @@ -6,7 +6,8 @@ import pytest -from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME +from ludwig.globals import (MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, + MODEL_WEIGHTS_FILE_NAME) from ludwig.utils.upload_utils import HuggingFaceHub logger = logging.getLogger(__name__) diff --git a/tests/ludwig/utils/test_version_transformation.py b/tests/ludwig/utils/test_version_transformation.py index 3cd35409b7b..580617a91cb 100644 --- a/tests/ludwig/utils/test_version_transformation.py +++ b/tests/ludwig/utils/test_version_transformation.py @@ -1,4 +1,5 @@ -from ludwig.utils.version_transformation import VersionTransformation, VersionTransformationRegistry +from ludwig.utils.version_transformation import (VersionTransformation, + VersionTransformationRegistry) def test_version_transformation_registry(): diff --git a/tests/regression_tests/automl/scripts/update_golden_types.py b/tests/regression_tests/automl/scripts/update_golden_types.py index 12490ab22c3..bb1edc02c55 100644 --- a/tests/regression_tests/automl/scripts/update_golden_types.py +++ b/tests/regression_tests/automl/scripts/update_golden_types.py @@ -3,7 +3,9 @@ import json from ludwig.automl import create_auto_config -from tests.regression_tests.automl.utils import get_dataset_golden_types_path, get_dataset_object, TEST_DATASET_REGISTRY +from tests.regression_tests.automl.utils import (TEST_DATASET_REGISTRY, + get_dataset_golden_types_path, + get_dataset_object) def write_json_files(): diff --git a/tests/regression_tests/automl/test_auto_type_inference.py b/tests/regression_tests/automl/test_auto_type_inference.py index 72c2281c7fe..0e3820252f1 100644 --- a/tests/regression_tests/automl/test_auto_type_inference.py +++ b/tests/regression_tests/automl/test_auto_type_inference.py @@ -2,7 +2,9 @@ import pytest -from tests.regression_tests.automl.utils import get_dataset_golden_types_path, get_dataset_object, TEST_DATASET_REGISTRY +from tests.regression_tests.automl.utils import (TEST_DATASET_REGISTRY, + get_dataset_golden_types_path, + get_dataset_object) try: from ludwig.automl import create_auto_config diff --git a/tests/training_success/configs.py b/tests/training_success/configs.py index a96893e82fd..d5a7128b930 100644 --- a/tests/training_success/configs.py +++ b/tests/training_success/configs.py @@ -1,8 +1,6 @@ from ludwig.config_sampling.explore_schema import ( - combine_configs, - combine_configs_for_comparator_combiner, - combine_configs_for_sequence_combiner, -) + combine_configs, combine_configs_for_comparator_combiner, + combine_configs_for_sequence_combiner) # A generic tabular to text config used to generate synthetic data and train a model on it. TABULAR_TO_TEXT = """ diff --git a/tests/training_success/test_training_success.py b/tests/training_success/test_training_success.py index f54eb22beaf..f851e3b2dc1 100644 --- a/tests/training_success/test_training_success.py +++ b/tests/training_success/test_training_success.py @@ -8,16 +8,16 @@ import yaml from ludwig.api import LudwigModel -from ludwig.config_sampling.explore_schema import combine_configs, ConfigOption, explore_properties +from ludwig.config_sampling.explore_schema import (ConfigOption, + combine_configs, + explore_properties) from ludwig.config_validation.validation import get_schema from ludwig.types import ModelConfigDict -from .configs import ( - COMBINER_TYPE_TO_COMBINE_FN_MAP, - ECD_CONFIG_SECTION_TO_CONFIG, - FEATURE_TYPE_TO_CONFIG_FOR_DECODER_LOSS, - FEATURE_TYPE_TO_CONFIG_FOR_ENCODER_PREPROCESSING, -) +from .configs import (COMBINER_TYPE_TO_COMBINE_FN_MAP, + ECD_CONFIG_SECTION_TO_CONFIG, + FEATURE_TYPE_TO_CONFIG_FOR_DECODER_LOSS, + FEATURE_TYPE_TO_CONFIG_FOR_ENCODER_PREPROCESSING) def defaults_config_generator( From a3704a1e851dc01b09f8f9990acc375edbbfb114 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Sun, 1 Dec 2024 21:20:03 -0800 Subject: [PATCH 37/67] fixed invalid error in toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 34fe0c1afda..720dfd5e2db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -154,7 +154,7 @@ serve = [ tree = ["hummingbird-ml>=0.4.8", "lightgbm", "lightgbm-ray"] viz = [ "hiplot", - matplotlib>3.4; python_version > '3.6'", + "matplotlib>3.4; python_version > '3.6'", #"matplotlib>3.4,<3.9.0; python_version > '3.6'", "matplotlib>=3.0,<3.4; python_version <= '3.6'", "ptitprince", From 294a760ea4eca7fa11b45fbd5b2ee69e9cb7749d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 2 Dec 2024 05:22:19 +0000 Subject: [PATCH 38/67] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ludwig/api.py | 88 +++++++------ ludwig/automl/auto_tune_config.py | 23 ++-- ludwig/automl/automl.py | 38 ++++-- ludwig/automl/base_config.py | 22 +++- ludwig/backend/_ray210_compat.py | 2 +- ludwig/backend/base.py | 5 +- ludwig/backend/datasource.py | 14 ++- ludwig/backend/ray.py | 35 +++--- ludwig/benchmarking/benchmark.py | 17 ++- ludwig/benchmarking/profiler.py | 6 +- ludwig/benchmarking/reporting.py | 4 +- ludwig/benchmarking/summarize.py | 10 +- ludwig/benchmarking/summary_dataclasses.py | 4 +- ludwig/benchmarking/utils.py | 3 +- ludwig/callbacks.py | 3 +- ludwig/collect.py | 3 +- ludwig/combiners/combiners.py | 9 +- ludwig/config_sampling/explore_schema.py | 3 +- ludwig/config_validation/checks.py | 33 +++-- ludwig/config_validation/validation.py | 9 +- ludwig/contrib.py | 2 +- ludwig/contribs/mlflow/__init__.py | 6 +- ludwig/contribs/mlflow/model.py | 2 +- ludwig/data/cache/manager.py | 2 +- ludwig/data/cache/util.py | 3 +- ludwig/data/dataframe/dask.py | 3 +- ludwig/data/dataframe/modin.py | 3 +- ludwig/data/dataset/pandas.py | 5 +- ludwig/data/dataset/ray.py | 6 +- ludwig/data/dataset_synthesizer.py | 28 ++++- ludwig/data/preprocessing.py | 118 ++++++++++++------ ludwig/data/prompt.py | 8 +- ludwig/data/split.py | 15 ++- ludwig/datasets/__init__.py | 4 +- ludwig/decoders/generic_decoders.py | 7 +- ludwig/decoders/image_decoders.py | 6 +- ludwig/decoders/llm_decoders.py | 3 +- ludwig/decoders/sequence_decoders.py | 6 +- ludwig/decoders/sequence_tagger.py | 6 +- ludwig/distributed/base.py | 2 +- ludwig/distributed/ddp.py | 3 +- ludwig/distributed/deepspeed.py | 6 +- ludwig/distributed/fsdp.py | 2 +- ludwig/distributed/horovod.py | 6 +- ludwig/encoders/category_encoders.py | 7 +- ludwig/encoders/date_encoders.py | 3 +- ludwig/encoders/generic_encoders.py | 6 +- ludwig/encoders/h3_encoders.py | 3 +- ludwig/encoders/image/base.py | 15 ++- ludwig/encoders/image/torchvision.py | 32 +++-- ludwig/encoders/sequence_encoders.py | 25 ++-- ludwig/encoders/text_encoders.py | 43 ++++--- ludwig/experiment.py | 8 +- ludwig/explain/captum.py | 18 ++- ludwig/explain/captum_ray.py | 12 +- ludwig/export.py | 3 +- ludwig/features/audio_feature.py | 30 +++-- ludwig/features/bag_feature.py | 3 +- ludwig/features/base_feature.py | 29 +++-- ludwig/features/binary_feature.py | 31 +++-- ludwig/features/category_feature.py | 40 ++++-- ludwig/features/date_feature.py | 13 +- ludwig/features/feature_registries.py | 59 ++++----- ludwig/features/feature_utils.py | 6 +- ludwig/features/h3_feature.py | 3 +- ludwig/features/image_feature.py | 84 ++++++++----- ludwig/features/number_feature.py | 19 +-- ludwig/features/sequence_feature.py | 45 ++++--- ludwig/features/set_feature.py | 24 ++-- ludwig/features/text_feature.py | 48 ++++--- ludwig/features/timeseries_feature.py | 20 ++- ludwig/features/vector_feature.py | 19 +-- ludwig/hyperopt/execution.py | 16 +-- ludwig/hyperopt/run.py | 44 ++++--- ludwig/hyperopt/utils.py | 39 ++++-- ludwig/model_export/onnx_exporter.py | 3 +- ludwig/models/base.py | 11 +- ludwig/models/embedder.py | 5 +- ludwig/models/gbm.py | 3 +- ludwig/models/inference.py | 14 +-- ludwig/models/llm.py | 21 ++-- ludwig/models/predictor.py | 8 +- ludwig/models/retrieval.py | 3 +- ludwig/modules/attention_modules.py | 2 +- ludwig/modules/convolutional_modules.py | 2 +- ludwig/modules/embedding_modules.py | 2 +- ludwig/modules/fully_connected_modules.py | 3 +- ludwig/modules/loss_modules.py | 21 +++- ludwig/modules/lr_scheduler.py | 3 +- ludwig/modules/metric_modules.py | 97 +++++++++----- ludwig/modules/metric_registry.py | 5 +- ludwig/modules/optimization_modules.py | 5 +- ludwig/schema/__init__.py | 8 +- ludwig/schema/combiners/sequence.py | 3 +- ludwig/schema/combiners/tab_transformer.py | 3 +- ludwig/schema/combiners/transformer.py | 3 +- ludwig/schema/combiners/utils.py | 3 +- ludwig/schema/decoders/base.py | 3 +- ludwig/schema/decoders/image_decoders.py | 5 +- ludwig/schema/decoders/utils.py | 2 +- ludwig/schema/defaults/ecd.py | 17 ++- ludwig/schema/defaults/utils.py | 2 +- ludwig/schema/encoders/base.py | 8 +- ludwig/schema/encoders/category_encoders.py | 5 +- ludwig/schema/encoders/image/base.py | 5 +- ludwig/schema/encoders/sequence_encoders.py | 5 +- ludwig/schema/encoders/text_encoders.py | 14 +-- ludwig/schema/encoders/utils.py | 2 +- ludwig/schema/features/audio_feature.py | 7 +- ludwig/schema/features/augmentation/image.py | 3 +- ludwig/schema/features/augmentation/utils.py | 2 +- ludwig/schema/features/bag_feature.py | 7 +- ludwig/schema/features/base.py | 42 +++++-- ludwig/schema/features/binary_feature.py | 27 ++-- ludwig/schema/features/category_feature.py | 38 +++--- ludwig/schema/features/date_feature.py | 7 +- ludwig/schema/features/h3_feature.py | 7 +- ludwig/schema/features/image_feature.py | 24 ++-- ludwig/schema/features/loss/__init__.py | 5 +- ludwig/schema/features/loss/loss.py | 33 +++-- ludwig/schema/features/number_feature.py | 24 ++-- .../schema/features/preprocessing/__init__.py | 19 ++- ludwig/schema/features/preprocessing/audio.py | 3 +- ludwig/schema/features/preprocessing/bag.py | 3 +- .../schema/features/preprocessing/binary.py | 12 +- .../schema/features/preprocessing/category.py | 3 +- ludwig/schema/features/preprocessing/date.py | 3 +- ludwig/schema/features/preprocessing/h3.py | 3 +- ludwig/schema/features/preprocessing/image.py | 3 +- .../schema/features/preprocessing/number.py | 11 +- .../schema/features/preprocessing/sequence.py | 4 +- ludwig/schema/features/preprocessing/set.py | 4 +- ludwig/schema/features/preprocessing/text.py | 4 +- .../features/preprocessing/timeseries.py | 4 +- ludwig/schema/features/preprocessing/utils.py | 2 +- .../schema/features/preprocessing/vector.py | 4 +- ludwig/schema/features/sequence_feature.py | 21 ++-- ludwig/schema/features/set_feature.py | 18 +-- ludwig/schema/features/text_feature.py | 40 +++--- ludwig/schema/features/timeseries_feature.py | 21 ++-- ludwig/schema/features/vector_feature.py | 18 +-- ludwig/schema/hyperopt/__init__.py | 9 +- ludwig/schema/hyperopt/executor.py | 5 +- ludwig/schema/hyperopt/scheduler.py | 2 +- ludwig/schema/hyperopt/search_algorithm.py | 2 +- ludwig/schema/llms/base_model.py | 2 +- ludwig/schema/llms/peft.py | 2 +- ludwig/schema/lr_scheduler.py | 2 +- ludwig/schema/model_types/base.py | 35 ++++-- ludwig/schema/model_types/ecd.py | 12 +- ludwig/schema/model_types/gbm.py | 12 +- ludwig/schema/model_types/llm.py | 21 ++-- ludwig/schema/model_types/utils.py | 28 +++-- ludwig/schema/optimizers.py | 5 +- ludwig/schema/profiler.py | 2 +- ludwig/schema/trainer.py | 30 +++-- ludwig/schema/utils.py | 12 +- ludwig/train.py | 3 +- ludwig/trainers/trainer.py | 48 ++++--- ludwig/trainers/trainer_lightgbm.py | 41 +++--- ludwig/trainers/trainer_llm.py | 15 ++- ludwig/upload.py | 3 +- ludwig/utils/automl/field_info.py | 2 +- ludwig/utils/automl/utils.py | 17 ++- ludwig/utils/backward_compatibility.py | 80 ++++++++---- ludwig/utils/batch_size_tuner.py | 3 +- ludwig/utils/checkpoint_utils.py | 2 +- ludwig/utils/config_utils.py | 20 ++- ludwig/utils/data_utils.py | 10 +- ludwig/utils/date_utils.py | 2 +- ludwig/utils/defaults.py | 3 +- ludwig/utils/entmax/__init__.py | 20 +-- ludwig/utils/heuristics.py | 4 +- ludwig/utils/image_utils.py | 2 +- ludwig/utils/inference_utils.py | 23 +++- ludwig/utils/llm_utils.py | 14 +-- ludwig/utils/misc_utils.py | 2 +- ludwig/utils/neuropod_utils.py | 3 +- ludwig/utils/strings_utils.py | 3 +- ludwig/utils/tokenizers.py | 16 ++- ludwig/utils/trainer_utils.py | 2 +- ludwig/utils/triton_utils.py | 34 +++-- ludwig/utils/upload_utils.py | 3 +- ludwig/visualize.py | 16 ++- tests/conftest.py | 17 ++- .../scripts/run_train_aim.py | 3 +- .../scripts/run_train_comet.py | 4 +- .../scripts/run_train_wandb.py | 5 +- tests/integration_tests/test_api.py | 14 ++- tests/integration_tests/test_automl.py | 22 ++-- tests/integration_tests/test_cache_manager.py | 5 +- .../test_cached_preprocessing.py | 7 +- tests/integration_tests/test_carton.py | 10 +- .../test_class_imbalance_feature.py | 3 +- tests/integration_tests/test_cli.py | 16 ++- tests/integration_tests/test_collect.py | 6 +- .../test_config_global_defaults.py | 24 +++- .../test_custom_components.py | 18 +-- tests/integration_tests/test_date_feature.py | 19 ++- tests/integration_tests/test_dependencies.py | 3 +- tests/integration_tests/test_experiment.py | 31 +++-- tests/integration_tests/test_explain.py | 23 ++-- tests/integration_tests/test_gbm.py | 6 +- .../integration_tests/test_graph_execution.py | 12 +- tests/integration_tests/test_horovod.py | 3 +- tests/integration_tests/test_hyperopt.py | 43 +++++-- tests/integration_tests/test_hyperopt_ray.py | 9 +- .../test_hyperopt_ray_horovod.py | 12 +- .../test_input_feature_tied.py | 11 +- tests/integration_tests/test_kfold_cv.py | 13 +- tests/integration_tests/test_llm.py | 35 ++++-- .../test_missing_value_strategy.py | 20 +-- tests/integration_tests/test_mlflow.py | 6 +- .../test_model_save_and_load.py | 28 +++-- .../test_model_training_options.py | 21 ++-- tests/integration_tests/test_neuropod.py | 10 +- tests/integration_tests/test_peft.py | 6 +- .../integration_tests/test_postprocessing.py | 11 +- tests/integration_tests/test_preprocessing.py | 43 +++++-- tests/integration_tests/test_ray.py | 68 ++++++---- tests/integration_tests/test_reducers.py | 3 +- tests/integration_tests/test_regularizers.py | 16 ++- tests/integration_tests/test_remote.py | 14 ++- .../test_sequence_decoders.py | 27 ++-- .../test_sequence_features.py | 3 +- tests/integration_tests/test_server.py | 13 +- .../integration_tests/test_simple_features.py | 17 ++- .../test_timeseries_feature.py | 6 +- tests/integration_tests/test_torchscript.py | 28 +++-- tests/integration_tests/test_trainer.py | 30 +++-- tests/integration_tests/test_triton.py | 22 ++-- tests/integration_tests/test_visualization.py | 18 +-- .../test_visualization_api.py | 19 +-- tests/integration_tests/utils.py | 39 ++++-- tests/ludwig/accounting/test_used_tokens.py | 4 +- tests/ludwig/automl/test_base_config.py | 15 ++- tests/ludwig/combiners/test_combiners.py | 31 +++-- .../config_sampling/test_config_sampling.py | 6 +- .../test_validate_config_combiner.py | 3 +- .../test_validate_config_encoder.py | 14 ++- .../test_validate_config_features.py | 3 +- .../test_validate_config_hyperopt.py | 13 +- .../test_validate_config_misc.py | 71 +++++++---- .../test_validate_config_preprocessing.py | 3 +- .../test_validate_config_trainer.py | 3 +- tests/ludwig/data/test_ray_data.py | 4 +- tests/ludwig/decoders/test_image_decoder.py | 6 +- tests/ludwig/decoders/test_llm_decoders.py | 3 +- .../ludwig/decoders/test_sequence_decoder.py | 14 ++- tests/ludwig/decoders/test_sequence_tagger.py | 3 +- tests/ludwig/encoders/test_bag_encoders.py | 3 +- .../ludwig/encoders/test_category_encoders.py | 6 +- tests/ludwig/encoders/test_date_encoders.py | 3 +- tests/ludwig/encoders/test_h3_encoders.py | 3 +- tests/ludwig/encoders/test_image_encoders.py | 45 +++---- tests/ludwig/encoders/test_llm_encoders.py | 3 +- .../ludwig/encoders/test_sequence_encoders.py | 19 +-- tests/ludwig/encoders/test_set_encoders.py | 3 +- tests/ludwig/encoders/test_text_encoders.py | 18 +-- tests/ludwig/explain/test_util.py | 6 +- tests/ludwig/features/test_audio_feature.py | 3 +- tests/ludwig/features/test_binary_feature.py | 6 +- .../ludwig/features/test_category_feature.py | 3 +- tests/ludwig/features/test_date_feature.py | 3 +- tests/ludwig/features/test_image_feature.py | 19 +-- tests/ludwig/features/test_number_feature.py | 2 +- .../ludwig/features/test_sequence_features.py | 13 +- tests/ludwig/features/test_text_feature.py | 3 +- .../features/test_timeseries_feature.py | 3 +- tests/ludwig/hyperopt/test_hyperopt.py | 3 +- tests/ludwig/marshmallow/test_fields_misc.py | 3 +- .../marshmallow/test_fields_optimization.py | 3 +- .../marshmallow/test_fields_preprocessing.py | 9 +- .../marshmallow/test_marshmallow_misc.py | 4 +- .../models/test_trainable_image_layers.py | 2 +- .../models/test_training_determinism.py | 23 ++-- tests/ludwig/models/test_training_success.py | 3 +- tests/ludwig/modules/test_attention.py | 13 +- .../modules/test_convolutional_modules.py | 24 ++-- .../ludwig/modules/test_embedding_modules.py | 4 +- tests/ludwig/modules/test_encoder.py | 15 ++- tests/ludwig/modules/test_loss_modules.py | 21 ++-- tests/ludwig/modules/test_lr_scheduler.py | 6 +- tests/ludwig/modules/test_metric_modules.py | 8 +- .../ludwig/modules/test_mlp_mixer_modules.py | 2 +- tests/ludwig/modules/test_regex_freezing.py | 18 ++- tests/ludwig/modules/test_tabnet_modules.py | 6 +- .../ludwig/schema/hyperopt/test_scheduler.py | 5 +- .../schema/hyperopt/test_search_algorithm.py | 5 +- tests/ludwig/schema/test_model_config.py | 45 +++++-- tests/ludwig/utils/entmax/test_losses.py | 3 +- tests/ludwig/utils/entmax/test_topk.py | 9 +- .../utils/test_backward_compatibility.py | 36 ++++-- tests/ludwig/utils/test_config_utils.py | 17 ++- tests/ludwig/utils/test_data_utils.py | 19 ++- tests/ludwig/utils/test_dataframe_utils.py | 2 +- tests/ludwig/utils/test_defaults.py | 49 ++++++-- tests/ludwig/utils/test_fs_utils.py | 4 +- tests/ludwig/utils/test_hf_utils.py | 8 +- tests/ludwig/utils/test_image_utils.py | 20 ++- tests/ludwig/utils/test_llm_utils.py | 13 +- tests/ludwig/utils/test_model_utils.py | 10 +- tests/ludwig/utils/test_normalization.py | 3 +- tests/ludwig/utils/test_tokenizers.py | 9 +- tests/ludwig/utils/test_torch_utils.py | 11 +- tests/ludwig/utils/test_trainer_utils.py | 3 +- tests/ludwig/utils/test_upload_utils.py | 3 +- .../utils/test_version_transformation.py | 3 +- .../automl/scripts/update_golden_types.py | 4 +- .../automl/test_auto_type_inference.py | 4 +- tests/training_success/configs.py | 6 +- .../training_success/test_training_success.py | 14 +-- 312 files changed, 2572 insertions(+), 1726 deletions(-) diff --git a/ludwig/api.py b/ludwig/api.py index 05b201025c2..063853a7104 100644 --- a/ludwig/api.py +++ b/ludwig/api.py @@ -38,57 +38,75 @@ from tabulate import tabulate from ludwig.api_annotations import PublicAPI -from ludwig.backend import (Backend, initialize_backend, - provision_preprocessing_workers) +from ludwig.backend import Backend, initialize_backend, provision_preprocessing_workers from ludwig.callbacks import Callback -from ludwig.constants import (AUTO, BATCH_SIZE, EVAL_BATCH_SIZE, - FALLBACK_BATCH_SIZE, FULL, HYPEROPT, - HYPEROPT_WARNING, MIN_DATASET_SPLIT_ROWS, - MODEL_ECD, MODEL_LLM, TEST, TIMESERIES, TRAINING, - VALIDATION) +from ludwig.constants import ( + AUTO, + BATCH_SIZE, + EVAL_BATCH_SIZE, + FALLBACK_BATCH_SIZE, + FULL, + HYPEROPT, + HYPEROPT_WARNING, + MIN_DATASET_SPLIT_ROWS, + MODEL_ECD, + MODEL_LLM, + TEST, + TIMESERIES, + TRAINING, + VALIDATION, +) from ludwig.data.cache.types import CacheableDataset from ludwig.data.dataset.base import Dataset from ludwig.data.postprocessing import convert_predictions, postprocess -from ludwig.data.preprocessing import (load_metadata, - preprocess_for_prediction, - preprocess_for_training) +from ludwig.data.preprocessing import load_metadata, preprocess_for_prediction, preprocess_for_training from ludwig.datasets import load_dataset_uris -from ludwig.features.feature_registries import (update_config_with_metadata, - update_config_with_model) -from ludwig.globals import (LUDWIG_VERSION, MODEL_FILE_NAME, - MODEL_HYPERPARAMETERS_FILE_NAME, - MODEL_WEIGHTS_FILE_NAME, - TRAIN_SET_METADATA_FILE_NAME, - TRAINING_CHECKPOINTS_DIR_PATH, - set_disable_progressbar) +from ludwig.features.feature_registries import update_config_with_metadata, update_config_with_model +from ludwig.globals import ( + LUDWIG_VERSION, + MODEL_FILE_NAME, + MODEL_HYPERPARAMETERS_FILE_NAME, + MODEL_WEIGHTS_FILE_NAME, + set_disable_progressbar, + TRAIN_SET_METADATA_FILE_NAME, + TRAINING_CHECKPOINTS_DIR_PATH, +) from ludwig.models.base import BaseModel from ludwig.models.calibrator import Calibrator -from ludwig.models.inference import (InferenceModule, - save_ludwig_model_for_inference) -from ludwig.models.predictor import (calculate_overall_stats, - print_evaluation_stats, - save_evaluation_stats, - save_prediction_outputs) +from ludwig.models.inference import InferenceModule, save_ludwig_model_for_inference +from ludwig.models.predictor import ( + calculate_overall_stats, + print_evaluation_stats, + save_evaluation_stats, + save_prediction_outputs, +) from ludwig.models.registry import model_type_registry from ludwig.schema.model_config import ModelConfig from ludwig.types import ModelConfigDict, TrainingSetMetadataDict from ludwig.upload import get_upload_registry from ludwig.utils import metric_utils -from ludwig.utils.backward_compatibility import \ - upgrade_config_dict_to_latest_version +from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version from ludwig.utils.config_utils import get_preprocessing_params -from ludwig.utils.data_utils import (figure_data_format, generate_kfold_splits, - load_dataset, load_json, load_yaml, - save_json) +from ludwig.utils.data_utils import ( + figure_data_format, + generate_kfold_splits, + load_dataset, + load_json, + load_yaml, + save_json, +) from ludwig.utils.dataset_utils import generate_dataset_statistics from ludwig.utils.defaults import default_random_seed -from ludwig.utils.fs_utils import (makedirs, path_exists, - upload_output_directory) +from ludwig.utils.fs_utils import makedirs, path_exists, upload_output_directory from ludwig.utils.heuristics import get_auto_learning_rate -from ludwig.utils.llm_utils import TextStreamer, create_text_streamer -from ludwig.utils.misc_utils import (get_commit_hash, get_file_names, - get_from_registry, get_output_directory, - set_saved_weights_in_checkpoint_flag) +from ludwig.utils.llm_utils import create_text_streamer, TextStreamer +from ludwig.utils.misc_utils import ( + get_commit_hash, + get_file_names, + get_from_registry, + get_output_directory, + set_saved_weights_in_checkpoint_flag, +) from ludwig.utils.print_utils import print_boxed from ludwig.utils.tokenizers import HFTokenizer from ludwig.utils.torch_utils import DEVICE diff --git a/ludwig/automl/auto_tune_config.py b/ludwig/automl/auto_tune_config.py index 5b1e4746164..fc4056e7698 100644 --- a/ludwig/automl/auto_tune_config.py +++ b/ludwig/automl/auto_tune_config.py @@ -13,14 +13,21 @@ from ludwig.api import LudwigModel from ludwig.backend import initialize_backend -from ludwig.constants import (AUTO, AUTOML_DEFAULT_TEXT_ENCODER, - AUTOML_LARGE_TEXT_DATASET, - AUTOML_MAX_ROWS_PER_CHECKPOINT, - AUTOML_SMALLER_TEXT_ENCODER, - AUTOML_SMALLER_TEXT_LENGTH, - AUTOML_TEXT_ENCODER_MAX_TOKEN_LEN, HYPEROPT, - MINIMUM_BATCH_SIZE, PREPROCESSING, SPACE, TEXT, - TRAINER) +from ludwig.constants import ( + AUTO, + AUTOML_DEFAULT_TEXT_ENCODER, + AUTOML_LARGE_TEXT_DATASET, + AUTOML_MAX_ROWS_PER_CHECKPOINT, + AUTOML_SMALLER_TEXT_ENCODER, + AUTOML_SMALLER_TEXT_LENGTH, + AUTOML_TEXT_ENCODER_MAX_TOKEN_LEN, + HYPEROPT, + MINIMUM_BATCH_SIZE, + PREPROCESSING, + SPACE, + TEXT, + TRAINER, +) from ludwig.data.preprocessing import preprocess_for_training from ludwig.features.feature_registries import update_config_with_metadata from ludwig.schema.model_config import ModelConfig diff --git a/ludwig/automl/automl.py b/ludwig/automl/automl.py index 03222395e83..3b0c878c073 100644 --- a/ludwig/automl/automl.py +++ b/ludwig/automl/automl.py @@ -20,16 +20,33 @@ from ludwig.api import LudwigModel from ludwig.api_annotations import PublicAPI -from ludwig.automl.base_config import (DatasetInfo, create_default_config, - get_dataset_info, get_features_config, - get_reference_configs) +from ludwig.automl.base_config import ( + create_default_config, + DatasetInfo, + get_dataset_info, + get_features_config, + get_reference_configs, +) from ludwig.backend import Backend, initialize_backend -from ludwig.constants import (AUTO, AUTOML_DEFAULT_IMAGE_ENCODER, - AUTOML_DEFAULT_TABULAR_MODEL, - AUTOML_DEFAULT_TEXT_ENCODER, BINARY, CATEGORY, - ENCODER, HYPEROPT, IMAGE, INPUT_FEATURES, NAME, - NUMBER, OUTPUT_FEATURES, TABULAR, TEXT, TRAINER, - TYPE) +from ludwig.constants import ( + AUTO, + AUTOML_DEFAULT_IMAGE_ENCODER, + AUTOML_DEFAULT_TABULAR_MODEL, + AUTOML_DEFAULT_TEXT_ENCODER, + BINARY, + CATEGORY, + ENCODER, + HYPEROPT, + IMAGE, + INPUT_FEATURES, + NAME, + NUMBER, + OUTPUT_FEATURES, + TABULAR, + TEXT, + TRAINER, + TYPE, +) from ludwig.contrib import add_contrib_callback_args from ludwig.data.cache.types import CacheableDataset from ludwig.datasets import load_dataset_uris @@ -38,8 +55,7 @@ from ludwig.schema.model_config import ModelConfig from ludwig.types import ModelConfigDict from ludwig.utils.automl.ray_utils import _ray_init -from ludwig.utils.automl.utils import (_add_transfer_config, get_model_type, - set_output_feature_metric) +from ludwig.utils.automl.utils import _add_transfer_config, get_model_type, set_output_feature_metric from ludwig.utils.data_utils import load_dataset, use_credentials from ludwig.utils.defaults import default_random_seed from ludwig.utils.fs_utils import open_file diff --git a/ludwig/automl/base_config.py b/ludwig/automl/base_config.py index 55633024ccd..2cf4265492e 100644 --- a/ludwig/automl/base_config.py +++ b/ludwig/automl/base_config.py @@ -20,18 +20,28 @@ import numpy as np import pandas as pd import yaml -from dataclasses_json import LetterCase, dataclass_json +from dataclasses_json import dataclass_json, LetterCase from tqdm import tqdm from ludwig.api_annotations import DeveloperAPI from ludwig.backend import Backend -from ludwig.constants import (COLUMN, COMBINER, ENCODER, EXECUTOR, HYPEROPT, - INPUT_FEATURES, PREPROCESSING, SCHEDULER, - SEARCH_ALG, SPLIT, TEXT, TYPE) +from ludwig.constants import ( + COLUMN, + COMBINER, + ENCODER, + EXECUTOR, + HYPEROPT, + INPUT_FEATURES, + PREPROCESSING, + SCHEDULER, + SEARCH_ALG, + SPLIT, + TEXT, + TYPE, +) from ludwig.types import ModelConfigDict from ludwig.utils.automl.data_source import DataSource, wrap_data_source -from ludwig.utils.automl.field_info import (FieldConfig, FieldInfo, - FieldMetadata) +from ludwig.utils.automl.field_info import FieldConfig, FieldInfo, FieldMetadata from ludwig.utils.automl.type_inference import infer_type, should_exclude from ludwig.utils.data_utils import load_yaml from ludwig.utils.misc_utils import merge_dict diff --git a/ludwig/backend/_ray210_compat.py b/ludwig/backend/_ray210_compat.py index b4222000dc6..afe1b705940 100644 --- a/ludwig/backend/_ray210_compat.py +++ b/ludwig/backend/_ray210_compat.py @@ -1,7 +1,7 @@ # Implements https://github.com/ray-project/ray/pull/30598 ahead of Ray 2.2 release. import math -from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Type, Union +from typing import Any, Callable, Dict, Optional, Type, TYPE_CHECKING, Union import ray from ray.air.config import RunConfig diff --git a/ludwig/backend/base.py b/ludwig/backend/base.py index 3ef5b7fdd93..f586074af64 100644 --- a/ludwig/backend/base.py +++ b/ludwig/backend/base.py @@ -20,7 +20,7 @@ from abc import ABC, abstractmethod from concurrent.futures import ThreadPoolExecutor from contextlib import contextmanager -from typing import TYPE_CHECKING, Any, Callable, Generator +from typing import Any, Callable, Generator, TYPE_CHECKING import numpy as np import pandas as pd @@ -282,8 +282,7 @@ def create_trainer( model: BaseModel, **kwargs, ) -> BaseTrainer: # type: ignore[override] - from ludwig.trainers.registry import (get_llm_trainers_registry, - get_trainers_registry) + from ludwig.trainers.registry import get_llm_trainers_registry, get_trainers_registry trainer_cls: type if model.type() == MODEL_LLM: diff --git a/ludwig/backend/datasource.py b/ludwig/backend/datasource.py index f184f68ec2e..aa965da8463 100644 --- a/ludwig/backend/datasource.py +++ b/ludwig/backend/datasource.py @@ -1,7 +1,6 @@ import contextlib import logging -from typing import (TYPE_CHECKING, Any, Callable, Dict, Iterable, List, - Optional, Tuple, Union) +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TYPE_CHECKING, Union import ray import urllib3 @@ -11,9 +10,14 @@ from ray.data.datasource.binary_datasource import BinaryDatasource from ray.data.datasource.datasource import Datasource, ReadTask from ray.data.datasource.file_based_datasource import ( - BaseFileMetadataProvider, BlockOutputBuffer, DefaultFileMetadataProvider, - _check_pyarrow_version, _resolve_paths_and_filesystem, - _S3FileSystemWrapper, _wrap_s3_serialization_workaround) + _check_pyarrow_version, + _resolve_paths_and_filesystem, + _S3FileSystemWrapper, + _wrap_s3_serialization_workaround, + BaseFileMetadataProvider, + BlockOutputBuffer, + DefaultFileMetadataProvider, +) from ludwig.utils.fs_utils import get_bytes_obj_from_http_path, is_http diff --git a/ludwig/backend/ray.py b/ludwig/backend/ray.py index dcf3bef937f..01915e42fc2 100644 --- a/ludwig/backend/ray.py +++ b/ludwig/backend/ray.py @@ -41,28 +41,29 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.backend.base import Backend, RemoteTrainingMixin -from ludwig.constants import (CPU_RESOURCES_PER_TRIAL, EXECUTOR, MODEL_ECD, - MODEL_LLM, NAME, PROC_COLUMN) +from ludwig.constants import CPU_RESOURCES_PER_TRIAL, EXECUTOR, MODEL_ECD, MODEL_LLM, NAME, PROC_COLUMN from ludwig.data.dataframe.base import DataFrameEngine from ludwig.data.dataframe.dask import tensor_extension_casting -from ludwig.data.dataset.ray import (RayDataset, RayDatasetManager, - RayDatasetShard) -from ludwig.distributed import (DistributedStrategy, LocalStrategy, - get_default_strategy_name, get_dist_strategy, - init_dist_strategy) +from ludwig.data.dataset.ray import RayDataset, RayDatasetManager, RayDatasetShard +from ludwig.distributed import ( + DistributedStrategy, + get_default_strategy_name, + get_dist_strategy, + init_dist_strategy, + LocalStrategy, +) from ludwig.models.base import BaseModel -from ludwig.models.predictor import (BasePredictor, get_output_columns, - get_predictor_cls) +from ludwig.models.predictor import BasePredictor, get_output_columns, get_predictor_cls from ludwig.schema.trainer import ECDTrainerConfig, FineTuneTrainerConfig -from ludwig.trainers.registry import (get_llm_ray_trainers_registry, - get_ray_trainers_registry, - register_llm_ray_trainer, - register_ray_trainer) +from ludwig.trainers.registry import ( + get_llm_ray_trainers_registry, + get_ray_trainers_registry, + register_llm_ray_trainer, + register_ray_trainer, +) from ludwig.trainers.trainer import BaseTrainer, RemoteTrainer, Trainer -from ludwig.trainers.trainer_llm import (RemoteLLMFineTuneTrainer, - RemoteLLMTrainer) -from ludwig.types import (HyperoptConfigDict, ModelConfigDict, - TrainerConfigDict, TrainingSetMetadataDict) +from ludwig.trainers.trainer_llm import RemoteLLMFineTuneTrainer, RemoteLLMTrainer +from ludwig.types import HyperoptConfigDict, ModelConfigDict, TrainerConfigDict, TrainingSetMetadataDict from ludwig.utils.batch_size_tuner import BatchSizeEvaluator from ludwig.utils.dataframe_utils import is_dask_series_or_df, set_index_name from ludwig.utils.fs_utils import get_fs_and_path diff --git a/ludwig/benchmarking/benchmark.py b/ludwig/benchmarking/benchmark.py index c1d04a4bf42..71644362e89 100644 --- a/ludwig/benchmarking/benchmark.py +++ b/ludwig/benchmarking/benchmark.py @@ -7,14 +7,19 @@ import ludwig.datasets from ludwig.api import LudwigModel -from ludwig.benchmarking.artifacts import (BenchmarkingResult, - build_benchmarking_result) +from ludwig.benchmarking.artifacts import BenchmarkingResult, build_benchmarking_result from ludwig.benchmarking.profiler_callbacks import LudwigProfilerCallback from ludwig.benchmarking.utils import ( - create_default_config, delete_hyperopt_outputs, delete_model_checkpoints, - export_artifacts, load_from_module, - populate_benchmarking_config_with_defaults, propagate_global_parameters, - save_yaml, validate_benchmarking_config) + create_default_config, + delete_hyperopt_outputs, + delete_model_checkpoints, + export_artifacts, + load_from_module, + populate_benchmarking_config_with_defaults, + propagate_global_parameters, + save_yaml, + validate_benchmarking_config, +) from ludwig.contrib import add_contrib_callback_args from ludwig.hyperopt.run import hyperopt from ludwig.utils.data_utils import load_yaml diff --git a/ludwig/benchmarking/profiler.py b/ludwig/benchmarking/profiler.py index 039be94d4bb..f0712314a67 100644 --- a/ludwig/benchmarking/profiler.py +++ b/ludwig/benchmarking/profiler.py @@ -16,10 +16,8 @@ from cpuinfo import get_cpu_info from gpustat.core import GPUStatCollection -from ludwig.benchmarking.profiler_dataclasses import ( - TorchProfilerMetrics, profiler_dataclass_to_flat_dict) -from ludwig.benchmarking.reporting import ( - get_metrics_from_system_usage_profiler, get_metrics_from_torch_profiler) +from ludwig.benchmarking.profiler_dataclasses import profiler_dataclass_to_flat_dict, TorchProfilerMetrics +from ludwig.benchmarking.reporting import get_metrics_from_system_usage_profiler, get_metrics_from_torch_profiler from ludwig.constants import LUDWIG_TAG from ludwig.globals import LUDWIG_VERSION from ludwig.utils.data_utils import save_json diff --git a/ludwig/benchmarking/reporting.py b/ludwig/benchmarking/reporting.py index 226dcc188d4..589461eafd2 100644 --- a/ludwig/benchmarking/reporting.py +++ b/ludwig/benchmarking/reporting.py @@ -6,9 +6,7 @@ from torch._C._autograd import _KinetoEvent from torch.autograd import DeviceType, profiler_util -from ludwig.benchmarking.profiler_dataclasses import (DeviceUsageMetrics, - SystemResourceMetrics, - TorchProfilerMetrics) +from ludwig.benchmarking.profiler_dataclasses import DeviceUsageMetrics, SystemResourceMetrics, TorchProfilerMetrics from ludwig.constants import LUDWIG_TAG diff --git a/ludwig/benchmarking/summarize.py b/ludwig/benchmarking/summarize.py index b944579e1e7..25d49b54af4 100644 --- a/ludwig/benchmarking/summarize.py +++ b/ludwig/benchmarking/summarize.py @@ -5,9 +5,13 @@ from typing import List, Tuple from ludwig.benchmarking.summary_dataclasses import ( - MetricsDiff, ResourceUsageDiff, build_metrics_diff, - build_resource_usage_diff, export_metrics_diff_to_csv, - export_resource_usage_diff_to_csv) + build_metrics_diff, + build_resource_usage_diff, + export_metrics_diff_to_csv, + export_resource_usage_diff_to_csv, + MetricsDiff, + ResourceUsageDiff, +) from ludwig.benchmarking.utils import download_artifacts logger = logging.getLogger() diff --git a/ludwig/benchmarking/summary_dataclasses.py b/ludwig/benchmarking/summary_dataclasses.py index f173c5a77c0..ba391b2c87b 100644 --- a/ludwig/benchmarking/summary_dataclasses.py +++ b/ludwig/benchmarking/summary_dataclasses.py @@ -8,8 +8,8 @@ import ludwig.modules.metric_modules # noqa: F401 from ludwig.benchmarking.utils import format_memory, format_time from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME -from ludwig.modules.metric_registry import (get_metric_classes, # noqa: F401 - metric_feature_type_registry) +from ludwig.modules.metric_registry import get_metric_classes # noqa: F401 +from ludwig.modules.metric_registry import metric_feature_type_registry from ludwig.types import ModelConfigDict from ludwig.utils.data_utils import load_json diff --git a/ludwig/benchmarking/utils.py b/ludwig/benchmarking/utils.py index 3707dee9960..87fbe0d2cb4 100644 --- a/ludwig/benchmarking/utils.py +++ b/ludwig/benchmarking/utils.py @@ -16,8 +16,7 @@ from ludwig.constants import BINARY, CATEGORY from ludwig.datasets import model_configs_for_dataset from ludwig.datasets.loaders.dataset_loader import DatasetLoader -from ludwig.globals import (CONFIG_YAML, MODEL_FILE_NAME, - MODEL_WEIGHTS_FILE_NAME) +from ludwig.globals import CONFIG_YAML, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME from ludwig.utils.data_utils import load_yaml from ludwig.utils.dataset_utils import get_repeatable_train_val_test_split from ludwig.utils.defaults import default_random_seed diff --git a/ludwig/callbacks.py b/ludwig/callbacks.py index 9e1e0ba1db7..3e08962e855 100644 --- a/ludwig/callbacks.py +++ b/ludwig/callbacks.py @@ -18,8 +18,7 @@ from typing import Any, Callable, Dict, List, Union from ludwig.api_annotations import PublicAPI -from ludwig.types import (HyperoptConfigDict, ModelConfigDict, - TrainingSetMetadataDict) +from ludwig.types import HyperoptConfigDict, ModelConfigDict, TrainingSetMetadataDict @PublicAPI diff --git a/ludwig/collect.py b/ludwig/collect.py index 834c18d9d70..066edcd191c 100644 --- a/ludwig/collect.py +++ b/ludwig/collect.py @@ -30,8 +30,7 @@ from ludwig.constants import FULL, TEST, TRAINING, VALIDATION from ludwig.contrib import add_contrib_callback_args from ludwig.globals import LUDWIG_VERSION -from ludwig.utils.print_utils import (get_logging_level_registry, print_boxed, - print_ludwig) +from ludwig.utils.print_utils import get_logging_level_registry, print_boxed, print_ludwig from ludwig.utils.strings_utils import make_safe_filename logger = logging.getLogger(__name__) diff --git a/ludwig/combiners/combiners.py b/ludwig/combiners/combiners.py index 0c73cfd1b6c..06cb61a873d 100644 --- a/ludwig/combiners/combiners.py +++ b/ludwig/combiners/combiners.py @@ -34,13 +34,10 @@ from ludwig.schema.combiners.base import BaseCombinerConfig from ludwig.schema.combiners.comparator import ComparatorCombinerConfig from ludwig.schema.combiners.concat import ConcatCombinerConfig -from ludwig.schema.combiners.project_aggregate import \ - ProjectAggregateCombinerConfig +from ludwig.schema.combiners.project_aggregate import ProjectAggregateCombinerConfig from ludwig.schema.combiners.sequence import SequenceCombinerConfig -from ludwig.schema.combiners.sequence_concat import \ - SequenceConcatCombinerConfig -from ludwig.schema.combiners.tab_transformer import \ - TabTransformerCombinerConfig +from ludwig.schema.combiners.sequence_concat import SequenceConcatCombinerConfig +from ludwig.schema.combiners.tab_transformer import TabTransformerCombinerConfig from ludwig.schema.combiners.tabnet import TabNetCombinerConfig from ludwig.schema.combiners.transformer import TransformerCombinerConfig from ludwig.utils.misc_utils import get_from_registry diff --git a/ludwig/config_sampling/explore_schema.py b/ludwig/config_sampling/explore_schema.py index 61b857a3782..2f6e96649b3 100644 --- a/ludwig/config_sampling/explore_schema.py +++ b/ludwig/config_sampling/explore_schema.py @@ -5,8 +5,7 @@ import pandas as pd -from ludwig.config_sampling.parameter_sampling import (ParameterBaseTypes, - handle_property_type) +from ludwig.config_sampling.parameter_sampling import handle_property_type, ParameterBaseTypes from ludwig.constants import SEQUENCE, TEXT, TIMESERIES from ludwig.data.dataset_synthesizer import build_synthetic_dataset_df from ludwig.schema.model_types.base import ModelConfig diff --git a/ludwig/config_validation/checks.py b/ludwig/config_validation/checks.py index 31cbb267c1c..9f77ad61ef9 100644 --- a/ludwig/config_validation/checks.py +++ b/ludwig/config_validation/checks.py @@ -2,18 +2,30 @@ from abc import ABC, abstractmethod from re import findall -from typing import TYPE_CHECKING, Callable +from typing import Callable, TYPE_CHECKING from transformers import AutoConfig from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (AUDIO, BINARY, CATEGORY, IMAGE, IN_MEMORY, - MIN_QUANTIZATION_BITS_FOR_MERGE_AND_UNLOAD, - MODEL_ECD, MODEL_GBM, MODEL_LLM, NUMBER, - SEQUENCE, SET, TEXT, TIMESERIES, VECTOR) +from ludwig.constants import ( + AUDIO, + BINARY, + CATEGORY, + IMAGE, + IN_MEMORY, + MIN_QUANTIZATION_BITS_FOR_MERGE_AND_UNLOAD, + MODEL_ECD, + MODEL_GBM, + MODEL_LLM, + NUMBER, + SEQUENCE, + SET, + TEXT, + TIMESERIES, + VECTOR, +) from ludwig.error import ConfigValidationError -from ludwig.utils.metric_utils import \ - get_feature_to_metric_names_map_from_feature_collection +from ludwig.utils.metric_utils import get_feature_to_metric_names_map_from_feature_collection from ludwig.utils.misc_utils import merge_dict if TYPE_CHECKING: @@ -346,8 +358,8 @@ def check_hyperopt_parameter_dicts(config: "ModelConfig") -> None: # noqa: F821 if config.hyperopt is None: return - from ludwig.schema.hyperopt.utils import (get_parameter_cls, # noqa: F401 - parameter_config_registry) + from ludwig.schema.hyperopt.utils import get_parameter_cls # noqa: F401 + from ludwig.schema.hyperopt.utils import parameter_config_registry for parameter, space in config.hyperopt.parameters.items(): # skip nested hyperopt parameters @@ -543,8 +555,7 @@ def check_llm_finetuning_adalora_config(config: "ModelConfig"): if config.adapter.type != "adalora": return - from peft.utils import \ - TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING + from peft.utils import TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING model_config = _get_llm_model_config(config.base_model) if model_config.model_type not in TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING: diff --git a/ludwig/config_validation/validation.py b/ludwig/config_validation/validation.py index 141cb01b975..cd4e0dfadea 100644 --- a/ludwig/config_validation/validation.py +++ b/ludwig/config_validation/validation.py @@ -8,13 +8,14 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.constants import BASE_MODEL, MODEL_ECD, MODEL_LLM, MODEL_TYPE from ludwig.error import ConfigValidationError + # TODO(travis): figure out why we need these imports to avoid circular import error from ludwig.schema.combiners.utils import get_combiner_jsonschema # noqa -from ludwig.schema.features.utils import (get_input_feature_jsonschema, # noqa - get_output_feature_jsonschema) +from ludwig.schema.features.utils import get_input_feature_jsonschema # noqa +from ludwig.schema.features.utils import get_output_feature_jsonschema from ludwig.schema.hyperopt import get_hyperopt_jsonschema # noqa -from ludwig.schema.trainer import (get_model_type_jsonschema, # noqa - get_trainer_jsonschema) +from ludwig.schema.trainer import get_model_type_jsonschema # noqa +from ludwig.schema.trainer import get_trainer_jsonschema from ludwig.schema.utils import unload_jsonschema_from_marshmallow_class VALIDATION_LOCK = Lock() diff --git a/ludwig/contrib.py b/ludwig/contrib.py index 50dca8ccad8..3c30bf6116f 100644 --- a/ludwig/contrib.py +++ b/ludwig/contrib.py @@ -16,7 +16,7 @@ import argparse -from ludwig.contribs import ContribLoader, contrib_registry +from ludwig.contribs import contrib_registry, ContribLoader def create_load_action(contrib_loader: ContribLoader) -> argparse.Action: diff --git a/ludwig/contribs/mlflow/__init__.py b/ludwig/contribs/mlflow/__init__.py index d4fe3e1db3c..55c51a9ac88 100644 --- a/ludwig/contribs/mlflow/__init__.py +++ b/ludwig/contribs/mlflow/__init__.py @@ -6,11 +6,9 @@ from ludwig.api_annotations import DeveloperAPI, PublicAPI from ludwig.callbacks import Callback from ludwig.constants import TRAINER -from ludwig.globals import (MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, - TRAIN_SET_METADATA_FILE_NAME) +from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME from ludwig.types import TrainingSetMetadataDict -from ludwig.utils.data_utils import (chunk_dict, flatten_dict, save_json, - to_json_dict) +from ludwig.utils.data_utils import chunk_dict, flatten_dict, save_json, to_json_dict from ludwig.utils.package_utils import LazyLoader mlflow = LazyLoader("mlflow", globals(), "mlflow") diff --git a/ludwig/contribs/mlflow/model.py b/ludwig/contribs/mlflow/model.py index 2243014881a..16403c7afdd 100644 --- a/ludwig/contribs/mlflow/model.py +++ b/ludwig/contribs/mlflow/model.py @@ -9,7 +9,7 @@ from mlflow.models import Model from mlflow.models.model import MLMODEL_FILE_NAME from mlflow.models.signature import ModelSignature -from mlflow.models.utils import ModelInputExample, _save_example +from mlflow.models.utils import _save_example, ModelInputExample from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS from mlflow.tracking.artifact_utils import _download_artifact_from_uri from mlflow.utils.environment import _mlflow_conda_env diff --git a/ludwig/data/cache/manager.py b/ludwig/data/cache/manager.py index f345f0e3b61..bc87065a7be 100644 --- a/ludwig/data/cache/manager.py +++ b/ludwig/data/cache/manager.py @@ -3,7 +3,7 @@ from typing import Optional from ludwig.constants import CHECKSUM, META, TEST, TRAINING, VALIDATION -from ludwig.data.cache.types import CacheableDataset, alphanum +from ludwig.data.cache.types import alphanum, CacheableDataset from ludwig.data.cache.util import calculate_checksum from ludwig.data.dataset.base import DatasetManager from ludwig.utils import data_utils diff --git a/ludwig/data/cache/util.py b/ludwig/data/cache/util.py index 121ffbac44c..14b53c78396 100644 --- a/ludwig/data/cache/util.py +++ b/ludwig/data/cache/util.py @@ -1,6 +1,5 @@ import ludwig -from ludwig.constants import (DEFAULTS, INPUT_FEATURES, OUTPUT_FEATURES, - PREPROCESSING, PROC_COLUMN, TYPE) +from ludwig.constants import DEFAULTS, INPUT_FEATURES, OUTPUT_FEATURES, PREPROCESSING, PROC_COLUMN, TYPE from ludwig.data.cache.types import CacheableDataset from ludwig.types import ModelConfigDict from ludwig.utils.data_utils import hash_dict diff --git a/ludwig/data/dataframe/dask.py b/ludwig/data/dataframe/dask.py index a45aacfa84d..5f292eeabb6 100644 --- a/ludwig/data/dataframe/dask.py +++ b/ludwig/data/dataframe/dask.py @@ -30,8 +30,7 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.data.dataframe.base import DataFrameEngine -from ludwig.utils.data_utils import (get_pa_schema, get_parquet_filename, - split_by_slices) +from ludwig.utils.data_utils import get_pa_schema, get_parquet_filename, split_by_slices from ludwig.utils.dataframe_utils import set_index_name from ludwig.utils.fs_utils import get_fs_and_path diff --git a/ludwig/data/dataframe/modin.py b/ludwig/data/dataframe/modin.py index a7ccce3497d..a0057979f1c 100644 --- a/ludwig/data/dataframe/modin.py +++ b/ludwig/data/dataframe/modin.py @@ -21,8 +21,7 @@ from ludwig.data.dataframe.base import DataFrameEngine from ludwig.globals import PREDICTIONS_SHAPES_FILE_NAME -from ludwig.utils.data_utils import (get_pa_schema, load_json, save_json, - split_by_slices) +from ludwig.utils.data_utils import get_pa_schema, load_json, save_json, split_by_slices from ludwig.utils.dataframe_utils import flatten_df, unflatten_df diff --git a/ludwig/data/dataset/pandas.py b/ludwig/data/dataset/pandas.py index 52db45eda8c..70b2b4ae8a9 100644 --- a/ludwig/data/dataset/pandas.py +++ b/ludwig/data/dataset/pandas.py @@ -17,7 +17,7 @@ from __future__ import annotations import contextlib -from typing import TYPE_CHECKING, Iterable +from typing import Iterable, TYPE_CHECKING import numpy as np from pandas import DataFrame @@ -30,8 +30,7 @@ from ludwig.distributed import DistributedStrategy from ludwig.features.base_feature import BaseFeature from ludwig.utils.data_utils import DATA_TRAIN_HDF5_FP, load_hdf5, save_hdf5 -from ludwig.utils.dataframe_utils import (from_numpy_dataset, to_numpy_dataset, - to_scalar_df) +from ludwig.utils.dataframe_utils import from_numpy_dataset, to_numpy_dataset, to_scalar_df from ludwig.utils.defaults import default_random_seed from ludwig.utils.fs_utils import download_h5 from ludwig.utils.misc_utils import get_proc_features diff --git a/ludwig/data/dataset/ray.py b/ludwig/data/dataset/ray.py index 3d0323fd100..5ad083fa715 100644 --- a/ludwig/data/dataset/ray.py +++ b/ludwig/data/dataset/ray.py @@ -38,10 +38,8 @@ from ludwig.data.dataset.base import Dataset, DatasetManager from ludwig.distributed import DistributedStrategy from ludwig.features.base_feature import BaseFeature -from ludwig.types import (FeatureConfigDict, ModelConfigDict, - TrainingSetMetadataDict) -from ludwig.utils.data_utils import (DATA_TRAIN_HDF5_FP, DATA_TRAIN_PARQUET_FP, - from_numpy_dataset, to_numpy_dataset) +from ludwig.types import FeatureConfigDict, ModelConfigDict, TrainingSetMetadataDict +from ludwig.utils.data_utils import DATA_TRAIN_HDF5_FP, DATA_TRAIN_PARQUET_FP, from_numpy_dataset, to_numpy_dataset from ludwig.utils.dataframe_utils import to_scalar_df from ludwig.utils.defaults import default_random_seed from ludwig.utils.error_handling_utils import default_retry diff --git a/ludwig/data/dataset_synthesizer.py b/ludwig/data/dataset_synthesizer.py index d298da4e57b..8d32e87c190 100644 --- a/ludwig/data/dataset_synthesizer.py +++ b/ludwig/data/dataset_synthesizer.py @@ -30,11 +30,29 @@ from packaging import version from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (AUDIO, BAG, BINARY, CATEGORY, - CATEGORY_DISTRIBUTION, DATE, DECODER, ENCODER, - H3, IMAGE, INPUT_FEATURES, NAME, NUMBER, - OUTPUT_FEATURES, PREPROCESSING, SEQUENCE, SET, - TEXT, TIMESERIES, TYPE, VECTOR) +from ludwig.constants import ( + AUDIO, + BAG, + BINARY, + CATEGORY, + CATEGORY_DISTRIBUTION, + DATE, + DECODER, + ENCODER, + H3, + IMAGE, + INPUT_FEATURES, + NAME, + NUMBER, + OUTPUT_FEATURES, + PREPROCESSING, + SEQUENCE, + SET, + TEXT, + TIMESERIES, + TYPE, + VECTOR, +) from ludwig.contrib import add_contrib_callback_args from ludwig.globals import LUDWIG_VERSION from ludwig.types import ModelConfigDict diff --git a/ludwig/data/preprocessing.py b/ludwig/data/preprocessing.py index 8947ce6bc4a..3754f959bc5 100644 --- a/ludwig/data/preprocessing.py +++ b/ludwig/data/preprocessing.py @@ -24,55 +24,99 @@ import torch from ludwig.api_annotations import DeveloperAPI -from ludwig.backend import LOCAL_BACKEND, Backend -from ludwig.config_validation.preprocessing import \ - check_global_max_sequence_length_fits_prompt_template -from ludwig.constants import (BFILL, CHECKSUM, COLUMN, DEFAULTS, DROP_ROW, - ENCODER, FFILL, FILL_WITH_CONST, FILL_WITH_FALSE, - FILL_WITH_MEAN, FILL_WITH_MODE, FILL_WITH_TRUE, - FULL, META, MIN_DATASET_SPLIT_ROWS, MODEL_ECD, - NAME, NUMBER, PREPROCESSING, PROC_COLUMN, SPLIT, - SRC, TEST, TEXT, TRAINING, TYPE, VALIDATION) +from ludwig.backend import Backend, LOCAL_BACKEND +from ludwig.config_validation.preprocessing import check_global_max_sequence_length_fits_prompt_template +from ludwig.constants import ( + BFILL, + CHECKSUM, + COLUMN, + DEFAULTS, + DROP_ROW, + ENCODER, + FFILL, + FILL_WITH_CONST, + FILL_WITH_FALSE, + FILL_WITH_MEAN, + FILL_WITH_MODE, + FILL_WITH_TRUE, + FULL, + META, + MIN_DATASET_SPLIT_ROWS, + MODEL_ECD, + NAME, + NUMBER, + PREPROCESSING, + PROC_COLUMN, + SPLIT, + SRC, + TEST, + TEXT, + TRAINING, + TYPE, + VALIDATION, +) from ludwig.data.cache.manager import DatasetCache from ludwig.data.cache.types import wrap -from ludwig.data.concatenate_datasets import (concatenate_df, - concatenate_files, - concatenate_splits) +from ludwig.data.concatenate_datasets import concatenate_df, concatenate_files, concatenate_splits from ludwig.data.dataset.base import Dataset from ludwig.data.prompt import format_input_with_prompt, index_column from ludwig.data.split import get_splitter, split_dataset from ludwig.data.utils import get_input_and_output_features, set_fixed_split from ludwig.datasets import load_dataset_uris from ludwig.features.feature_registries import get_base_type_registry -from ludwig.models.embedder import (create_embed_batch_size_evaluator, - create_embed_transform_fn) +from ludwig.models.embedder import create_embed_batch_size_evaluator, create_embed_transform_fn from ludwig.schema.encoders.utils import get_encoder_cls -from ludwig.types import (FeatureConfigDict, ModelConfigDict, - PreprocessingConfigDict, TrainingSetMetadataDict) +from ludwig.types import FeatureConfigDict, ModelConfigDict, PreprocessingConfigDict, TrainingSetMetadataDict from ludwig.utils import data_utils, strings_utils from ludwig.utils.backward_compatibility import upgrade_metadata -from ludwig.utils.data_utils import (CACHEABLE_FORMATS, CSV_FORMATS, - DATA_TEST_PARQUET_FP, DATA_TRAIN_HDF5_FP, - DATA_TRAIN_PARQUET_FP, - DATA_VALIDATION_PARQUET_FP, - DATAFRAME_FORMATS, DICT_FORMATS, - EXCEL_FORMATS, FEATHER_FORMATS, - FWF_FORMATS, HDF5_FORMATS, HTML_FORMATS, - JSON_FORMATS, JSONL_FORMATS, ORC_FORMATS, - PARQUET_FORMATS, PICKLE_FORMATS, - SAS_FORMATS, SPSS_FORMATS, STATA_FORMATS, - TSV_FORMATS, figure_data_format, - get_split_path, override_in_memory_flag, - read_csv, read_excel, read_feather, - read_fwf, read_html, read_json, - read_jsonl, read_orc, read_parquet, - read_pickle, read_sas, read_spss, - read_stata, read_tsv, - sanitize_column_names) +from ludwig.utils.data_utils import ( + CACHEABLE_FORMATS, + CSV_FORMATS, + DATA_TEST_PARQUET_FP, + DATA_TRAIN_HDF5_FP, + DATA_TRAIN_PARQUET_FP, + DATA_VALIDATION_PARQUET_FP, + DATAFRAME_FORMATS, + DICT_FORMATS, + EXCEL_FORMATS, + FEATHER_FORMATS, + figure_data_format, + FWF_FORMATS, + get_split_path, + HDF5_FORMATS, + HTML_FORMATS, + JSON_FORMATS, + JSONL_FORMATS, + ORC_FORMATS, + override_in_memory_flag, + PARQUET_FORMATS, + PICKLE_FORMATS, + read_csv, + read_excel, + read_feather, + read_fwf, + read_html, + read_json, + read_jsonl, + read_orc, + read_parquet, + read_pickle, + read_sas, + read_spss, + read_stata, + read_tsv, + sanitize_column_names, + SAS_FORMATS, + SPSS_FORMATS, + STATA_FORMATS, + TSV_FORMATS, +) from ludwig.utils.dataframe_utils import is_dask_series_or_df -from ludwig.utils.defaults import (default_prediction_preprocessing_parameters, - default_random_seed, - default_training_preprocessing_parameters) +from ludwig.utils.defaults import ( + default_prediction_preprocessing_parameters, + default_random_seed, + default_training_preprocessing_parameters, +) from ludwig.utils.fs_utils import file_lock, path_exists from ludwig.utils.misc_utils import get_from_registry, merge_dict from ludwig.utils.types import DataFrame, Series diff --git a/ludwig/data/prompt.py b/ludwig/data/prompt.py index a22b462a428..205d6706fb1 100644 --- a/ludwig/data/prompt.py +++ b/ludwig/data/prompt.py @@ -2,17 +2,15 @@ import logging import os import string -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Type +from typing import Any, Dict, List, Optional, Set, Tuple, Type, TYPE_CHECKING import pandas as pd if TYPE_CHECKING: from ludwig.backend.base import Backend -from ludwig.models.retrieval import (RetrievalModel, df_checksum, - get_retrieval_model) -from ludwig.utils.fs_utils import (get_default_cache_location, makedirs, - path_exists) +from ludwig.models.retrieval import df_checksum, get_retrieval_model, RetrievalModel +from ludwig.utils.fs_utils import get_default_cache_location, makedirs, path_exists from ludwig.utils.types import DataFrame, Series logger = logging.getLogger(__name__) diff --git a/ludwig/data/split.py b/ludwig/data/split.py index 364bbe5b48a..ce8cea95c2b 100644 --- a/ludwig/data/split.py +++ b/ludwig/data/split.py @@ -15,7 +15,7 @@ import logging from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, List, Optional, Tuple +from typing import List, Optional, Tuple, TYPE_CHECKING from zlib import crc32 import numpy as np @@ -23,12 +23,15 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.backend.base import Backend -from ludwig.constants import (BINARY, CATEGORY, DATE, MIN_DATASET_SPLIT_ROWS, - SPLIT) +from ludwig.constants import BINARY, CATEGORY, DATE, MIN_DATASET_SPLIT_ROWS, SPLIT from ludwig.error import ConfigValidationError -from ludwig.schema.split import (DateTimeSplitConfig, FixedSplitConfig, - HashSplitConfig, RandomSplitConfig, - StratifySplitConfig) +from ludwig.schema.split import ( + DateTimeSplitConfig, + FixedSplitConfig, + HashSplitConfig, + RandomSplitConfig, + StratifySplitConfig, +) from ludwig.types import ModelConfigDict, PreprocessingConfigDict from ludwig.utils.data_utils import hash_dict, split_dataset_ttv from ludwig.utils.defaults import default_random_seed diff --git a/ludwig/datasets/__init__.py b/ludwig/datasets/__init__.py index 12867c1838c..dbe45169376 100644 --- a/ludwig/datasets/__init__.py +++ b/ludwig/datasets/__init__.py @@ -11,12 +11,12 @@ from ludwig.api_annotations import DeveloperAPI, PublicAPI from ludwig.backend.base import Backend -from ludwig.constants import (AUDIO, BINARY, CATEGORY, IMAGE, NUMBER, TEST, - TEXT, TRAIN, TYPE, VALIDATION) +from ludwig.constants import AUDIO, BINARY, CATEGORY, IMAGE, NUMBER, TEST, TEXT, TRAIN, TYPE, VALIDATION from ludwig.data.cache.types import CacheableDataframe from ludwig.datasets import configs from ludwig.datasets.dataset_config import DatasetConfig from ludwig.datasets.loaders.dataset_loader import DatasetLoader + # PublicAPI from ludwig.datasets.utils import model_configs_for_dataset # noqa from ludwig.globals import LUDWIG_VERSION diff --git a/ludwig/decoders/generic_decoders.py b/ludwig/decoders/generic_decoders.py index 7dfc3b7ea94..ac5e971a8fe 100644 --- a/ludwig/decoders/generic_decoders.py +++ b/ludwig/decoders/generic_decoders.py @@ -19,13 +19,10 @@ import torch from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (BINARY, CATEGORY, CATEGORY_DISTRIBUTION, LOSS, - NUMBER, SET, TIMESERIES, TYPE, VECTOR) +from ludwig.constants import BINARY, CATEGORY, CATEGORY_DISTRIBUTION, LOSS, NUMBER, SET, TIMESERIES, TYPE, VECTOR from ludwig.decoders.base import Decoder from ludwig.decoders.registry import register_decoder -from ludwig.schema.decoders.base import (ClassifierConfig, - PassthroughDecoderConfig, - ProjectorConfig, RegressorConfig) +from ludwig.schema.decoders.base import ClassifierConfig, PassthroughDecoderConfig, ProjectorConfig, RegressorConfig from ludwig.utils.torch_utils import Dense, get_activation logger = logging.getLogger(__name__) diff --git a/ludwig/decoders/image_decoders.py b/ludwig/decoders/image_decoders.py index 73e59594fa6..aad1f2dd613 100644 --- a/ludwig/decoders/image_decoders.py +++ b/ludwig/decoders/image_decoders.py @@ -19,13 +19,11 @@ import torch from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (ENCODER_OUTPUT_STATE, HIDDEN, IMAGE, LOGITS, - PREDICTIONS) +from ludwig.constants import ENCODER_OUTPUT_STATE, HIDDEN, IMAGE, LOGITS, PREDICTIONS from ludwig.decoders.base import Decoder from ludwig.decoders.registry import register_decoder from ludwig.modules.convolutional_modules import UNetUpStack -from ludwig.schema.decoders.image_decoders import (ImageDecoderConfig, - UNetDecoderConfig) +from ludwig.schema.decoders.image_decoders import ImageDecoderConfig, UNetDecoderConfig logger = logging.getLogger(__name__) diff --git a/ludwig/decoders/llm_decoders.py b/ludwig/decoders/llm_decoders.py index cac584a04b2..5763f5a5868 100644 --- a/ludwig/decoders/llm_decoders.py +++ b/ludwig/decoders/llm_decoders.py @@ -10,8 +10,7 @@ from ludwig.decoders.base import Decoder from ludwig.decoders.registry import register_decoder from ludwig.decoders.utils import extract_generated_tokens -from ludwig.schema.decoders.llm_decoders import ( - CategoryExtractorDecoderConfig, TextExtractorDecoderConfig) +from ludwig.schema.decoders.llm_decoders import CategoryExtractorDecoderConfig, TextExtractorDecoderConfig from ludwig.utils.strings_utils import get_tokenizer logger = logging.getLogger(__name__) diff --git a/ludwig/decoders/sequence_decoders.py b/ludwig/decoders/sequence_decoders.py index 01be9b7bd29..33fc8b40141 100644 --- a/ludwig/decoders/sequence_decoders.py +++ b/ludwig/decoders/sequence_decoders.py @@ -22,11 +22,9 @@ from ludwig.constants import LOGITS, PREDICTIONS, PROBABILITIES, SEQUENCE, TEXT from ludwig.decoders.base import Decoder from ludwig.decoders.registry import register_decoder -from ludwig.decoders.sequence_decoder_utils import (get_lstm_init_state, - get_rnn_init_state) +from ludwig.decoders.sequence_decoder_utils import get_lstm_init_state, get_rnn_init_state from ludwig.modules.reduction_modules import SequenceReducer -from ludwig.schema.decoders.sequence_decoders import \ - SequenceGeneratorDecoderConfig +from ludwig.schema.decoders.sequence_decoders import SequenceGeneratorDecoderConfig from ludwig.utils import strings_utils logger = logging.getLogger(__name__) diff --git a/ludwig/decoders/sequence_tagger.py b/ludwig/decoders/sequence_tagger.py index 96bbe53bb84..78fabbe9e4c 100644 --- a/ludwig/decoders/sequence_tagger.py +++ b/ludwig/decoders/sequence_tagger.py @@ -4,13 +4,11 @@ import torch from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (HIDDEN, LOGITS, PREDICTIONS, PROBABILITIES, - SEQUENCE, TEXT) +from ludwig.constants import HIDDEN, LOGITS, PREDICTIONS, PROBABILITIES, SEQUENCE, TEXT from ludwig.decoders.base import Decoder from ludwig.decoders.registry import register_decoder from ludwig.modules.attention_modules import MultiHeadSelfAttention -from ludwig.schema.decoders.sequence_decoders import \ - SequenceTaggerDecoderConfig +from ludwig.schema.decoders.sequence_decoders import SequenceTaggerDecoderConfig from ludwig.utils.torch_utils import Dense logger = logging.getLogger(__name__) diff --git a/ludwig/distributed/base.py b/ludwig/distributed/base.py index 915b41b34fb..3649c9e30c0 100644 --- a/ludwig/distributed/base.py +++ b/ludwig/distributed/base.py @@ -2,7 +2,7 @@ import contextlib from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, Callable +from typing import Any, Callable, TYPE_CHECKING import torch from torch import nn diff --git a/ludwig/distributed/ddp.py b/ludwig/distributed/ddp.py index 6f83d7abf8b..a70d308fea3 100644 --- a/ludwig/distributed/ddp.py +++ b/ludwig/distributed/ddp.py @@ -2,8 +2,7 @@ import logging import os import socket -from typing import (TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Type, - Union) +from typing import Any, Callable, Dict, Optional, Tuple, Type, TYPE_CHECKING, Union import torch import torch.distributed as dist diff --git a/ludwig/distributed/deepspeed.py b/ludwig/distributed/deepspeed.py index 7717d6f19b1..a5677f66538 100644 --- a/ludwig/distributed/deepspeed.py +++ b/ludwig/distributed/deepspeed.py @@ -1,14 +1,12 @@ import logging import os import warnings -from typing import (TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, - Union) +from typing import Any, Dict, List, Mapping, Optional, Tuple, TYPE_CHECKING, Union import deepspeed import deepspeed.comm import torch -from deepspeed.utils.zero_to_fp32 import \ - get_fp32_state_dict_from_zero_checkpoint +from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint from packaging import version from torch import nn from torch.optim.optimizer import Optimizer diff --git a/ludwig/distributed/fsdp.py b/ludwig/distributed/fsdp.py index d078a34f343..368e22df7fc 100644 --- a/ludwig/distributed/fsdp.py +++ b/ludwig/distributed/fsdp.py @@ -1,5 +1,5 @@ import logging -from typing import TYPE_CHECKING, Optional, Tuple +from typing import Optional, Tuple, TYPE_CHECKING import torch from torch import nn diff --git a/ludwig/distributed/horovod.py b/ludwig/distributed/horovod.py index 847d08dc083..80ea4f784cc 100644 --- a/ludwig/distributed/horovod.py +++ b/ludwig/distributed/horovod.py @@ -1,7 +1,6 @@ import contextlib import logging -from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, - Type) +from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TYPE_CHECKING import horovod.torch as hvd import ray @@ -17,8 +16,7 @@ from ludwig.constants import AUTO from ludwig.distributed.base import DistributedStrategy from ludwig.modules.optimization_modules import create_optimizer -from ludwig.utils.horovod_utils import (gather_all_tensors, - is_distributed_available) +from ludwig.utils.horovod_utils import gather_all_tensors, is_distributed_available if TYPE_CHECKING: from ludwig.schema.trainer import ECDTrainerConfig diff --git a/ludwig/encoders/category_encoders.py b/ludwig/encoders/category_encoders.py index 43a6d8ecae8..2a41ccfce5d 100644 --- a/ludwig/encoders/category_encoders.py +++ b/ludwig/encoders/category_encoders.py @@ -27,8 +27,11 @@ from ludwig.modules.embedding_modules import Embed from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.category_encoders import ( - CategoricalEmbedConfig, CategoricalOneHotEncoderConfig, - CategoricalPassthroughEncoderConfig, CategoricalSparseConfig) + CategoricalEmbedConfig, + CategoricalOneHotEncoderConfig, + CategoricalPassthroughEncoderConfig, + CategoricalSparseConfig, +) logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/date_encoders.py b/ludwig/encoders/date_encoders.py index de985638416..fa5b6329454 100644 --- a/ludwig/encoders/date_encoders.py +++ b/ludwig/encoders/date_encoders.py @@ -26,8 +26,7 @@ from ludwig.modules.embedding_modules import Embed from ludwig.modules.fully_connected_modules import FCStack from ludwig.schema.encoders.base import BaseEncoderConfig -from ludwig.schema.encoders.date_encoders import (DateEmbedConfig, - DateWaveConfig) +from ludwig.schema.encoders.date_encoders import DateEmbedConfig, DateWaveConfig from ludwig.utils import torch_utils logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/generic_encoders.py b/ludwig/encoders/generic_encoders.py index 41b300d506b..a92e2580463 100644 --- a/ludwig/encoders/generic_encoders.py +++ b/ludwig/encoders/generic_encoders.py @@ -19,14 +19,12 @@ import torch from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (BINARY, ENCODER_OUTPUT, NUMBER, TEXT, TIMESERIES, - VECTOR) +from ludwig.constants import BINARY, ENCODER_OUTPUT, NUMBER, TEXT, TIMESERIES, VECTOR from ludwig.encoders.base import Encoder from ludwig.encoders.registry import register_encoder from ludwig.encoders.types import EncoderOutputDict from ludwig.modules.fully_connected_modules import FCStack -from ludwig.schema.encoders.base import (BaseEncoderConfig, DenseEncoderConfig, - PassthroughEncoderConfig) +from ludwig.schema.encoders.base import BaseEncoderConfig, DenseEncoderConfig, PassthroughEncoderConfig logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/h3_encoders.py b/ludwig/encoders/h3_encoders.py index 6c5965f4149..4eb766c512d 100644 --- a/ludwig/encoders/h3_encoders.py +++ b/ludwig/encoders/h3_encoders.py @@ -29,8 +29,7 @@ from ludwig.modules.recurrent_modules import RecurrentStack from ludwig.modules.reduction_modules import SequenceReducer from ludwig.schema.encoders.base import BaseEncoderConfig -from ludwig.schema.encoders.h3_encoders import (H3EmbedConfig, H3RNNConfig, - H3WeightedSumConfig) +from ludwig.schema.encoders.h3_encoders import H3EmbedConfig, H3RNNConfig, H3WeightedSumConfig from ludwig.utils import torch_utils logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/image/base.py b/ludwig/encoders/image/base.py index 27a9a7e37ed..1e3bc5ca580 100644 --- a/ludwig/encoders/image/base.py +++ b/ludwig/encoders/image/base.py @@ -23,14 +23,17 @@ from ludwig.encoders.base import Encoder from ludwig.encoders.registry import register_encoder from ludwig.encoders.types import EncoderOutputDict -from ludwig.modules.convolutional_modules import (Conv2DStack, ResNet, - UNetDownStack) +from ludwig.modules.convolutional_modules import Conv2DStack, ResNet, UNetDownStack from ludwig.modules.fully_connected_modules import FCStack from ludwig.modules.mlp_mixer_modules import MLPMixer -from ludwig.schema.encoders.image.base import (ImageEncoderConfig, - MLPMixerConfig, ResNetConfig, - Stacked2DCNNConfig, - UNetEncoderConfig, ViTConfig) +from ludwig.schema.encoders.image.base import ( + ImageEncoderConfig, + MLPMixerConfig, + ResNetConfig, + Stacked2DCNNConfig, + UNetEncoderConfig, + ViTConfig, +) from ludwig.utils.torch_utils import FreezeModule logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/image/torchvision.py b/ludwig/encoders/image/torchvision.py index b8cfbf7ffbf..dbebff2f6ec 100644 --- a/ludwig/encoders/image/torchvision.py +++ b/ludwig/encoders/image/torchvision.py @@ -13,17 +13,27 @@ from ludwig.encoders.types import EncoderOutputDict from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.image.torchvision import ( - TVAlexNetEncoderConfig, TVConvNeXtEncoderConfig, TVDenseNetEncoderConfig, - TVEfficientNetEncoderConfig, TVGoogLeNetEncoderConfig, - TVInceptionV3EncoderConfig, TVMaxVitEncoderConfig, TVMNASNetEncoderConfig, - TVMobileNetV2EncoderConfig, TVMobileNetV3EncoderConfig, - TVRegNetEncoderConfig, TVResNetEncoderConfig, TVResNeXtEncoderConfig, - TVShuffleNetV2EncoderConfig, TVSqueezeNetEncoderConfig, - TVSwinTransformerEncoderConfig, TVVGGEncoderConfig, TVViTEncoderConfig, - TVWideResNetEncoderConfig) -from ludwig.utils.image_utils import (TVModelVariant, - register_torchvision_model_variants, - torchvision_model_registry) + TVAlexNetEncoderConfig, + TVConvNeXtEncoderConfig, + TVDenseNetEncoderConfig, + TVEfficientNetEncoderConfig, + TVGoogLeNetEncoderConfig, + TVInceptionV3EncoderConfig, + TVMaxVitEncoderConfig, + TVMNASNetEncoderConfig, + TVMobileNetV2EncoderConfig, + TVMobileNetV3EncoderConfig, + TVRegNetEncoderConfig, + TVResNetEncoderConfig, + TVResNeXtEncoderConfig, + TVShuffleNetV2EncoderConfig, + TVSqueezeNetEncoderConfig, + TVSwinTransformerEncoderConfig, + TVVGGEncoderConfig, + TVViTEncoderConfig, + TVWideResNetEncoderConfig, +) +from ludwig.utils.image_utils import register_torchvision_model_variants, torchvision_model_registry, TVModelVariant logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/sequence_encoders.py b/ludwig/encoders/sequence_encoders.py index 225cbb3f79c..b5de6177412 100644 --- a/ludwig/encoders/sequence_encoders.py +++ b/ludwig/encoders/sequence_encoders.py @@ -20,24 +20,27 @@ from torch import nn from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (AUDIO, ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, - SEQUENCE, TEXT, TIMESERIES) +from ludwig.constants import AUDIO, ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, SEQUENCE, TEXT, TIMESERIES from ludwig.encoders.base import Encoder -from ludwig.encoders.registry import (register_encoder, - register_sequence_encoder) +from ludwig.encoders.registry import register_encoder, register_sequence_encoder from ludwig.encoders.types import EncoderOutputDict from ludwig.modules.attention_modules import TransformerStack -from ludwig.modules.convolutional_modules import (Conv1DStack, ParallelConv1D, - ParallelConv1DStack) -from ludwig.modules.embedding_modules import (EmbedSequence, - TokenAndPositionEmbedding) +from ludwig.modules.convolutional_modules import Conv1DStack, ParallelConv1D, ParallelConv1DStack +from ludwig.modules.embedding_modules import EmbedSequence, TokenAndPositionEmbedding from ludwig.modules.fully_connected_modules import FCStack from ludwig.modules.recurrent_modules import RecurrentStack from ludwig.modules.reduction_modules import SequenceReducer from ludwig.schema.encoders.sequence_encoders import ( - ParallelCNNConfig, SequenceEmbedConfig, SequenceEncoderConfig, - SequencePassthroughConfig, StackedCNNConfig, StackedCNNRNNConfig, - StackedParallelCNNConfig, StackedRNNConfig, StackedTransformerConfig) + ParallelCNNConfig, + SequenceEmbedConfig, + SequenceEncoderConfig, + SequencePassthroughConfig, + StackedCNNConfig, + StackedCNNRNNConfig, + StackedParallelCNNConfig, + StackedRNNConfig, + StackedTransformerConfig, +) logger = logging.getLogger(__name__) diff --git a/ludwig/encoders/text_encoders.py b/ludwig/encoders/text_encoders.py index ab8189d1922..f34e500b237 100644 --- a/ludwig/encoders/text_encoders.py +++ b/ludwig/encoders/text_encoders.py @@ -16,8 +16,7 @@ import contextlib import inspect import logging -from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type, - TypeVar, Union) +from typing import Any, Callable, Dict, List, Optional, Type, TYPE_CHECKING, TypeVar, Union import numpy as np import torch @@ -32,25 +31,33 @@ from ludwig.modules.reduction_modules import SequenceReducer from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.sequence_encoders import SequenceEncoderConfig -from ludwig.schema.encoders.text_encoders import (ALBERTConfig, - AutoTransformerConfig, - BERTConfig, CamemBERTConfig, - CTRLConfig, DebertaV2Config, - DistilBERTConfig, - ELECTRAConfig, - FlauBERTConfig, GPT2Config, - GPTConfig, LLMEncoderConfig, - LongformerConfig, MT5Config, - RoBERTaConfig, T5Config, - TfIdfEncoderConfig, - TransformerXLConfig, - XLMConfig, XLMRoBERTaConfig, - XLNetConfig) +from ludwig.schema.encoders.text_encoders import ( + ALBERTConfig, + AutoTransformerConfig, + BERTConfig, + CamemBERTConfig, + CTRLConfig, + DebertaV2Config, + DistilBERTConfig, + ELECTRAConfig, + FlauBERTConfig, + GPT2Config, + GPTConfig, + LLMEncoderConfig, + LongformerConfig, + MT5Config, + RoBERTaConfig, + T5Config, + TfIdfEncoderConfig, + TransformerXLConfig, + XLMConfig, + XLMRoBERTaConfig, + XLNetConfig, +) from ludwig.schema.llms.peft import BaseAdapterConfig from ludwig.utils.data_utils import clear_data_cache from ludwig.utils.hf_utils import load_pretrained_hf_model_with_hub_fallback -from ludwig.utils.llm_utils import (get_context_len, initialize_adapter, - load_pretrained_from_config) +from ludwig.utils.llm_utils import get_context_len, initialize_adapter, load_pretrained_from_config from ludwig.utils.tokenizers import HFTokenizer from ludwig.utils.torch_utils import FreezeModule diff --git a/ludwig/experiment.py b/ludwig/experiment.py index 0c0be84e156..7615a4f9783 100644 --- a/ludwig/experiment.py +++ b/ludwig/experiment.py @@ -21,17 +21,15 @@ import pandas as pd -from ludwig.api import LudwigModel, kfold_cross_validate +from ludwig.api import kfold_cross_validate, LudwigModel from ludwig.backend import ALL_BACKENDS, Backend, initialize_backend from ludwig.callbacks import Callback -from ludwig.constants import (CONTINUE_PROMPT, FULL, HYPEROPT, - HYPEROPT_WARNING, TEST, TRAINING, VALIDATION) +from ludwig.constants import CONTINUE_PROMPT, FULL, HYPEROPT, HYPEROPT_WARNING, TEST, TRAINING, VALIDATION from ludwig.contrib import add_contrib_callback_args from ludwig.globals import LUDWIG_VERSION from ludwig.utils.data_utils import load_config_from_str, load_yaml, save_json from ludwig.utils.defaults import default_random_seed -from ludwig.utils.print_utils import (get_logging_level_registry, print_ludwig, - query_yes_no) +from ludwig.utils.print_utils import get_logging_level_registry, print_ludwig, query_yes_no logger = logging.getLogger(__name__) diff --git a/ludwig/explain/captum.py b/ludwig/explain/captum.py index 3aa5b29d81e..643d5d2ddc3 100644 --- a/ludwig/explain/captum.py +++ b/ludwig/explain/captum.py @@ -16,9 +16,21 @@ from ludwig.api import LudwigModel from ludwig.api_annotations import PublicAPI -from ludwig.constants import (BINARY, CATEGORY, DATE, IMAGE, INPUT_FEATURES, - MINIMUM_BATCH_SIZE, NAME, NUMBER, PREPROCESSING, - SEQUENCE, SET, TEXT, UNKNOWN_SYMBOL) +from ludwig.constants import ( + BINARY, + CATEGORY, + DATE, + IMAGE, + INPUT_FEATURES, + MINIMUM_BATCH_SIZE, + NAME, + NUMBER, + PREPROCESSING, + SEQUENCE, + SET, + TEXT, + UNKNOWN_SYMBOL, +) from ludwig.data.preprocessing import preprocess_for_prediction from ludwig.explain.explainer import Explainer from ludwig.explain.explanation import ExplanationsResult diff --git a/ludwig/explain/captum_ray.py b/ludwig/explain/captum_ray.py index 639ee74305f..21d15db3815 100644 --- a/ludwig/explain/captum_ray.py +++ b/ludwig/explain/captum_ray.py @@ -9,10 +9,14 @@ from ludwig.api import LudwigModel from ludwig.api_annotations import PublicAPI -from ludwig.explain.captum import (ExplanationRunConfig, - IntegratedGradientsExplainer, get_baseline, - get_input_tensors, get_total_attribution, - retry_with_halved_batch_size) +from ludwig.explain.captum import ( + ExplanationRunConfig, + get_baseline, + get_input_tensors, + get_total_attribution, + IntegratedGradientsExplainer, + retry_with_halved_batch_size, +) from ludwig.explain.explanation import ExplanationsResult from ludwig.features.feature_utils import LudwigFeatureDict from ludwig.utils.torch_utils import get_torch_device diff --git a/ludwig/export.py b/ludwig/export.py index a8633094633..60622650a1f 100644 --- a/ludwig/export.py +++ b/ludwig/export.py @@ -23,8 +23,7 @@ from ludwig.contrib import add_contrib_callback_args from ludwig.globals import LUDWIG_VERSION from ludwig.utils.carton_utils import export_carton as utils_export_carton -from ludwig.utils.neuropod_utils import \ - export_neuropod as utils_export_neuropod +from ludwig.utils.neuropod_utils import export_neuropod as utils_export_neuropod from ludwig.utils.print_utils import get_logging_level_registry, print_ludwig from ludwig.utils.triton_utils import export_triton as utils_export_triton diff --git a/ludwig/features/audio_feature.py b/ludwig/features/audio_feature.py index fdce28a7635..a9ce7a2fd0a 100644 --- a/ludwig/features/audio_feature.py +++ b/ludwig/features/audio_feature.py @@ -22,22 +22,26 @@ import torchaudio from packaging import version -from ludwig.constants import (AUDIO, AUDIO_FEATURE_KEYS, COLUMN, NAME, - PREPROCESSING, PROC_COLUMN, SRC, TYPE) +from ludwig.constants import AUDIO, AUDIO_FEATURE_KEYS, COLUMN, NAME, PREPROCESSING, PROC_COLUMN, SRC, TYPE from ludwig.features.base_feature import BaseFeatureMixin from ludwig.features.sequence_feature import SequenceInputFeature from ludwig.schema.features.audio_feature import AudioInputFeatureConfig -from ludwig.types import (FeatureMetadataDict, ModelConfigDict, - PreprocessingConfigDict, TrainingSetMetadataDict) -from ludwig.utils.audio_utils import (calculate_mean, calculate_var, - get_default_audio, get_fbank, - get_group_delay, get_length_in_samp, - get_max_length_stft_based, - get_non_symmetric_length, - get_phase_stft_magnitude, - get_stft_magnitude, is_torch_audio_tuple, - read_audio_from_bytes_obj, - read_audio_from_path) +from ludwig.types import FeatureMetadataDict, ModelConfigDict, PreprocessingConfigDict, TrainingSetMetadataDict +from ludwig.utils.audio_utils import ( + calculate_mean, + calculate_var, + get_default_audio, + get_fbank, + get_group_delay, + get_length_in_samp, + get_max_length_stft_based, + get_non_symmetric_length, + get_phase_stft_magnitude, + get_stft_magnitude, + is_torch_audio_tuple, + read_audio_from_bytes_obj, + read_audio_from_path, +) from ludwig.utils.data_utils import get_abs_path from ludwig.utils.fs_utils import has_remote_protocol from ludwig.utils.misc_utils import set_default_value diff --git a/ludwig/features/bag_feature.py b/ludwig/features/bag_feature.py index 91c8d70e111..2f963a46fb2 100644 --- a/ludwig/features/bag_feature.py +++ b/ludwig/features/bag_feature.py @@ -24,8 +24,7 @@ from ludwig.features.feature_utils import set_str_to_idx from ludwig.features.set_feature import _SetPreprocessing from ludwig.schema.features.bag_feature import BagInputFeatureConfig -from ludwig.types import (FeatureMetadataDict, ModelConfigDict, - PreprocessingConfigDict, TrainingSetMetadataDict) +from ludwig.types import FeatureMetadataDict, ModelConfigDict, PreprocessingConfigDict, TrainingSetMetadataDict from ludwig.utils.strings_utils import create_vocabulary logger = logging.getLogger(__name__) diff --git a/ludwig/features/base_feature.py b/ludwig/features/base_feature.py index 1d210582442..94b29b558a1 100644 --- a/ludwig/features/base_feature.py +++ b/ludwig/features/base_feature.py @@ -20,23 +20,32 @@ import torch from torch import Tensor -from ludwig.constants import (ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, HIDDEN, - LENGTHS, LOGITS, LOSS, PREDICTIONS, - PROBABILITIES) +from ludwig.constants import ( + ENCODER_OUTPUT, + ENCODER_OUTPUT_STATE, + HIDDEN, + LENGTHS, + LOGITS, + LOSS, + PREDICTIONS, + PROBABILITIES, +) from ludwig.decoders.registry import get_decoder_cls from ludwig.encoders.registry import get_encoder_cls from ludwig.features.feature_utils import get_input_size_with_dependencies from ludwig.modules.fully_connected_modules import FCStack from ludwig.modules.loss_modules import create_loss from ludwig.modules.metric_modules import LossMetric, LudwigMetric, MeanMetric -from ludwig.modules.metric_registry import (get_metric_classes, get_metric_cls, - get_metric_tensor_input) +from ludwig.modules.metric_registry import get_metric_classes, get_metric_cls, get_metric_tensor_input from ludwig.modules.reduction_modules import SequenceReducer -from ludwig.schema.features.base import (BaseFeatureConfig, - BaseOutputFeatureConfig) -from ludwig.types import (FeatureConfigDict, FeatureMetadataDict, - ModelConfigDict, PreprocessingConfigDict, - TrainingSetMetadataDict) +from ludwig.schema.features.base import BaseFeatureConfig, BaseOutputFeatureConfig +from ludwig.types import ( + FeatureConfigDict, + FeatureMetadataDict, + ModelConfigDict, + PreprocessingConfigDict, + TrainingSetMetadataDict, +) from ludwig.utils import output_feature_utils from ludwig.utils.calibration import CalibrationModule from ludwig.utils.torch_utils import LudwigModule diff --git a/ludwig/features/binary_feature.py b/ludwig/features/binary_feature.py index f445ef84291..ed6c8264ef1 100644 --- a/ludwig/features/binary_feature.py +++ b/ludwig/features/binary_feature.py @@ -19,21 +19,26 @@ import numpy as np import torch -from ludwig.constants import (BINARY, COLUMN, HIDDEN, LOGITS, NAME, - PREDICTIONS, PROBABILITIES, PROBABILITY, - PROC_COLUMN) +from ludwig.constants import BINARY, COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, PROBABILITIES, PROBABILITY, PROC_COLUMN from ludwig.error import InputDataError -from ludwig.features.base_feature import (BaseFeatureMixin, InputFeature, - OutputFeature, PredictModule) -from ludwig.schema.features.binary_feature import (BinaryInputFeatureConfig, - BinaryOutputFeatureConfig) -from ludwig.types import (FeatureConfigDict, FeatureMetadataDict, - FeaturePostProcessingOutputDict, ModelConfigDict, - PreprocessingConfigDict, TrainingSetMetadataDict) +from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule +from ludwig.schema.features.binary_feature import BinaryInputFeatureConfig, BinaryOutputFeatureConfig +from ludwig.types import ( + FeatureConfigDict, + FeatureMetadataDict, + FeaturePostProcessingOutputDict, + ModelConfigDict, + PreprocessingConfigDict, + TrainingSetMetadataDict, +) from ludwig.utils import calibration, output_feature_utils, strings_utils -from ludwig.utils.eval_utils import (ConfusionMatrix, average_precision_score, - precision_recall_curve, roc_auc_score, - roc_curve) +from ludwig.utils.eval_utils import ( + average_precision_score, + ConfusionMatrix, + precision_recall_curve, + roc_auc_score, + roc_curve, +) from ludwig.utils.types import DataFrame, TorchscriptPreprocessingInput logger = logging.getLogger(__name__) diff --git a/ludwig/features/category_feature.py b/ludwig/features/category_feature.py index 9e2da1e1885..b1b912c0e2a 100644 --- a/ludwig/features/category_feature.py +++ b/ludwig/features/category_feature.py @@ -19,26 +19,40 @@ import numpy as np import torch -from ludwig.constants import (CATEGORY, CATEGORY_DISTRIBUTION, COLUMN, HIDDEN, - LOGITS, NAME, PREDICTIONS, PREPROCESSING, - PROBABILITIES, PROBABILITY, PROC_COLUMN, - PROJECTION_INPUT) +from ludwig.constants import ( + CATEGORY, + CATEGORY_DISTRIBUTION, + COLUMN, + HIDDEN, + LOGITS, + NAME, + PREDICTIONS, + PREPROCESSING, + PROBABILITIES, + PROBABILITY, + PROC_COLUMN, + PROJECTION_INPUT, +) from ludwig.error import InputDataError -from ludwig.features.base_feature import (BaseFeatureMixin, InputFeature, - OutputFeature, PredictModule) +from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule from ludwig.features.vector_feature import VectorFeatureMixin from ludwig.schema.features.category_feature import ( - CategoryDistributionOutputFeatureConfig, CategoryInputFeatureConfig, - CategoryOutputFeatureConfig) + CategoryDistributionOutputFeatureConfig, + CategoryInputFeatureConfig, + CategoryOutputFeatureConfig, +) from ludwig.schema.features.loss.loss import CORNLossConfig -from ludwig.types import (FeatureMetadataDict, FeaturePostProcessingOutputDict, - ModelConfigDict, PreprocessingConfigDict, - TrainingSetMetadataDict) +from ludwig.types import ( + FeatureMetadataDict, + FeaturePostProcessingOutputDict, + ModelConfigDict, + PreprocessingConfigDict, + TrainingSetMetadataDict, +) from ludwig.utils import calibration, output_feature_utils from ludwig.utils.eval_utils import ConfusionMatrix from ludwig.utils.math_utils import int_type, softmax -from ludwig.utils.strings_utils import (UNKNOWN_SYMBOL, - create_vocabulary_single_token) +from ludwig.utils.strings_utils import create_vocabulary_single_token, UNKNOWN_SYMBOL from ludwig.utils.types import TorchscriptPreprocessingInput logger = logging.getLogger(__name__) diff --git a/ludwig/features/date_feature.py b/ludwig/features/date_feature.py index a3f837cc062..aa6712992f5 100644 --- a/ludwig/features/date_feature.py +++ b/ludwig/features/date_feature.py @@ -23,11 +23,14 @@ from ludwig.constants import COLUMN, DATE, PROC_COLUMN from ludwig.features.base_feature import BaseFeatureMixin, InputFeature from ludwig.schema.features.date_feature import DateInputFeatureConfig -from ludwig.types import (FeatureConfigDict, FeatureMetadataDict, - ModelConfigDict, PreprocessingConfigDict, - TrainingSetMetadataDict) -from ludwig.utils.date_utils import (create_vector_from_datetime_obj, - parse_datetime) +from ludwig.types import ( + FeatureConfigDict, + FeatureMetadataDict, + ModelConfigDict, + PreprocessingConfigDict, + TrainingSetMetadataDict, +) +from ludwig.utils.date_utils import create_vector_from_datetime_obj, parse_datetime from ludwig.utils.types import DataFrame, TorchscriptPreprocessingInput logger = logging.getLogger(__name__) diff --git a/ludwig/features/feature_registries.py b/ludwig/features/feature_registries.py index e7842720bf7..2a738a2979f 100644 --- a/ludwig/features/feature_registries.py +++ b/ludwig/features/feature_registries.py @@ -12,41 +12,44 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -from typing import TYPE_CHECKING, Any, Dict +from typing import Any, Dict, TYPE_CHECKING from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (AUDIO, BAG, BINARY, CATEGORY, - CATEGORY_DISTRIBUTION, DATE, H3, IMAGE, NUMBER, - SEQUENCE, SET, TEXT, TIMESERIES, VECTOR) +from ludwig.constants import ( + AUDIO, + BAG, + BINARY, + CATEGORY, + CATEGORY_DISTRIBUTION, + DATE, + H3, + IMAGE, + NUMBER, + SEQUENCE, + SET, + TEXT, + TIMESERIES, + VECTOR, +) from ludwig.features.audio_feature import AudioFeatureMixin, AudioInputFeature from ludwig.features.bag_feature import BagFeatureMixin, BagInputFeature -from ludwig.features.binary_feature import (BinaryFeatureMixin, - BinaryInputFeature, - BinaryOutputFeature) +from ludwig.features.binary_feature import BinaryFeatureMixin, BinaryInputFeature, BinaryOutputFeature from ludwig.features.category_feature import ( - CategoryDistributionFeatureMixin, CategoryDistributionOutputFeature, - CategoryFeatureMixin, CategoryInputFeature, CategoryOutputFeature) + CategoryDistributionFeatureMixin, + CategoryDistributionOutputFeature, + CategoryFeatureMixin, + CategoryInputFeature, + CategoryOutputFeature, +) from ludwig.features.date_feature import DateFeatureMixin, DateInputFeature from ludwig.features.h3_feature import H3FeatureMixin, H3InputFeature -from ludwig.features.image_feature import (ImageFeatureMixin, - ImageInputFeature, - ImageOutputFeature) -from ludwig.features.number_feature import (NumberFeatureMixin, - NumberInputFeature, - NumberOutputFeature) -from ludwig.features.sequence_feature import (SequenceFeatureMixin, - SequenceInputFeature, - SequenceOutputFeature) -from ludwig.features.set_feature import (SetFeatureMixin, SetInputFeature, - SetOutputFeature) -from ludwig.features.text_feature import (TextFeatureMixin, TextInputFeature, - TextOutputFeature) -from ludwig.features.timeseries_feature import (TimeseriesFeatureMixin, - TimeseriesInputFeature, - TimeseriesOutputFeature) -from ludwig.features.vector_feature import (VectorFeatureMixin, - VectorInputFeature, - VectorOutputFeature) +from ludwig.features.image_feature import ImageFeatureMixin, ImageInputFeature, ImageOutputFeature +from ludwig.features.number_feature import NumberFeatureMixin, NumberInputFeature, NumberOutputFeature +from ludwig.features.sequence_feature import SequenceFeatureMixin, SequenceInputFeature, SequenceOutputFeature +from ludwig.features.set_feature import SetFeatureMixin, SetInputFeature, SetOutputFeature +from ludwig.features.text_feature import TextFeatureMixin, TextInputFeature, TextOutputFeature +from ludwig.features.timeseries_feature import TimeseriesFeatureMixin, TimeseriesInputFeature, TimeseriesOutputFeature +from ludwig.features.vector_feature import VectorFeatureMixin, VectorInputFeature, VectorOutputFeature from ludwig.utils.misc_utils import get_from_registry if TYPE_CHECKING: diff --git a/ludwig/features/feature_utils.py b/ludwig/features/feature_utils.py index 0d730b57df8..50834a89cfc 100644 --- a/ludwig/features/feature_utils.py +++ b/ludwig/features/feature_utils.py @@ -19,11 +19,9 @@ import numpy as np import torch -from ludwig.constants import (NAME, PREPROCESSING, SEQUENCE, TEXT, TIMESERIES, - TYPE) +from ludwig.constants import NAME, PREPROCESSING, SEQUENCE, TEXT, TIMESERIES, TYPE from ludwig.utils.data_utils import hash_dict -from ludwig.utils.strings_utils import (UNKNOWN_SYMBOL, - get_tokenizer_from_registry) +from ludwig.utils.strings_utils import get_tokenizer_from_registry, UNKNOWN_SYMBOL SEQUENCE_TYPES = {SEQUENCE, TEXT, TIMESERIES} FEATURE_NAME_SUFFIX = "__ludwig" diff --git a/ludwig/features/h3_feature.py b/ludwig/features/h3_feature.py index 9cf90a12b90..ab58de22042 100644 --- a/ludwig/features/h3_feature.py +++ b/ludwig/features/h3_feature.py @@ -21,8 +21,7 @@ from ludwig.constants import COLUMN, H3, PROC_COLUMN from ludwig.features.base_feature import BaseFeatureMixin, InputFeature from ludwig.schema.features.h3_feature import H3InputFeatureConfig -from ludwig.types import (FeatureMetadataDict, ModelConfigDict, - PreprocessingConfigDict, TrainingSetMetadataDict) +from ludwig.types import FeatureMetadataDict, ModelConfigDict, PreprocessingConfigDict, TrainingSetMetadataDict from ludwig.utils.h3_util import h3_to_components from ludwig.utils.types import TorchscriptPreprocessingInput diff --git a/ludwig/features/image_feature.py b/ludwig/features/image_feature.py index 756dea36517..a1488440f41 100644 --- a/ludwig/features/image_feature.py +++ b/ludwig/features/image_feature.py @@ -27,44 +27,70 @@ from torchvision.transforms import functional as F from torchvision.transforms.functional import normalize -from ludwig.constants import (CHECKSUM, COLUMN, ENCODER, HEIGHT, IMAGE, - IMAGENET1K, INFER_IMAGE_DIMENSIONS, - INFER_IMAGE_MAX_HEIGHT, INFER_IMAGE_MAX_WIDTH, - INFER_IMAGE_NUM_CLASSES, INFER_IMAGE_SAMPLE_SIZE, - LOGITS, NAME, NUM_CHANNELS, PREDICTIONS, - PREPROCESSING, PROC_COLUMN, - REQUIRES_EQUAL_DIMENSIONS, SRC, TRAINING, TYPE, - WIDTH) +from ludwig.constants import ( + CHECKSUM, + COLUMN, + ENCODER, + HEIGHT, + IMAGE, + IMAGENET1K, + INFER_IMAGE_DIMENSIONS, + INFER_IMAGE_MAX_HEIGHT, + INFER_IMAGE_MAX_WIDTH, + INFER_IMAGE_NUM_CLASSES, + INFER_IMAGE_SAMPLE_SIZE, + LOGITS, + NAME, + NUM_CHANNELS, + PREDICTIONS, + PREPROCESSING, + PROC_COLUMN, + REQUIRES_EQUAL_DIMENSIONS, + SRC, + TRAINING, + TYPE, + WIDTH, +) from ludwig.data.cache.types import wrap from ludwig.encoders.base import Encoder from ludwig.encoders.image.torchvision import TVModelVariant -from ludwig.features.base_feature import (BaseFeatureMixin, InputFeature, - OutputFeature, PredictModule) +from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule from ludwig.schema.features.augmentation.base import BaseAugmentationConfig from ludwig.schema.features.augmentation.image import ( - AutoAugmentationConfig, RandomBlurConfig, RandomBrightnessConfig, - RandomContrastConfig, RandomHorizontalFlipConfig, RandomRotateConfig, - RandomVerticalFlipConfig) -from ludwig.schema.features.image_feature import (ImageInputFeatureConfig, - ImageOutputFeatureConfig) -from ludwig.types import (FeatureMetadataDict, FeaturePostProcessingOutputDict, - ModelConfigDict, PreprocessingConfigDict, - TrainingSetMetadataDict) + AutoAugmentationConfig, + RandomBlurConfig, + RandomBrightnessConfig, + RandomContrastConfig, + RandomHorizontalFlipConfig, + RandomRotateConfig, + RandomVerticalFlipConfig, +) +from ludwig.schema.features.image_feature import ImageInputFeatureConfig, ImageOutputFeatureConfig +from ludwig.types import ( + FeatureMetadataDict, + FeaturePostProcessingOutputDict, + ModelConfigDict, + PreprocessingConfigDict, + TrainingSetMetadataDict, +) from ludwig.utils import output_feature_utils -from ludwig.utils.augmentation_utils import (get_augmentation_op, - register_augmentation_op) +from ludwig.utils.augmentation_utils import get_augmentation_op, register_augmentation_op from ludwig.utils.data_utils import get_abs_path from ludwig.utils.dataframe_utils import is_dask_series_or_df from ludwig.utils.fs_utils import has_remote_protocol, upload_h5 -from ludwig.utils.image_utils import (ResizeChannels, - get_class_mask_from_image, - get_gray_default_image, - get_image_from_class_mask, - get_unique_channels, grayscale, - num_channels_in_image, - read_image_from_bytes_obj, - read_image_from_path, resize_image, - torchvision_model_registry) +from ludwig.utils.image_utils import ( + get_class_mask_from_image, + get_gray_default_image, + get_image_from_class_mask, + get_unique_channels, + grayscale, + num_channels_in_image, + read_image_from_bytes_obj, + read_image_from_path, + resize_image, + ResizeChannels, + torchvision_model_registry, +) from ludwig.utils.misc_utils import set_default_value from ludwig.utils.types import Series, TorchscriptPreprocessingInput diff --git a/ludwig/features/number_feature.py b/ludwig/features/number_feature.py index d2d883f34c1..1b76d700570 100644 --- a/ludwig/features/number_feature.py +++ b/ludwig/features/number_feature.py @@ -23,15 +23,16 @@ import torch from torch import nn -from ludwig.constants import (COLUMN, HIDDEN, LOGITS, NAME, NUMBER, - PREDICTIONS, PROC_COLUMN) -from ludwig.features.base_feature import (BaseFeatureMixin, InputFeature, - OutputFeature, PredictModule) -from ludwig.schema.features.number_feature import (NumberInputFeatureConfig, - NumberOutputFeatureConfig) -from ludwig.types import (FeatureMetadataDict, FeaturePostProcessingOutputDict, - ModelConfigDict, PreprocessingConfigDict, - TrainingSetMetadataDict) +from ludwig.constants import COLUMN, HIDDEN, LOGITS, NAME, NUMBER, PREDICTIONS, PROC_COLUMN +from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule +from ludwig.schema.features.number_feature import NumberInputFeatureConfig, NumberOutputFeatureConfig +from ludwig.types import ( + FeatureMetadataDict, + FeaturePostProcessingOutputDict, + ModelConfigDict, + PreprocessingConfigDict, + TrainingSetMetadataDict, +) from ludwig.utils import output_feature_utils from ludwig.utils.misc_utils import get_from_registry from ludwig.utils.types import TorchscriptPreprocessingInput diff --git a/ludwig/features/sequence_feature.py b/ludwig/features/sequence_feature.py index 56c72b78153..6b62b37eb14 100644 --- a/ludwig/features/sequence_feature.py +++ b/ludwig/features/sequence_feature.py @@ -21,24 +21,37 @@ import numpy as np import torch -from ludwig.constants import (COLUMN, LAST_PREDICTIONS, LENGTHS, NAME, - PREDICTIONS, PROBABILITIES, PROBABILITY, - PROC_COLUMN, SEQUENCE) -from ludwig.features.base_feature import (BaseFeatureMixin, InputFeature, - OutputFeature, PredictModule) -from ludwig.features.feature_utils import (compute_sequence_probability, - compute_token_probabilities) -from ludwig.schema.features.sequence_feature import ( - SequenceInputFeatureConfig, SequenceOutputFeatureConfig) -from ludwig.types import (FeatureMetadataDict, FeaturePostProcessingOutputDict, - ModelConfigDict, PreprocessingConfigDict, - TrainingSetMetadataDict) +from ludwig.constants import ( + COLUMN, + LAST_PREDICTIONS, + LENGTHS, + NAME, + PREDICTIONS, + PROBABILITIES, + PROBABILITY, + PROC_COLUMN, + SEQUENCE, +) +from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule +from ludwig.features.feature_utils import compute_sequence_probability, compute_token_probabilities +from ludwig.schema.features.sequence_feature import SequenceInputFeatureConfig, SequenceOutputFeatureConfig +from ludwig.types import ( + FeatureMetadataDict, + FeaturePostProcessingOutputDict, + ModelConfigDict, + PreprocessingConfigDict, + TrainingSetMetadataDict, +) from ludwig.utils import output_feature_utils from ludwig.utils.math_utils import softmax -from ludwig.utils.strings_utils import (START_SYMBOL, STOP_SYMBOL, - UNKNOWN_SYMBOL, SpecialSymbol, - build_sequence_matrix, - create_vocabulary) +from ludwig.utils.strings_utils import ( + build_sequence_matrix, + create_vocabulary, + SpecialSymbol, + START_SYMBOL, + STOP_SYMBOL, + UNKNOWN_SYMBOL, +) from ludwig.utils.tokenizers import get_tokenizer_from_registry from ludwig.utils.types import TorchscriptPreprocessingInput diff --git a/ludwig/features/set_feature.py b/ludwig/features/set_feature.py index 3ea9806a697..80bdf910313 100644 --- a/ludwig/features/set_feature.py +++ b/ludwig/features/set_feature.py @@ -19,20 +19,20 @@ import numpy as np import torch -from ludwig.constants import (COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, - PROBABILITIES, PROC_COLUMN, SET) -from ludwig.features.base_feature import (BaseFeatureMixin, InputFeature, - OutputFeature, PredictModule) +from ludwig.constants import COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, PROBABILITIES, PROC_COLUMN, SET +from ludwig.features.base_feature import BaseFeatureMixin, InputFeature, OutputFeature, PredictModule from ludwig.features.feature_utils import set_str_to_idx -from ludwig.schema.features.set_feature import (SetInputFeatureConfig, - SetOutputFeatureConfig) -from ludwig.types import (FeatureMetadataDict, FeaturePostProcessingOutputDict, - ModelConfigDict, PreprocessingConfigDict, - TrainingSetMetadataDict) +from ludwig.schema.features.set_feature import SetInputFeatureConfig, SetOutputFeatureConfig +from ludwig.types import ( + FeatureMetadataDict, + FeaturePostProcessingOutputDict, + ModelConfigDict, + PreprocessingConfigDict, + TrainingSetMetadataDict, +) from ludwig.utils import output_feature_utils -from ludwig.utils.strings_utils import UNKNOWN_SYMBOL, create_vocabulary -from ludwig.utils.tokenizers import (TORCHSCRIPT_COMPATIBLE_TOKENIZERS, - get_tokenizer_from_registry) +from ludwig.utils.strings_utils import create_vocabulary, UNKNOWN_SYMBOL +from ludwig.utils.tokenizers import get_tokenizer_from_registry, TORCHSCRIPT_COMPATIBLE_TOKENIZERS from ludwig.utils.types import TorchscriptPreprocessingInput logger = logging.getLogger(__name__) diff --git a/ludwig/features/text_feature.py b/ludwig/features/text_feature.py index 5b3e3238e5c..1056ae820c1 100644 --- a/ludwig/features/text_feature.py +++ b/ludwig/features/text_feature.py @@ -22,26 +22,40 @@ from torch import Tensor from transformers import PreTrainedTokenizer -from ludwig.constants import (COLUMN, IGNORE_INDEX_TOKEN_ID, LAST_PREDICTIONS, - LENGTHS, NAME, PREDICTIONS, PREPROCESSING, - PROBABILITIES, PROBABILITY, PROC_COLUMN, - RESPONSE, TEXT) +from ludwig.constants import ( + COLUMN, + IGNORE_INDEX_TOKEN_ID, + LAST_PREDICTIONS, + LENGTHS, + NAME, + PREDICTIONS, + PREPROCESSING, + PROBABILITIES, + PROBABILITY, + PROC_COLUMN, + RESPONSE, + TEXT, +) from ludwig.features.base_feature import BaseFeatureMixin, OutputFeature -from ludwig.features.feature_utils import (compute_sequence_probability, - compute_token_probabilities) -from ludwig.features.sequence_feature import (SequenceInputFeature, - SequenceOutputFeature, - _SequencePostprocessing, - _SequencePreprocessing) +from ludwig.features.feature_utils import compute_sequence_probability, compute_token_probabilities +from ludwig.features.sequence_feature import ( + _SequencePostprocessing, + _SequencePreprocessing, + SequenceInputFeature, + SequenceOutputFeature, +) from ludwig.modules.metric_registry import get_metric_tensor_input -from ludwig.schema.features.text_feature import (TextInputFeatureConfig, - TextOutputFeatureConfig) -from ludwig.types import (FeatureMetadataDict, ModelConfigDict, - PreprocessingConfigDict, TrainingSetMetadataDict) +from ludwig.schema.features.text_feature import TextInputFeatureConfig, TextOutputFeatureConfig +from ludwig.types import FeatureMetadataDict, ModelConfigDict, PreprocessingConfigDict, TrainingSetMetadataDict from ludwig.utils.math_utils import softmax -from ludwig.utils.strings_utils import (UNKNOWN_SYMBOL, SpecialSymbol, - Vocabulary, build_sequence_matrix, - create_vocabulary, get_tokenizer) +from ludwig.utils.strings_utils import ( + build_sequence_matrix, + create_vocabulary, + get_tokenizer, + SpecialSymbol, + UNKNOWN_SYMBOL, + Vocabulary, +) logger = logging.getLogger(__name__) diff --git a/ludwig/features/timeseries_feature.py b/ludwig/features/timeseries_feature.py index 64b0aaf4342..45708c95dea 100644 --- a/ludwig/features/timeseries_feature.py +++ b/ludwig/features/timeseries_feature.py @@ -14,24 +14,18 @@ # limitations under the License. # ============================================================================== import logging -from typing import TYPE_CHECKING, Dict, List, Union +from typing import Dict, List, TYPE_CHECKING, Union import numpy as np import torch -from ludwig.constants import (COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, - PROC_COLUMN, TIMESERIES) -from ludwig.features.base_feature import (BaseFeatureMixin, OutputFeature, - PredictModule) +from ludwig.constants import COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, PROC_COLUMN, TIMESERIES +from ludwig.features.base_feature import BaseFeatureMixin, OutputFeature, PredictModule from ludwig.features.sequence_feature import SequenceInputFeature -from ludwig.features.vector_feature import (_VectorPostprocessing, - _VectorPredict) -from ludwig.schema.features.timeseries_feature import ( - TimeseriesInputFeatureConfig, TimeseriesOutputFeatureConfig) -from ludwig.types import (FeatureMetadataDict, ModelConfigDict, - PreprocessingConfigDict, TrainingSetMetadataDict) -from ludwig.utils.tokenizers import (TORCHSCRIPT_COMPATIBLE_TOKENIZERS, - get_tokenizer_from_registry) +from ludwig.features.vector_feature import _VectorPostprocessing, _VectorPredict +from ludwig.schema.features.timeseries_feature import TimeseriesInputFeatureConfig, TimeseriesOutputFeatureConfig +from ludwig.types import FeatureMetadataDict, ModelConfigDict, PreprocessingConfigDict, TrainingSetMetadataDict +from ludwig.utils.tokenizers import get_tokenizer_from_registry, TORCHSCRIPT_COMPATIBLE_TOKENIZERS from ludwig.utils.types import Series, TorchscriptPreprocessingInput if TYPE_CHECKING: diff --git a/ludwig/features/vector_feature.py b/ludwig/features/vector_feature.py index 34fa2d1d6a2..06ad7ef0fc8 100644 --- a/ludwig/features/vector_feature.py +++ b/ludwig/features/vector_feature.py @@ -19,15 +19,16 @@ import numpy as np import torch -from ludwig.constants import (COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, - PROC_COLUMN, VECTOR) -from ludwig.features.base_feature import (InputFeature, OutputFeature, - PredictModule) -from ludwig.schema.features.vector_feature import (VectorInputFeatureConfig, - VectorOutputFeatureConfig) -from ludwig.types import (FeatureMetadataDict, FeaturePostProcessingOutputDict, - ModelConfigDict, PreprocessingConfigDict, - TrainingSetMetadataDict) +from ludwig.constants import COLUMN, HIDDEN, LOGITS, NAME, PREDICTIONS, PROC_COLUMN, VECTOR +from ludwig.features.base_feature import InputFeature, OutputFeature, PredictModule +from ludwig.schema.features.vector_feature import VectorInputFeatureConfig, VectorOutputFeatureConfig +from ludwig.types import ( + FeatureMetadataDict, + FeaturePostProcessingOutputDict, + ModelConfigDict, + PreprocessingConfigDict, + TrainingSetMetadataDict, +) from ludwig.utils import output_feature_utils from ludwig.utils.types import TorchscriptPreprocessingInput diff --git a/ludwig/hyperopt/execution.py b/ludwig/hyperopt/execution.py index a427350fd45..c1ad226a116 100644 --- a/ludwig/hyperopt/execution.py +++ b/ludwig/hyperopt/execution.py @@ -21,11 +21,9 @@ from ray import tune from ray.air import Checkpoint from ray.air.config import CheckpointConfig, FailureConfig, RunConfig -from ray.tune import (ExperimentAnalysis, Stopper, TuneConfig, - register_trainable) +from ray.tune import ExperimentAnalysis, register_trainable, Stopper, TuneConfig from ray.tune.execution.placement_groups import PlacementGroupFactory -from ray.tune.schedulers.resource_changing_scheduler import ( - DistributeResources, ResourceChangingScheduler) +from ray.tune.schedulers.resource_changing_scheduler import DistributeResources, ResourceChangingScheduler from ray.tune.search import BasicVariantGenerator, ConcurrencyLimiter from ray.tune.tuner import Tuner from ray.tune.utils import wait_for_gpu @@ -33,11 +31,10 @@ from ludwig.api import LudwigModel from ludwig.api_annotations import PublicAPI -from ludwig.backend import RAY, initialize_backend +from ludwig.backend import initialize_backend, RAY from ludwig.backend.ray import initialize_ray from ludwig.callbacks import Callback -from ludwig.constants import (MAXIMIZE, TEST, TRAINER, TRAINING, TYPE, - VALIDATION) +from ludwig.constants import MAXIMIZE, TEST, TRAINER, TRAINING, TYPE, VALIDATION from ludwig.globals import MODEL_FILE_NAME from ludwig.hyperopt.results import HyperoptResults, TrialResults from ludwig.hyperopt.syncer import RemoteSyncer @@ -47,7 +44,7 @@ from ludwig.schema.model_config import ModelConfig from ludwig.types import ModelConfigDict from ludwig.utils import fs_utils, metric_utils -from ludwig.utils.data_utils import NumpyEncoder, hash_dict, use_credentials +from ludwig.utils.data_utils import hash_dict, NumpyEncoder, use_credentials from ludwig.utils.defaults import default_random_seed from ludwig.utils.error_handling_utils import default_retry from ludwig.utils.fs_utils import has_remote_protocol, safe_move_file @@ -806,8 +803,7 @@ def run_experiment_trial(config, local_hyperopt_dict, checkpoint_dir=None): self.sync_config = tune.SyncConfig(upload_dir=output_directory, syncer=self.sync_client) output_directory = None elif self.kubernetes_namespace: - from ray.tune.integration.kubernetes import ( - KubernetesSyncClient, NamespacedKubernetesSyncer) + from ray.tune.integration.kubernetes import KubernetesSyncClient, NamespacedKubernetesSyncer self.sync_config = tune.SyncConfig(sync_to_driver=NamespacedKubernetesSyncer(self.kubernetes_namespace)) self.sync_client = KubernetesSyncClient(self.kubernetes_namespace) diff --git a/ludwig/hyperopt/run.py b/ludwig/hyperopt/run.py index a5e76813527..3c53e089b8b 100644 --- a/ludwig/hyperopt/run.py +++ b/ludwig/hyperopt/run.py @@ -9,22 +9,39 @@ from tabulate import tabulate from ludwig.api import LudwigModel -from ludwig.backend import Backend, LocalBackend, initialize_backend +from ludwig.backend import Backend, initialize_backend, LocalBackend from ludwig.callbacks import Callback -from ludwig.constants import (AUTO, COMBINED, EXECUTOR, GOAL, HYPEROPT, LOSS, - MAX_CONCURRENT_TRIALS, METRIC, NAME, - OUTPUT_FEATURES, PARAMETERS, PREPROCESSING, - SEARCH_ALG, SPLIT, TEST, TRAINING, TYPE, - VALIDATION) +from ludwig.constants import ( + AUTO, + COMBINED, + EXECUTOR, + GOAL, + HYPEROPT, + LOSS, + MAX_CONCURRENT_TRIALS, + METRIC, + NAME, + OUTPUT_FEATURES, + PARAMETERS, + PREPROCESSING, + SEARCH_ALG, + SPLIT, + TEST, + TRAINING, + TYPE, + VALIDATION, +) from ludwig.data.split import get_splitter from ludwig.hyperopt.results import HyperoptResults -from ludwig.hyperopt.utils import (log_warning_if_all_grid_type_parameters, - print_hyperopt_results, save_hyperopt_stats, - should_tune_preprocessing, - update_hyperopt_params_with_defaults) +from ludwig.hyperopt.utils import ( + log_warning_if_all_grid_type_parameters, + print_hyperopt_results, + save_hyperopt_stats, + should_tune_preprocessing, + update_hyperopt_params_with_defaults, +) from ludwig.schema.model_config import ModelConfig -from ludwig.utils.backward_compatibility import \ - upgrade_config_dict_to_latest_version +from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version from ludwig.utils.dataset_utils import generate_dataset_statistics from ludwig.utils.defaults import default_random_seed from ludwig.utils.fs_utils import makedirs, open_file @@ -179,8 +196,7 @@ def hyperopt( :return: (List[dict]) List of results for each trial, ordered by descending performance on the target metric. """ - from ludwig.hyperopt.execution import (RayTuneExecutor, - get_build_hyperopt_executor) + from ludwig.hyperopt.execution import get_build_hyperopt_executor, RayTuneExecutor # check if config is a path or a dict if isinstance(config, str): # assume path diff --git a/ludwig/hyperopt/utils.py b/ludwig/hyperopt/utils.py index c894271a108..08c3e8f54b4 100644 --- a/ludwig/hyperopt/utils.py +++ b/ludwig/hyperopt/utils.py @@ -7,19 +7,40 @@ from typing import Any, Dict from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (AUTO, COMBINED, EXECUTOR, GOAL, GRID_SEARCH, - HYPEROPT, INPUT_FEATURES, LOSS, - MAX_CONCURRENT_TRIALS, METRIC, MINIMIZE, NAME, - NUM_SAMPLES, OUTPUT_FEATURES, PARAMETERS, - PREPROCESSING, RAY, SPACE, SPLIT, TYPE, - VALIDATION) +from ludwig.constants import ( + AUTO, + COMBINED, + EXECUTOR, + GOAL, + GRID_SEARCH, + HYPEROPT, + INPUT_FEATURES, + LOSS, + MAX_CONCURRENT_TRIALS, + METRIC, + MINIMIZE, + NAME, + NUM_SAMPLES, + OUTPUT_FEATURES, + PARAMETERS, + PREPROCESSING, + RAY, + SPACE, + SPLIT, + TYPE, + VALIDATION, +) from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME from ludwig.hyperopt.results import HyperoptResults, TrialResults from ludwig.types import HyperoptConfigDict, ModelConfigDict from ludwig.utils.data_utils import save_json -from ludwig.utils.misc_utils import (get_class_attributes, get_from_registry, - merge_dict, set_default_value, - set_default_values) +from ludwig.utils.misc_utils import ( + get_class_attributes, + get_from_registry, + merge_dict, + set_default_value, + set_default_values, +) from ludwig.utils.print_utils import print_boxed logger = logging.getLogger(__name__) diff --git a/ludwig/model_export/onnx_exporter.py b/ludwig/model_export/onnx_exporter.py index 767f6b5d99e..1659a2c197a 100644 --- a/ludwig/model_export/onnx_exporter.py +++ b/ludwig/model_export/onnx_exporter.py @@ -4,8 +4,7 @@ import torch from ludwig.api import LudwigModel -from ludwig.model_export.base_model_exporter import (BaseModelExporter, - LudwigTorchWrapper) +from ludwig.model_export.base_model_exporter import BaseModelExporter, LudwigTorchWrapper # Copyright (c) 2023 Predibase, Inc., 2019 Uber Technologies, Inc. diff --git a/ludwig/models/base.py b/ludwig/models/base.py index 989121b904c..c173af824b6 100644 --- a/ludwig/models/base.py +++ b/ludwig/models/base.py @@ -11,17 +11,12 @@ from ludwig.combiners.combiners import Combiner from ludwig.constants import COMBINED, LOSS, NAME from ludwig.encoders.base import Encoder -from ludwig.features.base_feature import (InputFeature, ModuleWrapper, - OutputFeature, - create_passthrough_input_feature) -from ludwig.features.feature_registries import (get_input_type_registry, - get_output_type_registry) +from ludwig.features.base_feature import create_passthrough_input_feature, InputFeature, ModuleWrapper, OutputFeature +from ludwig.features.feature_registries import get_input_type_registry, get_output_type_registry from ludwig.features.feature_utils import LudwigFeatureDict from ludwig.modules.metric_modules import LudwigMetric from ludwig.modules.training_hooks import TrainingHook -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig, - FeatureCollection) +from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig, FeatureCollection from ludwig.utils.algorithms_utils import topological_sort_feature_dependencies from ludwig.utils.metric_utils import get_scalar_from_ludwig_metric from ludwig.utils.misc_utils import get_from_registry diff --git a/ludwig/models/embedder.py b/ludwig/models/embedder.py index aa4c48c57a4..a5a152e7c93 100644 --- a/ludwig/models/embedder.py +++ b/ludwig/models/embedder.py @@ -9,14 +9,13 @@ from ludwig.features.feature_registries import get_input_type_registry from ludwig.features.feature_utils import LudwigFeatureDict from ludwig.models.base import BaseModel -from ludwig.schema.features.base import (BaseInputFeatureConfig, - FeatureCollection) +from ludwig.schema.features.base import BaseInputFeatureConfig, FeatureCollection from ludwig.schema.features.utils import get_input_feature_cls from ludwig.types import FeatureConfigDict, TrainingSetMetadataDict from ludwig.utils.batch_size_tuner import BatchSizeEvaluator from ludwig.utils.dataframe_utils import from_numpy_dataset from ludwig.utils.misc_utils import get_from_registry -from ludwig.utils.torch_utils import LudwigModule, get_torch_device +from ludwig.utils.torch_utils import get_torch_device, LudwigModule @DeveloperAPI diff --git a/ludwig/models/gbm.py b/ludwig/models/gbm.py index adf5ffd149b..b249fce2cb6 100644 --- a/ludwig/models/gbm.py +++ b/ludwig/models/gbm.py @@ -12,8 +12,7 @@ from ludwig.features.base_feature import OutputFeature from ludwig.globals import MODEL_WEIGHTS_FILE_NAME from ludwig.models.base import BaseModel -from ludwig.schema.features.base import (BaseOutputFeatureConfig, - FeatureCollection) +from ludwig.schema.features.base import BaseOutputFeatureConfig, FeatureCollection from ludwig.schema.model_config import ModelConfig from ludwig.utils import output_feature_utils from ludwig.utils.fs_utils import path_exists diff --git a/ludwig/models/inference.py b/ludwig/models/inference.py index 5e279753292..82890572979 100644 --- a/ludwig/models/inference.py +++ b/ludwig/models/inference.py @@ -1,6 +1,6 @@ import logging import os -from typing import TYPE_CHECKING, Any, Dict, Optional, Union +from typing import Any, Dict, Optional, TYPE_CHECKING, Union import pandas as pd import torch @@ -10,18 +10,14 @@ from ludwig.data.postprocessing import convert_dict_to_df from ludwig.data.preprocessing import load_metadata from ludwig.features.feature_registries import get_input_type_registry -from ludwig.features.feature_utils import (get_module_dict_key_from_name, - get_name_from_module_dict_key) -from ludwig.globals import (MODEL_HYPERPARAMETERS_FILE_NAME, - TRAIN_SET_METADATA_FILE_NAME) +from ludwig.features.feature_utils import get_module_dict_key_from_name, get_name_from_module_dict_key +from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME from ludwig.types import ModelConfigDict, TrainingSetMetadataDict from ludwig.utils import output_feature_utils from ludwig.utils.data_utils import load_json, save_json -from ludwig.utils.inference_utils import ( - get_filename_from_stage, to_inference_module_input_from_dataframe) +from ludwig.utils.inference_utils import get_filename_from_stage, to_inference_module_input_from_dataframe from ludwig.utils.misc_utils import get_from_registry -from ludwig.utils.output_feature_utils import ( - get_feature_name_from_concat_name, get_tensor_name_from_concat_name) +from ludwig.utils.output_feature_utils import get_feature_name_from_concat_name, get_tensor_name_from_concat_name from ludwig.utils.torch_utils import DEVICE from ludwig.utils.types import TorchDevice, TorchscriptPreprocessingInput diff --git a/ludwig/models/llm.py b/ludwig/models/llm.py index d7105955278..3a970cd2013 100644 --- a/ludwig/models/llm.py +++ b/ludwig/models/llm.py @@ -8,26 +8,29 @@ from transformers import AutoConfig, GenerationConfig from ludwig.accounting.used_tokens import get_used_tokens_for_llm -from ludwig.constants import (IGNORE_INDEX_TOKEN_ID, LOGITS, MODEL_LLM, - PREDICTIONS, TEXT, USED_TOKENS) +from ludwig.constants import IGNORE_INDEX_TOKEN_ID, LOGITS, MODEL_LLM, PREDICTIONS, TEXT, USED_TOKENS from ludwig.features.base_feature import ModuleWrapper, OutputFeature from ludwig.features.feature_utils import LudwigFeatureDict from ludwig.features.text_feature import TextOutputFeature from ludwig.globals import MODEL_WEIGHTS_FILE_NAME from ludwig.models.base import BaseModel from ludwig.modules.training_hooks import NEFTuneHook -from ludwig.schema.features.base import (BaseOutputFeatureConfig, - FeatureCollection) +from ludwig.schema.features.base import BaseOutputFeatureConfig, FeatureCollection from ludwig.schema.model_types.llm import LLMModelConfig from ludwig.utils.augmentation_utils import AugmentationPipelines from ludwig.utils.data_utils import clear_data_cache -from ludwig.utils.llm_quantization_utils import \ - convert_quantized_linear_to_linear +from ludwig.utils.llm_quantization_utils import convert_quantized_linear_to_linear from ludwig.utils.llm_utils import ( - add_left_padding, generate_merged_ids, get_context_len, + add_left_padding, + generate_merged_ids, + get_context_len, get_realigned_target_and_prediction_tensors_for_inference, - initialize_adapter, load_pretrained_from_config, - pad_target_tensor_for_fine_tuning, remove_left_padding, to_device) + initialize_adapter, + load_pretrained_from_config, + pad_target_tensor_for_fine_tuning, + remove_left_padding, + to_device, +) from ludwig.utils.logging_utils import log_once from ludwig.utils.output_feature_utils import set_output_feature_tensor from ludwig.utils.tokenizers import HFTokenizer diff --git a/ludwig/models/predictor.py b/ludwig/models/predictor.py index 01b10ae7106..977fcb0a263 100644 --- a/ludwig/models/predictor.py +++ b/ludwig/models/predictor.py @@ -2,7 +2,7 @@ import os import sys from abc import ABC, abstractmethod -from collections import OrderedDict, defaultdict +from collections import defaultdict, OrderedDict from pprint import pformat from typing import Dict, List, Optional, Type @@ -12,13 +12,11 @@ import torch from torch import nn -from ludwig.constants import (COMBINED, LAST_HIDDEN, LOGITS, MODEL_ECD, - MODEL_GBM, MODEL_LLM) +from ludwig.constants import COMBINED, LAST_HIDDEN, LOGITS, MODEL_ECD, MODEL_GBM, MODEL_LLM from ludwig.data.dataset.base import Dataset from ludwig.data.utils import convert_to_dict from ludwig.distributed.base import DistributedStrategy, LocalStrategy -from ludwig.globals import (PREDICTIONS_PARQUET_FILE_NAME, - TEST_STATISTICS_FILE_NAME, is_progressbar_disabled) +from ludwig.globals import is_progressbar_disabled, PREDICTIONS_PARQUET_FILE_NAME, TEST_STATISTICS_FILE_NAME from ludwig.models.base import BaseModel from ludwig.progress_bar import LudwigProgressBar from ludwig.utils.data_utils import save_csv, save_json diff --git a/ludwig/models/retrieval.py b/ludwig/models/retrieval.py index fc499baea62..553fe85f3a6 100644 --- a/ludwig/models/retrieval.py +++ b/ludwig/models/retrieval.py @@ -2,8 +2,7 @@ import json import os from abc import ABC, abstractmethod -from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type, - Union) +from typing import Any, Callable, Dict, List, Optional, Type, TYPE_CHECKING, Union import numpy as np import pandas as pd diff --git a/ludwig/modules/attention_modules.py b/ludwig/modules/attention_modules.py index c599aff871b..064997d093d 100644 --- a/ludwig/modules/attention_modules.py +++ b/ludwig/modules/attention_modules.py @@ -18,7 +18,7 @@ from torch import nn from torch.nn import functional as F -from ludwig.utils.torch_utils import LudwigModule, get_activation +from ludwig.utils.torch_utils import get_activation, LudwigModule logger = logging.getLogger(__name__) diff --git a/ludwig/modules/convolutional_modules.py b/ludwig/modules/convolutional_modules.py index 7d3cbc0c378..198d9d1eae9 100644 --- a/ludwig/modules/convolutional_modules.py +++ b/ludwig/modules/convolutional_modules.py @@ -20,7 +20,7 @@ import torch.nn as nn from ludwig.utils.image_utils import get_img_output_shape -from ludwig.utils.torch_utils import LudwigModule, get_activation +from ludwig.utils.torch_utils import get_activation, LudwigModule logger = logging.getLogger(__name__) diff --git a/ludwig/modules/embedding_modules.py b/ludwig/modules/embedding_modules.py index 169e074f55b..d53769f242a 100644 --- a/ludwig/modules/embedding_modules.py +++ b/ludwig/modules/embedding_modules.py @@ -21,7 +21,7 @@ from ludwig.constants import TYPE from ludwig.modules.initializer_modules import get_initializer from ludwig.utils.data_utils import load_pretrained_embeddings -from ludwig.utils.torch_utils import LudwigModule, get_torch_device +from ludwig.utils.torch_utils import get_torch_device, LudwigModule logger = logging.getLogger(__name__) diff --git a/ludwig/modules/fully_connected_modules.py b/ludwig/modules/fully_connected_modules.py index 92c8888f29f..61b0f23ade6 100644 --- a/ludwig/modules/fully_connected_modules.py +++ b/ludwig/modules/fully_connected_modules.py @@ -20,8 +20,7 @@ from torch.nn import Dropout, Linear, ModuleList from ludwig.modules.normalization_modules import create_norm_layer -from ludwig.utils.torch_utils import (LudwigModule, activations, - initializer_registry) +from ludwig.utils.torch_utils import activations, initializer_registry, LudwigModule logger = logging.getLogger(__name__) diff --git a/ludwig/modules/loss_modules.py b/ludwig/modules/loss_modules.py index 26c704dee0e..a239def352d 100644 --- a/ludwig/modules/loss_modules.py +++ b/ludwig/modules/loss_modules.py @@ -17,7 +17,7 @@ from typing import Type import torch -from torch import Tensor, nn +from torch import nn, Tensor from torch.nn import HuberLoss as _HuberLoss from torch.nn import L1Loss from torch.nn import MSELoss as _MSELoss @@ -27,11 +27,20 @@ from ludwig.constants import LOGITS from ludwig.modules.loss_implementations.corn import corn_loss from ludwig.schema.features.loss.loss import ( - BaseLossConfig, BWCEWLossConfig, CORNLossConfig, HuberLossConfig, - MAELossConfig, MAPELossConfig, MSELossConfig, - NextTokenSoftmaxCrossEntropyLossConfig, RMSELossConfig, RMSPELossConfig, - SequenceSoftmaxCrossEntropyLossConfig, SigmoidCrossEntropyLossConfig, - SoftmaxCrossEntropyLossConfig) + BaseLossConfig, + BWCEWLossConfig, + CORNLossConfig, + HuberLossConfig, + MAELossConfig, + MAPELossConfig, + MSELossConfig, + NextTokenSoftmaxCrossEntropyLossConfig, + RMSELossConfig, + RMSPELossConfig, + SequenceSoftmaxCrossEntropyLossConfig, + SigmoidCrossEntropyLossConfig, + SoftmaxCrossEntropyLossConfig, +) from ludwig.utils import strings_utils from ludwig.utils.registry import Registry diff --git a/ludwig/modules/lr_scheduler.py b/ludwig/modules/lr_scheduler.py index 64bb3848eb1..a796a73d410 100644 --- a/ludwig/modules/lr_scheduler.py +++ b/ludwig/modules/lr_scheduler.py @@ -3,8 +3,7 @@ from typing import Any, Callable, Dict from torch.optim import Optimizer -from torch.optim.lr_scheduler import (CosineAnnealingWarmRestarts, LambdaLR, - ReduceLROnPlateau, SequentialLR) +from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, LambdaLR, ReduceLROnPlateau, SequentialLR from ludwig.constants import MINIMIZE, TRAINING, VALIDATION from ludwig.modules.metric_registry import get_metric_objective diff --git a/ludwig/modules/metric_modules.py b/ludwig/modules/metric_modules.py index c44d831dd32..a126b81ad7b 100644 --- a/ludwig/modules/metric_modules.py +++ b/ludwig/modules/metric_modules.py @@ -22,43 +22,80 @@ from torchmetrics import MeanAbsoluteError, MeanAbsolutePercentageError from torchmetrics import MeanMetric as _MeanMetric from torchmetrics import MeanSquaredError, Metric -from torchmetrics.classification import (BinaryAccuracy, BinaryAUROC, - BinaryPrecision, BinaryRecall, - BinarySpecificity, MulticlassAccuracy, - MulticlassAUROC) -from torchmetrics.functional.regression.r2 import (_r2_score_compute, - _r2_score_update) +from torchmetrics.classification import ( + BinaryAccuracy, + BinaryAUROC, + BinaryPrecision, + BinaryRecall, + BinarySpecificity, + MulticlassAccuracy, + MulticlassAUROC, +) +from torchmetrics.functional.regression.r2 import _r2_score_compute, _r2_score_update from torchmetrics.metric import jit_distributed_available from torchmetrics.text import BLEUScore, CharErrorRate, WordErrorRate from torchmetrics.text.perplexity import Perplexity from torchmetrics.text.rouge import ROUGEScore -from ludwig.constants import (ACCURACY, ACCURACY_MICRO, BINARY, # RESPONSE, - BINARY_WEIGHTED_CROSS_ENTROPY, CATEGORY, - CATEGORY_DISTRIBUTION, CORN, HITS_AT_K, HUBER, - IGNORE_INDEX_TOKEN_ID, IMAGE, JACCARD, LOGITS, - LOSS, MAXIMIZE, MEAN_ABSOLUTE_ERROR, - MEAN_ABSOLUTE_PERCENTAGE_ERROR, - MEAN_SQUARED_ERROR, MINIMIZE, - NEXT_TOKEN_PERPLEXITY, NUMBER, PERPLEXITY, - PRECISION, PREDICTIONS, PROBABILITIES, R2, - RECALL, ROC_AUC, ROOT_MEAN_SQUARED_ERROR, - ROOT_MEAN_SQUARED_PERCENTAGE_ERROR, SEQUENCE, - SEQUENCE_ACCURACY, SET, SPECIFICITY, TEXT, - TIMESERIES, TOKEN_ACCURACY, VECTOR) +from ludwig.constants import ( # RESPONSE, + ACCURACY, + ACCURACY_MICRO, + BINARY, + BINARY_WEIGHTED_CROSS_ENTROPY, + CATEGORY, + CATEGORY_DISTRIBUTION, + CORN, + HITS_AT_K, + HUBER, + IGNORE_INDEX_TOKEN_ID, + IMAGE, + JACCARD, + LOGITS, + LOSS, + MAXIMIZE, + MEAN_ABSOLUTE_ERROR, + MEAN_ABSOLUTE_PERCENTAGE_ERROR, + MEAN_SQUARED_ERROR, + MINIMIZE, + NEXT_TOKEN_PERPLEXITY, + NUMBER, + PERPLEXITY, + PRECISION, + PREDICTIONS, + PROBABILITIES, + R2, + RECALL, + ROC_AUC, + ROOT_MEAN_SQUARED_ERROR, + ROOT_MEAN_SQUARED_PERCENTAGE_ERROR, + SEQUENCE, + SEQUENCE_ACCURACY, + SET, + SPECIFICITY, + TEXT, + TIMESERIES, + TOKEN_ACCURACY, + VECTOR, +) from ludwig.distributed import get_current_dist_strategy -from ludwig.modules.loss_modules import (BWCEWLoss, CORNLoss, HuberLoss, - NextTokenSoftmaxCrossEntropyLoss, - SequenceSoftmaxCrossEntropyLoss, - SigmoidCrossEntropyLoss, - SoftmaxCrossEntropyLoss) -from ludwig.modules.metric_registry import (get_metric_objective, - get_metric_registry, - register_metric) +from ludwig.modules.loss_modules import ( + BWCEWLoss, + CORNLoss, + HuberLoss, + NextTokenSoftmaxCrossEntropyLoss, + SequenceSoftmaxCrossEntropyLoss, + SigmoidCrossEntropyLoss, + SoftmaxCrossEntropyLoss, +) +from ludwig.modules.metric_registry import get_metric_objective, get_metric_registry, register_metric from ludwig.schema.features.loss.loss import ( - BWCEWLossConfig, CORNLossConfig, HuberLossConfig, - SequenceSoftmaxCrossEntropyLossConfig, SigmoidCrossEntropyLossConfig, - SoftmaxCrossEntropyLossConfig) + BWCEWLossConfig, + CORNLossConfig, + HuberLossConfig, + SequenceSoftmaxCrossEntropyLossConfig, + SigmoidCrossEntropyLossConfig, + SoftmaxCrossEntropyLossConfig, +) from ludwig.utils.loss_utils import rmspe_loss from ludwig.utils.metric_utils import masked_correct_predictions from ludwig.utils.torch_utils import sequence_length_2D diff --git a/ludwig/modules/metric_registry.py b/ludwig/modules/metric_registry.py index 7837564f9d5..d9a06134a54 100644 --- a/ludwig/modules/metric_registry.py +++ b/ludwig/modules/metric_registry.py @@ -1,8 +1,7 @@ -from typing import TYPE_CHECKING, Dict, List, Literal, Union +from typing import Dict, List, Literal, TYPE_CHECKING, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (LOGITS, MAXIMIZE, MINIMIZE, PREDICTIONS, - PROBABILITIES, RESPONSE) +from ludwig.constants import LOGITS, MAXIMIZE, MINIMIZE, PREDICTIONS, PROBABILITIES, RESPONSE from ludwig.utils.registry import Registry if TYPE_CHECKING: diff --git a/ludwig/modules/optimization_modules.py b/ludwig/modules/optimization_modules.py index b2d6efd4081..b762f23e53d 100644 --- a/ludwig/modules/optimization_modules.py +++ b/ludwig/modules/optimization_modules.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== from dataclasses import asdict -from typing import TYPE_CHECKING, Dict, Optional, Tuple, Type +from typing import Dict, Optional, Tuple, Type, TYPE_CHECKING import torch @@ -21,8 +21,7 @@ from ludwig.utils.torch_utils import LudwigModule if TYPE_CHECKING: - from ludwig.schema.optimizers import (BaseOptimizerConfig, - GradientClippingConfig) + from ludwig.schema.optimizers import BaseOptimizerConfig, GradientClippingConfig def create_clipper(gradient_clipping_config: Optional["GradientClippingConfig"]): diff --git a/ludwig/schema/__init__.py b/ludwig/schema/__init__.py index 8c190cc6f8d..b217aab6a4b 100644 --- a/ludwig/schema/__init__.py +++ b/ludwig/schema/__init__.py @@ -1,7 +1,7 @@ # TODO(travis): figure out why we need these imports to avoid circular import error from ludwig.schema.combiners.utils import get_combiner_jsonschema # noqa -from ludwig.schema.features.utils import (get_input_feature_jsonschema, # noqa - get_output_feature_jsonschema) +from ludwig.schema.features.utils import get_input_feature_jsonschema # noqa +from ludwig.schema.features.utils import get_output_feature_jsonschema from ludwig.schema.hyperopt import get_hyperopt_jsonschema # noqa -from ludwig.schema.trainer import (get_model_type_jsonschema, # noqa - get_trainer_jsonschema) +from ludwig.schema.trainer import get_model_type_jsonschema # noqa +from ludwig.schema.trainer import get_trainer_jsonschema diff --git a/ludwig/schema/combiners/sequence.py b/ludwig/schema/combiners/sequence.py index c28881fc172..33907ab37d2 100644 --- a/ludwig/schema/combiners/sequence.py +++ b/ludwig/schema/combiners/sequence.py @@ -4,8 +4,7 @@ from ludwig.constants import MODEL_ECD, SEQUENCE from ludwig.schema import utils as schema_utils from ludwig.schema.combiners.base import BaseCombinerConfig -from ludwig.schema.combiners.sequence_concat import \ - MAIN_SEQUENCE_FEATURE_DESCRIPTION +from ludwig.schema.combiners.sequence_concat import MAIN_SEQUENCE_FEATURE_DESCRIPTION from ludwig.schema.combiners.utils import register_combiner_config from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField diff --git a/ludwig/schema/combiners/tab_transformer.py b/ludwig/schema/combiners/tab_transformer.py index ee50ffed2ff..7d0cfe90123 100644 --- a/ludwig/schema/combiners/tab_transformer.py +++ b/ludwig/schema/combiners/tab_transformer.py @@ -3,8 +3,7 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils from ludwig.schema.combiners.base import BaseCombinerConfig -from ludwig.schema.combiners.common_transformer_options import \ - CommonTransformerConfig +from ludwig.schema.combiners.common_transformer_options import CommonTransformerConfig from ludwig.schema.combiners.utils import register_combiner_config from ludwig.schema.metadata import COMBINER_METADATA from ludwig.schema.utils import ludwig_dataclass diff --git a/ludwig/schema/combiners/transformer.py b/ludwig/schema/combiners/transformer.py index 6a026b1bf4d..780bd4342f5 100644 --- a/ludwig/schema/combiners/transformer.py +++ b/ludwig/schema/combiners/transformer.py @@ -3,8 +3,7 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils from ludwig.schema.combiners.base import BaseCombinerConfig -from ludwig.schema.combiners.common_transformer_options import \ - CommonTransformerConfig +from ludwig.schema.combiners.common_transformer_options import CommonTransformerConfig from ludwig.schema.combiners.utils import register_combiner_config from ludwig.schema.metadata import COMBINER_METADATA from ludwig.schema.utils import ludwig_dataclass diff --git a/ludwig/schema/combiners/utils.py b/ludwig/schema/combiners/utils.py index 3e28fa0d529..dfad5c83f46 100644 --- a/ludwig/schema/combiners/utils.py +++ b/ludwig/schema/combiners/utils.py @@ -5,8 +5,7 @@ from ludwig.schema import utils as schema_utils from ludwig.schema.combiners.base import BaseCombinerConfig from ludwig.schema.metadata import COMBINER_METADATA -from ludwig.schema.metadata.parameter_metadata import ( - ParameterMetadata, convert_metadata_to_json) +from ludwig.schema.metadata.parameter_metadata import convert_metadata_to_json, ParameterMetadata from ludwig.utils.registry import Registry DEFAULT_VALUE = "concat" diff --git a/ludwig/schema/decoders/base.py b/ludwig/schema/decoders/base.py index 98bdadf3179..f1e27833fd8 100644 --- a/ludwig/schema/decoders/base.py +++ b/ludwig/schema/decoders/base.py @@ -2,8 +2,7 @@ from typing import Dict, List, Tuple, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (BINARY, CATEGORY, MODEL_ECD, MODEL_GBM, - MODEL_LLM, NUMBER, SET, TIMESERIES, VECTOR) +from ludwig.constants import BINARY, CATEGORY, MODEL_ECD, MODEL_GBM, MODEL_LLM, NUMBER, SET, TIMESERIES, VECTOR from ludwig.schema import common_fields from ludwig.schema import utils as schema_utils from ludwig.schema.decoders.utils import register_decoder_config diff --git a/ludwig/schema/decoders/image_decoders.py b/ludwig/schema/decoders/image_decoders.py index bd591181abb..1adfb4b343f 100644 --- a/ludwig/schema/decoders/image_decoders.py +++ b/ludwig/schema/decoders/image_decoders.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Optional +from typing import Optional, TYPE_CHECKING from ludwig.api_annotations import DeveloperAPI from ludwig.constants import IMAGE, MODEL_ECD @@ -9,8 +9,7 @@ from ludwig.schema.utils import ludwig_dataclass if TYPE_CHECKING: - from ludwig.schema.features.preprocessing.image import \ - ImagePreprocessingConfig + from ludwig.schema.features.preprocessing.image import ImagePreprocessingConfig class ImageDecoderConfig(BaseDecoderConfig): diff --git a/ludwig/schema/decoders/utils.py b/ludwig/schema/decoders/utils.py index fe6105fe916..1a1fec77552 100644 --- a/ludwig/schema/decoders/utils.py +++ b/ludwig/schema/decoders/utils.py @@ -1,5 +1,5 @@ from dataclasses import Field -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union +from typing import Any, Dict, List, Optional, Type, TYPE_CHECKING, Union from ludwig.api_annotations import DeveloperAPI from ludwig.constants import MODEL_ECD, TYPE diff --git a/ludwig/schema/defaults/ecd.py b/ludwig/schema/defaults/ecd.py index 622e9cdd310..ccf41e8c4ed 100644 --- a/ludwig/schema/defaults/ecd.py +++ b/ludwig/schema/defaults/ecd.py @@ -1,6 +1,19 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (AUDIO, BAG, BINARY, CATEGORY, DATE, H3, IMAGE, - NUMBER, SEQUENCE, SET, TEXT, TIMESERIES, VECTOR) +from ludwig.constants import ( + AUDIO, + BAG, + BINARY, + CATEGORY, + DATE, + H3, + IMAGE, + NUMBER, + SEQUENCE, + SET, + TEXT, + TIMESERIES, + VECTOR, +) from ludwig.schema import utils as schema_utils from ludwig.schema.defaults.base import BaseDefaultsConfig from ludwig.schema.defaults.utils import DefaultsDataclassField diff --git a/ludwig/schema/defaults/utils.py b/ludwig/schema/defaults/utils.py index 0ee0b2e8f69..a7c4560002b 100644 --- a/ludwig/schema/defaults/utils.py +++ b/ludwig/schema/defaults/utils.py @@ -1,6 +1,6 @@ from dataclasses import field -from marshmallow import ValidationError, fields +from marshmallow import fields, ValidationError import ludwig.schema.utils as schema_utils from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/schema/encoders/base.py b/ludwig/schema/encoders/base.py index fff788c2d6b..5b2010dce6c 100644 --- a/ludwig/schema/encoders/base.py +++ b/ludwig/schema/encoders/base.py @@ -1,9 +1,8 @@ from abc import ABC -from typing import TYPE_CHECKING, List, Union +from typing import List, TYPE_CHECKING, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (BINARY, MODEL_ECD, MODEL_GBM, MODEL_LLM, NUMBER, - TEXT, TIMESERIES, VECTOR) +from ludwig.constants import BINARY, MODEL_ECD, MODEL_GBM, MODEL_LLM, NUMBER, TEXT, TIMESERIES, VECTOR from ludwig.schema import common_fields from ludwig.schema import utils as schema_utils from ludwig.schema.encoders.utils import register_encoder_config @@ -11,8 +10,7 @@ from ludwig.schema.utils import ludwig_dataclass if TYPE_CHECKING: - from ludwig.schema.features.preprocessing.base import \ - BasePreprocessingConfig + from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig @DeveloperAPI diff --git a/ludwig/schema/encoders/category_encoders.py b/ludwig/schema/encoders/category_encoders.py index 52397cfa292..c54cc8be24b 100644 --- a/ludwig/schema/encoders/category_encoders.py +++ b/ludwig/schema/encoders/category_encoders.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, List +from typing import List, TYPE_CHECKING from ludwig.api_annotations import DeveloperAPI from ludwig.constants import CATEGORY, MODEL_ECD, MODEL_GBM @@ -10,8 +10,7 @@ from ludwig.schema.utils import ludwig_dataclass if TYPE_CHECKING: - from ludwig.schema.features.preprocessing.category import \ - CategoryPreprocessingConfig + from ludwig.schema.features.preprocessing.category import CategoryPreprocessingConfig @DeveloperAPI diff --git a/ludwig/schema/encoders/image/base.py b/ludwig/schema/encoders/image/base.py index 53d5c7e94ed..c0feeecb3b8 100644 --- a/ludwig/schema/encoders/image/base.py +++ b/ludwig/schema/encoders/image/base.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING, Union from ludwig.api_annotations import DeveloperAPI from ludwig.constants import IMAGE @@ -10,8 +10,7 @@ from ludwig.utils.torch_utils import initializer_registry if TYPE_CHECKING: - from ludwig.schema.features.preprocessing.image import \ - ImagePreprocessingConfig + from ludwig.schema.features.preprocessing.image import ImagePreprocessingConfig class ImageEncoderConfig(BaseEncoderConfig): diff --git a/ludwig/schema/encoders/sequence_encoders.py b/ludwig/schema/encoders/sequence_encoders.py index b3d9d7251a7..c6ef4c746e6 100644 --- a/ludwig/schema/encoders/sequence_encoders.py +++ b/ludwig/schema/encoders/sequence_encoders.py @@ -1,5 +1,5 @@ from dataclasses import Field -from typing import TYPE_CHECKING, List, Optional +from typing import List, Optional, TYPE_CHECKING from ludwig.api_annotations import DeveloperAPI from ludwig.constants import AUDIO, SEQUENCE, TEXT, TIMESERIES @@ -11,8 +11,7 @@ from ludwig.schema.utils import ludwig_dataclass if TYPE_CHECKING: - from ludwig.schema.features.preprocessing.sequence import \ - SequencePreprocessingConfig + from ludwig.schema.features.preprocessing.sequence import SequencePreprocessingConfig CONV_LAYERS_DESCRIPTION = """ A list of dictionaries containing the parameters of all the convolutional layers. diff --git a/ludwig/schema/encoders/text_encoders.py b/ludwig/schema/encoders/text_encoders.py index 9097da33aff..72420c55ae3 100644 --- a/ludwig/schema/encoders/text_encoders.py +++ b/ludwig/schema/encoders/text_encoders.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union +from typing import Callable, Dict, List, Optional, TYPE_CHECKING, Union from ludwig.api_annotations import DeveloperAPI from ludwig.constants import MODEL_ECD, MODEL_GBM, TEXT @@ -8,19 +8,15 @@ from ludwig.schema.encoders.text.hf_model_params import DebertaModelParams from ludwig.schema.encoders.utils import register_encoder_config from ludwig.schema.llms.base_model import BaseModelDataclassField -from ludwig.schema.llms.model_parameters import (ModelParametersConfig, - ModelParametersConfigField) +from ludwig.schema.llms.model_parameters import ModelParametersConfig, ModelParametersConfigField from ludwig.schema.llms.peft import AdapterDataclassField, BaseAdapterConfig -from ludwig.schema.llms.quantization import (QuantizationConfig, - QuantizationConfigField) +from ludwig.schema.llms.quantization import QuantizationConfig, QuantizationConfigField from ludwig.schema.metadata import ENCODER_METADATA -from ludwig.schema.metadata.parameter_metadata import (INTERNAL_ONLY, - ParameterMetadata) +from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY, ParameterMetadata from ludwig.schema.utils import ludwig_dataclass if TYPE_CHECKING: - from ludwig.schema.features.preprocessing.text import \ - TextPreprocessingConfig + from ludwig.schema.features.preprocessing.text import TextPreprocessingConfig class HFEncoderConfig(SequenceEncoderConfig): diff --git a/ludwig/schema/encoders/utils.py b/ludwig/schema/encoders/utils.py index bbf665d482d..f2d7bfcea1a 100644 --- a/ludwig/schema/encoders/utils.py +++ b/ludwig/schema/encoders/utils.py @@ -1,5 +1,5 @@ from dataclasses import Field -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union +from typing import Any, Dict, List, Optional, Type, TYPE_CHECKING, Union from ludwig.api_annotations import DeveloperAPI from ludwig.constants import MODEL_ECD, TYPE diff --git a/ludwig/schema/features/audio_feature.py b/ludwig/schema/features/audio_feature.py index 2153049119c..dbac117706c 100644 --- a/ludwig/schema/features/audio_feature.py +++ b/ludwig/schema/features/audio_feature.py @@ -5,11 +5,8 @@ from ludwig.schema.encoders.utils import EncoderDataclassField from ludwig.schema.features.base import BaseInputFeatureConfig from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - input_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ecd_defaults_config_registry, ecd_input_config_registry, input_mixin_registry from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/augmentation/image.py b/ludwig/schema/features/augmentation/image.py index db5671c7258..74dc1d2673e 100644 --- a/ludwig/schema/features/augmentation/image.py +++ b/ludwig/schema/features/augmentation/image.py @@ -2,8 +2,7 @@ from ludwig.constants import AUGMENTATION, IMAGE, TYPE from ludwig.schema import utils as schema_utils from ludwig.schema.features.augmentation.base import BaseAugmentationConfig -from ludwig.schema.features.augmentation.utils import \ - register_augmentation_config +from ludwig.schema.features.augmentation.utils import register_augmentation_config from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.utils import ludwig_dataclass diff --git a/ludwig/schema/features/augmentation/utils.py b/ludwig/schema/features/augmentation/utils.py index d1807af0555..e24afecee7e 100644 --- a/ludwig/schema/features/augmentation/utils.py +++ b/ludwig/schema/features/augmentation/utils.py @@ -2,7 +2,7 @@ from dataclasses import field from typing import Any, Dict, List, Optional, Union -from marshmallow import ValidationError, fields +from marshmallow import fields, ValidationError from ludwig.api_annotations import DeveloperAPI from ludwig.constants import TYPE diff --git a/ludwig/schema/features/bag_feature.py b/ludwig/schema/features/bag_feature.py index 6e27641eff7..3d627a8b4f8 100644 --- a/ludwig/schema/features/bag_feature.py +++ b/ludwig/schema/features/bag_feature.py @@ -5,11 +5,8 @@ from ludwig.schema.encoders.utils import EncoderDataclassField from ludwig.schema.features.base import BaseInputFeatureConfig from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - input_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ecd_defaults_config_registry, ecd_input_config_registry, input_mixin_registry from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/base.py b/ludwig/schema/features/base.py index 1cceed290fb..2d2a857a03e 100644 --- a/ludwig/schema/features/base.py +++ b/ludwig/schema/features/base.py @@ -6,21 +6,37 @@ from rich.console import Console from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (AUDIO, BAG, BINARY, CATEGORY, DATE, H3, IMAGE, - MODEL_ECD, MODEL_GBM, MODEL_LLM, NUMBER, - SEQUENCE, SET, TEXT, TIMESERIES, VECTOR) +from ludwig.constants import ( + AUDIO, + BAG, + BINARY, + CATEGORY, + DATE, + H3, + IMAGE, + MODEL_ECD, + MODEL_GBM, + MODEL_LLM, + NUMBER, + SEQUENCE, + SET, + TEXT, + TIMESERIES, + VECTOR, +) from ludwig.error import ConfigValidationError from ludwig.schema import utils as schema_utils -from ludwig.schema.features.utils import (ecd_input_config_registry, - ecd_output_config_registry, - gbm_input_config_registry, - gbm_output_config_registry, - get_input_feature_jsonschema, - get_output_feature_jsonschema, - llm_input_config_registry, - llm_output_config_registry) -from ludwig.schema.metadata.parameter_metadata import (INTERNAL_ONLY, - ParameterMetadata) +from ludwig.schema.features.utils import ( + ecd_input_config_registry, + ecd_output_config_registry, + gbm_input_config_registry, + gbm_output_config_registry, + get_input_feature_jsonschema, + get_output_feature_jsonschema, + llm_input_config_registry, + llm_output_config_registry, +) +from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY, ParameterMetadata from ludwig.schema.utils import ludwig_dataclass logger = logging.getLogger(__name__) diff --git a/ludwig/schema/features/binary_feature.py b/ludwig/schema/features/binary_feature.py index 02b2dc0c490..81929866722 100644 --- a/ludwig/schema/features/binary_feature.py +++ b/ludwig/schema/features/binary_feature.py @@ -1,26 +1,25 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (BINARY, BINARY_WEIGHTED_CROSS_ENTROPY, MODEL_ECD, - MODEL_GBM, ROC_AUC) +from ludwig.constants import BINARY, BINARY_WEIGHTED_CROSS_ENTROPY, MODEL_ECD, MODEL_GBM, ROC_AUC from ludwig.schema import utils as schema_utils from ludwig.schema.decoders.base import BaseDecoderConfig from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig) +from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - gbm_defaults_config_registry, - gbm_input_config_registry, - gbm_output_config_registry, - input_mixin_registry, - output_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ( + ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + gbm_defaults_config_registry, + gbm_input_config_registry, + gbm_output_config_registry, + input_mixin_registry, + output_mixin_registry, +) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/category_feature.py b/ludwig/schema/features/category_feature.py index 75ecdbbe2bc..c39ee4eabf6 100644 --- a/ludwig/schema/features/category_feature.py +++ b/ludwig/schema/features/category_feature.py @@ -1,28 +1,34 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (ACCURACY, CATEGORY, CATEGORY_DISTRIBUTION, - MODEL_ECD, MODEL_GBM, MODEL_LLM, - SOFTMAX_CROSS_ENTROPY) +from ludwig.constants import ( + ACCURACY, + CATEGORY, + CATEGORY_DISTRIBUTION, + MODEL_ECD, + MODEL_GBM, + MODEL_LLM, + SOFTMAX_CROSS_ENTROPY, +) from ludwig.schema import utils as schema_utils from ludwig.schema.decoders.base import BaseDecoderConfig from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig) +from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - gbm_defaults_config_registry, - gbm_input_config_registry, - gbm_output_config_registry, - input_mixin_registry, - llm_output_config_registry, - output_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ( + ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + gbm_defaults_config_registry, + gbm_input_config_registry, + gbm_output_config_registry, + input_mixin_registry, + llm_output_config_registry, + output_mixin_registry, +) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/date_feature.py b/ludwig/schema/features/date_feature.py index 4032ee6e575..55b2408df9c 100644 --- a/ludwig/schema/features/date_feature.py +++ b/ludwig/schema/features/date_feature.py @@ -5,11 +5,8 @@ from ludwig.schema.encoders.utils import EncoderDataclassField from ludwig.schema.features.base import BaseInputFeatureConfig from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - input_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ecd_defaults_config_registry, ecd_input_config_registry, input_mixin_registry from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/h3_feature.py b/ludwig/schema/features/h3_feature.py index d0f79f701fb..1c712b0ae39 100644 --- a/ludwig/schema/features/h3_feature.py +++ b/ludwig/schema/features/h3_feature.py @@ -5,11 +5,8 @@ from ludwig.schema.encoders.utils import EncoderDataclassField from ludwig.schema.features.base import BaseInputFeatureConfig from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - input_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ecd_defaults_config_registry, ecd_input_config_registry, input_mixin_registry from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/image_feature.py b/ludwig/schema/features/image_feature.py index 85564eb7435..9322ee253cb 100644 --- a/ludwig/schema/features/image_feature.py +++ b/ludwig/schema/features/image_feature.py @@ -8,22 +8,20 @@ from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField from ludwig.schema.features.augmentation.base import BaseAugmentationConfig -from ludwig.schema.features.augmentation.image import ( - RandomHorizontalFlipConfig, RandomRotateConfig) -from ludwig.schema.features.augmentation.utils import \ - AugmentationDataclassField -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig) +from ludwig.schema.features.augmentation.image import RandomHorizontalFlipConfig, RandomRotateConfig +from ludwig.schema.features.augmentation.utils import AugmentationDataclassField +from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - input_mixin_registry, - output_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ( + ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + input_mixin_registry, + output_mixin_registry, +) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/loss/__init__.py b/ludwig/schema/features/loss/__init__.py index b47ad206099..249d91e87bc 100644 --- a/ludwig/schema/features/loss/__init__.py +++ b/ludwig/schema/features/loss/__init__.py @@ -1,3 +1,2 @@ -from ludwig.schema.features.loss.loss import (get_loss_classes, # noqa - get_loss_cls, - get_loss_schema_registry) +from ludwig.schema.features.loss.loss import get_loss_classes # noqa +from ludwig.schema.features.loss.loss import get_loss_cls, get_loss_schema_registry diff --git a/ludwig/schema/features/loss/loss.py b/ludwig/schema/features/loss/loss.py index 2b8c5e83f71..2dffbe34293 100644 --- a/ludwig/schema/features/loss/loss.py +++ b/ludwig/schema/features/loss/loss.py @@ -1,16 +1,29 @@ from typing import Dict, List, Type, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (BINARY, BINARY_WEIGHTED_CROSS_ENTROPY, CATEGORY, - CORN, HUBER, IMAGE, MEAN_ABSOLUTE_ERROR, - MEAN_ABSOLUTE_PERCENTAGE_ERROR, - MEAN_SQUARED_ERROR, - NEXT_TOKEN_SOFTMAX_CROSS_ENTROPY, NUMBER, - ROOT_MEAN_SQUARED_ERROR, - ROOT_MEAN_SQUARED_PERCENTAGE_ERROR, SEQUENCE, - SEQUENCE_SOFTMAX_CROSS_ENTROPY, SET, - SIGMOID_CROSS_ENTROPY, SOFTMAX_CROSS_ENTROPY, - TEXT, TIMESERIES, VECTOR) +from ludwig.constants import ( + BINARY, + BINARY_WEIGHTED_CROSS_ENTROPY, + CATEGORY, + CORN, + HUBER, + IMAGE, + MEAN_ABSOLUTE_ERROR, + MEAN_ABSOLUTE_PERCENTAGE_ERROR, + MEAN_SQUARED_ERROR, + NEXT_TOKEN_SOFTMAX_CROSS_ENTROPY, + NUMBER, + ROOT_MEAN_SQUARED_ERROR, + ROOT_MEAN_SQUARED_PERCENTAGE_ERROR, + SEQUENCE, + SEQUENCE_SOFTMAX_CROSS_ENTROPY, + SET, + SIGMOID_CROSS_ENTROPY, + SOFTMAX_CROSS_ENTROPY, + TEXT, + TIMESERIES, + VECTOR, +) from ludwig.schema import utils as schema_utils from ludwig.schema.metadata import LOSS_METADATA from ludwig.schema.utils import ludwig_dataclass diff --git a/ludwig/schema/features/number_feature.py b/ludwig/schema/features/number_feature.py index 0e8562cb456..97ea49123c6 100644 --- a/ludwig/schema/features/number_feature.py +++ b/ludwig/schema/features/number_feature.py @@ -7,21 +7,21 @@ from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig) +from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - gbm_defaults_config_registry, - gbm_input_config_registry, - gbm_output_config_registry, - input_mixin_registry, - output_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ( + ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + gbm_defaults_config_registry, + gbm_input_config_registry, + gbm_output_config_registry, + input_mixin_registry, + output_mixin_registry, +) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/preprocessing/__init__.py b/ludwig/schema/features/preprocessing/__init__.py index 3cbd2a1ffa2..90440ec1c4e 100644 --- a/ludwig/schema/features/preprocessing/__init__.py +++ b/ludwig/schema/features/preprocessing/__init__.py @@ -1,5 +1,16 @@ # Register all preprocessors -from ludwig.schema.features.preprocessing import (audio, bag, binary, # noqa - category, date, h3, image, - number, sequence, set, text, - timeseries, vector) +from ludwig.schema.features.preprocessing import ( # noqa + audio, + bag, + binary, + category, + date, + h3, + image, + number, + sequence, + set, + text, + timeseries, + vector, +) diff --git a/ludwig/schema/features/preprocessing/audio.py b/ludwig/schema/features/preprocessing/audio.py index c88ae0c002b..3eba94e9d76 100644 --- a/ludwig/schema/features/preprocessing/audio.py +++ b/ludwig/schema/features/preprocessing/audio.py @@ -1,6 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (AUDIO, BFILL, MISSING_VALUE_STRATEGY_OPTIONS, - PREPROCESSING) +from ludwig.constants import AUDIO, BFILL, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/bag.py b/ludwig/schema/features/preprocessing/bag.py index 2ef9bbc8d63..6f6402b95c7 100644 --- a/ludwig/schema/features/preprocessing/bag.py +++ b/ludwig/schema/features/preprocessing/bag.py @@ -1,6 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (BAG, FILL_WITH_CONST, - MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING) +from ludwig.constants import BAG, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/binary.py b/ludwig/schema/features/preprocessing/binary.py index f4f709525d2..6641d8b0c45 100644 --- a/ludwig/schema/features/preprocessing/binary.py +++ b/ludwig/schema/features/preprocessing/binary.py @@ -1,8 +1,16 @@ from typing import Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (BFILL, BINARY, DROP_ROW, FFILL, FILL_WITH_FALSE, - FILL_WITH_MODE, FILL_WITH_TRUE, PREPROCESSING) +from ludwig.constants import ( + BFILL, + BINARY, + DROP_ROW, + FFILL, + FILL_WITH_FALSE, + FILL_WITH_MODE, + FILL_WITH_TRUE, + PREPROCESSING, +) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/category.py b/ludwig/schema/features/preprocessing/category.py index 3ceb596889e..540cd654185 100644 --- a/ludwig/schema/features/preprocessing/category.py +++ b/ludwig/schema/features/preprocessing/category.py @@ -1,8 +1,7 @@ from typing import List from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (CATEGORY, DROP_ROW, FILL_WITH_CONST, - MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING) +from ludwig.constants import CATEGORY, DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING from ludwig.error import ConfigValidationError from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig diff --git a/ludwig/schema/features/preprocessing/date.py b/ludwig/schema/features/preprocessing/date.py index 99ff06698bb..597ea8d53be 100644 --- a/ludwig/schema/features/preprocessing/date.py +++ b/ludwig/schema/features/preprocessing/date.py @@ -1,6 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (BFILL, DATE, DROP_ROW, FFILL, FILL_WITH_CONST, - PREPROCESSING) +from ludwig.constants import BFILL, DATE, DROP_ROW, FFILL, FILL_WITH_CONST, PREPROCESSING from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/h3.py b/ludwig/schema/features/preprocessing/h3.py index 4abb0a3e413..51b57ee3984 100644 --- a/ludwig/schema/features/preprocessing/h3.py +++ b/ludwig/schema/features/preprocessing/h3.py @@ -1,6 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (FILL_WITH_CONST, H3, - MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING) +from ludwig.constants import FILL_WITH_CONST, H3, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/image.py b/ludwig/schema/features/preprocessing/image.py index a78663e0dff..dd8caa51b53 100644 --- a/ludwig/schema/features/preprocessing/image.py +++ b/ludwig/schema/features/preprocessing/image.py @@ -1,8 +1,7 @@ from typing import Optional, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (BFILL, DROP_ROW, IMAGE, IMAGENET1K, - MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING) +from ludwig.constants import BFILL, DROP_ROW, IMAGE, IMAGENET1K, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/number.py b/ludwig/schema/features/preprocessing/number.py index c1b34329b4f..17a4ef408d2 100644 --- a/ludwig/schema/features/preprocessing/number.py +++ b/ludwig/schema/features/preprocessing/number.py @@ -1,9 +1,14 @@ from typing import Optional from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (DROP_ROW, FILL_WITH_CONST, FILL_WITH_MEAN, - MISSING_VALUE_STRATEGY_OPTIONS, NUMBER, - PREPROCESSING) +from ludwig.constants import ( + DROP_ROW, + FILL_WITH_CONST, + FILL_WITH_MEAN, + MISSING_VALUE_STRATEGY_OPTIONS, + NUMBER, + PREPROCESSING, +) from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/sequence.py b/ludwig/schema/features/preprocessing/sequence.py index fb737590736..6d504d0fecc 100644 --- a/ludwig/schema/features/preprocessing/sequence.py +++ b/ludwig/schema/features/preprocessing/sequence.py @@ -1,7 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (DROP_ROW, FILL_WITH_CONST, - MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, - SEQUENCE) +from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, SEQUENCE from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/set.py b/ludwig/schema/features/preprocessing/set.py index 5c6bfcf4db2..9d9ef513dcd 100644 --- a/ludwig/schema/features/preprocessing/set.py +++ b/ludwig/schema/features/preprocessing/set.py @@ -1,7 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (DROP_ROW, FILL_WITH_CONST, - MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, - SET) +from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, SET from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/text.py b/ludwig/schema/features/preprocessing/text.py index e887b053089..ec4230fd098 100644 --- a/ludwig/schema/features/preprocessing/text.py +++ b/ludwig/schema/features/preprocessing/text.py @@ -1,7 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (DROP_ROW, FILL_WITH_CONST, - MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, - TEXT) +from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, TEXT from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/timeseries.py b/ludwig/schema/features/preprocessing/timeseries.py index 48e49d22870..647dd9a63c5 100644 --- a/ludwig/schema/features/preprocessing/timeseries.py +++ b/ludwig/schema/features/preprocessing/timeseries.py @@ -1,7 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (DROP_ROW, FILL_WITH_CONST, - MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, - TIMESERIES) +from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, TIMESERIES from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/preprocessing/utils.py b/ludwig/schema/features/preprocessing/utils.py index 5e69d638b37..48bac3160fc 100644 --- a/ludwig/schema/features/preprocessing/utils.py +++ b/ludwig/schema/features/preprocessing/utils.py @@ -1,6 +1,6 @@ from dataclasses import field -from marshmallow import ValidationError, fields +from marshmallow import fields, ValidationError from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils diff --git a/ludwig/schema/features/preprocessing/vector.py b/ludwig/schema/features/preprocessing/vector.py index d93ab7f4d14..ca83daa14c3 100644 --- a/ludwig/schema/features/preprocessing/vector.py +++ b/ludwig/schema/features/preprocessing/vector.py @@ -1,7 +1,5 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (DROP_ROW, FILL_WITH_CONST, - MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, - VECTOR) +from ludwig.constants import DROP_ROW, FILL_WITH_CONST, MISSING_VALUE_STRATEGY_OPTIONS, PREPROCESSING, VECTOR from ludwig.schema import utils as schema_utils from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig from ludwig.schema.features.preprocessing.utils import register_preprocessor diff --git a/ludwig/schema/features/sequence_feature.py b/ludwig/schema/features/sequence_feature.py index 7bd7933a5f4..df2b2fa0f61 100644 --- a/ludwig/schema/features/sequence_feature.py +++ b/ludwig/schema/features/sequence_feature.py @@ -1,23 +1,22 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (LOSS, MODEL_ECD, SEQUENCE, - SEQUENCE_SOFTMAX_CROSS_ENTROPY) +from ludwig.constants import LOSS, MODEL_ECD, SEQUENCE, SEQUENCE_SOFTMAX_CROSS_ENTROPY from ludwig.schema import utils as schema_utils from ludwig.schema.decoders.base import BaseDecoderConfig from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig) +from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - input_mixin_registry, - output_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ( + ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + input_mixin_registry, + output_mixin_registry, +) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/set_feature.py b/ludwig/schema/features/set_feature.py index 547964dcf18..96cc3f68b7e 100644 --- a/ludwig/schema/features/set_feature.py +++ b/ludwig/schema/features/set_feature.py @@ -5,18 +5,18 @@ from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig) +from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - input_mixin_registry, - output_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ( + ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + input_mixin_registry, + output_mixin_registry, +) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/text_feature.py b/ludwig/schema/features/text_feature.py index 883ad44a2ff..8c9984a6016 100644 --- a/ludwig/schema/features/text_feature.py +++ b/ludwig/schema/features/text_feature.py @@ -1,29 +1,35 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (LOSS, MODEL_ECD, MODEL_GBM, MODEL_LLM, - NEXT_TOKEN_SOFTMAX_CROSS_ENTROPY, - SEQUENCE_SOFTMAX_CROSS_ENTROPY, TEXT) +from ludwig.constants import ( + LOSS, + MODEL_ECD, + MODEL_GBM, + MODEL_LLM, + NEXT_TOKEN_SOFTMAX_CROSS_ENTROPY, + SEQUENCE_SOFTMAX_CROSS_ENTROPY, + TEXT, +) from ludwig.schema import utils as schema_utils from ludwig.schema.decoders.base import BaseDecoderConfig from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig) +from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - gbm_defaults_config_registry, - gbm_input_config_registry, - input_mixin_registry, - llm_defaults_config_registry, - llm_input_config_registry, - llm_output_config_registry, - output_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ( + ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + gbm_defaults_config_registry, + gbm_input_config_registry, + input_mixin_registry, + llm_defaults_config_registry, + llm_input_config_registry, + llm_output_config_registry, + output_mixin_registry, +) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/timeseries_feature.py b/ludwig/schema/features/timeseries_feature.py index a9f6e23c61a..a5eaeb05a7f 100644 --- a/ludwig/schema/features/timeseries_feature.py +++ b/ludwig/schema/features/timeseries_feature.py @@ -1,23 +1,22 @@ from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (HUBER, MEAN_SQUARED_ERROR, MODEL_ECD, TIMESERIES, - VECTOR) +from ludwig.constants import HUBER, MEAN_SQUARED_ERROR, MODEL_ECD, TIMESERIES, VECTOR from ludwig.schema import utils as schema_utils from ludwig.schema.decoders.base import BaseDecoderConfig from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig) +from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - input_mixin_registry, - output_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ( + ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + input_mixin_registry, + output_mixin_registry, +) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/features/vector_feature.py b/ludwig/schema/features/vector_feature.py index 524e8e10f2c..e7cc7c99e4a 100644 --- a/ludwig/schema/features/vector_feature.py +++ b/ludwig/schema/features/vector_feature.py @@ -5,18 +5,18 @@ from ludwig.schema.decoders.utils import DecoderDataclassField from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import EncoderDataclassField -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig) +from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig from ludwig.schema.features.loss.loss import BaseLossConfig from ludwig.schema.features.loss.utils import LossDataclassField from ludwig.schema.features.preprocessing.base import BasePreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField -from ludwig.schema.features.utils import (ecd_defaults_config_registry, - ecd_input_config_registry, - ecd_output_config_registry, - input_mixin_registry, - output_mixin_registry) +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField +from ludwig.schema.features.utils import ( + ecd_defaults_config_registry, + ecd_input_config_registry, + ecd_output_config_registry, + input_mixin_registry, + output_mixin_registry, +) from ludwig.schema.metadata import FEATURE_METADATA from ludwig.schema.metadata.parameter_metadata import INTERNAL_ONLY from ludwig.schema.utils import BaseMarshmallowConfig, ludwig_dataclass diff --git a/ludwig/schema/hyperopt/__init__.py b/ludwig/schema/hyperopt/__init__.py index 74c92cb9f1d..6a81d8eaebe 100644 --- a/ludwig/schema/hyperopt/__init__.py +++ b/ludwig/schema/hyperopt/__init__.py @@ -6,14 +6,11 @@ import ludwig.schema.hyperopt.parameter # noqa: F401 from ludwig.api_annotations import DeveloperAPI from ludwig.constants import LOSS, TEST, TRAIN, VALIDATION -from ludwig.modules import \ - metric_modules # noqa: Needed to ensure that the metric registry is populated. +from ludwig.modules import metric_modules # noqa: Needed to ensure that the metric registry is populated. from ludwig.modules.metric_registry import get_metric_registry from ludwig.schema import utils as schema_utils -from ludwig.schema.hyperopt.executor import (ExecutorConfig, - ExecutorDataclassField) -from ludwig.schema.hyperopt.search_algorithm import ( - BaseSearchAlgorithmConfig, SearchAlgorithmDataclassField) +from ludwig.schema.hyperopt.executor import ExecutorConfig, ExecutorDataclassField +from ludwig.schema.hyperopt.search_algorithm import BaseSearchAlgorithmConfig, SearchAlgorithmDataclassField @DeveloperAPI diff --git a/ludwig/schema/hyperopt/executor.py b/ludwig/schema/hyperopt/executor.py index cc69cf870c2..3adfe36c1b6 100644 --- a/ludwig/schema/hyperopt/executor.py +++ b/ludwig/schema/hyperopt/executor.py @@ -1,13 +1,12 @@ from dataclasses import field from typing import Dict, Optional, Union -from marshmallow import ValidationError, fields +from marshmallow import fields, ValidationError from ludwig.api_annotations import DeveloperAPI from ludwig.constants import RAY from ludwig.schema import utils as schema_utils -from ludwig.schema.hyperopt.scheduler import (BaseSchedulerConfig, - SchedulerDataclassField) +from ludwig.schema.hyperopt.scheduler import BaseSchedulerConfig, SchedulerDataclassField from ludwig.schema.utils import ludwig_dataclass diff --git a/ludwig/schema/hyperopt/scheduler.py b/ludwig/schema/hyperopt/scheduler.py index 547be79d508..2641c04f6bc 100644 --- a/ludwig/schema/hyperopt/scheduler.py +++ b/ludwig/schema/hyperopt/scheduler.py @@ -3,7 +3,7 @@ from importlib import import_module from typing import Callable, Dict, Optional, Tuple, Union -from marshmallow import ValidationError, fields +from marshmallow import fields, ValidationError from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils diff --git a/ludwig/schema/hyperopt/search_algorithm.py b/ludwig/schema/hyperopt/search_algorithm.py index 45a3dc635bd..17dc942c557 100644 --- a/ludwig/schema/hyperopt/search_algorithm.py +++ b/ludwig/schema/hyperopt/search_algorithm.py @@ -2,7 +2,7 @@ from importlib import import_module from typing import Dict, List, Optional -from marshmallow import ValidationError, fields +from marshmallow import fields, ValidationError from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils diff --git a/ludwig/schema/llms/base_model.py b/ludwig/schema/llms/base_model.py index d72172ccf64..fe77291bfc5 100644 --- a/ludwig/schema/llms/base_model.py +++ b/ludwig/schema/llms/base_model.py @@ -2,7 +2,7 @@ import os from dataclasses import field -from marshmallow import ValidationError, fields +from marshmallow import fields, ValidationError from transformers import AutoConfig from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/schema/llms/peft.py b/ludwig/schema/llms/peft.py index 98c8c5f968d..104a316179c 100644 --- a/ludwig/schema/llms/peft.py +++ b/ludwig/schema/llms/peft.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, List, Optional, Type +from typing import List, Optional, Type, TYPE_CHECKING from ludwig.api_annotations import DeveloperAPI from ludwig.error import ConfigValidationError diff --git a/ludwig/schema/lr_scheduler.py b/ludwig/schema/lr_scheduler.py index bb782cad4be..3bfedab82bf 100644 --- a/ludwig/schema/lr_scheduler.py +++ b/ludwig/schema/lr_scheduler.py @@ -2,7 +2,7 @@ from dataclasses import field from typing import Dict -from marshmallow import ValidationError, fields +from marshmallow import fields, ValidationError import ludwig.schema.utils as schema_utils from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/schema/model_types/base.py b/ludwig/schema/model_types/base.py index e681c98bbe8..410aa5c454e 100644 --- a/ludwig/schema/model_types/base.py +++ b/ludwig/schema/model_types/base.py @@ -7,28 +7,39 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.config_validation.checks import get_config_check_registry from ludwig.config_validation.validation import check_schema -from ludwig.constants import (BACKEND, COLUMN, DEPENDENCIES, ENCODER, - INPUT_FEATURES, MODEL_ECD, NAME, OUTPUT_FEATURES, - TIED) +from ludwig.constants import ( + BACKEND, + COLUMN, + DEPENDENCIES, + ENCODER, + INPUT_FEATURES, + MODEL_ECD, + NAME, + OUTPUT_FEATURES, + TIED, +) from ludwig.error import ConfigValidationError from ludwig.globals import LUDWIG_VERSION from ludwig.schema import utils as schema_utils from ludwig.schema.defaults.base import BaseDefaultsConfig -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig, - FeatureCollection) +from ludwig.schema.features.base import BaseInputFeatureConfig, BaseOutputFeatureConfig, FeatureCollection from ludwig.schema.hyperopt import HyperoptConfig from ludwig.schema.model_types.utils import ( - merge_fixed_preprocessing_params, merge_with_defaults, - sanitize_and_filter_combiner_entities_, set_derived_feature_columns_, - set_hyperopt_defaults_, set_llm_parameters, set_preprocessing_parameters, - set_tagger_decoder_parameters, set_validation_parameters) + merge_fixed_preprocessing_params, + merge_with_defaults, + sanitize_and_filter_combiner_entities_, + set_derived_feature_columns_, + set_hyperopt_defaults_, + set_llm_parameters, + set_preprocessing_parameters, + set_tagger_decoder_parameters, + set_validation_parameters, +) from ludwig.schema.preprocessing import PreprocessingConfig from ludwig.schema.trainer import BaseTrainerConfig from ludwig.schema.utils import ludwig_dataclass from ludwig.types import ModelConfigDict -from ludwig.utils.backward_compatibility import \ - upgrade_config_dict_to_latest_version +from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version from ludwig.utils.data_utils import get_sanitized_feature_name, load_yaml from ludwig.utils.registry import Registry diff --git a/ludwig/schema/model_types/ecd.py b/ludwig/schema/model_types/ecd.py index c1d07d1ccb1..967d12ae143 100644 --- a/ludwig/schema/model_types/ecd.py +++ b/ludwig/schema/model_types/ecd.py @@ -5,11 +5,13 @@ from ludwig.schema.combiners.base import BaseCombinerConfig from ludwig.schema.combiners.utils import CombinerSelection from ludwig.schema.defaults.ecd import ECDDefaultsConfig, ECDDefaultsField -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig, - ECDInputFeatureSelection, - ECDOutputFeatureSelection, - FeatureCollection) +from ludwig.schema.features.base import ( + BaseInputFeatureConfig, + BaseOutputFeatureConfig, + ECDInputFeatureSelection, + ECDOutputFeatureSelection, + FeatureCollection, +) from ludwig.schema.hyperopt import HyperoptConfig, HyperoptField from ludwig.schema.model_types.base import ModelConfig, register_model_type from ludwig.schema.preprocessing import PreprocessingConfig, PreprocessingField diff --git a/ludwig/schema/model_types/gbm.py b/ludwig/schema/model_types/gbm.py index a19a8c0db4c..9fda9294d92 100644 --- a/ludwig/schema/model_types/gbm.py +++ b/ludwig/schema/model_types/gbm.py @@ -3,11 +3,13 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils from ludwig.schema.defaults.gbm import GBMDefaultsConfig, GBMDefaultsField -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig, - FeatureCollection, - GBMInputFeatureSelection, - GBMOutputFeatureSelection) +from ludwig.schema.features.base import ( + BaseInputFeatureConfig, + BaseOutputFeatureConfig, + FeatureCollection, + GBMInputFeatureSelection, + GBMOutputFeatureSelection, +) from ludwig.schema.hyperopt import HyperoptConfig, HyperoptField from ludwig.schema.model_types.base import ModelConfig, register_model_type from ludwig.schema.preprocessing import PreprocessingConfig, PreprocessingField diff --git a/ludwig/schema/model_types/llm.py b/ludwig/schema/model_types/llm.py index a56a35968a4..95ddb29bc69 100644 --- a/ludwig/schema/model_types/llm.py +++ b/ludwig/schema/model_types/llm.py @@ -3,21 +3,20 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.schema import utils as schema_utils from ludwig.schema.defaults.llm import LLMDefaultsConfig, LLMDefaultsField -from ludwig.schema.features.base import (BaseInputFeatureConfig, - BaseOutputFeatureConfig, - FeatureCollection, - LLMInputFeatureSelection, - LLMOutputFeatureSelection) +from ludwig.schema.features.base import ( + BaseInputFeatureConfig, + BaseOutputFeatureConfig, + FeatureCollection, + LLMInputFeatureSelection, + LLMOutputFeatureSelection, +) from ludwig.schema.hyperopt import HyperoptConfig, HyperoptField from ludwig.schema.llms.base_model import BaseModelDataclassField -from ludwig.schema.llms.generation import (LLMGenerationConfig, - LLMGenerationConfigField) -from ludwig.schema.llms.model_parameters import (ModelParametersConfig, - ModelParametersConfigField) +from ludwig.schema.llms.generation import LLMGenerationConfig, LLMGenerationConfigField +from ludwig.schema.llms.model_parameters import ModelParametersConfig, ModelParametersConfigField from ludwig.schema.llms.peft import AdapterDataclassField, BaseAdapterConfig from ludwig.schema.llms.prompt import PromptConfig, PromptConfigField -from ludwig.schema.llms.quantization import (QuantizationConfig, - QuantizationConfigField) +from ludwig.schema.llms.quantization import QuantizationConfig, QuantizationConfigField from ludwig.schema.model_types.base import ModelConfig, register_model_type from ludwig.schema.preprocessing import PreprocessingConfig, PreprocessingField from ludwig.schema.trainer import LLMTrainerConfig, LLMTrainerDataclassField diff --git a/ludwig/schema/model_types/utils.py b/ludwig/schema/model_types/utils.py index 067849955c2..d229214118f 100644 --- a/ludwig/schema/model_types/utils.py +++ b/ludwig/schema/model_types/utils.py @@ -2,16 +2,31 @@ import logging import sys import warnings -from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Set +from typing import Any, Dict, List, Mapping, Set, TYPE_CHECKING from marshmallow import ValidationError from transformers import AutoConfig from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (CATEGORY, COMBINED, DECODER, DEFAULTS, ENCODER, - GRID_SEARCH, INPUT_FEATURES, LOSS, MODEL_ECD, - MODEL_LLM, OUTPUT_FEATURES, PARAMETERS, - PREPROCESSING, SEQUENCE, SPACE, TEXT, TYPE) +from ludwig.constants import ( + CATEGORY, + COMBINED, + DECODER, + DEFAULTS, + ENCODER, + GRID_SEARCH, + INPUT_FEATURES, + LOSS, + MODEL_ECD, + MODEL_LLM, + OUTPUT_FEATURES, + PARAMETERS, + PREPROCESSING, + SEQUENCE, + SPACE, + TEXT, + TYPE, +) from ludwig.features.feature_utils import compute_feature_hash from ludwig.schema.features.utils import output_config_registry from ludwig.schema.hyperopt.scheduler import BaseHyperbandSchedulerConfig @@ -96,8 +111,7 @@ def set_validation_parameters(config: "ModelConfig"): else: # Determine the proper validation field for the user, like if the user specifies "accuracy" but forgets to # change the validation field from "combined" to the name of the feature that produces accuracy metrics. - from ludwig.utils.metric_utils import \ - get_feature_to_metric_names_map + from ludwig.utils.metric_utils import get_feature_to_metric_names_map feature_to_metric_names_map = get_feature_to_metric_names_map(config.output_features.to_list()) validation_field = None diff --git a/ludwig/schema/optimizers.py b/ludwig/schema/optimizers.py index 3b9a8e8d5be..b7d6d0a8268 100644 --- a/ludwig/schema/optimizers.py +++ b/ludwig/schema/optimizers.py @@ -4,13 +4,12 @@ import bitsandbytes as bnb import torch -from marshmallow import ValidationError, fields +from marshmallow import fields, ValidationError import ludwig.schema.utils as schema_utils from ludwig.api_annotations import DeveloperAPI from ludwig.schema.metadata import OPTIMIZER_METADATA -from ludwig.schema.metadata.parameter_metadata import ( - ParameterMetadata, convert_metadata_to_json) +from ludwig.schema.metadata.parameter_metadata import convert_metadata_to_json, ParameterMetadata from ludwig.schema.utils import ludwig_dataclass from ludwig.utils.registry import Registry diff --git a/ludwig/schema/profiler.py b/ludwig/schema/profiler.py index c41ffa7910f..b857df62439 100644 --- a/ludwig/schema/profiler.py +++ b/ludwig/schema/profiler.py @@ -1,7 +1,7 @@ from dataclasses import field from typing import Dict -from marshmallow import ValidationError, fields +from marshmallow import fields, ValidationError import ludwig.schema.utils as schema_utils from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/schema/trainer.py b/ludwig/schema/trainer.py index ff54a3e03d9..a164e86fed5 100644 --- a/ludwig/schema/trainer.py +++ b/ludwig/schema/trainer.py @@ -8,18 +8,27 @@ from packaging.version import parse as parse_version from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (AUTO, EFFECTIVE_BATCH_SIZE, LOSS, MAX_BATCH_SIZE, - MAX_POSSIBLE_BATCH_SIZE, MODEL_ECD, MODEL_GBM, - MODEL_LLM, TRAINING) +from ludwig.constants import ( + AUTO, + EFFECTIVE_BATCH_SIZE, + LOSS, + MAX_BATCH_SIZE, + MAX_POSSIBLE_BATCH_SIZE, + MODEL_ECD, + MODEL_GBM, + MODEL_LLM, + TRAINING, +) from ludwig.error import ConfigValidationError from ludwig.schema import utils as schema_utils -from ludwig.schema.lr_scheduler import (LRSchedulerConfig, - LRSchedulerDataclassField) +from ludwig.schema.lr_scheduler import LRSchedulerConfig, LRSchedulerDataclassField from ludwig.schema.metadata import TRAINER_METADATA -from ludwig.schema.optimizers import (BaseOptimizerConfig, - GradientClippingConfig, - GradientClippingDataclassField, - OptimizerDataclassField) +from ludwig.schema.optimizers import ( + BaseOptimizerConfig, + GradientClippingConfig, + GradientClippingDataclassField, + OptimizerDataclassField, +) from ludwig.schema.profiler import ProfilerConfig, ProfilerDataclassField from ludwig.schema.utils import ludwig_dataclass from ludwig.utils.registry import Registry @@ -461,8 +470,7 @@ def __post_init__(self): ) def update_batch_size_grad_accum(self, num_workers: int): - from ludwig.utils.trainer_utils import \ - get_rendered_batch_size_grad_accum + from ludwig.utils.trainer_utils import get_rendered_batch_size_grad_accum self.batch_size, self.gradient_accumulation_steps = get_rendered_batch_size_grad_accum(self, num_workers) diff --git a/ludwig/schema/utils.py b/ludwig/schema/utils.py index 729ce555dad..cd049ea44ba 100644 --- a/ludwig/schema/utils.py +++ b/ludwig/schema/utils.py @@ -11,19 +11,16 @@ import marshmallow_dataclass import yaml -from marshmallow import (EXCLUDE, ValidationError, fields, pre_load, schema, - validate) +from marshmallow import EXCLUDE, fields, pre_load, schema, validate, ValidationError from marshmallow.utils import missing from marshmallow_dataclass import dataclass as m_dataclass from marshmallow_jsonschema import JSONSchema as js from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (ACTIVE, COLUMN, LUDWIG_SCHEMA_VALIDATION_POLICY, - NAME, PROC_COLUMN, TYPE) +from ludwig.constants import ACTIVE, COLUMN, LUDWIG_SCHEMA_VALIDATION_POLICY, NAME, PROC_COLUMN, TYPE from ludwig.modules.reduction_modules import reduce_mode_registry from ludwig.schema.metadata import COMMON_METADATA -from ludwig.schema.metadata.parameter_metadata import ( - ParameterMetadata, convert_metadata_to_json) +from ludwig.schema.metadata.parameter_metadata import convert_metadata_to_json, ParameterMetadata from ludwig.utils.misc_utils import scrub_creds from ludwig.utils.registry import Registry from ludwig.utils.torch_utils import activations, initializer_registry @@ -56,8 +53,7 @@ def load_trainer_with_kwargs( otherwise passes all other parameters through without change. """ from ludwig.constants import MODEL_ECD, MODEL_GBM, MODEL_LLM - from ludwig.schema.trainer import (ECDTrainerConfig, GBMTrainerConfig, - LLMTrainerConfig) + from ludwig.schema.trainer import ECDTrainerConfig, GBMTrainerConfig, LLMTrainerConfig # TODO: use registry pattern for trainers if model_type == MODEL_ECD: diff --git a/ludwig/train.py b/ludwig/train.py index 424d4e23cab..37e6bb6f618 100644 --- a/ludwig/train.py +++ b/ludwig/train.py @@ -28,8 +28,7 @@ from ludwig.globals import LUDWIG_VERSION from ludwig.utils.data_utils import load_config_from_str, load_yaml from ludwig.utils.defaults import default_random_seed -from ludwig.utils.print_utils import (get_logging_level_registry, print_ludwig, - query_yes_no) +from ludwig.utils.print_utils import get_logging_level_registry, print_ludwig, query_yes_no logger = logging.getLogger(__name__) diff --git a/ludwig/trainers/trainer.py b/ludwig/trainers/trainer.py index 0ced212ee53..74bbdd5885b 100644 --- a/ludwig/trainers/trainer.py +++ b/ludwig/trainers/trainer.py @@ -34,21 +34,32 @@ import torch from torch.utils.tensorboard import SummaryWriter -from ludwig.constants import (AUTO, LOSS, MAX_CPU_BATCH_SIZE, MINIMIZE, - MODEL_ECD, MODEL_LLM, TEST, TRAINING, - USED_TOKENS, VALIDATION) +from ludwig.constants import ( + AUTO, + LOSS, + MAX_CPU_BATCH_SIZE, + MINIMIZE, + MODEL_ECD, + MODEL_LLM, + TEST, + TRAINING, + USED_TOKENS, + VALIDATION, +) from ludwig.data.dataset.base import Dataset from ludwig.distributed.base import DistributedStrategy, LocalStrategy -from ludwig.globals import (MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, - TRAINING_CHECKPOINTS_DIR_PATH, - TRAINING_PROGRESS_TRACKER_FILE_NAME, - is_progressbar_disabled) +from ludwig.globals import ( + is_progressbar_disabled, + MODEL_FILE_NAME, + MODEL_HYPERPARAMETERS_FILE_NAME, + TRAINING_CHECKPOINTS_DIR_PATH, + TRAINING_PROGRESS_TRACKER_FILE_NAME, +) from ludwig.models.ecd import ECD from ludwig.models.llm import LLM from ludwig.models.predictor import Predictor from ludwig.modules.lr_scheduler import LRScheduler -from ludwig.modules.metric_modules import (get_improved_fn, - get_initial_validation_value) +from ludwig.modules.metric_modules import get_improved_fn, get_initial_validation_value from ludwig.modules.metric_registry import get_metric_objective from ludwig.modules.optimization_modules import create_clipper from ludwig.progress_bar import LudwigProgressBar @@ -64,18 +75,21 @@ from ludwig.utils.defaults import default_random_seed from ludwig.utils.fs_utils import path_exists from ludwig.utils.llm_utils import update_embedding_layer -from ludwig.utils.metric_utils import TrainerMetric, get_metric_names +from ludwig.utils.metric_utils import get_metric_names, TrainerMetric from ludwig.utils.metrics_printed_table import print_metrics_table from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.model_utils import contains_nan_or_inf_tensors from ludwig.utils.torch_utils import get_torch_device -from ludwig.utils.trainer_utils import (ProgressTracker, append_metrics, - freeze_layers_regex, - get_final_steps_per_checkpoint, - get_latest_metrics_dict, - get_new_progress_tracker, - get_total_expected_checkpoints, - get_total_steps) +from ludwig.utils.trainer_utils import ( + append_metrics, + freeze_layers_regex, + get_final_steps_per_checkpoint, + get_latest_metrics_dict, + get_new_progress_tracker, + get_total_expected_checkpoints, + get_total_steps, + ProgressTracker, +) logger = logging.getLogger(__name__) diff --git a/ludwig/trainers/trainer_lightgbm.py b/ludwig/trainers/trainer_lightgbm.py index 9a1642a13c5..d15982d71a9 100644 --- a/ludwig/trainers/trainer_lightgbm.py +++ b/ludwig/trainers/trainer_lightgbm.py @@ -11,18 +11,19 @@ import torch from torch.utils.tensorboard import SummaryWriter -from ludwig.constants import (BINARY, CATEGORY, MINIMIZE, MODEL_GBM, NUMBER, - TEST, TRAINING, VALIDATION) +from ludwig.constants import BINARY, CATEGORY, MINIMIZE, MODEL_GBM, NUMBER, TEST, TRAINING, VALIDATION from ludwig.distributed import init_dist_strategy from ludwig.distributed.base import DistributedStrategy, LocalStrategy from ludwig.features.feature_utils import LudwigFeatureDict -from ludwig.globals import (MODEL_FILE_NAME, TRAINING_CHECKPOINTS_DIR_PATH, - TRAINING_PROGRESS_TRACKER_FILE_NAME, - is_progressbar_disabled) +from ludwig.globals import ( + is_progressbar_disabled, + MODEL_FILE_NAME, + TRAINING_CHECKPOINTS_DIR_PATH, + TRAINING_PROGRESS_TRACKER_FILE_NAME, +) from ludwig.models.gbm import GBM from ludwig.models.predictor import Predictor -from ludwig.modules.metric_modules import (get_improved_fn, - get_initial_validation_value) +from ludwig.modules.metric_modules import get_improved_fn, get_initial_validation_value from ludwig.modules.metric_registry import get_metric_objective from ludwig.progress_bar import LudwigProgressBar from ludwig.schema.trainer import BaseTrainerConfig, GBMTrainerConfig @@ -32,17 +33,25 @@ from ludwig.utils import time_utils from ludwig.utils.checkpoint_utils import CheckpointManager from ludwig.utils.defaults import default_random_seed -from ludwig.utils.gbm_utils import (TrainLogits, get_single_output_feature, - get_targets, log_loss_objective, - logits_to_predictions, - multiclass_objective, store_predictions, - store_predictions_ray) -from ludwig.utils.metric_utils import TrainerMetric, get_metric_names +from ludwig.utils.gbm_utils import ( + get_single_output_feature, + get_targets, + log_loss_objective, + logits_to_predictions, + multiclass_objective, + store_predictions, + store_predictions_ray, + TrainLogits, +) +from ludwig.utils.metric_utils import get_metric_names, TrainerMetric from ludwig.utils.metrics_printed_table import print_metrics_table from ludwig.utils.misc_utils import set_random_seed -from ludwig.utils.trainer_utils import (ProgressTracker, append_metrics, - get_latest_metrics_dict, - get_new_progress_tracker) +from ludwig.utils.trainer_utils import ( + append_metrics, + get_latest_metrics_dict, + get_new_progress_tracker, + ProgressTracker, +) try: import ray diff --git a/ludwig/trainers/trainer_llm.py b/ludwig/trainers/trainer_llm.py index 44ed92f59b8..727257c1375 100644 --- a/ludwig/trainers/trainer_llm.py +++ b/ludwig/trainers/trainer_llm.py @@ -13,24 +13,23 @@ from ludwig.models.llm import LLM from ludwig.models.predictor import LlmFineTunePredictor, LlmPredictor from ludwig.modules.metric_modules import get_initial_validation_value -from ludwig.schema.trainer import (BaseTrainerConfig, FineTuneTrainerConfig, - NoneTrainerConfig) +from ludwig.schema.trainer import BaseTrainerConfig, FineTuneTrainerConfig, NoneTrainerConfig from ludwig.trainers.base import BaseTrainer -from ludwig.trainers.registry import (register_llm_ray_trainer, - register_llm_trainer) +from ludwig.trainers.registry import register_llm_ray_trainer, register_llm_trainer from ludwig.trainers.trainer import Trainer from ludwig.types import ModelConfigDict from ludwig.utils import time_utils from ludwig.utils.batch_size_tuner import ( - BatchSizeEvaluator, LLMFinetunePredictBatchSizeEvaluator, - LLMFinetuneTrainerBatchSizeEvaluator) + BatchSizeEvaluator, + LLMFinetunePredictBatchSizeEvaluator, + LLMFinetuneTrainerBatchSizeEvaluator, +) from ludwig.utils.defaults import default_random_seed from ludwig.utils.metric_utils import TrainerMetric from ludwig.utils.metrics_printed_table import print_metrics_table from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.torch_utils import get_torch_device -from ludwig.utils.trainer_utils import (ProgressTracker, append_metrics, - get_new_progress_tracker) +from ludwig.utils.trainer_utils import append_metrics, get_new_progress_tracker, ProgressTracker logger = logging.getLogger(__name__) diff --git a/ludwig/upload.py b/ludwig/upload.py index c4e323ff0a3..acb325046f0 100644 --- a/ludwig/upload.py +++ b/ludwig/upload.py @@ -4,8 +4,7 @@ import sys from typing import Optional -from ludwig.globals import (MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, - MODEL_WEIGHTS_FILE_NAME) +from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME from ludwig.utils.print_utils import get_logging_level_registry from ludwig.utils.upload_utils import HuggingFaceHub, Predibase diff --git a/ludwig/utils/automl/field_info.py b/ludwig/utils/automl/field_info.py index 633076a4a15..bab4518be43 100644 --- a/ludwig/utils/automl/field_info.py +++ b/ludwig/utils/automl/field_info.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import List -from dataclasses_json import LetterCase, dataclass_json +from dataclasses_json import dataclass_json, LetterCase from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/utils/automl/utils.py b/ludwig/utils/automl/utils.py index 0c2b6502daf..30c57511538 100644 --- a/ludwig/utils/automl/utils.py +++ b/ludwig/utils/automl/utils.py @@ -6,9 +6,20 @@ from pandas import Series from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (BINARY, CATEGORY, COMBINER, CONFIG, HYPEROPT, - IMBALANCE_DETECTION_RATIO, NAME, NUMBER, - PARAMETERS, SEARCH_ALG, TRAINER, TYPE) +from ludwig.constants import ( + BINARY, + CATEGORY, + COMBINER, + CONFIG, + HYPEROPT, + IMBALANCE_DETECTION_RATIO, + NAME, + NUMBER, + PARAMETERS, + SEARCH_ALG, + TRAINER, + TYPE, +) from ludwig.features.feature_registries import get_output_type_registry from ludwig.modules.metric_registry import get_metric_objective from ludwig.schema.combiners.utils import get_combiner_jsonschema diff --git a/ludwig/utils/backward_compatibility.py b/ludwig/utils/backward_compatibility.py index 52e0e942499..d52fc343500 100644 --- a/ludwig/utils/backward_compatibility.py +++ b/ludwig/utils/backward_compatibility.py @@ -19,32 +19,70 @@ from typing import Any, Callable, Dict, List, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (AUDIO, BIAS, CLASS_WEIGHTS, COLUMN, CONV_BIAS, - CONV_USE_BIAS, DECODER, DEFAULT_BIAS, - DEFAULT_USE_BIAS, DEFAULTS, ENCODER, - EVAL_BATCH_SIZE, EXECUTOR, FORCE_SPLIT, HEIGHT, - HYPEROPT, IMAGE, INPUT_FEATURES, LOSS, - MISSING_VALUE_STRATEGY, MODEL_ECD, MODEL_GBM, - MODEL_TYPE, NAME, NUM_SAMPLES, NUMBER, - OUTPUT_FEATURES, PARAMETERS, PREPROCESSING, - PROBABILITIES, RANDOM, RAY, SAMPLER, SCHEDULER, - SEARCH_ALG, SEQUENCE, SPLIT, SPLIT_PROBABILITIES, - STRATIFY, TEXT, TIMESERIES, TRAINER, TRAINING, - TYPE, USE_BIAS, WIDTH) -from ludwig.features.feature_registries import (get_base_type_registry, - get_input_type_registry, - get_output_type_registry) +from ludwig.constants import ( + AUDIO, + BIAS, + CLASS_WEIGHTS, + COLUMN, + CONV_BIAS, + CONV_USE_BIAS, + DECODER, + DEFAULT_BIAS, + DEFAULT_USE_BIAS, + DEFAULTS, + ENCODER, + EVAL_BATCH_SIZE, + EXECUTOR, + FORCE_SPLIT, + HEIGHT, + HYPEROPT, + IMAGE, + INPUT_FEATURES, + LOSS, + MISSING_VALUE_STRATEGY, + MODEL_ECD, + MODEL_GBM, + MODEL_TYPE, + NAME, + NUM_SAMPLES, + NUMBER, + OUTPUT_FEATURES, + PARAMETERS, + PREPROCESSING, + PROBABILITIES, + RANDOM, + RAY, + SAMPLER, + SCHEDULER, + SEARCH_ALG, + SEQUENCE, + SPLIT, + SPLIT_PROBABILITIES, + STRATIFY, + TEXT, + TIMESERIES, + TRAINER, + TRAINING, + TYPE, + USE_BIAS, + WIDTH, +) +from ludwig.features.feature_registries import get_base_type_registry, get_input_type_registry, get_output_type_registry from ludwig.globals import LUDWIG_VERSION from ludwig.schema.defaults.gbm import GBMDefaultsConfig from ludwig.schema.encoders.utils import get_encoder_cls -from ludwig.types import (FeatureConfigDict, FeatureTypeDefaultsDict, - HyperoptConfigDict, ModelConfigDict, - PreprocessingConfigDict, TrainerConfigDict, - TrainingSetMetadataDict) +from ludwig.types import ( + FeatureConfigDict, + FeatureTypeDefaultsDict, + HyperoptConfigDict, + ModelConfigDict, + PreprocessingConfigDict, + TrainerConfigDict, + TrainingSetMetadataDict, +) from ludwig.utils.metric_utils import TrainerMetric from ludwig.utils.misc_utils import get_from_registry, merge_dict -from ludwig.utils.version_transformation import (VersionTransformation, - VersionTransformationRegistry) +from ludwig.utils.version_transformation import VersionTransformation, VersionTransformationRegistry config_transformation_registry = VersionTransformationRegistry() diff --git a/ludwig/utils/batch_size_tuner.py b/ludwig/utils/batch_size_tuner.py index 033310f9218..0e9568a850d 100644 --- a/ludwig/utils/batch_size_tuner.py +++ b/ludwig/utils/batch_size_tuner.py @@ -8,8 +8,7 @@ import torch from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (MAX_BATCH_SIZE_DATASET_FRACTION, - MIN_POSSIBLE_BATCH_SIZE) +from ludwig.constants import MAX_BATCH_SIZE_DATASET_FRACTION, MIN_POSSIBLE_BATCH_SIZE logger = logging.getLogger(__name__) diff --git a/ludwig/utils/checkpoint_utils.py b/ludwig/utils/checkpoint_utils.py index 0953538c03a..d117a053443 100644 --- a/ludwig/utils/checkpoint_utils.py +++ b/ludwig/utils/checkpoint_utils.py @@ -13,7 +13,7 @@ import uuid from abc import ABC, abstractmethod from glob import glob -from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Tuple +from typing import Any, Dict, Mapping, Optional, Tuple, TYPE_CHECKING import torch from torch.optim import Optimizer diff --git a/ludwig/utils/config_utils.py b/ludwig/utils/config_utils.py index bcd2d643b25..6c4970c255e 100644 --- a/ludwig/utils/config_utils.py +++ b/ludwig/utils/config_utils.py @@ -1,13 +1,23 @@ from typing import Any, Dict, List, Set, Union from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (DECODER, ENCODER, IMAGE, INPUT_FEATURES, - MODEL_ECD, MODEL_LLM, MODEL_TYPE, PREPROCESSING, - SEQUENCE, TEXT, TIMESERIES, TYPE) +from ludwig.constants import ( + DECODER, + ENCODER, + IMAGE, + INPUT_FEATURES, + MODEL_ECD, + MODEL_LLM, + MODEL_TYPE, + PREPROCESSING, + SEQUENCE, + TEXT, + TIMESERIES, + TYPE, +) from ludwig.features.feature_registries import get_input_type_registry from ludwig.schema.model_config import ModelConfig -from ludwig.types import (FeatureConfigDict, FeatureTypeDefaultsDict, - PreprocessingConfigDict) +from ludwig.types import FeatureConfigDict, FeatureTypeDefaultsDict, PreprocessingConfigDict @DeveloperAPI diff --git a/ludwig/utils/data_utils.py b/ludwig/utils/data_utils.py index efdd9d1720c..b06f753a174 100644 --- a/ludwig/utils/data_utils.py +++ b/ludwig/utils/data_utils.py @@ -43,13 +43,9 @@ from ludwig.api_annotations import DeveloperAPI from ludwig.constants import PREPROCESSING, SPLIT from ludwig.data.cache.types import CacheableDataset -from ludwig.globals import (MODEL_HYPERPARAMETERS_FILE_NAME, - MODEL_WEIGHTS_FILE_NAME, - TRAIN_SET_METADATA_FILE_NAME) -from ludwig.utils.dataframe_utils import (from_numpy_dataset, is_dask_lib, - to_numpy_dataset) -from ludwig.utils.fs_utils import (download_h5, has_remote_protocol, open_file, - upload_h5) +from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME +from ludwig.utils.dataframe_utils import from_numpy_dataset, is_dask_lib, to_numpy_dataset +from ludwig.utils.fs_utils import download_h5, has_remote_protocol, open_file, upload_h5 from ludwig.utils.math_utils import cumsum from ludwig.utils.misc_utils import get_from_registry from ludwig.utils.types import DataFrame diff --git a/ludwig/utils/date_utils.py b/ludwig/utils/date_utils.py index 932ccaf85ce..3ab9babf9c2 100644 --- a/ludwig/utils/date_utils.py +++ b/ludwig/utils/date_utils.py @@ -18,7 +18,7 @@ from typing import Union import numpy as np -from dateutil.parser import ParserError, parse +from dateutil.parser import parse, ParserError from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/utils/defaults.py b/ludwig/utils/defaults.py index 53239a9bed0..a623011a991 100644 --- a/ludwig/utils/defaults.py +++ b/ludwig/utils/defaults.py @@ -25,8 +25,7 @@ from ludwig.globals import LUDWIG_VERSION from ludwig.schema.model_config import ModelConfig from ludwig.schema.preprocessing import PreprocessingConfig -from ludwig.utils.backward_compatibility import \ - upgrade_config_dict_to_latest_version +from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version from ludwig.utils.data_utils import load_config_from_str, load_yaml from ludwig.utils.fs_utils import open_file from ludwig.utils.print_utils import print_ludwig diff --git a/ludwig/utils/entmax/__init__.py b/ludwig/utils/entmax/__init__.py index 2730034a0c1..882b5938146 100644 --- a/ludwig/utils/entmax/__init__.py +++ b/ludwig/utils/entmax/__init__.py @@ -1,13 +1,17 @@ __version__ = "1.1.dev0" -from ludwig.utils.entmax.activations import (Entmax15, Sparsemax, entmax15, - sparsemax) -from ludwig.utils.entmax.losses import (Entmax15Loss, EntmaxBisectLoss, - SparsemaxBisectLoss, SparsemaxLoss, - entmax15_loss, entmax_bisect_loss, - sparsemax_bisect_loss, sparsemax_loss) -from ludwig.utils.entmax.root_finding import (EntmaxBisect, SparsemaxBisect, - entmax_bisect, sparsemax_bisect) +from ludwig.utils.entmax.activations import Entmax15, entmax15, Sparsemax, sparsemax +from ludwig.utils.entmax.losses import ( + entmax15_loss, + Entmax15Loss, + entmax_bisect_loss, + EntmaxBisectLoss, + sparsemax_bisect_loss, + sparsemax_loss, + SparsemaxBisectLoss, + SparsemaxLoss, +) +from ludwig.utils.entmax.root_finding import entmax_bisect, EntmaxBisect, sparsemax_bisect, SparsemaxBisect __all__ = [ "entmax15", diff --git a/ludwig/utils/heuristics.py b/ludwig/utils/heuristics.py index a7b525395a0..dd44a10e4b4 100644 --- a/ludwig/utils/heuristics.py +++ b/ludwig/utils/heuristics.py @@ -1,7 +1,5 @@ from ludwig.schema.model_config import ModelConfig -from ludwig.utils.config_utils import (has_pretrained_encoder, - has_trainable_encoder, - has_unstructured_input_feature) +from ludwig.utils.config_utils import has_pretrained_encoder, has_trainable_encoder, has_unstructured_input_feature def get_auto_learning_rate(config: ModelConfig) -> float: diff --git a/ludwig/utils/image_utils.py b/ludwig/utils/image_utils.py index fd5a3259759..a2fae951777 100644 --- a/ludwig/utils/image_utils.py +++ b/ludwig/utils/image_utils.py @@ -24,7 +24,7 @@ import tifffile import torch import torchvision.transforms.functional as F -from torchvision.io import ImageReadMode, decode_image +from torchvision.io import decode_image, ImageReadMode from torchvision.models._api import WeightsEnum from ludwig.api_annotations import DeveloperAPI diff --git a/ludwig/utils/inference_utils.py b/ludwig/utils/inference_utils.py index d471d78501b..ae5d966488b 100644 --- a/ludwig/utils/inference_utils.py +++ b/ludwig/utils/inference_utils.py @@ -4,10 +4,25 @@ import pandas as pd import torch -from ludwig.constants import (AUDIO, BAG, BINARY, CATEGORY, COLUMN, DATE, - IMAGE, NAME, POSTPROCESSOR, PREDICTOR, - PREPROCESSOR, SEQUENCE, SET, TEXT, TIMESERIES, - TYPE, VECTOR) +from ludwig.constants import ( + AUDIO, + BAG, + BINARY, + CATEGORY, + COLUMN, + DATE, + IMAGE, + NAME, + POSTPROCESSOR, + PREDICTOR, + PREPROCESSOR, + SEQUENCE, + SET, + TEXT, + TIMESERIES, + TYPE, + VECTOR, +) from ludwig.types import FeatureConfigDict, ModelConfigDict from ludwig.utils.audio_utils import read_audio_from_path from ludwig.utils.date_utils import create_vector_from_datetime_obj diff --git a/ludwig/utils/llm_utils.py b/ludwig/utils/llm_utils.py index 4875e82ded8..a29ef380868 100644 --- a/ludwig/utils/llm_utils.py +++ b/ludwig/utils/llm_utils.py @@ -1,18 +1,16 @@ import copy import logging import tempfile -from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union +from typing import Dict, Optional, Tuple, TYPE_CHECKING, Union import torch import torch.nn.functional as F import transformers from bitsandbytes.nn.modules import Embedding from packaging import version -from transformers import (AutoConfig, AutoModelForCausalLM, PreTrainedModel, - PreTrainedTokenizer, TextStreamer) +from transformers import AutoConfig, AutoModelForCausalLM, PreTrainedModel, PreTrainedTokenizer, TextStreamer -from ludwig.constants import (IGNORE_INDEX_TOKEN_ID, LOGITS, PREDICTIONS, - PROBABILITIES) +from ludwig.constants import IGNORE_INDEX_TOKEN_ID, LOGITS, PREDICTIONS, PROBABILITIES from ludwig.schema.trainer import LLMTrainerConfig from ludwig.utils.error_handling_utils import default_retry from ludwig.utils.logging_utils import log_once @@ -160,8 +158,8 @@ def initialize_adapter( logger.info(f"Using pretrained adapter weights: {config_obj.adapter.pretrained_adapter_weights}") # Leave this import inline to support a minimal install of Ludwig - from peft import (MODEL_TYPE_TO_PEFT_MODEL_MAPPING, # noqa - PeftConfig) + from peft import MODEL_TYPE_TO_PEFT_MODEL_MAPPING # noqa + from peft import PeftConfig peft_config = PeftConfig.from_pretrained(config_obj.adapter.pretrained_adapter_weights) @@ -170,7 +168,7 @@ def initialize_adapter( ) else: # Leave this import inline to support a minimal install of Ludwig - from peft import TaskType, get_peft_model # noqa + from peft import get_peft_model, TaskType # noqa # If no pretrained adapter is provided, we want to load untrained weights into the model peft_config = config_obj.adapter.to_config( diff --git a/ludwig/utils/misc_utils.py b/ludwig/utils/misc_utils.py index 907e9f3c0d5..6949d1bdf1a 100644 --- a/ludwig/utils/misc_utils.py +++ b/ludwig/utils/misc_utils.py @@ -21,7 +21,7 @@ import weakref from collections import OrderedDict from collections.abc import Mapping -from typing import TYPE_CHECKING, Any, Dict +from typing import Any, Dict, TYPE_CHECKING import numpy import torch diff --git a/ludwig/utils/neuropod_utils.py b/ludwig/utils/neuropod_utils.py index e0e58613f07..e3e3f504338 100644 --- a/ludwig/utils/neuropod_utils.py +++ b/ludwig/utils/neuropod_utils.py @@ -107,8 +107,7 @@ def _get_output_spec(model: LudwigModel) -> List[Dict[str, Any]]: @DeveloperAPI def export_neuropod(model: LudwigModel, neuropod_path: str, neuropod_model_name="ludwig_model"): try: - from neuropod.backends.torchscript.packager import \ - create_torchscript_neuropod + from neuropod.backends.torchscript.packager import create_torchscript_neuropod except ImportError: raise RuntimeError('The "neuropod" package is not installed in your environment.') diff --git a/ludwig/utils/strings_utils.py b/ludwig/utils/strings_utils.py index eb6ce7791a8..16aaa9a7252 100644 --- a/ludwig/utils/strings_utils.py +++ b/ludwig/utils/strings_utils.py @@ -24,8 +24,7 @@ import numpy as np from dateutil.parser import parse as parse_datetime -from ludwig.constants import (PADDING_SYMBOL, START_SYMBOL, STOP_SYMBOL, - UNKNOWN_SYMBOL) +from ludwig.constants import PADDING_SYMBOL, START_SYMBOL, STOP_SYMBOL, UNKNOWN_SYMBOL from ludwig.data.dataframe.base import DataFrameEngine from ludwig.data.dataframe.pandas import PANDAS from ludwig.utils.fs_utils import open_file diff --git a/ludwig/utils/tokenizers.py b/ludwig/utils/tokenizers.py index 7d53572316c..99cde68d51a 100644 --- a/ludwig/utils/tokenizers.py +++ b/ludwig/utils/tokenizers.py @@ -855,11 +855,17 @@ def _set_pad_token(self) -> None: # CodeGenTokenizer Used by Phi-2 # GPTNeoXTokenizerFast Used by Pythia - from transformers import (CodeGenTokenizer, CodeGenTokenizerFast, - CodeLlamaTokenizer, CodeLlamaTokenizerFast, - GPT2Tokenizer, GPT2TokenizerFast, - GPTNeoXTokenizerFast, LlamaTokenizer, - LlamaTokenizerFast) + from transformers import ( + CodeGenTokenizer, + CodeGenTokenizerFast, + CodeLlamaTokenizer, + CodeLlamaTokenizerFast, + GPT2Tokenizer, + GPT2TokenizerFast, + GPTNeoXTokenizerFast, + LlamaTokenizer, + LlamaTokenizerFast, + ) # Tokenizers might have the pad token id attribute since they tend to use the same base class, but # it can be set to None so we check for this explicitly. diff --git a/ludwig/utils/trainer_utils.py b/ludwig/utils/trainer_utils.py index 9a505023524..8a9fd779d4f 100644 --- a/ludwig/utils/trainer_utils.py +++ b/ludwig/utils/trainer_utils.py @@ -1,7 +1,7 @@ import logging import re from collections import defaultdict -from typing import TYPE_CHECKING, Dict, List, Tuple, Union +from typing import Dict, List, Tuple, TYPE_CHECKING, Union try: from typing import Literal diff --git a/ludwig/utils/triton_utils.py b/ludwig/utils/triton_utils.py index 07178542038..3d81cdb1069 100644 --- a/ludwig/utils/triton_utils.py +++ b/ludwig/utils/triton_utils.py @@ -11,17 +11,33 @@ from ludwig.api import LudwigModel from ludwig.api_annotations import DeveloperAPI -from ludwig.constants import (AUDIO, BAG, BINARY, CATEGORY, DATE, IMAGE, - INPUT_FEATURES, POSTPROCESSOR, PREDICTOR, - PREPROCESSOR, SEQUENCE, SET, TEXT, TIMESERIES, - TYPE, VECTOR) +from ludwig.constants import ( + AUDIO, + BAG, + BINARY, + CATEGORY, + DATE, + IMAGE, + INPUT_FEATURES, + POSTPROCESSOR, + PREDICTOR, + PREPROCESSOR, + SEQUENCE, + SET, + TEXT, + TIMESERIES, + TYPE, + VECTOR, +) from ludwig.data.dataset_synthesizer import build_synthetic_dataset -from ludwig.models.inference import (InferenceModule, _InferencePostprocessor, - _InferencePredictor, - _InferencePreprocessor) +from ludwig.models.inference import ( + _InferencePostprocessor, + _InferencePredictor, + _InferencePreprocessor, + InferenceModule, +) from ludwig.types import ModelConfigDict -from ludwig.utils.inference_utils import \ - to_inference_module_input_from_dataframe +from ludwig.utils.inference_utils import to_inference_module_input_from_dataframe from ludwig.utils.misc_utils import remove_empty_lines from ludwig.utils.torch_utils import model_size, place_on_device from ludwig.utils.types import TorchAudioTuple, TorchscriptPreprocessingInput diff --git a/ludwig/utils/upload_utils.py b/ludwig/utils/upload_utils.py index 7991ba8982a..51a0fb87efd 100644 --- a/ludwig/utils/upload_utils.py +++ b/ludwig/utils/upload_utils.py @@ -7,8 +7,7 @@ from huggingface_hub import HfApi, login from huggingface_hub.hf_api import CommitInfo -from ludwig.globals import (MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, - MODEL_WEIGHTS_FILE_NAME) +from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME logger = logging.getLogger(__name__) diff --git a/ludwig/visualize.py b/ludwig/visualize.py index 1bbc5aff346..feb97897072 100644 --- a/ludwig/visualize.py +++ b/ludwig/visualize.py @@ -33,14 +33,18 @@ from ludwig.api_annotations import DeveloperAPI, PublicAPI from ludwig.backend import LOCAL_BACKEND from ludwig.callbacks import Callback -from ludwig.constants import (ACCURACY, EDIT_DISTANCE, HITS_AT_K, LOSS, - PREDICTIONS, SPACE, SPLIT) +from ludwig.constants import ACCURACY, EDIT_DISTANCE, HITS_AT_K, LOSS, PREDICTIONS, SPACE, SPLIT from ludwig.contrib import add_contrib_callback_args from ludwig.utils import visualization_utils -from ludwig.utils.data_utils import (CACHEABLE_FORMATS, data_reader_registry, - figure_data_format_dataset, load_array, - load_from_file, load_json, - replace_file_extension) +from ludwig.utils.data_utils import ( + CACHEABLE_FORMATS, + data_reader_registry, + figure_data_format_dataset, + load_array, + load_from_file, + load_json, + replace_file_extension, +) from ludwig.utils.dataframe_utils import to_numpy_dataset, unflatten_df from ludwig.utils.fs_utils import path_exists from ludwig.utils.misc_utils import get_from_registry diff --git a/tests/conftest.py b/tests/conftest.py index f7642dd5d06..9dae92e2e65 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,12 +21,19 @@ import pytest -from ludwig.constants import (BATCH_SIZE, COMBINER, EPOCHS, HYPEROPT, - INPUT_FEATURES, NAME, OUTPUT_FEATURES, TRAINER, - TYPE) +from ludwig.constants import ( + BATCH_SIZE, + COMBINER, + EPOCHS, + HYPEROPT, + INPUT_FEATURES, + NAME, + OUTPUT_FEATURES, + TRAINER, + TYPE, +) from ludwig.hyperopt.run import hyperopt -from tests.integration_tests.utils import (category_feature, generate_data, - text_feature) +from tests.integration_tests.utils import category_feature, generate_data, text_feature TEST_SUITE_TIMEOUT_S = int(os.environ.get("LUDWIG_TEST_SUITE_TIMEOUT_S", 3600)) diff --git a/tests/integration_tests/scripts/run_train_aim.py b/tests/integration_tests/scripts/run_train_aim.py index a67abbe9dbe..659f5119347 100644 --- a/tests/integration_tests/scripts/run_train_aim.py +++ b/tests/integration_tests/scripts/run_train_aim.py @@ -8,8 +8,7 @@ import aim # noqa from ludwig.contribs.aim import AimCallback -from tests.integration_tests.utils import (category_feature, generate_data, - image_feature, run_experiment) +from tests.integration_tests.utils import category_feature, generate_data, image_feature, run_experiment PATH_HERE = os.path.abspath(os.path.dirname(__file__)) PATH_ROOT = os.path.join(PATH_HERE, "..", "..", "..") diff --git a/tests/integration_tests/scripts/run_train_comet.py b/tests/integration_tests/scripts/run_train_comet.py index 8beb71774c0..2f4b2b73463 100644 --- a/tests/integration_tests/scripts/run_train_comet.py +++ b/tests/integration_tests/scripts/run_train_comet.py @@ -28,8 +28,8 @@ PATH_ROOT = os.path.join(PATH_HERE, "..", "..", "..") sys.path.insert(0, os.path.abspath(PATH_ROOT)) -from tests.integration_tests.utils import (category_feature, # noqa - generate_data, image_feature) +from tests.integration_tests.utils import category_feature # noqa +from tests.integration_tests.utils import generate_data, image_feature parser = argparse.ArgumentParser() parser.add_argument("--csv-filename", required=True) diff --git a/tests/integration_tests/scripts/run_train_wandb.py b/tests/integration_tests/scripts/run_train_wandb.py index e386eb2feda..116624114db 100644 --- a/tests/integration_tests/scripts/run_train_wandb.py +++ b/tests/integration_tests/scripts/run_train_wandb.py @@ -18,9 +18,8 @@ PATH_ROOT = os.path.join(PATH_HERE, "..", "..", "..") sys.path.insert(0, os.path.abspath(PATH_ROOT)) -from tests.integration_tests.utils import (category_feature, # noqa - generate_data, image_feature, - run_experiment) +from tests.integration_tests.utils import category_feature # noqa +from tests.integration_tests.utils import generate_data, image_feature, run_experiment parser = argparse.ArgumentParser() parser.add_argument("--csv-filename", required=True) diff --git a/tests/integration_tests/test_api.py b/tests/integration_tests/test_api.py index 01994119e42..21e81f08202 100644 --- a/tests/integration_tests/test_api.py +++ b/tests/integration_tests/test_api.py @@ -28,10 +28,16 @@ from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME from ludwig.models.inference import InferenceModule from ludwig.utils.data_utils import read_csv -from tests.integration_tests.utils import (ENCODERS, category_feature, - generate_data, get_weights, - image_feature, run_api_experiment, - sequence_feature, text_feature) +from tests.integration_tests.utils import ( + category_feature, + ENCODERS, + generate_data, + get_weights, + image_feature, + run_api_experiment, + sequence_feature, + text_feature, +) def run_api_experiment_separated_datasets(input_features, output_features, data_csv): diff --git a/tests/integration_tests/test_automl.py b/tests/integration_tests/test_automl.py index e5e78b190a8..4ed11229aaf 100644 --- a/tests/integration_tests/test_automl.py +++ b/tests/integration_tests/test_automl.py @@ -8,24 +8,28 @@ import pytest from ludwig.api import LudwigModel -from ludwig.constants import (COLUMN, ENCODER, INPUT_FEATURES, NAME, - OUTPUT_FEATURES, PREPROCESSING, SPLIT, TYPE) +from ludwig.constants import COLUMN, ENCODER, INPUT_FEATURES, NAME, OUTPUT_FEATURES, PREPROCESSING, SPLIT, TYPE from ludwig.schema.model_types.base import ModelConfig from ludwig.types import FeatureConfigDict, ModelConfigDict from ludwig.utils.misc_utils import merge_dict -from tests.integration_tests.utils import (binary_feature, category_feature, - generate_data, image_feature, - minio_test_creds, number_feature, - private_param, remote_tmpdir, - text_feature) +from tests.integration_tests.utils import ( + binary_feature, + category_feature, + generate_data, + image_feature, + minio_test_creds, + number_feature, + private_param, + remote_tmpdir, + text_feature, +) ray = pytest.importorskip("ray") import dask.dataframe as dd # noqa E402 from ray.tune.experiment.trial import Trial # noqa E402 -from ludwig.automl import (auto_train, create_auto_config, # noqa E402 - train_with_config) +from ludwig.automl import auto_train, create_auto_config, train_with_config # noqa E402 from ludwig.automl.automl import OUTPUT_DIR # noqa E402 from ludwig.hyperopt.execution import RayTuneExecutor # noqa E402 diff --git a/tests/integration_tests/test_cache_manager.py b/tests/integration_tests/test_cache_manager.py index b4241cde51b..747fe400eed 100644 --- a/tests/integration_tests/test_cache_manager.py +++ b/tests/integration_tests/test_cache_manager.py @@ -5,12 +5,11 @@ import pytest from ludwig.constants import CHECKSUM, META, TEST, TRAINING, VALIDATION -from ludwig.data.cache.manager import CacheManager, alphanum +from ludwig.data.cache.manager import alphanum, CacheManager from ludwig.data.cache.types import CacheableDataframe, wrap from ludwig.data.dataset.pandas import PandasDatasetManager from ludwig.globals import TRAINING_PREPROC_FILE_NAME -from tests.integration_tests.utils import (LocalTestBackend, category_feature, - sequence_feature) +from tests.integration_tests.utils import category_feature, LocalTestBackend, sequence_feature @pytest.fixture diff --git a/tests/integration_tests/test_cached_preprocessing.py b/tests/integration_tests/test_cached_preprocessing.py index 61973993bf8..d035180dde6 100644 --- a/tests/integration_tests/test_cached_preprocessing.py +++ b/tests/integration_tests/test_cached_preprocessing.py @@ -4,12 +4,9 @@ import pytest from ludwig.api import LudwigModel -from ludwig.constants import (MODEL_ECD, MODEL_GBM, PREPROCESSING, PROC_COLUMN, - TRAINER) +from ludwig.constants import MODEL_ECD, MODEL_GBM, PREPROCESSING, PROC_COLUMN, TRAINER from tests.integration_tests.test_gbm import category_feature -from tests.integration_tests.utils import (binary_feature, generate_data, - number_feature, run_test_suite, - text_feature) +from tests.integration_tests.utils import binary_feature, generate_data, number_feature, run_test_suite, text_feature @pytest.mark.slow diff --git a/tests/integration_tests/test_carton.py b/tests/integration_tests/test_carton.py index e44376bf247..37fb0b4e389 100644 --- a/tests/integration_tests/test_carton.py +++ b/tests/integration_tests/test_carton.py @@ -25,9 +25,13 @@ from ludwig.api import LudwigModel from ludwig.constants import BATCH_SIZE, NAME, PREDICTIONS, TRAINER from ludwig.utils.carton_utils import export_carton -from tests.integration_tests.utils import (LocalTestBackend, binary_feature, - category_feature, generate_data, - number_feature) +from tests.integration_tests.utils import ( + binary_feature, + category_feature, + generate_data, + LocalTestBackend, + number_feature, +) @pytest.mark.skipif(platform.system() == "Windows", reason="Carton is not supported on Windows") diff --git a/tests/integration_tests/test_class_imbalance_feature.py b/tests/integration_tests/test_class_imbalance_feature.py index e52a9e30952..82b31514c49 100644 --- a/tests/integration_tests/test_class_imbalance_feature.py +++ b/tests/integration_tests/test_class_imbalance_feature.py @@ -8,8 +8,7 @@ from ludwig.api import LudwigModel from ludwig.backend import LocalBackend -from tests.integration_tests.utils import (RAY_BACKEND_CONFIG, - create_data_set_to_use, spawn) +from tests.integration_tests.utils import create_data_set_to_use, RAY_BACKEND_CONFIG, spawn try: import ray diff --git a/tests/integration_tests/test_cli.py b/tests/integration_tests/test_cli.py index dd865d20107..0c0302a2e5c 100644 --- a/tests/integration_tests/test_cli.py +++ b/tests/integration_tests/test_cli.py @@ -24,14 +24,20 @@ import pytest import yaml -from ludwig.constants import (BATCH_SIZE, COMBINER, EVAL_BATCH_SIZE, - INPUT_FEATURES, NAME, OUTPUT_FEATURES, - PREPROCESSING, TRAINER) +from ludwig.constants import ( + BATCH_SIZE, + COMBINER, + EVAL_BATCH_SIZE, + INPUT_FEATURES, + NAME, + OUTPUT_FEATURES, + PREPROCESSING, + TRAINER, +) from ludwig.globals import MODEL_FILE_NAME from ludwig.types import FeatureConfigDict from ludwig.utils.data_utils import load_yaml -from tests.integration_tests.utils import (category_feature, generate_data, - number_feature, sequence_feature) +from tests.integration_tests.utils import category_feature, generate_data, number_feature, sequence_feature pytestmark = pytest.mark.integration_tests_b diff --git a/tests/integration_tests/test_collect.py b/tests/integration_tests/test_collect.py index acef2001b78..a3d4574152f 100644 --- a/tests/integration_tests/test_collect.py +++ b/tests/integration_tests/test_collect.py @@ -19,13 +19,11 @@ import torch from ludwig.api import LudwigModel -from ludwig.collect import (collect_activations, collect_weights, - print_model_summary) +from ludwig.collect import collect_activations, collect_weights, print_model_summary from ludwig.constants import BATCH_SIZE, ENCODER, TRAINER, TYPE from ludwig.globals import MODEL_FILE_NAME from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.utils import (ENCODERS, category_feature, - generate_data, sequence_feature) +from tests.integration_tests.utils import category_feature, ENCODERS, generate_data, sequence_feature DEVICE = get_torch_device() diff --git a/tests/integration_tests/test_config_global_defaults.py b/tests/integration_tests/test_config_global_defaults.py index 32288f21918..57e90634589 100644 --- a/tests/integration_tests/test_config_global_defaults.py +++ b/tests/integration_tests/test_config_global_defaults.py @@ -1,13 +1,25 @@ import logging from typing import Dict, Tuple -from ludwig.constants import (BATCH_SIZE, CATEGORY, COMBINER, DECODER, - DEFAULTS, ENCODER, EPOCHS, FILL_WITH_CONST, - INPUT_FEATURES, LOSS, OUTPUT_FEATURES, - PREPROCESSING, TEXT, TRAINER, TYPE) +from ludwig.constants import ( + BATCH_SIZE, + CATEGORY, + COMBINER, + DECODER, + DEFAULTS, + ENCODER, + EPOCHS, + FILL_WITH_CONST, + INPUT_FEATURES, + LOSS, + OUTPUT_FEATURES, + PREPROCESSING, + TEXT, + TRAINER, + TYPE, +) from ludwig.schema.model_config import ModelConfig -from tests.integration_tests.utils import (category_feature, generate_data, - run_experiment, text_feature) +from tests.integration_tests.utils import category_feature, generate_data, run_experiment, text_feature logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/tests/integration_tests/test_custom_components.py b/tests/integration_tests/test_custom_components.py index 99c95e892ff..98f9b01da3d 100644 --- a/tests/integration_tests/test_custom_components.py +++ b/tests/integration_tests/test_custom_components.py @@ -4,12 +4,11 @@ import torch from marshmallow_dataclass import dataclass -from torch import Tensor, nn +from torch import nn, Tensor from ludwig.api import LudwigModel from ludwig.combiners.combiners import Combiner, register_combiner -from ludwig.constants import (BATCH_SIZE, ENCODER_OUTPUT, LOGITS, MINIMIZE, - NUMBER, TRAINER) +from ludwig.constants import BATCH_SIZE, ENCODER_OUTPUT, LOGITS, MINIMIZE, NUMBER, TRAINER from ludwig.decoders.base import Decoder from ludwig.decoders.registry import register_decoder from ludwig.encoders.base import Encoder @@ -24,11 +23,14 @@ from ludwig.schema.encoders.base import BaseEncoderConfig from ludwig.schema.encoders.utils import register_encoder_config from ludwig.schema.features.loss.loss import BaseLossConfig -from ludwig.schema.features.loss.loss import \ - register_loss as register_loss_schema -from tests.integration_tests.utils import (LocalTestBackend, category_feature, - generate_data, number_feature, - sequence_feature) +from ludwig.schema.features.loss.loss import register_loss as register_loss_schema +from tests.integration_tests.utils import ( + category_feature, + generate_data, + LocalTestBackend, + number_feature, + sequence_feature, +) @register_encoder_config("custom_number_encoder", NUMBER) diff --git a/tests/integration_tests/test_date_feature.py b/tests/integration_tests/test_date_feature.py index e4364e8bcdb..ea04edff36c 100644 --- a/tests/integration_tests/test_date_feature.py +++ b/tests/integration_tests/test_date_feature.py @@ -6,10 +6,21 @@ from dateutil.parser import parse from ludwig.api import LudwigModel -from ludwig.constants import (BACKEND, BINARY, DATE, EPOCHS, FILL_WITH_CONST, - INPUT_FEATURES, MISSING_VALUE_STRATEGY, NAME, - OUTPUT_FEATURES, PREPROCESSING, RAY, TRAINER, - TYPE) +from ludwig.constants import ( + BACKEND, + BINARY, + DATE, + EPOCHS, + FILL_WITH_CONST, + INPUT_FEATURES, + MISSING_VALUE_STRATEGY, + NAME, + OUTPUT_FEATURES, + PREPROCESSING, + RAY, + TRAINER, + TYPE, +) from ludwig.utils.date_utils import create_vector_from_datetime_obj ray = pytest.importorskip("ray") diff --git a/tests/integration_tests/test_dependencies.py b/tests/integration_tests/test_dependencies.py index 6a94f136c7c..5ccb96a1731 100644 --- a/tests/integration_tests/test_dependencies.py +++ b/tests/integration_tests/test_dependencies.py @@ -9,8 +9,7 @@ from ludwig.modules.reduction_modules import SequenceReducer from ludwig.schema.model_config import ModelConfig from ludwig.utils import output_feature_utils -from tests.integration_tests.utils import ( - generate_output_features_with_dependencies, number_feature) +from tests.integration_tests.utils import generate_output_features_with_dependencies, number_feature logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/tests/integration_tests/test_experiment.py b/tests/integration_tests/test_experiment.py index 9b9a8c8ca3a..cf46c1672cc 100644 --- a/tests/integration_tests/test_experiment.py +++ b/tests/integration_tests/test_experiment.py @@ -28,8 +28,7 @@ from ludwig.api import LudwigModel from ludwig.backend import LOCAL_BACKEND from ludwig.callbacks import Callback -from ludwig.constants import (BATCH_SIZE, COLUMN, ENCODER, H3, NAME, - PREPROCESSING, TRAINER, TYPE) +from ludwig.constants import BATCH_SIZE, COLUMN, ENCODER, H3, NAME, PREPROCESSING, TRAINER, TYPE from ludwig.data.concatenate_datasets import concatenate_df from ludwig.data.dataset_synthesizer import build_synthetic_dataset_df from ludwig.data.preprocessing import preprocess_for_training @@ -41,13 +40,29 @@ from ludwig.utils.data_utils import read_csv from ludwig.utils.defaults import default_random_seed from tests.integration_tests.utils import ( - ENCODERS, TEXT_ENCODERS, LocalTestBackend, audio_feature, bag_feature, - binary_feature, category_distribution_feature, category_feature, - create_data_set_to_use, date_feature, generate_data, + audio_feature, + bag_feature, + binary_feature, + category_distribution_feature, + category_feature, + create_data_set_to_use, + date_feature, + ENCODERS, + generate_data, generate_output_features_with_dependencies, - generate_output_features_with_dependencies_complex, h3_feature, - image_feature, number_feature, run_experiment, sequence_feature, - set_feature, text_feature, timeseries_feature, vector_feature) + generate_output_features_with_dependencies_complex, + h3_feature, + image_feature, + LocalTestBackend, + number_feature, + run_experiment, + sequence_feature, + set_feature, + TEXT_ENCODERS, + text_feature, + timeseries_feature, + vector_feature, +) pytestmark = pytest.mark.integration_tests_d diff --git a/tests/integration_tests/test_explain.py b/tests/integration_tests/test_explain.py index a436313509e..9e1541bb476 100644 --- a/tests/integration_tests/test_explain.py +++ b/tests/integration_tests/test_explain.py @@ -6,18 +6,25 @@ import pytest from ludwig.api import LudwigModel -from ludwig.constants import (BATCH_SIZE, BINARY, CATEGORY, MINIMUM_BATCH_SIZE, - MODEL_ECD, MODEL_GBM, TYPE) +from ludwig.constants import BATCH_SIZE, BINARY, CATEGORY, MINIMUM_BATCH_SIZE, MODEL_ECD, MODEL_GBM, TYPE from ludwig.explain.captum import IntegratedGradientsExplainer from ludwig.explain.explainer import Explainer from ludwig.explain.explanation import Explanation from ludwig.explain.gbm import GBMExplainer -from tests.integration_tests.utils import (LocalTestBackend, binary_feature, - category_feature, date_feature, - generate_data, image_feature, - number_feature, sequence_feature, - set_feature, text_feature, - timeseries_feature, vector_feature) +from tests.integration_tests.utils import ( + binary_feature, + category_feature, + date_feature, + generate_data, + image_feature, + LocalTestBackend, + number_feature, + sequence_feature, + set_feature, + text_feature, + timeseries_feature, + vector_feature, +) try: from ludwig.explain.captum_ray import RayIntegratedGradientsExplainer diff --git a/tests/integration_tests/test_gbm.py b/tests/integration_tests/test_gbm.py index 1aeab03b436..bad65f375a7 100644 --- a/tests/integration_tests/test_gbm.py +++ b/tests/integration_tests/test_gbm.py @@ -4,16 +4,14 @@ import pytest from ludwig.api import LudwigModel -from ludwig.constants import (INPUT_FEATURES, MODEL_TYPE, OUTPUT_FEATURES, - TRAINER) +from ludwig.constants import INPUT_FEATURES, MODEL_TYPE, OUTPUT_FEATURES, TRAINER from ludwig.error import ConfigValidationError from ludwig.globals import MODEL_FILE_NAME from ludwig.schema.model_types.base import ModelConfig from tests.integration_tests import synthetic_test_data from tests.integration_tests.utils import binary_feature from tests.integration_tests.utils import category_feature as _category_feature -from tests.integration_tests.utils import (generate_data, number_feature, - text_feature) +from tests.integration_tests.utils import generate_data, number_feature, text_feature pytestmark = pytest.mark.integration_tests_b diff --git a/tests/integration_tests/test_graph_execution.py b/tests/integration_tests/test_graph_execution.py index 7a07e004a97..495175ed3b5 100644 --- a/tests/integration_tests/test_graph_execution.py +++ b/tests/integration_tests/test_graph_execution.py @@ -15,9 +15,15 @@ import pytest from tests.integration_tests.utils import ( - category_feature, generate_data, - generate_output_features_with_dependencies, number_feature, run_experiment, - sequence_feature, set_feature, text_feature) + category_feature, + generate_data, + generate_output_features_with_dependencies, + number_feature, + run_experiment, + sequence_feature, + set_feature, + text_feature, +) @pytest.mark.parametrize( diff --git a/tests/integration_tests/test_horovod.py b/tests/integration_tests/test_horovod.py index 4b0fc2d8729..ef624e20269 100644 --- a/tests/integration_tests/test_horovod.py +++ b/tests/integration_tests/test_horovod.py @@ -30,8 +30,7 @@ HOROVOD_AVAILABLE = True from ludwig.constants import ENCODER, TYPE -from tests.integration_tests.utils import (ENCODERS, category_feature, - generate_data, sequence_feature) +from tests.integration_tests.utils import category_feature, ENCODERS, generate_data, sequence_feature # This script will run the actual test model training in parallel TEST_SCRIPT = os.path.join(os.path.dirname(__file__), "scripts", "run_train_horovod.py") diff --git a/tests/integration_tests/test_hyperopt.py b/tests/integration_tests/test_hyperopt.py index de6317af407..9db188ca279 100644 --- a/tests/integration_tests/test_hyperopt.py +++ b/tests/integration_tests/test_hyperopt.py @@ -21,11 +21,29 @@ import pytest from ludwig.backend import initialize_backend -from ludwig.constants import (ACCURACY, AUTO, BATCH_SIZE, CATEGORY, COMBINER, - EXECUTOR, HYPEROPT, INPUT_FEATURES, - MAX_CONCURRENT_TRIALS, MODEL_ECD, MODEL_GBM, - MODEL_TYPE, NAME, OUTPUT_FEATURES, RAY, TEST, - TEXT, TRAINER, TRAINING, TYPE, VALIDATION) +from ludwig.constants import ( + ACCURACY, + AUTO, + BATCH_SIZE, + CATEGORY, + COMBINER, + EXECUTOR, + HYPEROPT, + INPUT_FEATURES, + MAX_CONCURRENT_TRIALS, + MODEL_ECD, + MODEL_GBM, + MODEL_TYPE, + NAME, + OUTPUT_FEATURES, + RAY, + TEST, + TEXT, + TRAINER, + TRAINING, + TYPE, + VALIDATION, +) from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME, MODEL_FILE_NAME from ludwig.hyperopt.results import HyperoptResults from ludwig.hyperopt.run import hyperopt @@ -33,14 +51,19 @@ from ludwig.schema.model_config import ModelConfig from ludwig.utils import fs_utils from ludwig.utils.data_utils import load_json, use_credentials -from tests.integration_tests.utils import (category_feature, generate_data, - minio_test_creds, private_param, - remote_tmpdir, text_feature) +from tests.integration_tests.utils import ( + category_feature, + generate_data, + minio_test_creds, + private_param, + remote_tmpdir, + text_feature, +) ray = pytest.importorskip("ray") -from ludwig.hyperopt.execution import (RayTuneExecutor, # noqa - get_build_hyperopt_executor) +from ludwig.hyperopt.execution import RayTuneExecutor # noqa +from ludwig.hyperopt.execution import get_build_hyperopt_executor pytestmark = [pytest.mark.distributed, pytest.mark.integration_tests_a] diff --git a/tests/integration_tests/test_hyperopt_ray.py b/tests/integration_tests/test_hyperopt_ray.py index fcd99911438..07e3374d1c6 100644 --- a/tests/integration_tests/test_hyperopt_ray.py +++ b/tests/integration_tests/test_hyperopt_ray.py @@ -24,18 +24,15 @@ from ludwig.backend import initialize_backend from ludwig.callbacks import Callback -from ludwig.constants import (ACCURACY, AUTO, BATCH_SIZE, EXECUTOR, - MAX_CONCURRENT_TRIALS, TRAINER) +from ludwig.constants import ACCURACY, AUTO, BATCH_SIZE, EXECUTOR, MAX_CONCURRENT_TRIALS, TRAINER from ludwig.contribs.mlflow import MlflowCallback -from ludwig.globals import (HYPEROPT_STATISTICS_FILE_NAME, MODEL_FILE_NAME, - MODEL_HYPERPARAMETERS_FILE_NAME) +from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME from ludwig.hyperopt.results import HyperoptResults from ludwig.hyperopt.run import hyperopt from ludwig.hyperopt.utils import update_hyperopt_params_with_defaults from ludwig.schema.model_config import ModelConfig from ludwig.utils.automl.utils import get_model_type -from tests.integration_tests.utils import (category_feature, generate_data, - text_feature) +from tests.integration_tests.utils import category_feature, generate_data, text_feature try: import ray diff --git a/tests/integration_tests/test_hyperopt_ray_horovod.py b/tests/integration_tests/test_hyperopt_ray_horovod.py index 8e57667a03e..0b9bb513e0b 100644 --- a/tests/integration_tests/test_hyperopt_ray_horovod.py +++ b/tests/integration_tests/test_hyperopt_ray_horovod.py @@ -21,24 +21,20 @@ from ludwig.api import LudwigModel from ludwig.callbacks import Callback -from ludwig.constants import (ACCURACY, AUTO, BATCH_SIZE, EXECUTOR, - MAX_CONCURRENT_TRIALS, TRAINER) +from ludwig.constants import ACCURACY, AUTO, BATCH_SIZE, EXECUTOR, MAX_CONCURRENT_TRIALS, TRAINER from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME from ludwig.hyperopt.results import HyperoptResults from ludwig.hyperopt.run import hyperopt from ludwig.hyperopt.utils import update_hyperopt_params_with_defaults from ludwig.schema.model_config import ModelConfig -from tests.integration_tests.utils import (binary_feature, - create_data_set_to_use, - generate_data, number_feature) +from tests.integration_tests.utils import binary_feature, create_data_set_to_use, generate_data, number_feature try: import ray - from ray.tune.syncer import SyncConfig, get_node_to_storage_syncer + from ray.tune.syncer import get_node_to_storage_syncer, SyncConfig from ludwig.backend.ray import RayBackend - from ludwig.hyperopt.execution import (RayTuneExecutor, - _get_relative_checkpoints_dir_parts) + from ludwig.hyperopt.execution import _get_relative_checkpoints_dir_parts, RayTuneExecutor except ImportError: ray = None RayTuneExecutor = object diff --git a/tests/integration_tests/test_input_feature_tied.py b/tests/integration_tests/test_input_feature_tied.py index 542ea36e57c..906c0459388 100644 --- a/tests/integration_tests/test_input_feature_tied.py +++ b/tests/integration_tests/test_input_feature_tied.py @@ -4,9 +4,14 @@ from ludwig.models.base import BaseModel from ludwig.schema.model_config import ModelConfig -from tests.integration_tests.utils import (category_feature, generate_data, - number_feature, run_experiment, - sequence_feature, text_feature) +from tests.integration_tests.utils import ( + category_feature, + generate_data, + number_feature, + run_experiment, + sequence_feature, + text_feature, +) # InputFeatureOptions namedtuple structure: # feature_type: input feature type, e.g., number, category, etc. diff --git a/tests/integration_tests/test_kfold_cv.py b/tests/integration_tests/test_kfold_cv.py index 1f4b954b128..a987f07018a 100644 --- a/tests/integration_tests/test_kfold_cv.py +++ b/tests/integration_tests/test_kfold_cv.py @@ -10,10 +10,15 @@ from ludwig.constants import BATCH_SIZE, TRAINER from ludwig.experiment import kfold_cross_validate_cli from ludwig.utils.data_utils import load_json -from tests.integration_tests.utils import (binary_feature, category_feature, - create_data_set_to_use, - generate_data, number_feature, - sequence_feature, text_feature) +from tests.integration_tests.utils import ( + binary_feature, + category_feature, + create_data_set_to_use, + generate_data, + number_feature, + sequence_feature, + text_feature, +) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/tests/integration_tests/test_llm.py b/tests/integration_tests/test_llm.py index 573082c0ec0..ed377b48ca3 100644 --- a/tests/integration_tests/test_llm.py +++ b/tests/integration_tests/test_llm.py @@ -14,20 +14,37 @@ import ludwig.error as ludwig_error from ludwig.api import LudwigModel -from ludwig.constants import (ADAPTER, BACKEND, BASE_MODEL, BATCH_SIZE, - COMBINER, EPOCHS, EVAL_BATCH_SIZE, GENERATION, - INPUT_FEATURES, MERGE_ADAPTER_INTO_BASE_MODEL, - MODEL_ECD, MODEL_LLM, MODEL_TYPE, - OUTPUT_FEATURES, POSTPROCESSOR, PREPROCESSING, - PRETRAINED_ADAPTER_WEIGHTS, PROGRESSBAR, PROMPT, - QUANTIZATION, TARGET_MODULES, TRAINER, TYPE) +from ludwig.constants import ( + ADAPTER, + BACKEND, + BASE_MODEL, + BATCH_SIZE, + COMBINER, + EPOCHS, + EVAL_BATCH_SIZE, + GENERATION, + INPUT_FEATURES, + MERGE_ADAPTER_INTO_BASE_MODEL, + MODEL_ECD, + MODEL_LLM, + MODEL_TYPE, + OUTPUT_FEATURES, + POSTPROCESSOR, + PREPROCESSING, + PRETRAINED_ADAPTER_WEIGHTS, + PROGRESSBAR, + PROMPT, + QUANTIZATION, + TARGET_MODULES, + TRAINER, + TYPE, +) from ludwig.globals import MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME from ludwig.models.llm import LLM from ludwig.schema.model_types.base import ModelConfig from ludwig.utils.fs_utils import list_file_names_in_directory from ludwig.utils.types import DataFrame -from tests.integration_tests.utils import (category_feature, generate_data, - text_feature) +from tests.integration_tests.utils import category_feature, generate_data, text_feature pytestmark = pytest.mark.llm diff --git a/tests/integration_tests/test_missing_value_strategy.py b/tests/integration_tests/test_missing_value_strategy.py index 4d5757387ed..fd32b396603 100644 --- a/tests/integration_tests/test_missing_value_strategy.py +++ b/tests/integration_tests/test_missing_value_strategy.py @@ -20,14 +20,20 @@ import pytest from ludwig.api import LudwigModel -from ludwig.constants import (BATCH_SIZE, COLUMN, DROP_ROW, FILL_WITH_MEAN, - PREPROCESSING, PROC_COLUMN, TRAINER) +from ludwig.constants import BATCH_SIZE, COLUMN, DROP_ROW, FILL_WITH_MEAN, PREPROCESSING, PROC_COLUMN, TRAINER from ludwig.globals import MODEL_FILE_NAME -from tests.integration_tests.utils import (LocalTestBackend, binary_feature, - category_feature, generate_data, - number_feature, read_csv_with_nan, - sequence_feature, set_feature, - text_feature, vector_feature) +from tests.integration_tests.utils import ( + binary_feature, + category_feature, + generate_data, + LocalTestBackend, + number_feature, + read_csv_with_nan, + sequence_feature, + set_feature, + text_feature, + vector_feature, +) def test_missing_value_prediction(tmpdir, csv_filename): diff --git a/tests/integration_tests/test_mlflow.py b/tests/integration_tests/test_mlflow.py index 0ba5b28f6cd..bac996a781a 100644 --- a/tests/integration_tests/test_mlflow.py +++ b/tests/integration_tests/test_mlflow.py @@ -14,10 +14,8 @@ from ludwig.contribs.mlflow import MlflowCallback from ludwig.export import export_mlflow from ludwig.globals import MODEL_FILE_NAME -from ludwig.utils.backward_compatibility import \ - upgrade_config_dict_to_latest_version -from tests.integration_tests.utils import (FakeRemoteBackend, category_feature, - generate_data, sequence_feature) +from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version +from tests.integration_tests.utils import category_feature, FakeRemoteBackend, generate_data, sequence_feature def run_mlflow_callback_test(mlflow_client, config, training_data, val_data, test_data, tmpdir, exp_name=None): diff --git a/tests/integration_tests/test_model_save_and_load.py b/tests/integration_tests/test_model_save_and_load.py index 9788e63acf0..ffba4ab72ff 100644 --- a/tests/integration_tests/test_model_save_and_load.py +++ b/tests/integration_tests/test_model_save_and_load.py @@ -8,21 +8,29 @@ import torch from ludwig.api import LudwigModel -from ludwig.constants import (BATCH_SIZE, ENCODER, LOSS, NAME, PREPROCESSING, - TRAINER, TRAINING, TYPE) +from ludwig.constants import BATCH_SIZE, ENCODER, LOSS, NAME, PREPROCESSING, TRAINER, TRAINING, TYPE from ludwig.data.split import get_splitter from ludwig.globals import MODEL_FILE_NAME from ludwig.modules.loss_modules import MSELoss from ludwig.schema.features.loss.loss import MSELossConfig from ludwig.utils.data_utils import read_csv -from tests.integration_tests.utils import (LocalTestBackend, audio_feature, - bag_feature, binary_feature, - category_feature, date_feature, - generate_data, h3_feature, - image_feature, number_feature, - sequence_feature, set_feature, - text_feature, timeseries_feature, - vector_feature) +from tests.integration_tests.utils import ( + audio_feature, + bag_feature, + binary_feature, + category_feature, + date_feature, + generate_data, + h3_feature, + image_feature, + LocalTestBackend, + number_feature, + sequence_feature, + set_feature, + text_feature, + timeseries_feature, + vector_feature, +) def test_model_load_from_checkpoint(tmpdir, csv_filename, tmp_path): diff --git a/tests/integration_tests/test_model_training_options.py b/tests/integration_tests/test_model_training_options.py index e59d1175ba8..9df49906c2a 100644 --- a/tests/integration_tests/test_model_training_options.py +++ b/tests/integration_tests/test_model_training_options.py @@ -11,22 +11,27 @@ from ludwig import globals as global_vars from ludwig.api import LudwigModel from ludwig.backend import LOCAL_BACKEND -from ludwig.constants import (BATCH_SIZE, CATEGORY, DEFAULTS, EPOCHS, - INPUT_FEATURES, OUTPUT_FEATURES, PREPROCESSING, - TRAINER, TRAINING) +from ludwig.constants import ( + BATCH_SIZE, + CATEGORY, + DEFAULTS, + EPOCHS, + INPUT_FEATURES, + OUTPUT_FEATURES, + PREPROCESSING, + TRAINER, + TRAINING, +) from ludwig.contribs.mlflow import MlflowCallback from ludwig.experiment import experiment_cli from ludwig.features.number_feature import numeric_transformation_registry -from ludwig.globals import (DESCRIPTION_FILE_NAME, MODEL_FILE_NAME, - MODEL_WEIGHTS_FILE_NAME, - TRAINING_PREPROC_FILE_NAME) +from ludwig.globals import DESCRIPTION_FILE_NAME, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME, TRAINING_PREPROC_FILE_NAME from ludwig.schema.optimizers import optimizer_registry from ludwig.utils.data_utils import load_json, replace_file_extension from ludwig.utils.misc_utils import get_from_registry from ludwig.utils.package_utils import LazyLoader from tests.integration_tests import synthetic_test_data -from tests.integration_tests.utils import (LocalTestBackend, category_feature, - generate_data) +from tests.integration_tests.utils import category_feature, generate_data, LocalTestBackend mlflow = LazyLoader("mlflow", globals(), "mlflow") diff --git a/tests/integration_tests/test_neuropod.py b/tests/integration_tests/test_neuropod.py index bb1cb1e657a..40b8629c1b3 100644 --- a/tests/integration_tests/test_neuropod.py +++ b/tests/integration_tests/test_neuropod.py @@ -26,9 +26,13 @@ from ludwig.api import LudwigModel from ludwig.constants import BATCH_SIZE, NAME, PREDICTIONS, TRAINER from ludwig.utils.neuropod_utils import export_neuropod -from tests.integration_tests.utils import (LocalTestBackend, binary_feature, - category_feature, generate_data, - number_feature) +from tests.integration_tests.utils import ( + binary_feature, + category_feature, + generate_data, + LocalTestBackend, + number_feature, +) @pytest.mark.skipif(platform.system() == "Windows", reason="Neuropod is not supported on Windows") diff --git a/tests/integration_tests/test_peft.py b/tests/integration_tests/test_peft.py index f3ad8e67a46..9e6fcb2a865 100644 --- a/tests/integration_tests/test_peft.py +++ b/tests/integration_tests/test_peft.py @@ -2,10 +2,8 @@ import pytest -from ludwig.constants import (COMBINER, EPOCHS, INPUT_FEATURES, - OUTPUT_FEATURES, TRAINER, TYPE) -from tests.integration_tests.utils import (binary_feature, generate_data, - run_test_suite, text_feature) +from ludwig.constants import COMBINER, EPOCHS, INPUT_FEATURES, OUTPUT_FEATURES, TRAINER, TYPE +from tests.integration_tests.utils import binary_feature, generate_data, run_test_suite, text_feature @pytest.mark.integration_tests_e diff --git a/tests/integration_tests/test_postprocessing.py b/tests/integration_tests/test_postprocessing.py index 3047d2a29b2..3990c44b831 100644 --- a/tests/integration_tests/test_postprocessing.py +++ b/tests/integration_tests/test_postprocessing.py @@ -25,9 +25,14 @@ from ludwig.api import LudwigModel from ludwig.constants import BATCH_SIZE, DECODER, NAME, TRAINER from ludwig.globals import MODEL_FILE_NAME -from tests.integration_tests.utils import (RAY_BACKEND_CONFIG, binary_feature, - category_feature, generate_data, - set_feature, text_feature) +from tests.integration_tests.utils import ( + binary_feature, + category_feature, + generate_data, + RAY_BACKEND_CONFIG, + set_feature, + text_feature, +) def random_binary_logits(*args, num_predict_samples, **kwargs): diff --git a/tests/integration_tests/test_preprocessing.py b/tests/integration_tests/test_preprocessing.py index bdd68d7bccd..b8a330a88ea 100644 --- a/tests/integration_tests/test_preprocessing.py +++ b/tests/integration_tests/test_preprocessing.py @@ -16,20 +16,43 @@ from ludwig.api import LudwigModel from ludwig.backend import initialize_backend from ludwig.callbacks import Callback -from ludwig.constants import (BASE_MODEL, BATCH_SIZE, COLUMN, DECODER, EPOCHS, - FULL, INPUT_FEATURES, MODEL_ECD, MODEL_LLM, - MODEL_TYPE, NAME, OUTPUT_FEATURES, PREPROCESSING, - PROC_COLUMN, PROMPT, SPLIT, TRAINER, TYPE) +from ludwig.constants import ( + BASE_MODEL, + BATCH_SIZE, + COLUMN, + DECODER, + EPOCHS, + FULL, + INPUT_FEATURES, + MODEL_ECD, + MODEL_LLM, + MODEL_TYPE, + NAME, + OUTPUT_FEATURES, + PREPROCESSING, + PROC_COLUMN, + PROMPT, + SPLIT, + TRAINER, + TYPE, +) from ludwig.data.concatenate_datasets import concatenate_df -from ludwig.data.preprocessing import (handle_features_with_prompt_config, - preprocess_for_prediction) +from ludwig.data.preprocessing import handle_features_with_prompt_config, preprocess_for_prediction from ludwig.schema.llms.prompt import PromptConfig from ludwig.schema.model_types.base import ModelConfig from tests.integration_tests.utils import ( - LocalTestBackend, assert_preprocessed_dataset_shape_and_dtype_for_feature, - audio_feature, binary_feature, category_feature, generate_data, - generate_data_as_dataframe, image_feature, number_feature, - sequence_feature, text_feature) + assert_preprocessed_dataset_shape_and_dtype_for_feature, + audio_feature, + binary_feature, + category_feature, + generate_data, + generate_data_as_dataframe, + image_feature, + LocalTestBackend, + number_feature, + sequence_feature, + text_feature, +) NUM_EXAMPLES = 20 diff --git a/tests/integration_tests/test_ray.py b/tests/integration_tests/test_ray.py index 06a56584521..02407e318ff 100644 --- a/tests/integration_tests/test_ray.py +++ b/tests/integration_tests/test_ray.py @@ -23,29 +23,56 @@ import torch from ludwig.api import LudwigModel -from ludwig.backend import (LOCAL_BACKEND, create_ray_backend, - initialize_backend) -from ludwig.constants import (AUDIO, BAG, BALANCE_PERCENTAGE_TOLERANCE, - BATCH_SIZE, BFILL, BINARY, CATEGORY, COLUMN, - DATE, H3, IMAGE, MAX_BATCH_SIZE_DATASET_FRACTION, - NAME, NUMBER, PREPROCESSING, SEQUENCE, SET, - SPLIT, TEXT, TIMESERIES, TRAINER, VECTOR) +from ludwig.backend import create_ray_backend, initialize_backend, LOCAL_BACKEND +from ludwig.constants import ( + AUDIO, + BAG, + BALANCE_PERCENTAGE_TOLERANCE, + BATCH_SIZE, + BFILL, + BINARY, + CATEGORY, + COLUMN, + DATE, + H3, + IMAGE, + MAX_BATCH_SIZE_DATASET_FRACTION, + NAME, + NUMBER, + PREPROCESSING, + SEQUENCE, + SET, + SPLIT, + TEXT, + TIMESERIES, + TRAINER, + VECTOR, +) from ludwig.data.preprocessing import balance_data from ludwig.data.split import DEFAULT_PROBABILITIES from ludwig.globals import MODEL_FILE_NAME from ludwig.utils.data_utils import read_parquet from ludwig.utils.misc_utils import merge_dict -from tests.integration_tests.utils import (RAY_BACKEND_CONFIG, audio_feature, - augment_dataset_with_none, - bag_feature, binary_feature, - category_feature, - create_data_set_to_use, - date_feature, generate_data, - h3_feature, image_feature, - number_feature, sequence_feature, - set_feature, text_feature, - timeseries_feature, - train_with_backend, vector_feature) +from tests.integration_tests.utils import ( + audio_feature, + augment_dataset_with_none, + bag_feature, + binary_feature, + category_feature, + create_data_set_to_use, + date_feature, + generate_data, + h3_feature, + image_feature, + number_feature, + RAY_BACKEND_CONFIG, + sequence_feature, + set_feature, + text_feature, + timeseries_feature, + train_with_backend, + vector_feature, +) ray = pytest.importorskip("ray") # noqa @@ -57,10 +84,9 @@ import ray.exceptions # noqa: E402 from ray.air.config import DatasetConfig # noqa: E402 from ray.data import Dataset, DatasetPipeline # noqa: E402 -from ray.train._internal.dataset_spec import \ - DataParallelIngestSpec # noqa: E402 +from ray.train._internal.dataset_spec import DataParallelIngestSpec # noqa: E402 -from ludwig.backend.ray import RayBackend, get_trainer_kwargs # noqa: E402 +from ludwig.backend.ray import get_trainer_kwargs, RayBackend # noqa: E402 from ludwig.data.dataframe.dask import DaskEngine # noqa: E402 try: diff --git a/tests/integration_tests/test_reducers.py b/tests/integration_tests/test_reducers.py index cf56a66dd0a..c1ce56e1156 100644 --- a/tests/integration_tests/test_reducers.py +++ b/tests/integration_tests/test_reducers.py @@ -1,8 +1,7 @@ import pytest from ludwig.modules.reduction_modules import reduce_mode_registry -from tests.integration_tests.utils import (category_feature, generate_data, - run_experiment, sequence_feature) +from tests.integration_tests.utils import category_feature, generate_data, run_experiment, sequence_feature @pytest.mark.parametrize("reduce_output", reduce_mode_registry) diff --git a/tests/integration_tests/test_regularizers.py b/tests/integration_tests/test_regularizers.py index 6f24e3f29e1..2e5c3c2bca0 100644 --- a/tests/integration_tests/test_regularizers.py +++ b/tests/integration_tests/test_regularizers.py @@ -10,11 +10,17 @@ from ludwig.data.preprocessing import preprocess_for_training from ludwig.utils.data_utils import read_csv from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.utils import (LocalTestBackend, binary_feature, - category_feature, date_feature, - generate_data, image_feature, - number_feature, sequence_feature, - set_feature) +from tests.integration_tests.utils import ( + binary_feature, + category_feature, + date_feature, + generate_data, + image_feature, + LocalTestBackend, + number_feature, + sequence_feature, + set_feature, +) DEVICE = get_torch_device() BATCH_SIZE = 32 diff --git a/tests/integration_tests/test_remote.py b/tests/integration_tests/test_remote.py index aff93961422..e9f38e101a8 100644 --- a/tests/integration_tests/test_remote.py +++ b/tests/integration_tests/test_remote.py @@ -6,13 +6,17 @@ from ludwig.api import LudwigModel from ludwig.backend import initialize_backend from ludwig.constants import BATCH_SIZE, TRAINER -from ludwig.globals import (DESCRIPTION_FILE_NAME, MODEL_FILE_NAME, - MODEL_WEIGHTS_FILE_NAME) +from ludwig.globals import DESCRIPTION_FILE_NAME, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME from ludwig.utils import fs_utils from ludwig.utils.data_utils import use_credentials -from tests.integration_tests.utils import (category_feature, generate_data, - minio_test_creds, private_param, - remote_tmpdir, sequence_feature) +from tests.integration_tests.utils import ( + category_feature, + generate_data, + minio_test_creds, + private_param, + remote_tmpdir, + sequence_feature, +) pytestmark = pytest.mark.integration_tests_b diff --git a/tests/integration_tests/test_sequence_decoders.py b/tests/integration_tests/test_sequence_decoders.py index fcf0264a9fa..00645f0267a 100644 --- a/tests/integration_tests/test_sequence_decoders.py +++ b/tests/integration_tests/test_sequence_decoders.py @@ -2,13 +2,26 @@ import pytest -from ludwig.constants import (BATCH_SIZE, DECODER, ENCODER, EPOCHS, - INPUT_FEATURES, OUTPUT_FEATURES, SEQUENCE, TEXT, - TRAINER, TYPE) -from tests.integration_tests.utils import (RAY_BACKEND_CONFIG, - create_data_set_to_use, - generate_data, sequence_feature, - text_feature, train_with_backend) +from ludwig.constants import ( + BATCH_SIZE, + DECODER, + ENCODER, + EPOCHS, + INPUT_FEATURES, + OUTPUT_FEATURES, + SEQUENCE, + TEXT, + TRAINER, + TYPE, +) +from tests.integration_tests.utils import ( + create_data_set_to_use, + generate_data, + RAY_BACKEND_CONFIG, + sequence_feature, + text_feature, + train_with_backend, +) pytestmark = pytest.mark.integration_tests_c diff --git a/tests/integration_tests/test_sequence_features.py b/tests/integration_tests/test_sequence_features.py index 9252cdddbda..eb1a3f45c96 100644 --- a/tests/integration_tests/test_sequence_features.py +++ b/tests/integration_tests/test_sequence_features.py @@ -11,8 +11,7 @@ from ludwig.data.dataset_synthesizer import build_synthetic_dataset from ludwig.data.preprocessing import preprocess_for_training from ludwig.features.feature_registries import update_config_with_metadata -from tests.integration_tests.utils import (generate_data, run_experiment, - sequence_feature) +from tests.integration_tests.utils import generate_data, run_experiment, sequence_feature # # this test is focused on testing input sequence features with all encoders diff --git a/tests/integration_tests/test_server.py b/tests/integration_tests/test_server.py index 6757f759764..b4e1d374068 100644 --- a/tests/integration_tests/test_server.py +++ b/tests/integration_tests/test_server.py @@ -24,10 +24,15 @@ from ludwig.constants import BATCH_SIZE, DECODER, TRAINER from ludwig.serve import server from ludwig.utils.data_utils import read_csv -from tests.integration_tests.utils import (LocalTestBackend, audio_feature, - category_feature, generate_data, - image_feature, number_feature, - text_feature) +from tests.integration_tests.utils import ( + audio_feature, + category_feature, + generate_data, + image_feature, + LocalTestBackend, + number_feature, + text_feature, +) logger = logging.getLogger(__name__) diff --git a/tests/integration_tests/test_simple_features.py b/tests/integration_tests/test_simple_features.py index 20aad8d5109..6a18c03d7bd 100644 --- a/tests/integration_tests/test_simple_features.py +++ b/tests/integration_tests/test_simple_features.py @@ -19,11 +19,18 @@ import pytest from ludwig.constants import NAME -from tests.integration_tests.utils import (bag_feature, binary_feature, - category_feature, generate_data, - number_feature, run_experiment, - sequence_feature, set_feature, - text_feature, vector_feature) +from tests.integration_tests.utils import ( + bag_feature, + binary_feature, + category_feature, + generate_data, + number_feature, + run_experiment, + sequence_feature, + set_feature, + text_feature, + vector_feature, +) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/tests/integration_tests/test_timeseries_feature.py b/tests/integration_tests/test_timeseries_feature.py index fdea52331ee..c6d2d316cee 100644 --- a/tests/integration_tests/test_timeseries_feature.py +++ b/tests/integration_tests/test_timeseries_feature.py @@ -4,11 +4,9 @@ import torch from ludwig.api import LudwigModel -from ludwig.constants import (COLUMN, ENCODER_OUTPUT, INPUT_FEATURES, - OUTPUT_FEATURES) +from ludwig.constants import COLUMN, ENCODER_OUTPUT, INPUT_FEATURES, OUTPUT_FEATURES from ludwig.features.timeseries_feature import TimeseriesInputFeature -from ludwig.schema.features.timeseries_feature import \ - TimeseriesInputFeatureConfig +from ludwig.schema.features.timeseries_feature import TimeseriesInputFeatureConfig from ludwig.schema.utils import load_config_with_kwargs from tests.integration_tests.utils import number_feature, timeseries_feature diff --git a/tests/integration_tests/test_torchscript.py b/tests/integration_tests/test_torchscript.py index fb37d40c7fa..bb06371f733 100644 --- a/tests/integration_tests/test_torchscript.py +++ b/tests/integration_tests/test_torchscript.py @@ -25,8 +25,7 @@ from ludwig.api import LudwigModel from ludwig.backend import RAY -from ludwig.constants import (BATCH_SIZE, COMBINER, EVAL_BATCH_SIZE, LOGITS, - NAME, PREDICTIONS, PROBABILITIES, TRAINER) +from ludwig.constants import BATCH_SIZE, COMBINER, EVAL_BATCH_SIZE, LOGITS, NAME, PREDICTIONS, PROBABILITIES, TRAINER from ludwig.data.preprocessing import preprocess_for_prediction from ludwig.features.number_feature import numeric_transformation_registry from ludwig.globals import TRAIN_SET_METADATA_FILE_NAME @@ -34,14 +33,23 @@ from ludwig.utils import output_feature_utils from ludwig.utils.tokenizers import TORCHSCRIPT_COMPATIBLE_TOKENIZERS from tests.integration_tests import utils -from tests.integration_tests.utils import (LocalTestBackend, audio_feature, - bag_feature, binary_feature, - category_feature, date_feature, - generate_data, h3_feature, - image_feature, number_feature, - sequence_feature, set_feature, - text_feature, timeseries_feature, - vector_feature) +from tests.integration_tests.utils import ( + audio_feature, + bag_feature, + binary_feature, + category_feature, + date_feature, + generate_data, + h3_feature, + image_feature, + LocalTestBackend, + number_feature, + sequence_feature, + set_feature, + text_feature, + timeseries_feature, + vector_feature, +) @pytest.mark.integration_tests_e diff --git a/tests/integration_tests/test_trainer.py b/tests/integration_tests/test_trainer.py index bf36f9d397f..7de2dfd7c7a 100644 --- a/tests/integration_tests/test_trainer.py +++ b/tests/integration_tests/test_trainer.py @@ -11,17 +11,29 @@ from ludwig.api import LudwigModel from ludwig.callbacks import Callback -from ludwig.constants import (BATCH_SIZE, EFFECTIVE_BATCH_SIZE, EPOCHS, - EVAL_BATCH_SIZE, INPUT_FEATURES, - MAX_BATCH_SIZE_DATASET_FRACTION, OUTPUT_FEATURES, - TRAINER) +from ludwig.constants import ( + BATCH_SIZE, + EFFECTIVE_BATCH_SIZE, + EPOCHS, + EVAL_BATCH_SIZE, + INPUT_FEATURES, + MAX_BATCH_SIZE_DATASET_FRACTION, + OUTPUT_FEATURES, + TRAINER, +) from ludwig.distributed import init_dist_strategy from ludwig.globals import MODEL_FILE_NAME -from tests.integration_tests.utils import (RAY_BACKEND_CONFIG, - LocalTestBackend, binary_feature, - category_feature, generate_data, - number_feature, sequence_feature, - text_feature, vector_feature) +from tests.integration_tests.utils import ( + binary_feature, + category_feature, + generate_data, + LocalTestBackend, + number_feature, + RAY_BACKEND_CONFIG, + sequence_feature, + text_feature, + vector_feature, +) try: from ludwig.backend.horovod import HorovodBackend diff --git a/tests/integration_tests/test_triton.py b/tests/integration_tests/test_triton.py index 5a15051b59a..bbbc92234eb 100644 --- a/tests/integration_tests/test_triton.py +++ b/tests/integration_tests/test_triton.py @@ -23,15 +23,19 @@ from ludwig.constants import BATCH_SIZE, TRAINER from ludwig.data.dataset_synthesizer import build_synthetic_dataset_df from ludwig.utils.data_utils import load_yaml -from ludwig.utils.inference_utils import \ - to_inference_module_input_from_dataframe -from ludwig.utils.triton_utils import (POSTPROCESSOR, PREDICTOR, PREPROCESSOR, - export_triton, get_inference_modules) -from tests.integration_tests.utils import (LocalTestBackend, binary_feature, - category_feature, generate_data, - number_feature, sequence_feature, - set_feature, text_feature, - vector_feature) +from ludwig.utils.inference_utils import to_inference_module_input_from_dataframe +from ludwig.utils.triton_utils import export_triton, get_inference_modules, POSTPROCESSOR, PREDICTOR, PREPROCESSOR +from tests.integration_tests.utils import ( + binary_feature, + category_feature, + generate_data, + LocalTestBackend, + number_feature, + sequence_feature, + set_feature, + text_feature, + vector_feature, +) def test_triton_torchscript(csv_filename, tmpdir): diff --git a/tests/integration_tests/test_visualization.py b/tests/integration_tests/test_visualization.py index f2712325a28..0060e322b90 100644 --- a/tests/integration_tests/test_visualization.py +++ b/tests/integration_tests/test_visualization.py @@ -29,16 +29,20 @@ from ludwig.constants import BATCH_SIZE, ENCODER, TRAINER, TYPE from ludwig.experiment import experiment_cli -from ludwig.globals import (DESCRIPTION_FILE_NAME, - PREDICTIONS_PARQUET_FILE_NAME, - TEST_STATISTICS_FILE_NAME) +from ludwig.globals import DESCRIPTION_FILE_NAME, PREDICTIONS_PARQUET_FILE_NAME, TEST_STATISTICS_FILE_NAME from ludwig.utils.data_utils import get_split_path from ludwig.visualize import _extract_ground_truth_values from tests.integration_tests.test_visualization_api import obtain_df_splits -from tests.integration_tests.utils import (bag_feature, binary_feature, - category_feature, generate_data, - number_feature, sequence_feature, - set_feature, text_feature) +from tests.integration_tests.utils import ( + bag_feature, + binary_feature, + category_feature, + generate_data, + number_feature, + sequence_feature, + set_feature, + text_feature, +) pytestmark = pytest.mark.integration_tests_c diff --git a/tests/integration_tests/test_visualization_api.py b/tests/integration_tests/test_visualization_api.py index cbf6ca2e46e..0f10bc5c922 100644 --- a/tests/integration_tests/test_visualization_api.py +++ b/tests/integration_tests/test_visualization_api.py @@ -22,16 +22,21 @@ from ludwig import visualize from ludwig.api import LudwigModel, TrainingStats -from ludwig.constants import (BATCH_SIZE, ENCODER, NAME, PREDICTIONS, - PROBABILITIES, PROBABILITY, TRAINER, TYPE) +from ludwig.constants import BATCH_SIZE, ENCODER, NAME, PREDICTIONS, PROBABILITIES, PROBABILITY, TRAINER, TYPE from ludwig.data.split import get_splitter from ludwig.globals import HYPEROPT_STATISTICS_FILE_NAME from ludwig.utils.data_utils import read_csv -from tests.integration_tests.utils import (LocalTestBackend, bag_feature, - binary_feature, category_feature, - generate_data, number_feature, - sequence_feature, set_feature, - text_feature) +from tests.integration_tests.utils import ( + bag_feature, + binary_feature, + category_feature, + generate_data, + LocalTestBackend, + number_feature, + sequence_feature, + set_feature, + text_feature, +) pytestmark = pytest.mark.integration_tests_c diff --git a/tests/integration_tests/utils.py b/tests/integration_tests/utils.py index b347524a6e7..fd7886199c3 100644 --- a/tests/integration_tests/utils.py +++ b/tests/integration_tests/utils.py @@ -23,8 +23,9 @@ import tempfile import traceback import uuid + # from distutils.util import strtobool -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union +from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING, Union import cloudpickle import numpy as np @@ -36,13 +37,32 @@ from ludwig.api import LudwigModel from ludwig.backend import LocalBackend -from ludwig.constants import (AUDIO, BAG, BATCH_SIZE, BINARY, CATEGORY, - CATEGORY_DISTRIBUTION, COLUMN, DATE, DECODER, - ENCODER, H3, IMAGE, MODEL_ECD, NAME, NUMBER, - PROC_COLUMN, SEQUENCE, SET, SPLIT, TEXT, - TIMESERIES, TRAINER, VECTOR) -from ludwig.data.dataset_synthesizer import (DATETIME_FORMATS, - build_synthetic_dataset) +from ludwig.constants import ( + AUDIO, + BAG, + BATCH_SIZE, + BINARY, + CATEGORY, + CATEGORY_DISTRIBUTION, + COLUMN, + DATE, + DECODER, + ENCODER, + H3, + IMAGE, + MODEL_ECD, + NAME, + NUMBER, + PROC_COLUMN, + SEQUENCE, + SET, + SPLIT, + TEXT, + TIMESERIES, + TRAINER, + VECTOR, +) +from ludwig.data.dataset_synthesizer import build_synthetic_dataset, DATETIME_FORMATS from ludwig.experiment import experiment_cli from ludwig.features.feature_utils import compute_feature_hash from ludwig.globals import MODEL_FILE_NAME, PREDICTIONS_PARQUET_FILE_NAME @@ -50,8 +70,7 @@ from ludwig.schema.encoders.utils import get_encoder_classes from ludwig.trainers.trainer import Trainer from ludwig.utils import fs_utils -from ludwig.utils.data_utils import (read_csv, replace_file_extension, - use_credentials) +from ludwig.utils.data_utils import read_csv, replace_file_extension, use_credentials if TYPE_CHECKING: from ludwig.data.dataset.base import Dataset diff --git a/tests/ludwig/accounting/test_used_tokens.py b/tests/ludwig/accounting/test_used_tokens.py index d0d52f5427d..f760dbd6d44 100644 --- a/tests/ludwig/accounting/test_used_tokens.py +++ b/tests/ludwig/accounting/test_used_tokens.py @@ -1,8 +1,6 @@ import torch -from ludwig.accounting.used_tokens import (get_used_tokens_for_ecd, - get_used_tokens_for_gbm, - get_used_tokens_for_llm) +from ludwig.accounting.used_tokens import get_used_tokens_for_ecd, get_used_tokens_for_gbm, get_used_tokens_for_llm def test_get_used_tokens_for_gbm(): diff --git a/tests/ludwig/automl/test_base_config.py b/tests/ludwig/automl/test_base_config.py index 89f4ee8d3ad..f10a09fb015 100644 --- a/tests/ludwig/automl/test_base_config.py +++ b/tests/ludwig/automl/test_base_config.py @@ -8,15 +8,18 @@ ray = pytest.importorskip("ray") # noqa -from ludwig.automl.base_config import (get_dataset_info, # noqa - get_dataset_info_from_source, - get_field_metadata, - get_reference_configs, is_field_boolean) +from ludwig.automl.base_config import get_dataset_info # noqa +from ludwig.automl.base_config import ( + get_dataset_info_from_source, + get_field_metadata, + get_reference_configs, + is_field_boolean, +) from ludwig.data.dataframe.dask import DaskEngine # noqa from ludwig.data.dataframe.pandas import PandasEngine # noqa from ludwig.schema.model_types.base import ModelConfig # noqa -from ludwig.utils.automl.data_source import (DataframeSource, # noqa - wrap_data_source) +from ludwig.utils.automl.data_source import DataframeSource # noqa +from ludwig.utils.automl.data_source import wrap_data_source pytestmark = pytest.mark.distributed diff --git a/tests/ludwig/combiners/test_combiners.py b/tests/ludwig/combiners/test_combiners.py index 6c517f97489..645d5afec71 100644 --- a/tests/ludwig/combiners/test_combiners.py +++ b/tests/ludwig/combiners/test_combiners.py @@ -6,31 +6,30 @@ import pytest import torch -from ludwig.combiners.combiners import (ComparatorCombiner, ConcatCombiner, - ProjectAggregateCombiner, - SequenceCombiner, - SequenceConcatCombiner, TabNetCombiner, - TabTransformerCombiner, - TransformerCombiner) -from ludwig.constants import (CATEGORY, ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, - TYPE) +from ludwig.combiners.combiners import ( + ComparatorCombiner, + ConcatCombiner, + ProjectAggregateCombiner, + SequenceCombiner, + SequenceConcatCombiner, + TabNetCombiner, + TabTransformerCombiner, + TransformerCombiner, +) +from ludwig.constants import CATEGORY, ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, TYPE from ludwig.encoders.registry import get_sequence_encoder_registry from ludwig.schema.combiners.comparator import ComparatorCombinerConfig from ludwig.schema.combiners.concat import ConcatCombinerConfig -from ludwig.schema.combiners.project_aggregate import \ - ProjectAggregateCombinerConfig +from ludwig.schema.combiners.project_aggregate import ProjectAggregateCombinerConfig from ludwig.schema.combiners.sequence import SequenceCombinerConfig -from ludwig.schema.combiners.sequence_concat import \ - SequenceConcatCombinerConfig -from ludwig.schema.combiners.tab_transformer import \ - TabTransformerCombinerConfig +from ludwig.schema.combiners.sequence_concat import SequenceConcatCombinerConfig +from ludwig.schema.combiners.tab_transformer import TabTransformerCombinerConfig from ludwig.schema.combiners.tabnet import TabNetCombinerConfig from ludwig.schema.combiners.transformer import TransformerCombinerConfig from ludwig.schema.utils import load_config from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) diff --git a/tests/ludwig/config_sampling/test_config_sampling.py b/tests/ludwig/config_sampling/test_config_sampling.py index 2133bf2f809..e4f74f1db57 100644 --- a/tests/ludwig/config_sampling/test_config_sampling.py +++ b/tests/ludwig/config_sampling/test_config_sampling.py @@ -2,8 +2,10 @@ from ludwig.utils.data_utils import load_json from tests.training_success.test_training_success import ( - combiner_config_generator, defaults_config_generator, - ecd_trainer_config_generator) + combiner_config_generator, + defaults_config_generator, + ecd_trainer_config_generator, +) def full_config_generator(generator_fn, *args): diff --git a/tests/ludwig/config_validation/test_validate_config_combiner.py b/tests/ludwig/config_validation/test_validate_config_combiner.py index 46460ae3556..2ef8a97a22e 100644 --- a/tests/ludwig/config_validation/test_validate_config_combiner.py +++ b/tests/ludwig/config_validation/test_validate_config_combiner.py @@ -3,8 +3,7 @@ from ludwig.config_validation.validation import check_schema, get_schema from ludwig.constants import MODEL_ECD, TRAINER from ludwig.error import ConfigValidationError -from tests.integration_tests.utils import (binary_feature, category_feature, - number_feature) +from tests.integration_tests.utils import binary_feature, category_feature, number_feature def test_combiner_schema_is_not_empty_for_ECD(): diff --git a/tests/ludwig/config_validation/test_validate_config_encoder.py b/tests/ludwig/config_validation/test_validate_config_encoder.py index f0bc9cf9f3d..2fb94bf6737 100644 --- a/tests/ludwig/config_validation/test_validate_config_encoder.py +++ b/tests/ludwig/config_validation/test_validate_config_encoder.py @@ -1,13 +1,15 @@ import pytest -from ludwig.constants import (DEFAULTS, ENCODER, INPUT_FEATURES, NAME, - OUTPUT_FEATURES, SEQUENCE, TEXT, TIMESERIES, - TYPE) +from ludwig.constants import DEFAULTS, ENCODER, INPUT_FEATURES, NAME, OUTPUT_FEATURES, SEQUENCE, TEXT, TIMESERIES, TYPE from ludwig.error import ConfigValidationError from ludwig.schema.model_config import ModelConfig -from tests.integration_tests.utils import (binary_feature, number_feature, - sequence_feature, text_feature, - timeseries_feature) +from tests.integration_tests.utils import ( + binary_feature, + number_feature, + sequence_feature, + text_feature, + timeseries_feature, +) @pytest.mark.parametrize("feature_type", [SEQUENCE, TEXT, TIMESERIES]) diff --git a/tests/ludwig/config_validation/test_validate_config_features.py b/tests/ludwig/config_validation/test_validate_config_features.py index d699f42cf56..e034c655347 100644 --- a/tests/ludwig/config_validation/test_validate_config_features.py +++ b/tests/ludwig/config_validation/test_validate_config_features.py @@ -2,8 +2,7 @@ from ludwig.config_validation.validation import check_schema from ludwig.error import ConfigValidationError -from tests.integration_tests.utils import (binary_feature, category_feature, - number_feature, text_feature) +from tests.integration_tests.utils import binary_feature, category_feature, number_feature, text_feature def test_config_input_output_features(): diff --git a/tests/ludwig/config_validation/test_validate_config_hyperopt.py b/tests/ludwig/config_validation/test_validate_config_hyperopt.py index e83e632b9ea..87dfbbd917e 100644 --- a/tests/ludwig/config_validation/test_validate_config_hyperopt.py +++ b/tests/ludwig/config_validation/test_validate_config_hyperopt.py @@ -7,9 +7,16 @@ import ludwig.schema.hyperopt.parameter # noqa: F401 import ludwig.schema.hyperopt.scheduler # noqa: F401 import ludwig.schema.hyperopt.search_algorithm # noqa: F401 -from ludwig.constants import (EXECUTOR, HYPEROPT, INPUT_FEATURES, - OUTPUT_FEATURES, PARAMETERS, SCHEDULER, - SEARCH_ALG, TYPE) +from ludwig.constants import ( + EXECUTOR, + HYPEROPT, + INPUT_FEATURES, + OUTPUT_FEATURES, + PARAMETERS, + SCHEDULER, + SEARCH_ALG, + TYPE, +) from ludwig.error import ConfigValidationError from ludwig.schema.hyperopt import utils from ludwig.schema.model_types.base import ModelConfig diff --git a/tests/ludwig/config_validation/test_validate_config_misc.py b/tests/ludwig/config_validation/test_validate_config_misc.py index 28a7f7571c1..d8d0220f17c 100644 --- a/tests/ludwig/config_validation/test_validate_config_misc.py +++ b/tests/ludwig/config_validation/test_validate_config_misc.py @@ -1,10 +1,26 @@ import pytest from ludwig.config_validation.validation import check_schema, get_schema -from ludwig.constants import (ACTIVE, AUDIO, BACKEND, CATEGORY, COLUMN, - DECODER, DEFAULTS, ENCODER, LOSS, MODEL_ECD, - MODEL_GBM, MODEL_LLM, MODEL_TYPE, NAME, - PREPROCESSING, PROC_COLUMN, TRAINER, TYPE) +from ludwig.constants import ( + ACTIVE, + AUDIO, + BACKEND, + CATEGORY, + COLUMN, + DECODER, + DEFAULTS, + ENCODER, + LOSS, + MODEL_ECD, + MODEL_GBM, + MODEL_LLM, + MODEL_TYPE, + NAME, + PREPROCESSING, + PROC_COLUMN, + TRAINER, + TYPE, +) from ludwig.error import ConfigValidationError from ludwig.features.feature_registries import get_output_type_registry from ludwig.schema import utils as schema_utils @@ -13,36 +29,37 @@ from ludwig.schema.defaults.gbm import GBMDefaultsConfig from ludwig.schema.features.preprocessing.audio import AudioPreprocessingConfig from ludwig.schema.features.preprocessing.bag import BagPreprocessingConfig -from ludwig.schema.features.preprocessing.binary import \ - BinaryPreprocessingConfig -from ludwig.schema.features.preprocessing.category import \ - CategoryPreprocessingConfig +from ludwig.schema.features.preprocessing.binary import BinaryPreprocessingConfig +from ludwig.schema.features.preprocessing.category import CategoryPreprocessingConfig from ludwig.schema.features.preprocessing.date import DatePreprocessingConfig from ludwig.schema.features.preprocessing.h3 import H3PreprocessingConfig from ludwig.schema.features.preprocessing.image import ImagePreprocessingConfig -from ludwig.schema.features.preprocessing.number import \ - NumberPreprocessingConfig -from ludwig.schema.features.preprocessing.sequence import \ - SequencePreprocessingConfig +from ludwig.schema.features.preprocessing.number import NumberPreprocessingConfig +from ludwig.schema.features.preprocessing.sequence import SequencePreprocessingConfig from ludwig.schema.features.preprocessing.set import SetPreprocessingConfig from ludwig.schema.features.preprocessing.text import TextPreprocessingConfig -from ludwig.schema.features.preprocessing.timeseries import \ - TimeseriesPreprocessingConfig -from ludwig.schema.features.preprocessing.vector import \ - VectorPreprocessingConfig -from ludwig.schema.features.utils import (get_input_feature_jsonschema, - get_output_feature_jsonschema) +from ludwig.schema.features.preprocessing.timeseries import TimeseriesPreprocessingConfig +from ludwig.schema.features.preprocessing.vector import VectorPreprocessingConfig +from ludwig.schema.features.utils import get_input_feature_jsonschema, get_output_feature_jsonschema from ludwig.schema.llms.peft import LoraConfig from ludwig.schema.model_types.base import ModelConfig -from ludwig.schema.utils import (ludwig_dataclass, - unload_jsonschema_from_marshmallow_class) -from tests.integration_tests.utils import (ENCODERS, audio_feature, - bag_feature, binary_feature, - category_feature, date_feature, - h3_feature, image_feature, - number_feature, sequence_feature, - set_feature, text_feature, - timeseries_feature, vector_feature) +from ludwig.schema.utils import ludwig_dataclass, unload_jsonschema_from_marshmallow_class +from tests.integration_tests.utils import ( + audio_feature, + bag_feature, + binary_feature, + category_feature, + date_feature, + ENCODERS, + h3_feature, + image_feature, + number_feature, + sequence_feature, + set_feature, + text_feature, + timeseries_feature, + vector_feature, +) def test_config_features(): diff --git a/tests/ludwig/config_validation/test_validate_config_preprocessing.py b/tests/ludwig/config_validation/test_validate_config_preprocessing.py index 87d2daa4d3f..bc05a1dff8f 100644 --- a/tests/ludwig/config_validation/test_validate_config_preprocessing.py +++ b/tests/ludwig/config_validation/test_validate_config_preprocessing.py @@ -1,7 +1,6 @@ import pytest -from ludwig.config_validation.preprocessing import \ - check_global_max_sequence_length_fits_prompt_template +from ludwig.config_validation.preprocessing import check_global_max_sequence_length_fits_prompt_template from ludwig.config_validation.validation import check_schema from tests.integration_tests.utils import binary_feature, category_feature diff --git a/tests/ludwig/config_validation/test_validate_config_trainer.py b/tests/ludwig/config_validation/test_validate_config_trainer.py index bd7116ae2fa..ead0f8cfa57 100644 --- a/tests/ludwig/config_validation/test_validate_config_trainer.py +++ b/tests/ludwig/config_validation/test_validate_config_trainer.py @@ -5,8 +5,7 @@ from ludwig.error import ConfigValidationError from ludwig.schema.optimizers import optimizer_registry from ludwig.schema.trainer import ECDTrainerConfig -from tests.integration_tests.utils import (binary_feature, category_feature, - number_feature) +from tests.integration_tests.utils import binary_feature, category_feature, number_feature # Note: simple tests for now, but once we add dependent fields we can add tests for more complex relationships in this # file. Currently verifies that the nested fields work, as the others are covered by basic marshmallow validation: diff --git a/tests/ludwig/data/test_ray_data.py b/tests/ludwig/data/test_ray_data.py index ab68a9c3984..bd7cb20b871 100644 --- a/tests/ludwig/data/test_ray_data.py +++ b/tests/ludwig/data/test_ray_data.py @@ -9,8 +9,8 @@ ray = pytest.importorskip("ray") # noqa dask = pytest.importorskip("dask") # noqa -from ludwig.data.dataset.ray import (RayDatasetBatcher, # noqa - read_remote_parquet) +from ludwig.data.dataset.ray import RayDatasetBatcher # noqa +from ludwig.data.dataset.ray import read_remote_parquet # Mark the entire module as distributed pytestmark = pytest.mark.distributed diff --git a/tests/ludwig/decoders/test_image_decoder.py b/tests/ludwig/decoders/test_image_decoder.py index a89028c7bb9..2f48591da7b 100644 --- a/tests/ludwig/decoders/test_image_decoder.py +++ b/tests/ludwig/decoders/test_image_decoder.py @@ -1,13 +1,11 @@ import pytest import torch -from ludwig.constants import (ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, HIDDEN, - LOGITS) +from ludwig.constants import ENCODER_OUTPUT, ENCODER_OUTPUT_STATE, HIDDEN, LOGITS from ludwig.decoders.image_decoders import UNetDecoder from ludwig.encoders.image.base import UNetEncoder from ludwig.utils.misc_utils import set_random_seed -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated RANDOM_SEED = 1919 diff --git a/tests/ludwig/decoders/test_llm_decoders.py b/tests/ludwig/decoders/test_llm_decoders.py index c12fc5cf49f..6408efdbb9b 100644 --- a/tests/ludwig/decoders/test_llm_decoders.py +++ b/tests/ludwig/decoders/test_llm_decoders.py @@ -1,8 +1,7 @@ import pytest import torch -from ludwig.constants import (BACKEND, BASE_MODEL, GENERATION, INPUT_FEATURES, - MODEL_TYPE, OUTPUT_FEATURES) +from ludwig.constants import BACKEND, BASE_MODEL, GENERATION, INPUT_FEATURES, MODEL_TYPE, OUTPUT_FEATURES from ludwig.decoders.llm_decoders import TextExtractorDecoder from ludwig.schema.model_config import ModelConfig from tests.integration_tests.utils import text_feature diff --git a/tests/ludwig/decoders/test_sequence_decoder.py b/tests/ludwig/decoders/test_sequence_decoder.py index f5554d7faa8..6abfe66a2f2 100644 --- a/tests/ludwig/decoders/test_sequence_decoder.py +++ b/tests/ludwig/decoders/test_sequence_decoder.py @@ -2,13 +2,15 @@ import torch from ludwig.constants import HIDDEN, LOGITS -from ludwig.decoders.sequence_decoders import (LSTMDecoder, RNNDecoder, - SequenceGeneratorDecoder, - SequenceLSTMDecoder, - SequenceRNNDecoder) +from ludwig.decoders.sequence_decoders import ( + LSTMDecoder, + RNNDecoder, + SequenceGeneratorDecoder, + SequenceLSTMDecoder, + SequenceRNNDecoder, +) from ludwig.utils.misc_utils import set_random_seed -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated RANDOM_SEED = 1919 diff --git a/tests/ludwig/decoders/test_sequence_tagger.py b/tests/ludwig/decoders/test_sequence_tagger.py index dba28b8792e..0a239f0633b 100644 --- a/tests/ludwig/decoders/test_sequence_tagger.py +++ b/tests/ludwig/decoders/test_sequence_tagger.py @@ -4,8 +4,7 @@ from ludwig.constants import HIDDEN, LOGITS from ludwig.decoders.sequence_tagger import SequenceTaggerDecoder from ludwig.utils.misc_utils import set_random_seed -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated RANDOM_SEED = 1919 diff --git a/tests/ludwig/encoders/test_bag_encoders.py b/tests/ludwig/encoders/test_bag_encoders.py index 51d2ffacfb6..89912eda131 100644 --- a/tests/ludwig/encoders/test_bag_encoders.py +++ b/tests/ludwig/encoders/test_bag_encoders.py @@ -6,8 +6,7 @@ from ludwig.constants import ENCODER_OUTPUT from ludwig.encoders.bag_encoders import BagEmbedWeightedEncoder from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated RANDOM_SEED = 1919 DEVICE = get_torch_device() diff --git a/tests/ludwig/encoders/test_category_encoders.py b/tests/ludwig/encoders/test_category_encoders.py index c7dacf166a5..6080f3f70df 100644 --- a/tests/ludwig/encoders/test_category_encoders.py +++ b/tests/ludwig/encoders/test_category_encoders.py @@ -4,11 +4,9 @@ import torch from ludwig.constants import ENCODER_OUTPUT -from ludwig.encoders.category_encoders import (CategoricalEmbedEncoder, - CategoricalSparseEncoder) +from ludwig.encoders.category_encoders import CategoricalEmbedEncoder, CategoricalSparseEncoder from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated RANDOM_SEED = 1919 DEVICE = get_torch_device() diff --git a/tests/ludwig/encoders/test_date_encoders.py b/tests/ludwig/encoders/test_date_encoders.py index 4897c7d1216..a2c74be1af9 100644 --- a/tests/ludwig/encoders/test_date_encoders.py +++ b/tests/ludwig/encoders/test_date_encoders.py @@ -6,8 +6,7 @@ from ludwig.encoders.date_encoders import DateEmbed, DateWave from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated RANDOM_SEED = 1919 DEVICE = get_torch_device() diff --git a/tests/ludwig/encoders/test_h3_encoders.py b/tests/ludwig/encoders/test_h3_encoders.py index 4442c06a167..911ece92604 100644 --- a/tests/ludwig/encoders/test_h3_encoders.py +++ b/tests/ludwig/encoders/test_h3_encoders.py @@ -6,8 +6,7 @@ from ludwig.encoders import h3_encoders from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated RANDOM_SEED = 1919 DEVICE = get_torch_device() diff --git a/tests/ludwig/encoders/test_image_encoders.py b/tests/ludwig/encoders/test_image_encoders.py index e425fad2c57..c1f74208e3a 100644 --- a/tests/ludwig/encoders/test_image_encoders.py +++ b/tests/ludwig/encoders/test_image_encoders.py @@ -4,30 +4,31 @@ import torch from ludwig.constants import ENCODER_OUTPUT -from ludwig.encoders.image.base import (MLPMixerEncoder, ResNetEncoder, - Stacked2DCNN, UNetEncoder, ViTEncoder) -from ludwig.encoders.image.torchvision import (TVAlexNetEncoder, - TVConvNeXtEncoder, - TVDenseNetEncoder, - TVEfficientNetEncoder, - TVGoogLeNetEncoder, - TVInceptionV3Encoder, - TVMaxVitEncoder, - TVMNASNetEncoder, - TVMobileNetV2Encoder, - TVMobileNetV3Encoder, - TVRegNetEncoder, - TVResNetEncoder, - TVResNeXtEncoder, - TVShuffleNetV2Encoder, - TVSqueezeNetEncoder, - TVSwinTransformerEncoder, - TVVGGEncoder, TVViTEncoder, - TVWideResNetEncoder) +from ludwig.encoders.image.base import MLPMixerEncoder, ResNetEncoder, Stacked2DCNN, UNetEncoder, ViTEncoder +from ludwig.encoders.image.torchvision import ( + TVAlexNetEncoder, + TVConvNeXtEncoder, + TVDenseNetEncoder, + TVEfficientNetEncoder, + TVGoogLeNetEncoder, + TVInceptionV3Encoder, + TVMaxVitEncoder, + TVMNASNetEncoder, + TVMobileNetV2Encoder, + TVMobileNetV3Encoder, + TVRegNetEncoder, + TVResNetEncoder, + TVResNeXtEncoder, + TVShuffleNetV2Encoder, + TVSqueezeNetEncoder, + TVSwinTransformerEncoder, + TVVGGEncoder, + TVViTEncoder, + TVWideResNetEncoder, +) from ludwig.utils.image_utils import torchvision_model_registry from ludwig.utils.misc_utils import set_random_seed -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated RANDOM_SEED = 1919 diff --git a/tests/ludwig/encoders/test_llm_encoders.py b/tests/ludwig/encoders/test_llm_encoders.py index 5ba40e6ef97..56cc6ad4b0e 100644 --- a/tests/ludwig/encoders/test_llm_encoders.py +++ b/tests/ludwig/encoders/test_llm_encoders.py @@ -7,8 +7,7 @@ from ludwig.encoders.text_encoders import LLMEncoder from ludwig.schema.encoders.text_encoders import LLMEncoderConfig -from ludwig.schema.llms.peft import (AdaloraConfig, BaseAdapterConfig, - IA3Config, LoraConfig) +from ludwig.schema.llms.peft import AdaloraConfig, BaseAdapterConfig, IA3Config, LoraConfig from ludwig.utils.llm_utils import get_context_len # Mapping of adapter types to test against and their respective config objects. diff --git a/tests/ludwig/encoders/test_sequence_encoders.py b/tests/ludwig/encoders/test_sequence_encoders.py index 9eb138ed79c..a0349ab6f5f 100644 --- a/tests/ludwig/encoders/test_sequence_encoders.py +++ b/tests/ludwig/encoders/test_sequence_encoders.py @@ -4,15 +4,18 @@ import torch from ludwig.constants import ENCODER_OUTPUT -from ludwig.encoders.sequence_encoders import (ParallelCNN, - SequenceEmbedEncoder, - SequencePassthroughEncoder, - StackedCNN, StackedCNNRNN, - StackedParallelCNN, StackedRNN, - StackedTransformer) +from ludwig.encoders.sequence_encoders import ( + ParallelCNN, + SequenceEmbedEncoder, + SequencePassthroughEncoder, + StackedCNN, + StackedCNNRNN, + StackedParallelCNN, + StackedRNN, + StackedTransformer, +) from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated DEVICE = get_torch_device() RANDOM_SEED = 1919 diff --git a/tests/ludwig/encoders/test_set_encoders.py b/tests/ludwig/encoders/test_set_encoders.py index 2f175ef6591..742b4b4ed8f 100644 --- a/tests/ludwig/encoders/test_set_encoders.py +++ b/tests/ludwig/encoders/test_set_encoders.py @@ -7,8 +7,7 @@ from ludwig.encoders.set_encoders import SetSparseEncoder from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated RANDOM_SEED = 1919 DEVICE = get_torch_device() diff --git a/tests/ludwig/encoders/test_text_encoders.py b/tests/ludwig/encoders/test_text_encoders.py index dfc9756fc5a..2bd4c2ab840 100644 --- a/tests/ludwig/encoders/test_text_encoders.py +++ b/tests/ludwig/encoders/test_text_encoders.py @@ -8,20 +8,22 @@ import ludwig.schema.encoders.utils as schema_encoders_utils from ludwig.api import LudwigModel -from ludwig.constants import (ENCODER, ENCODER_OUTPUT, MODEL_ECD, NAME, TEXT, - TRAINER) +from ludwig.constants import ENCODER, ENCODER_OUTPUT, MODEL_ECD, NAME, TEXT, TRAINER from ludwig.encoders import text_encoders from ludwig.error import ConfigValidationError from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME from ludwig.schema.model_config import ModelConfig from ludwig.utils.data_utils import load_json from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated -from tests.integration_tests.utils import (HF_ENCODERS, LocalTestBackend, - category_feature, - clear_huggingface_cache, - generate_data, text_feature) +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated +from tests.integration_tests.utils import ( + category_feature, + clear_huggingface_cache, + generate_data, + HF_ENCODERS, + LocalTestBackend, + text_feature, +) DEVICE = get_torch_device() RANDOM_SEED = 1919 diff --git a/tests/ludwig/explain/test_util.py b/tests/ludwig/explain/test_util.py index 97996c7f306..08e87b21fa4 100644 --- a/tests/ludwig/explain/test_util.py +++ b/tests/ludwig/explain/test_util.py @@ -6,10 +6,8 @@ from ludwig.api import LudwigModel from ludwig.constants import NAME -from ludwig.explain.util import (get_absolute_module_key_from_submodule, - replace_layer_with_copy) -from tests.integration_tests.utils import (LocalTestBackend, binary_feature, - generate_data, text_feature) +from ludwig.explain.util import get_absolute_module_key_from_submodule, replace_layer_with_copy +from tests.integration_tests.utils import binary_feature, generate_data, LocalTestBackend, text_feature def test_get_absolute_module_key_from_submodule(): diff --git a/tests/ludwig/features/test_audio_feature.py b/tests/ludwig/features/test_audio_feature.py index 3a0c84858f2..2323d1274e6 100644 --- a/tests/ludwig/features/test_audio_feature.py +++ b/tests/ludwig/features/test_audio_feature.py @@ -12,8 +12,7 @@ from ludwig.schema.features.audio_feature import AudioInputFeatureConfig from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.utils import (audio_feature, category_feature, - generate_data) +from tests.integration_tests.utils import audio_feature, category_feature, generate_data BATCH_SIZE = 2 SEQ_SIZE = 20 diff --git a/tests/ludwig/features/test_binary_feature.py b/tests/ludwig/features/test_binary_feature.py index 6f86c8a9243..851d580fad6 100644 --- a/tests/ludwig/features/test_binary_feature.py +++ b/tests/ludwig/features/test_binary_feature.py @@ -4,10 +4,8 @@ import torch from ludwig.constants import ENCODER, ENCODER_OUTPUT -from ludwig.features.binary_feature import (BinaryInputFeature, - BinaryOutputFeature) -from ludwig.schema.features.binary_feature import (BinaryInputFeatureConfig, - BinaryOutputFeatureConfig) +from ludwig.features.binary_feature import BinaryInputFeature, BinaryOutputFeature +from ludwig.schema.features.binary_feature import BinaryInputFeatureConfig, BinaryOutputFeatureConfig from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils.torch_utils import get_torch_device diff --git a/tests/ludwig/features/test_category_feature.py b/tests/ludwig/features/test_category_feature.py index fec6e26445c..7f982e24b67 100644 --- a/tests/ludwig/features/test_category_feature.py +++ b/tests/ludwig/features/test_category_feature.py @@ -6,8 +6,7 @@ from ludwig.constants import ENCODER, ENCODER_OUTPUT, TYPE from ludwig.features.category_feature import CategoryInputFeature -from ludwig.schema.features.category_feature import \ - ECDCategoryInputFeatureConfig +from ludwig.schema.features.category_feature import ECDCategoryInputFeatureConfig from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils.misc_utils import merge_dict from ludwig.utils.torch_utils import get_torch_device diff --git a/tests/ludwig/features/test_date_feature.py b/tests/ludwig/features/test_date_feature.py index 1e95958ad16..f527379b92b 100644 --- a/tests/ludwig/features/test_date_feature.py +++ b/tests/ludwig/features/test_date_feature.py @@ -6,8 +6,7 @@ import torch from dateutil.parser import parse -from ludwig.constants import (ENCODER_OUTPUT, FILL_WITH_CONST, - MISSING_VALUE_STRATEGY) +from ludwig.constants import ENCODER_OUTPUT, FILL_WITH_CONST, MISSING_VALUE_STRATEGY from ludwig.features import date_feature from ludwig.features.date_feature import DateInputFeature from ludwig.schema.features.date_feature import DateInputFeatureConfig diff --git a/tests/ludwig/features/test_image_feature.py b/tests/ludwig/features/test_image_feature.py index b67650324de..ac01adc0216 100644 --- a/tests/ludwig/features/test_image_feature.py +++ b/tests/ludwig/features/test_image_feature.py @@ -4,13 +4,18 @@ import pytest import torch -from ludwig.constants import (BFILL, CROP_OR_PAD, ENCODER, ENCODER_OUTPUT, - ENCODER_OUTPUT_STATE, INTERPOLATE, LOGITS, TYPE) -from ludwig.features.image_feature import (ImageInputFeature, - ImageOutputFeature, - _ImagePreprocessing) -from ludwig.schema.features.image_feature import (ImageInputFeatureConfig, - ImageOutputFeatureConfig) +from ludwig.constants import ( + BFILL, + CROP_OR_PAD, + ENCODER, + ENCODER_OUTPUT, + ENCODER_OUTPUT_STATE, + INTERPOLATE, + LOGITS, + TYPE, +) +from ludwig.features.image_feature import _ImagePreprocessing, ImageInputFeature, ImageOutputFeature +from ludwig.schema.features.image_feature import ImageInputFeatureConfig, ImageOutputFeatureConfig from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils.misc_utils import merge_dict from ludwig.utils.torch_utils import get_torch_device diff --git a/tests/ludwig/features/test_number_feature.py b/tests/ludwig/features/test_number_feature.py index f9f89f9c2e8..8d710c418fa 100644 --- a/tests/ludwig/features/test_number_feature.py +++ b/tests/ludwig/features/test_number_feature.py @@ -6,7 +6,7 @@ import torch from ludwig.constants import ENCODER_OUTPUT -from ludwig.features.number_feature import NumberInputFeature, _OutlierReplacer +from ludwig.features.number_feature import _OutlierReplacer, NumberInputFeature from ludwig.schema.features.number_feature import ECDNumberInputFeatureConfig from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils.misc_utils import merge_dict diff --git a/tests/ludwig/features/test_sequence_features.py b/tests/ludwig/features/test_sequence_features.py index e64021bb9ea..e4e84d7ef39 100644 --- a/tests/ludwig/features/test_sequence_features.py +++ b/tests/ludwig/features/test_sequence_features.py @@ -5,16 +5,11 @@ import pytest import torch -from ludwig.constants import (ENCODER_OUTPUT, LAST_HIDDEN, LOGITS, SEQUENCE, - TEXT, TYPE) -from ludwig.features.sequence_feature import (SequenceInputFeature, - SequenceOutputFeature, - _SequencePreprocessing) +from ludwig.constants import ENCODER_OUTPUT, LAST_HIDDEN, LOGITS, SEQUENCE, TEXT, TYPE +from ludwig.features.sequence_feature import _SequencePreprocessing, SequenceInputFeature, SequenceOutputFeature from ludwig.features.text_feature import TextInputFeature, TextOutputFeature -from ludwig.schema.features.sequence_feature import ( - SequenceInputFeatureConfig, SequenceOutputFeatureConfig) -from ludwig.schema.features.text_feature import (ECDTextInputFeatureConfig, - ECDTextOutputFeatureConfig) +from ludwig.schema.features.sequence_feature import SequenceInputFeatureConfig, SequenceOutputFeatureConfig +from ludwig.schema.features.text_feature import ECDTextInputFeatureConfig, ECDTextOutputFeatureConfig from ludwig.utils.torch_utils import get_torch_device from tests.integration_tests.utils import ENCODERS, sequence_feature diff --git a/tests/ludwig/features/test_text_feature.py b/tests/ludwig/features/test_text_feature.py index 163255dc368..c3574baccd0 100644 --- a/tests/ludwig/features/test_text_feature.py +++ b/tests/ludwig/features/test_text_feature.py @@ -4,8 +4,7 @@ from transformers import AutoTokenizer from ludwig.backend import LocalBackend -from ludwig.constants import (IGNORE_INDEX_TOKEN_ID, LOGITS, PREDICTIONS, - PROBABILITIES) +from ludwig.constants import IGNORE_INDEX_TOKEN_ID, LOGITS, PREDICTIONS, PROBABILITIES from ludwig.features import text_feature TEST_MODEL_NAME = "hf-internal-testing/tiny-random-OPTForCausalLM" diff --git a/tests/ludwig/features/test_timeseries_feature.py b/tests/ludwig/features/test_timeseries_feature.py index 4200ee6ba07..2f768d16d3f 100644 --- a/tests/ludwig/features/test_timeseries_feature.py +++ b/tests/ludwig/features/test_timeseries_feature.py @@ -5,8 +5,7 @@ from ludwig.constants import ENCODER, ENCODER_OUTPUT, TYPE from ludwig.features.timeseries_feature import TimeseriesInputFeature -from ludwig.schema.features.timeseries_feature import \ - TimeseriesInputFeatureConfig +from ludwig.schema.features.timeseries_feature import TimeseriesInputFeatureConfig from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils.torch_utils import get_torch_device diff --git a/tests/ludwig/hyperopt/test_hyperopt.py b/tests/ludwig/hyperopt/test_hyperopt.py index 78cb6302906..367e2b8027d 100644 --- a/tests/ludwig/hyperopt/test_hyperopt.py +++ b/tests/ludwig/hyperopt/test_hyperopt.py @@ -1,8 +1,7 @@ import pytest from ludwig.constants import INPUT_FEATURES, NAME, OUTPUT_FEATURES, TYPE -from ludwig.hyperopt.utils import (log_warning_if_all_grid_type_parameters, - substitute_parameters) +from ludwig.hyperopt.utils import log_warning_if_all_grid_type_parameters, substitute_parameters from ludwig.schema.model_config import ModelConfig BASE_CONFIG = { diff --git a/tests/ludwig/marshmallow/test_fields_misc.py b/tests/ludwig/marshmallow/test_fields_misc.py index aab14e584a8..824144638ea 100644 --- a/tests/ludwig/marshmallow/test_fields_misc.py +++ b/tests/ludwig/marshmallow/test_fields_misc.py @@ -1,8 +1,7 @@ from typing import Dict, Tuple, Union import pytest -from marshmallow.exceptions import \ - ValidationError as MarshmallowValidationError +from marshmallow.exceptions import ValidationError as MarshmallowValidationError from marshmallow_dataclass import dataclass from ludwig.config_validation.validation import get_validator, validate diff --git a/tests/ludwig/marshmallow/test_fields_optimization.py b/tests/ludwig/marshmallow/test_fields_optimization.py index 87ffcecafa9..59686e5eaa4 100644 --- a/tests/ludwig/marshmallow/test_fields_optimization.py +++ b/tests/ludwig/marshmallow/test_fields_optimization.py @@ -2,8 +2,7 @@ from typing import Optional import pytest -from marshmallow.exceptions import \ - ValidationError as MarshmallowValidationError +from marshmallow.exceptions import ValidationError as MarshmallowValidationError from marshmallow_dataclass import dataclass import ludwig.schema.optimizers as lso diff --git a/tests/ludwig/marshmallow/test_fields_preprocessing.py b/tests/ludwig/marshmallow/test_fields_preprocessing.py index 6c55427a647..a340e3ad561 100644 --- a/tests/ludwig/marshmallow/test_fields_preprocessing.py +++ b/tests/ludwig/marshmallow/test_fields_preprocessing.py @@ -1,12 +1,9 @@ #! /usr/bin/env python -from ludwig.schema.features.preprocessing.binary import \ - BinaryPreprocessingConfig -from ludwig.schema.features.preprocessing.category import \ - CategoryPreprocessingConfig -from ludwig.schema.features.preprocessing.utils import \ - PreprocessingDataclassField +from ludwig.schema.features.preprocessing.binary import BinaryPreprocessingConfig +from ludwig.schema.features.preprocessing.category import CategoryPreprocessingConfig +from ludwig.schema.features.preprocessing.utils import PreprocessingDataclassField def get_marshmallow_from_dataclass_field(dfield): diff --git a/tests/ludwig/marshmallow/test_marshmallow_misc.py b/tests/ludwig/marshmallow/test_marshmallow_misc.py index 18564e7cf23..78ba8326459 100644 --- a/tests/ludwig/marshmallow/test_marshmallow_misc.py +++ b/tests/ludwig/marshmallow/test_marshmallow_misc.py @@ -3,9 +3,7 @@ import ludwig.combiners.combiners as lcc from ludwig.schema.trainer import ECDTrainerConfig -from ludwig.schema.utils import (BaseMarshmallowConfig, - assert_is_a_marshmallow_class, - load_config_with_kwargs) +from ludwig.schema.utils import assert_is_a_marshmallow_class, BaseMarshmallowConfig, load_config_with_kwargs @dataclass diff --git a/tests/ludwig/models/test_trainable_image_layers.py b/tests/ludwig/models/test_trainable_image_layers.py index 5f304514f60..e90b5320095 100644 --- a/tests/ludwig/models/test_trainable_image_layers.py +++ b/tests/ludwig/models/test_trainable_image_layers.py @@ -3,7 +3,7 @@ import pytest import torch -from torchvision.models import ResNet18_Weights, resnet18 +from torchvision.models import resnet18, ResNet18_Weights from ludwig.api import LudwigModel from ludwig.data.dataset_synthesizer import cli_synthesize_dataset diff --git a/tests/ludwig/models/test_training_determinism.py b/tests/ludwig/models/test_training_determinism.py index 3477f1d5466..ff304252dac 100644 --- a/tests/ludwig/models/test_training_determinism.py +++ b/tests/ludwig/models/test_training_determinism.py @@ -7,13 +7,22 @@ from ludwig.api import LudwigModel from ludwig.constants import BATCH_SIZE, EVAL_BATCH_SIZE, TRAINER from ludwig.utils.numerical_test_utils import assert_all_finite -from tests.integration_tests.utils import (audio_feature, bag_feature, - binary_feature, category_feature, - date_feature, generate_data, - h3_feature, image_feature, - number_feature, sequence_feature, - set_feature, text_feature, - timeseries_feature, vector_feature) +from tests.integration_tests.utils import ( + audio_feature, + bag_feature, + binary_feature, + category_feature, + date_feature, + generate_data, + h3_feature, + image_feature, + number_feature, + sequence_feature, + set_feature, + text_feature, + timeseries_feature, + vector_feature, +) @pytest.mark.distributed diff --git a/tests/ludwig/models/test_training_success.py b/tests/ludwig/models/test_training_success.py index a77cc59a3a3..ec7e5e12b96 100644 --- a/tests/ludwig/models/test_training_success.py +++ b/tests/ludwig/models/test_training_success.py @@ -2,8 +2,7 @@ from ludwig.api import LudwigModel from ludwig.constants import BINARY, TRAINER -from tests.integration_tests.utils import (binary_feature, category_feature, - generate_data) +from tests.integration_tests.utils import binary_feature, category_feature, generate_data def generate_data_and_train(config, csv_filename): diff --git a/tests/ludwig/modules/test_attention.py b/tests/ludwig/modules/test_attention.py index 01538800590..59206cdff04 100644 --- a/tests/ludwig/modules/test_attention.py +++ b/tests/ludwig/modules/test_attention.py @@ -1,13 +1,14 @@ import pytest import torch -from ludwig.modules.attention_modules import (FeedForwardAttentionReducer, - MultiHeadSelfAttention, - TransformerBlock, - TransformerStack) +from ludwig.modules.attention_modules import ( + FeedForwardAttentionReducer, + MultiHeadSelfAttention, + TransformerBlock, + TransformerStack, +) from ludwig.utils.misc_utils import set_random_seed -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated RANDOM_SEED = 1919 diff --git a/tests/ludwig/modules/test_convolutional_modules.py b/tests/ludwig/modules/test_convolutional_modules.py index 8303c8e7643..f1296919b46 100644 --- a/tests/ludwig/modules/test_convolutional_modules.py +++ b/tests/ludwig/modules/test_convolutional_modules.py @@ -3,17 +3,21 @@ import pytest import torch -from ludwig.modules.convolutional_modules import (Conv1DLayer, Conv1DStack, - Conv2DLayer, - Conv2DLayerFixedPadding, - Conv2DStack, ParallelConv1D, - ParallelConv1DStack, ResNet, - ResNetBlock, - ResNetBlockLayer, - ResNetBottleneckBlock) +from ludwig.modules.convolutional_modules import ( + Conv1DLayer, + Conv1DStack, + Conv2DLayer, + Conv2DLayerFixedPadding, + Conv2DStack, + ParallelConv1D, + ParallelConv1DStack, + ResNet, + ResNetBlock, + ResNetBlockLayer, + ResNetBottleneckBlock, +) from ludwig.utils.image_utils import get_img_output_shape -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated BATCH_SIZE = 2 SEQ_SIZE = 17 diff --git a/tests/ludwig/modules/test_embedding_modules.py b/tests/ludwig/modules/test_embedding_modules.py index 5b69c8ba935..a3d2ed65866 100644 --- a/tests/ludwig/modules/test_embedding_modules.py +++ b/tests/ludwig/modules/test_embedding_modules.py @@ -3,9 +3,7 @@ import pytest import torch -from ludwig.modules.embedding_modules import (Embed, EmbedSequence, EmbedSet, - EmbedWeighted, - TokenAndPositionEmbedding) +from ludwig.modules.embedding_modules import Embed, EmbedSequence, EmbedSet, EmbedWeighted, TokenAndPositionEmbedding from ludwig.utils.torch_utils import get_torch_device DEVICE = get_torch_device() diff --git a/tests/ludwig/modules/test_encoder.py b/tests/ludwig/modules/test_encoder.py index 96f44de4c32..00cc5b9e739 100644 --- a/tests/ludwig/modules/test_encoder.py +++ b/tests/ludwig/modules/test_encoder.py @@ -22,13 +22,16 @@ from ludwig.data.dataset_synthesizer import build_vocab from ludwig.encoders.base import Encoder from ludwig.encoders.image.base import MLPMixerEncoder, Stacked2DCNN -from ludwig.encoders.sequence_encoders import (ParallelCNN, - SequenceEmbedEncoder, - StackedCNN, StackedCNNRNN, - StackedParallelCNN, StackedRNN) +from ludwig.encoders.sequence_encoders import ( + ParallelCNN, + SequenceEmbedEncoder, + StackedCNN, + StackedCNNRNN, + StackedParallelCNN, + StackedRNN, +) from ludwig.utils.torch_utils import get_torch_device -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated DROPOUT = 0.5 DEVICE = get_torch_device() diff --git a/tests/ludwig/modules/test_loss_modules.py b/tests/ludwig/modules/test_loss_modules.py index cad098a8782..2e11909dfb0 100644 --- a/tests/ludwig/modules/test_loss_modules.py +++ b/tests/ludwig/modules/test_loss_modules.py @@ -9,15 +9,20 @@ from ludwig.features.set_feature import SetOutputFeature from ludwig.features.text_feature import TextOutputFeature from ludwig.modules import loss_modules -from ludwig.schema.features.loss.loss import (BWCEWLossConfig, CORNLossConfig, - HuberLossConfig, MAELossConfig, - MAPELossConfig, MSELossConfig, - RMSELossConfig, RMSPELossConfig, - SigmoidCrossEntropyLossConfig, - SoftmaxCrossEntropyLossConfig) +from ludwig.schema.features.loss.loss import ( + BWCEWLossConfig, + CORNLossConfig, + HuberLossConfig, + MAELossConfig, + MAPELossConfig, + MSELossConfig, + RMSELossConfig, + RMSPELossConfig, + SigmoidCrossEntropyLossConfig, + SoftmaxCrossEntropyLossConfig, +) from ludwig.schema.model_config import ModelConfig -from tests.integration_tests.utils import (category_feature, set_feature, - text_feature) +from tests.integration_tests.utils import category_feature, set_feature, text_feature def from_float(v: float) -> torch.Tensor: diff --git a/tests/ludwig/modules/test_lr_scheduler.py b/tests/ludwig/modules/test_lr_scheduler.py index 77117938a3b..94876250173 100644 --- a/tests/ludwig/modules/test_lr_scheduler.py +++ b/tests/ludwig/modules/test_lr_scheduler.py @@ -3,12 +3,10 @@ import numpy as np from torch.optim import SGD -from ludwig.features.number_feature import (NumberInputFeature, - NumberOutputFeature) +from ludwig.features.number_feature import NumberInputFeature, NumberOutputFeature from ludwig.modules.lr_scheduler import LRScheduler from ludwig.schema.encoders.base import DenseEncoderConfig -from ludwig.schema.features.number_feature import ( - ECDNumberOutputFeatureConfig, NumberInputFeatureConfig) +from ludwig.schema.features.number_feature import ECDNumberOutputFeatureConfig, NumberInputFeatureConfig from ludwig.schema.lr_scheduler import LRSchedulerConfig from ludwig.utils.metric_utils import TrainerMetric from ludwig.utils.trainer_utils import get_new_progress_tracker diff --git a/tests/ludwig/modules/test_metric_modules.py b/tests/ludwig/modules/test_metric_modules.py index c382ef86480..8c305c1bb16 100644 --- a/tests/ludwig/modules/test_metric_modules.py +++ b/tests/ludwig/modules/test_metric_modules.py @@ -3,9 +3,11 @@ from ludwig.distributed import init_dist_strategy from ludwig.modules import metric_modules -from ludwig.schema.features.loss.loss import (BWCEWLossConfig, - SigmoidCrossEntropyLossConfig, - SoftmaxCrossEntropyLossConfig) +from ludwig.schema.features.loss.loss import ( + BWCEWLossConfig, + SigmoidCrossEntropyLossConfig, + SoftmaxCrossEntropyLossConfig, +) # Required for local testing. init_dist_strategy("local") diff --git a/tests/ludwig/modules/test_mlp_mixer_modules.py b/tests/ludwig/modules/test_mlp_mixer_modules.py index bbd6c986250..ac91e4644fb 100644 --- a/tests/ludwig/modules/test_mlp_mixer_modules.py +++ b/tests/ludwig/modules/test_mlp_mixer_modules.py @@ -1,6 +1,6 @@ import pytest -from ludwig.modules.mlp_mixer_modules import MLP, MixerBlock, MLPMixer +from ludwig.modules.mlp_mixer_modules import MixerBlock, MLP, MLPMixer from .test_utils import assert_output_shapes diff --git a/tests/ludwig/modules/test_regex_freezing.py b/tests/ludwig/modules/test_regex_freezing.py index adb99666e2e..7ec39544710 100644 --- a/tests/ludwig/modules/test_regex_freezing.py +++ b/tests/ludwig/modules/test_regex_freezing.py @@ -5,15 +5,23 @@ import pytest from ludwig.api import LudwigModel -from ludwig.constants import (BASE_MODEL, BATCH_SIZE, EPOCHS, GENERATION, - INPUT_FEATURES, MODEL_LLM, MODEL_TYPE, - OUTPUT_FEATURES, TRAINER, TYPE) +from ludwig.constants import ( + BASE_MODEL, + BATCH_SIZE, + EPOCHS, + GENERATION, + INPUT_FEATURES, + MODEL_LLM, + MODEL_TYPE, + OUTPUT_FEATURES, + TRAINER, + TYPE, +) from ludwig.encoders.image.torchvision import TVEfficientNetEncoder from ludwig.schema.trainer import ECDTrainerConfig from ludwig.utils.misc_utils import set_random_seed from ludwig.utils.trainer_utils import freeze_layers_regex -from tests.integration_tests.utils import (category_feature, generate_data, - image_feature, text_feature) +from tests.integration_tests.utils import category_feature, generate_data, image_feature, text_feature RANDOM_SEED = 130 diff --git a/tests/ludwig/modules/test_tabnet_modules.py b/tests/ludwig/modules/test_tabnet_modules.py index 4abc4eec4a3..ea4bf537fb2 100644 --- a/tests/ludwig/modules/test_tabnet_modules.py +++ b/tests/ludwig/modules/test_tabnet_modules.py @@ -3,11 +3,9 @@ import pytest import torch -from ludwig.modules.tabnet_modules import (AttentiveTransformer, FeatureBlock, - FeatureTransformer, TabNet) +from ludwig.modules.tabnet_modules import AttentiveTransformer, FeatureBlock, FeatureTransformer, TabNet from ludwig.utils.entmax import sparsemax -from tests.integration_tests.parameter_update_utils import \ - check_module_parameters_updated +from tests.integration_tests.parameter_update_utils import check_module_parameters_updated RANDOM_SEED = 67 diff --git a/tests/ludwig/schema/hyperopt/test_scheduler.py b/tests/ludwig/schema/hyperopt/test_scheduler.py index 7ef447a208c..363a3268c6f 100644 --- a/tests/ludwig/schema/hyperopt/test_scheduler.py +++ b/tests/ludwig/schema/hyperopt/test_scheduler.py @@ -1,9 +1,8 @@ import pytest from ludwig.schema.hyperopt.scheduler import BaseSchedulerConfig -from ludwig.schema.hyperopt.utils import (register_scheduler_config, - scheduler_config_registry) -from ludwig.schema.utils import ProtectedString, ludwig_dataclass +from ludwig.schema.hyperopt.utils import register_scheduler_config, scheduler_config_registry +from ludwig.schema.utils import ludwig_dataclass, ProtectedString @pytest.fixture( diff --git a/tests/ludwig/schema/hyperopt/test_search_algorithm.py b/tests/ludwig/schema/hyperopt/test_search_algorithm.py index c9db41b1f36..023a14a4533 100644 --- a/tests/ludwig/schema/hyperopt/test_search_algorithm.py +++ b/tests/ludwig/schema/hyperopt/test_search_algorithm.py @@ -1,9 +1,8 @@ import pytest from ludwig.schema.hyperopt.search_algorithm import BaseSearchAlgorithmConfig -from ludwig.schema.hyperopt.utils import (register_search_algorithm_config, - search_algorithm_config_registry) -from ludwig.schema.utils import ProtectedString, ludwig_dataclass +from ludwig.schema.hyperopt.utils import register_search_algorithm_config, search_algorithm_config_registry +from ludwig.schema.utils import ludwig_dataclass, ProtectedString @pytest.fixture( diff --git a/tests/ludwig/schema/test_model_config.py b/tests/ludwig/schema/test_model_config.py index a9fdc6aa3f7..21e2883b989 100644 --- a/tests/ludwig/schema/test_model_config.py +++ b/tests/ludwig/schema/test_model_config.py @@ -5,21 +5,42 @@ import pytest import yaml -from ludwig.constants import (ACTIVE, BASE_MODEL, CLIP, COLUMN, COMBINER, - DECODER, DEFAULT_VALIDATION_METRIC, DEFAULTS, - DEPENDENCIES, ENCODER, HYPEROPT, INPUT_FEATURES, - INPUT_SIZE, LOSS, MODEL_ECD, MODEL_GBM, - MODEL_LLM, MODEL_TYPE, NAME, NUM_CLASSES, - OPTIMIZER, OUTPUT_FEATURES, PREPROCESSING, - PROC_COLUMN, REDUCE_DEPENDENCIES, REDUCE_INPUT, - TIED, TRAINER, TYPE) +from ludwig.constants import ( + ACTIVE, + BASE_MODEL, + CLIP, + COLUMN, + COMBINER, + DECODER, + DEFAULT_VALIDATION_METRIC, + DEFAULTS, + DEPENDENCIES, + ENCODER, + HYPEROPT, + INPUT_FEATURES, + INPUT_SIZE, + LOSS, + MODEL_ECD, + MODEL_GBM, + MODEL_LLM, + MODEL_TYPE, + NAME, + NUM_CLASSES, + OPTIMIZER, + OUTPUT_FEATURES, + PREPROCESSING, + PROC_COLUMN, + REDUCE_DEPENDENCIES, + REDUCE_INPUT, + TIED, + TRAINER, + TYPE, +) from ludwig.error import ConfigValidationError from ludwig.schema.decoders.base import ClassifierConfig from ludwig.schema.encoders.text_encoders import BERTConfig -from ludwig.schema.features.augmentation.image import (RandomBlurConfig, - RandomRotateConfig) -from ludwig.schema.features.image_feature import \ - AUGMENTATION_DEFAULT_OPERATIONS +from ludwig.schema.features.augmentation.image import RandomBlurConfig, RandomRotateConfig +from ludwig.schema.features.image_feature import AUGMENTATION_DEFAULT_OPERATIONS from ludwig.schema.features.number_feature import NumberOutputFeatureConfig from ludwig.schema.features.text_feature import TextOutputFeatureConfig from ludwig.schema.llms.quantization import QuantizationConfig diff --git a/tests/ludwig/utils/entmax/test_losses.py b/tests/ludwig/utils/entmax/test_losses.py index 04fba713100..6c56a7e7c9c 100644 --- a/tests/ludwig/utils/entmax/test_losses.py +++ b/tests/ludwig/utils/entmax/test_losses.py @@ -5,8 +5,7 @@ from torch.autograd import gradcheck from ludwig.constants import IGNORE_INDEX_TOKEN_ID -from ludwig.utils.entmax.losses import (Entmax15Loss, EntmaxBisectLoss, - SparsemaxBisectLoss, SparsemaxLoss) +from ludwig.utils.entmax.losses import Entmax15Loss, EntmaxBisectLoss, SparsemaxBisectLoss, SparsemaxLoss # make data Xs = [torch.randn(4, 10, dtype=torch.float64, requires_grad=True) for _ in range(5)] diff --git a/tests/ludwig/utils/entmax/test_topk.py b/tests/ludwig/utils/entmax/test_topk.py index 1a6da119b4d..d4f7063e1ce 100644 --- a/tests/ludwig/utils/entmax/test_topk.py +++ b/tests/ludwig/utils/entmax/test_topk.py @@ -2,9 +2,12 @@ import torch from torch.autograd import gradcheck -from ludwig.utils.entmax.activations import (Entmax15, Sparsemax, - _entmax_threshold_and_support, - _sparsemax_threshold_and_support) +from ludwig.utils.entmax.activations import ( + _entmax_threshold_and_support, + _sparsemax_threshold_and_support, + Entmax15, + Sparsemax, +) @pytest.mark.parametrize("dim", (0, 1, 2)) diff --git a/tests/ludwig/utils/test_backward_compatibility.py b/tests/ludwig/utils/test_backward_compatibility.py index 2250b93f333..1548b268cec 100644 --- a/tests/ludwig/utils/test_backward_compatibility.py +++ b/tests/ludwig/utils/test_backward_compatibility.py @@ -4,18 +4,36 @@ import pytest -from ludwig.constants import (BATCH_SIZE, BFILL, CLASS_WEIGHTS, DEFAULTS, - EVAL_BATCH_SIZE, EXECUTOR, HYPEROPT, - INPUT_FEATURES, LEARNING_RATE_SCHEDULER, LOSS, - NUMBER, OUTPUT_FEATURES, PREPROCESSING, - SCHEDULER, SPLIT, TRAINER, TYPE) +from ludwig.constants import ( + BATCH_SIZE, + BFILL, + CLASS_WEIGHTS, + DEFAULTS, + EVAL_BATCH_SIZE, + EXECUTOR, + HYPEROPT, + INPUT_FEATURES, + LEARNING_RATE_SCHEDULER, + LOSS, + NUMBER, + OUTPUT_FEATURES, + PREPROCESSING, + SCHEDULER, + SPLIT, + TRAINER, + TYPE, +) from ludwig.schema.model_config import ModelConfig from ludwig.schema.trainer import ECDTrainerConfig from ludwig.utils.backward_compatibility import ( - _update_backend_cache_credentials, _upgrade_encoder_decoder_params, - _upgrade_feature, _upgrade_preprocessing_split, - upgrade_config_dict_to_latest_version, upgrade_missing_value_strategy, - upgrade_model_progress) + _update_backend_cache_credentials, + _upgrade_encoder_decoder_params, + _upgrade_feature, + _upgrade_preprocessing_split, + upgrade_config_dict_to_latest_version, + upgrade_missing_value_strategy, + upgrade_model_progress, +) from ludwig.utils.trainer_utils import TrainerMetric diff --git a/tests/ludwig/utils/test_config_utils.py b/tests/ludwig/utils/test_config_utils.py index 9978d75f57d..c5274f198ff 100644 --- a/tests/ludwig/utils/test_config_utils.py +++ b/tests/ludwig/utils/test_config_utils.py @@ -3,9 +3,20 @@ import pytest -from ludwig.constants import (BASE_MODEL, BINARY, ENCODER, INPUT_FEATURES, - MODEL_ECD, MODEL_GBM, MODEL_LLM, MODEL_TYPE, - NAME, OUTPUT_FEATURES, TEXT, TYPE) +from ludwig.constants import ( + BASE_MODEL, + BINARY, + ENCODER, + INPUT_FEATURES, + MODEL_ECD, + MODEL_GBM, + MODEL_LLM, + MODEL_TYPE, + NAME, + OUTPUT_FEATURES, + TEXT, + TYPE, +) from ludwig.schema.encoders.text_encoders import BERTConfig from ludwig.schema.encoders.utils import get_encoder_cls from ludwig.schema.features.preprocessing.text import TextPreprocessingConfig diff --git a/tests/ludwig/utils/test_data_utils.py b/tests/ludwig/utils/test_data_utils.py index 9da81d3c48a..d7299925f43 100644 --- a/tests/ludwig/utils/test_data_utils.py +++ b/tests/ludwig/utils/test_data_utils.py @@ -24,12 +24,19 @@ from ludwig.api import LudwigModel from ludwig.data.cache.types import CacheableDataframe from ludwig.data.dataset_synthesizer import build_synthetic_dataset_df -from ludwig.utils.data_utils import (PANDAS_DF, NumpyEncoder, - add_sequence_feature_column, - figure_data_format_dataset, get_abs_path, - hash_dict, read_csv, read_html, - read_parquet, sanitize_column_names, - use_credentials) +from ludwig.utils.data_utils import ( + add_sequence_feature_column, + figure_data_format_dataset, + get_abs_path, + hash_dict, + NumpyEncoder, + PANDAS_DF, + read_csv, + read_html, + read_parquet, + sanitize_column_names, + use_credentials, +) from tests.integration_tests.utils import private_param try: diff --git a/tests/ludwig/utils/test_dataframe_utils.py b/tests/ludwig/utils/test_dataframe_utils.py index 7e96245378a..3594c41b96b 100644 --- a/tests/ludwig/utils/test_dataframe_utils.py +++ b/tests/ludwig/utils/test_dataframe_utils.py @@ -2,7 +2,7 @@ import pandas as pd import pytest -from ludwig.backend import LOCAL_BACKEND, create_backend +from ludwig.backend import create_backend, LOCAL_BACKEND from ludwig.utils.dataframe_utils import to_numpy_dataset, to_scalar_df try: diff --git a/tests/ludwig/utils/test_defaults.py b/tests/ludwig/utils/test_defaults.py index 3e5dab1faa8..2872bfc8ffe 100644 --- a/tests/ludwig/utils/test_defaults.py +++ b/tests/ludwig/utils/test_defaults.py @@ -2,21 +2,46 @@ import pytest -from ludwig.constants import (CATEGORY, COMBINER, DECODER, DEFAULTS, - DEPENDENCIES, DROP_ROW, EARLY_STOP, ENCODER, - EXECUTOR, FILL_WITH_MODE, HYPEROPT, - INPUT_FEATURES, LOSS, MISSING_VALUE_STRATEGY, - MODEL_ECD, MODEL_TYPE, OUTPUT_FEATURES, - PREPROCESSING, REDUCE_DEPENDENCIES, REDUCE_INPUT, - SCHEDULER, SUM, TIED, TOP_K, TRAINER, TYPE) +from ludwig.constants import ( + CATEGORY, + COMBINER, + DECODER, + DEFAULTS, + DEPENDENCIES, + DROP_ROW, + EARLY_STOP, + ENCODER, + EXECUTOR, + FILL_WITH_MODE, + HYPEROPT, + INPUT_FEATURES, + LOSS, + MISSING_VALUE_STRATEGY, + MODEL_ECD, + MODEL_TYPE, + OUTPUT_FEATURES, + PREPROCESSING, + REDUCE_DEPENDENCIES, + REDUCE_INPUT, + SCHEDULER, + SUM, + TIED, + TOP_K, + TRAINER, + TYPE, +) from ludwig.schema.model_config import ModelConfig from ludwig.schema.trainer import ECDTrainerConfig -from ludwig.utils.backward_compatibility import \ - upgrade_config_dict_to_latest_version +from ludwig.utils.backward_compatibility import upgrade_config_dict_to_latest_version from ludwig.utils.misc_utils import merge_dict, set_default_values -from tests.integration_tests.utils import (binary_feature, category_feature, - number_feature, sequence_feature, - text_feature, vector_feature) +from tests.integration_tests.utils import ( + binary_feature, + category_feature, + number_feature, + sequence_feature, + text_feature, + vector_feature, +) HYPEROPT_CONFIG = { "parameters": { diff --git a/tests/ludwig/utils/test_fs_utils.py b/tests/ludwig/utils/test_fs_utils.py index 307bff1ee22..fef3e2b3bc3 100644 --- a/tests/ludwig/utils/test_fs_utils.py +++ b/tests/ludwig/utils/test_fs_utils.py @@ -6,9 +6,7 @@ import pytest -from ludwig.utils.fs_utils import (get_fs_and_path, - list_file_names_in_directory, - safe_move_directory) +from ludwig.utils.fs_utils import get_fs_and_path, list_file_names_in_directory, safe_move_directory logger = logging.getLogger(__name__) diff --git a/tests/ludwig/utils/test_hf_utils.py b/tests/ludwig/utils/test_hf_utils.py index cf4b2215fdc..f4a04388b6e 100644 --- a/tests/ludwig/utils/test_hf_utils.py +++ b/tests/ludwig/utils/test_hf_utils.py @@ -6,9 +6,11 @@ from transformers import AlbertModel, BertModel, BertTokenizer from ludwig.encoders.text_encoders import ALBERTEncoder, BERTEncoder -from ludwig.utils.hf_utils import (load_pretrained_hf_model_from_hub, - load_pretrained_hf_model_with_hub_fallback, - upload_folder_to_hfhub) +from ludwig.utils.hf_utils import ( + load_pretrained_hf_model_from_hub, + load_pretrained_hf_model_with_hub_fallback, + upload_folder_to_hfhub, +) @pytest.mark.parametrize( diff --git a/tests/ludwig/utils/test_image_utils.py b/tests/ludwig/utils/test_image_utils.py index f70860421a6..d063614435e 100644 --- a/tests/ludwig/utils/test_image_utils.py +++ b/tests/ludwig/utils/test_image_utils.py @@ -18,12 +18,20 @@ import torch import torchvision.transforms.functional as F -from ludwig.utils.image_utils import (ResizeChannels, crop, crop_or_pad, - get_class_mask_from_image, - get_image_from_class_mask, - get_unique_channels, grayscale, - is_image_score, num_channels_in_image, - pad, read_image_as_tif, resize_image) +from ludwig.utils.image_utils import ( + crop, + crop_or_pad, + get_class_mask_from_image, + get_image_from_class_mask, + get_unique_channels, + grayscale, + is_image_score, + num_channels_in_image, + pad, + read_image_as_tif, + resize_image, + ResizeChannels, +) @pytest.mark.parametrize("pad_fn", [pad, torch.jit.script(pad)]) diff --git a/tests/ludwig/utils/test_llm_utils.py b/tests/ludwig/utils/test_llm_utils.py index b062fdf9a07..a79085ef893 100644 --- a/tests/ludwig/utils/test_llm_utils.py +++ b/tests/ludwig/utils/test_llm_utils.py @@ -5,10 +5,17 @@ from ludwig.constants import LOGITS, PREDICTIONS, PROBABILITIES from ludwig.modules.training_hooks import NEFTuneHook from ludwig.utils.llm_utils import ( - FALLBACK_CONTEXT_LEN, add_left_padding, create_attention_mask, - find_last_matching_index, generate_merged_ids, get_context_len, + add_left_padding, + create_attention_mask, + FALLBACK_CONTEXT_LEN, + find_last_matching_index, + generate_merged_ids, + get_context_len, get_realigned_target_and_prediction_tensors_for_inference, - has_padding_token, pad_target_tensor_for_fine_tuning, remove_left_padding) + has_padding_token, + pad_target_tensor_for_fine_tuning, + remove_left_padding, +) from ludwig.utils.tokenizers import HFTokenizer pytestmark = [pytest.mark.llm] diff --git a/tests/ludwig/utils/test_model_utils.py b/tests/ludwig/utils/test_model_utils.py index 4b25e8893dc..c5acbfca927 100644 --- a/tests/ludwig/utils/test_model_utils.py +++ b/tests/ludwig/utils/test_model_utils.py @@ -2,10 +2,12 @@ import torch from transformers import AutoModelForCausalLM -from ludwig.utils.model_utils import (contains_nan_or_inf_tensors, - extract_tensors, - find_embedding_layer_with_path, - replace_tensors) +from ludwig.utils.model_utils import ( + contains_nan_or_inf_tensors, + extract_tensors, + find_embedding_layer_with_path, + replace_tensors, +) class SampleModel(torch.nn.Module): diff --git a/tests/ludwig/utils/test_normalization.py b/tests/ludwig/utils/test_normalization.py index c4803a0a520..f5ccf691366 100644 --- a/tests/ludwig/utils/test_normalization.py +++ b/tests/ludwig/utils/test_normalization.py @@ -21,8 +21,7 @@ from ludwig.backend import initialize_backend from ludwig.constants import COLUMN, NAME, PROC_COLUMN from ludwig.features.feature_utils import compute_feature_hash -from ludwig.features.number_feature import (NumberFeatureMixin, - numeric_transformation_registry) +from ludwig.features.number_feature import NumberFeatureMixin, numeric_transformation_registry from ludwig.utils.types import DataFrame diff --git a/tests/ludwig/utils/test_tokenizers.py b/tests/ludwig/utils/test_tokenizers.py index b43850963aa..0fa8104ed10 100644 --- a/tests/ludwig/utils/test_tokenizers.py +++ b/tests/ludwig/utils/test_tokenizers.py @@ -1,6 +1,9 @@ -from ludwig.utils.tokenizers import (EnglishLemmatizeFilterTokenizer, - NgramTokenizer, StringSplitTokenizer, - get_tokenizer_from_registry) +from ludwig.utils.tokenizers import ( + EnglishLemmatizeFilterTokenizer, + get_tokenizer_from_registry, + NgramTokenizer, + StringSplitTokenizer, +) def test_ngram_tokenizer(): diff --git a/tests/ludwig/utils/test_torch_utils.py b/tests/ludwig/utils/test_torch_utils.py index 1f3520ba030..f77a6b171c7 100644 --- a/tests/ludwig/utils/test_torch_utils.py +++ b/tests/ludwig/utils/test_torch_utils.py @@ -6,10 +6,13 @@ import pytest import torch -from ludwig.utils.torch_utils import (_get_torch_init_params, - _set_torch_init_params, - initialize_pytorch, sequence_length_2D, - sequence_length_3D) +from ludwig.utils.torch_utils import ( + _get_torch_init_params, + _set_torch_init_params, + initialize_pytorch, + sequence_length_2D, + sequence_length_3D, +) @pytest.mark.parametrize("input_sequence", [[[0, 1, 1], [2, 0, 0], [3, 3, 3]]]) diff --git a/tests/ludwig/utils/test_trainer_utils.py b/tests/ludwig/utils/test_trainer_utils.py index 521c4b9ea2f..763f311bcfc 100644 --- a/tests/ludwig/utils/test_trainer_utils.py +++ b/tests/ludwig/utils/test_trainer_utils.py @@ -6,8 +6,7 @@ from ludwig.constants import AUTO, BATCH_SIZE, COMBINED, LOSS from ludwig.features.category_feature import CategoryOutputFeature from ludwig.features.feature_utils import LudwigFeatureDict -from ludwig.schema.features.category_feature import \ - ECDCategoryOutputFeatureConfig +from ludwig.schema.features.category_feature import ECDCategoryOutputFeatureConfig from ludwig.schema.trainer import ECDTrainerConfig from ludwig.schema.utils import load_config_with_kwargs from ludwig.utils import trainer_utils diff --git a/tests/ludwig/utils/test_upload_utils.py b/tests/ludwig/utils/test_upload_utils.py index d7c2d1c2d42..548713233d4 100644 --- a/tests/ludwig/utils/test_upload_utils.py +++ b/tests/ludwig/utils/test_upload_utils.py @@ -6,8 +6,7 @@ import pytest -from ludwig.globals import (MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, - MODEL_WEIGHTS_FILE_NAME) +from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME from ludwig.utils.upload_utils import HuggingFaceHub logger = logging.getLogger(__name__) diff --git a/tests/ludwig/utils/test_version_transformation.py b/tests/ludwig/utils/test_version_transformation.py index 580617a91cb..3cd35409b7b 100644 --- a/tests/ludwig/utils/test_version_transformation.py +++ b/tests/ludwig/utils/test_version_transformation.py @@ -1,5 +1,4 @@ -from ludwig.utils.version_transformation import (VersionTransformation, - VersionTransformationRegistry) +from ludwig.utils.version_transformation import VersionTransformation, VersionTransformationRegistry def test_version_transformation_registry(): diff --git a/tests/regression_tests/automl/scripts/update_golden_types.py b/tests/regression_tests/automl/scripts/update_golden_types.py index bb1edc02c55..12490ab22c3 100644 --- a/tests/regression_tests/automl/scripts/update_golden_types.py +++ b/tests/regression_tests/automl/scripts/update_golden_types.py @@ -3,9 +3,7 @@ import json from ludwig.automl import create_auto_config -from tests.regression_tests.automl.utils import (TEST_DATASET_REGISTRY, - get_dataset_golden_types_path, - get_dataset_object) +from tests.regression_tests.automl.utils import get_dataset_golden_types_path, get_dataset_object, TEST_DATASET_REGISTRY def write_json_files(): diff --git a/tests/regression_tests/automl/test_auto_type_inference.py b/tests/regression_tests/automl/test_auto_type_inference.py index 0e3820252f1..72c2281c7fe 100644 --- a/tests/regression_tests/automl/test_auto_type_inference.py +++ b/tests/regression_tests/automl/test_auto_type_inference.py @@ -2,9 +2,7 @@ import pytest -from tests.regression_tests.automl.utils import (TEST_DATASET_REGISTRY, - get_dataset_golden_types_path, - get_dataset_object) +from tests.regression_tests.automl.utils import get_dataset_golden_types_path, get_dataset_object, TEST_DATASET_REGISTRY try: from ludwig.automl import create_auto_config diff --git a/tests/training_success/configs.py b/tests/training_success/configs.py index d5a7128b930..a96893e82fd 100644 --- a/tests/training_success/configs.py +++ b/tests/training_success/configs.py @@ -1,6 +1,8 @@ from ludwig.config_sampling.explore_schema import ( - combine_configs, combine_configs_for_comparator_combiner, - combine_configs_for_sequence_combiner) + combine_configs, + combine_configs_for_comparator_combiner, + combine_configs_for_sequence_combiner, +) # A generic tabular to text config used to generate synthetic data and train a model on it. TABULAR_TO_TEXT = """ diff --git a/tests/training_success/test_training_success.py b/tests/training_success/test_training_success.py index f851e3b2dc1..f54eb22beaf 100644 --- a/tests/training_success/test_training_success.py +++ b/tests/training_success/test_training_success.py @@ -8,16 +8,16 @@ import yaml from ludwig.api import LudwigModel -from ludwig.config_sampling.explore_schema import (ConfigOption, - combine_configs, - explore_properties) +from ludwig.config_sampling.explore_schema import combine_configs, ConfigOption, explore_properties from ludwig.config_validation.validation import get_schema from ludwig.types import ModelConfigDict -from .configs import (COMBINER_TYPE_TO_COMBINE_FN_MAP, - ECD_CONFIG_SECTION_TO_CONFIG, - FEATURE_TYPE_TO_CONFIG_FOR_DECODER_LOSS, - FEATURE_TYPE_TO_CONFIG_FOR_ENCODER_PREPROCESSING) +from .configs import ( + COMBINER_TYPE_TO_COMBINE_FN_MAP, + ECD_CONFIG_SECTION_TO_CONFIG, + FEATURE_TYPE_TO_CONFIG_FOR_DECODER_LOSS, + FEATURE_TYPE_TO_CONFIG_FOR_ENCODER_PREPROCESSING, +) def defaults_config_generator( From 0ab58587d8e9a6bc6296bd356e24fe69f0c57117 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Mon, 2 Dec 2024 14:52:09 -0800 Subject: [PATCH 39/67] testing matplotlib version --- pyproject.toml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 720dfd5e2db..5ecc0dbb6c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -154,9 +154,8 @@ serve = [ tree = ["hummingbird-ml>=0.4.8", "lightgbm", "lightgbm-ray"] viz = [ "hiplot", - "matplotlib>3.4; python_version > '3.6'", - #"matplotlib>3.4,<3.9.0; python_version > '3.6'", - "matplotlib>=3.0,<3.4; python_version <= '3.6'", + #"matplotlib>3.4; python_version > '3.6'", + "matplotlib==3.9.3", "ptitprince", "seaborn>=0.7,<0.12", ] From 3dea80f0b52efd079979f0f7754bcf1ed096f125 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Sun, 8 Dec 2024 14:16:58 -0800 Subject: [PATCH 40/67] bumped scipy version and added matplotlib to default dependencies --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5ecc0dbb6c2..bdc2934a345 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,9 @@ dependencies = [ "rich~=12.4.4", "sacremoses", "scikit-learn==1.3", - "scipy>=0.18", + "matplotlib==3.9.3,!=3.4.3", + #"scipy>=0.18", + "scipy==1.14.1", "sentencepiece", "spacy", "tabulate>=0.7", @@ -154,7 +156,6 @@ serve = [ tree = ["hummingbird-ml>=0.4.8", "lightgbm", "lightgbm-ray"] viz = [ "hiplot", - #"matplotlib>3.4; python_version > '3.6'", "matplotlib==3.9.3", "ptitprince", "seaborn>=0.7,<0.12", From 5a140a80415d4a56c625bb4b6732c1474752245a Mon Sep 17 00:00:00 2001 From: Saikat Kanjilal Date: Sun, 8 Dec 2024 21:12:47 -0800 Subject: [PATCH 41/67] updated toml file for getting past matplotlib errors --- ludwig/utils/visualization_utils.py | 2 +- pyproject.toml | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/ludwig/utils/visualization_utils.py b/ludwig/utils/visualization_utils.py index 54e37ef6da3..54a9b39192b 100644 --- a/ludwig/utils/visualization_utils.py +++ b/ludwig/utils/visualization_utils.py @@ -21,7 +21,7 @@ import numpy as np import pandas as pd -import ptitprince as pt +#import ptitprince as pt from packaging import version from ludwig.constants import SPACE, TRAINING, VALIDATION diff --git a/pyproject.toml b/pyproject.toml index 8953743c5f7..89582c5ec50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,22 +27,31 @@ keywords = [ dependencies = [ "absl-py", "bitsandbytes<0.41.0", + "captum", "Cython>=0.25,<1.0", + "cloudpickle", "dataclasses-json", "datasets", + "fastapi", "filelock", "fsspec[http]<=2023.10.0", "getdaft==0.1.20", "gpustat", "h5py>=2.6,!=3.0.0", + "httpx", "html5lib", + "hummingbird", + "hummingbird.ml", "imagecodecs", "jsonschema>=4.5.0,<4.7", "kaggle", + "lightgbm", "lxml", "marshmallow", "marshmallow-dataclass==8.5.4", "marshmallow-jsonschema", + "matplotlib>=3.5,<3.8", + "mlflow", "nltk", "numpy==1.26", "openpyxl>=3.0.7", @@ -50,6 +59,7 @@ dependencies = [ "pandas", "protobuf", "psutil", + "ptitprince", "py-cpuinfo==9.0.0", "pyarrow<15.0.0", "pydantic<2.0", @@ -80,6 +90,9 @@ dependencies = [ "xlwt", "tifffile==2024.9.20", "onnx", + "wget", + "starlette", + "uvicorn" ] [project.optional-dependencies] @@ -153,10 +166,9 @@ serve = [ tree = ["hummingbird-ml>=0.4.8", "lightgbm", "lightgbm-ray"] viz = [ "hiplot", - "matplotlib>3.4,<3.9.0; python_version > '3.6'", - "matplotlib>=3.0,<3.4; python_version <= '3.6'", + "matplotlib", "ptitprince", - "seaborn>=0.7,<0.12", + "seaborn", ] [project.urls] @@ -196,6 +208,9 @@ ignore = [ "E501", ] +[tool.hatch.envs.test] +dependencies = ["cloudpickle","matplotlib"] + [tool.hatch.envs.lint] dependencies = ["flake8", "flake8-pyproject"] From cbc3d800ee563abee5cb96f00ca494c1c7c3794c Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Mon, 13 Jan 2025 18:11:44 -0800 Subject: [PATCH 42/67] commented out combinatorial tests --- .github/workflows/pytest.yml | 73 ++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f7b32b937ee..a86f1daf2d4 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -22,7 +22,8 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - python-version: ["3.10", "3.11", "3.12"] + #python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.11"] test-markers: ["not distributed"] #["not distributed", "distributed"] env: @@ -256,44 +257,50 @@ jobs: # run: | # pytest -vs --durations 100 -m "llm" --junitxml pytest.xml tests - combinatorial-tests: - name: Combinatorial Tests - runs-on: ubuntu-latest - timeout-minutes: 60 - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: "3.11" + #COMMENTED OUT COMBINATORIAL TEST ######################################## - - name: Setup Linux - if: runner.os == 'linux' - run: | - sudo apt-get update && sudo apt-get install -y cmake libsndfile1 + # combinatorial-tests: + # name: Combinatorial Tests + # runs-on: ubuntu-latest - - name: Setup macOS - if: runner.os == 'macOS' - run: | - brew install libuv + # timeout-minutes: 60 + # steps: + # - uses: actions/checkout@v2 + # - name: Set up Python 3.11 + # uses: actions/setup-python@v5 + # with: + # python-version: "3.11" - - name: Install dependencies - run: | - python --version - pip --version - python -m pip install -U pip - pip install '.[test]' - pip list - shell: bash + # - name: Setup Linux + # if: runner.os == 'linux' + # run: | + # sudo apt-get update && sudo apt-get install -y cmake libsndfile1 - - name: Testing combinatorial config generation code - run: | - pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling + # - name: Setup macOS + # if: runner.os == 'macOS' + # run: | + # brew install libuv + + # - name: Install dependencies + # run: | + # python --version + # pip --version + # python -m pip install -U pip + # pip install '.[test]' + # pip list + # shell: bash + + # - name: Testing combinatorial config generation code + # run: | + # pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling + + # - name: Combinatorial Tests + # run: | + # pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success + + #COMMENTED OUT COMBINATORIAL TEST ######################################## - - name: Combinatorial Tests - run: | - pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success test-minimal-install: name: Test Minimal Install From 56d53029c10efbe130897b9f8c4b9c1b7d24b943 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 15 Jan 2025 16:48:40 -0800 Subject: [PATCH 43/67] added GPy step to pytest jobs --- .github/workflows/pytest.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index a86f1daf2d4..2dbd68857a9 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -62,7 +62,12 @@ jobs: - name: Setup Linux if: runner.os == 'linux' run: | - sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev libopenblas-dev + sudo apt-get update && sudo apt-get install -y build-essential cmake liblapack-dev gfortran libsndfile1 wget libsox-dev libopenblas-dev + + - name: Install GPy + run: | + python -m pip install -U pip + pip install GPy=1.10.0 - name: Setup macOS if: runner.os == 'macOS' From 5148870a94acef0de5d8a33f7243082ce992609a Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 15 Jan 2025 16:50:04 -0800 Subject: [PATCH 44/67] fixed gpy typo --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 2dbd68857a9..bf45abe197b 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -67,7 +67,7 @@ jobs: - name: Install GPy run: | python -m pip install -U pip - pip install GPy=1.10.0 + pip install GPy==1.10.0 - name: Setup macOS if: runner.os == 'macOS' From 17f7846de7c4b77b5be05a5c5c8370531e1ca7e0 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 15 Jan 2025 17:24:57 -0800 Subject: [PATCH 45/67] added download longintrepr file --- .github/workflows/pytest.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index bf45abe197b..f7a9b91c0be 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -64,6 +64,12 @@ jobs: run: | sudo apt-get update && sudo apt-get install -y build-essential cmake liblapack-dev gfortran libsndfile1 wget libsox-dev libopenblas-dev + - name: Download longintrepr.h + run: | + mkdir -p /usr/include/python3.11 + curl -o /usr/include/python3.11/longintrepr.h https://raw.githubusercontent.com/python/cpython/refs/heads/main/Include/cpython/longintrepr.h + + - name: Install GPy run: | python -m pip install -U pip From 7f3975d9c630d1448bf2ace4a971491db9c5fe4f Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 15 Jan 2025 17:26:47 -0800 Subject: [PATCH 46/67] added sudo privs to longint file --- .github/workflows/pytest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f7a9b91c0be..1b6ad1dc550 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -66,8 +66,8 @@ jobs: - name: Download longintrepr.h run: | - mkdir -p /usr/include/python3.11 - curl -o /usr/include/python3.11/longintrepr.h https://raw.githubusercontent.com/python/cpython/refs/heads/main/Include/cpython/longintrepr.h + sudo mkdir -p /usr/include/python3.11 + sudo curl -o /usr/include/python3.11/longintrepr.h https://raw.githubusercontent.com/python/cpython/refs/heads/main/Include/cpython/longintrepr.h - name: Install GPy From 1081593012c03918131659735861ef992a636d9f Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 15 Jan 2025 17:37:04 -0800 Subject: [PATCH 47/67] added cython .29.35 trying to fix GPy error --- .github/workflows/pytest.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 1b6ad1dc550..7558fb8cb0f 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -73,6 +73,7 @@ jobs: - name: Install GPy run: | python -m pip install -U pip + pip install Cython==0.29.35 pip install GPy==1.10.0 - name: Setup macOS From a596ee697d717b6e1b4d4ed1fd7981b60ef87c2f Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Mon, 20 Jan 2025 21:26:41 -0800 Subject: [PATCH 48/67] testing install dependency line --- .github/workflows/pytest.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 7558fb8cb0f..727fbf141d5 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -70,11 +70,11 @@ jobs: sudo curl -o /usr/include/python3.11/longintrepr.h https://raw.githubusercontent.com/python/cpython/refs/heads/main/Include/cpython/longintrepr.h - - name: Install GPy - run: | - python -m pip install -U pip - pip install Cython==0.29.35 - pip install GPy==1.10.0 + # - name: Install GPy + # run: | + # python -m pip install -U pip + # pip install Cython==0.29.35 + # pip install GPy==1.10.0 - name: Setup macOS if: runner.os == 'macOS' @@ -100,7 +100,8 @@ jobs: python -m pip install -U pip cmake --version - pip install --prefer-binary '.[test]' + # #pip install --prefer-binary '.[test]' + pip install --prefer-binary . pip list shell: bash From f37c512297aeffbee9f013bf2566d36903cd3578 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Mon, 20 Jan 2025 21:29:16 -0800 Subject: [PATCH 49/67] fix yml issue in actions --- .github/workflows/pytest.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 727fbf141d5..01e25484c3d 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -99,8 +99,6 @@ jobs: pip --version python -m pip install -U pip cmake --version - - # #pip install --prefer-binary '.[test]' pip install --prefer-binary . pip list shell: bash From 6d2195fcb98b0c8639df53bde7bb88aad1e7f935 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Mon, 20 Jan 2025 21:39:02 -0800 Subject: [PATCH 50/67] debugging GPy error --- .github/workflows/pytest.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 01e25484c3d..9cc4ea6d79d 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -103,6 +103,23 @@ jobs: pip list shell: bash + - name: Incremental dependency installation + run: | + pip install bayesian-optimization + pip install optuna + pip install ax-platform + pip install scikit-optimize + pip install --prefer-binary .[test] + shell: bash + + - name: Debug dependency tree + run: | + python -m pip install pipdeptree + pipdeptree > dependency-tree.txt + cat dependency-tree.txt + shell: bash + + - name: Unit Tests run: | RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig From 10b85c154de691cdb3a2dd2f332f1d467d7a7aeb Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 22 Jan 2025 15:48:11 -0800 Subject: [PATCH 51/67] added logging for successful installs --- .github/workflows/pytest.yml | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 9cc4ea6d79d..5e0745a05d9 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -106,10 +106,30 @@ jobs: - name: Incremental dependency installation run: | pip install bayesian-optimization + + echo "-------------------------------" + echo "done with bayesian-optimization" + echo "-------------------------------" + pip install optuna + + echo "-------------------------------" + echo "done with optuna" + echo "-------------------------------" + pip install ax-platform + + echo "-------------------------------" + echo "done with ax-platform" + echo "-------------------------------" + pip install scikit-optimize - pip install --prefer-binary .[test] + + echo "-------------------------------" + echo "scikit-optimize" + echo "-------------------------------" + + shell: bash - name: Debug dependency tree From 67156016c54e5fa93deaccfd52a6f9fdc8759955 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 22 Jan 2025 15:58:54 -0800 Subject: [PATCH 52/67] added logging for more GPy debug tests --- .github/workflows/pytest.yml | 39 +++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 5e0745a05d9..d74924e456f 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -105,31 +105,38 @@ jobs: - name: Incremental dependency installation run: | - pip install bayesian-optimization + pip install pytest + echo "Installed pytest successfully." - echo "-------------------------------" - echo "done with bayesian-optimization" - echo "-------------------------------" + pip install pytest-timeout + echo "Installed pytest-timeout successfully." - pip install optuna + pip install pytest-cov + echo "Installed pytest-cov successfully." - echo "-------------------------------" - echo "done with optuna" - echo "-------------------------------" + pip install tifffile + echo "Installed tifffile successfully." - pip install ax-platform + pip install wget + echo "Installed wget successfully." - echo "-------------------------------" - echo "done with ax-platform" - echo "-------------------------------" + pip install six>=1.13.0 + echo "Installed six>=1.13.0 successfully." - pip install scikit-optimize + pip install aim + echo "Installed aim successfully." - echo "-------------------------------" - echo "scikit-optimize" - echo "-------------------------------" + pip install wandb + echo "Installed wandb successfully." + pip install comet_ml + echo "Installed comet_ml successfully." + pip install mlflow + echo "Installed mlflow successfully." + + pip install sqlachemy<2 + echo "Installed sqlachemy<2 successfully." shell: bash - name: Debug dependency tree From 67c593cf1315b4574cd41db0c6189bdcf576c9cb Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 22 Jan 2025 16:06:46 -0800 Subject: [PATCH 53/67] added more tests --- .github/workflows/pytest.yml | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index d74924e456f..4df238caab0 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -135,8 +135,29 @@ jobs: pip install mlflow echo "Installed mlflow successfully." - pip install sqlachemy<2 - echo "Installed sqlachemy<2 successfully." + pip install sqlalchemy<2 + echo "Installed sqlalchemy<2 successfully." + + pip install hpbandster + echo "Installed hpbandster successfully." + + pip install ConfigSpace==0.7.1 + echo "Installed ConfigSpace==0.7.1 successfully." + + pip install flaml[blendsearch] + echo "Installed flaml[blendsearch] successfully." + + pip install HEBO + echo "Installed HEBO successfully." + + pip install nevergrad + echo "Installed nevergrad successfully." + + pip install zoopy + echo "Installed zoopy successfully." + + pip install s3fs>=2022.8.2 + echo "Installed s3fs>=2022.8.2 successfully." shell: bash - name: Debug dependency tree From db2204f94477f71c5f333b04b7051f985831e6ef Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 22 Jan 2025 16:16:38 -0800 Subject: [PATCH 54/67] testing sqlalchemy --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 4df238caab0..3df8df127bd 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -135,7 +135,7 @@ jobs: pip install mlflow echo "Installed mlflow successfully." - pip install sqlalchemy<2 + pip install "sqlalchemy<2" echo "Installed sqlalchemy<2 successfully." pip install hpbandster From b1202325937920c5ac3eead297142561526aa6c6 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 22 Jan 2025 16:22:41 -0800 Subject: [PATCH 55/67] zoopt typo --- .github/workflows/pytest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 3df8df127bd..c1d8fa2f103 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -153,8 +153,8 @@ jobs: pip install nevergrad echo "Installed nevergrad successfully." - pip install zoopy - echo "Installed zoopy successfully." + pip install zoopt + echo "Installed zoopt successfully." pip install s3fs>=2022.8.2 echo "Installed s3fs>=2022.8.2 successfully." From 6a78456b4fda5d307cfd0f188ca3fdd42dab4611 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 22 Jan 2025 16:37:51 -0800 Subject: [PATCH 56/67] added distributed, explain, extra tests --- .github/workflows/pytest.yml | 46 ++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index c1d8fa2f103..efe1a627eb4 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -160,6 +160,52 @@ jobs: echo "Installed s3fs>=2022.8.2 successfully." shell: bash + + - name: Install distributed, explain, and extra dependencies + run: | + # Distributed dependencies + pip install awscli + echo "Installed distributed dependency: awscli successfully." + + pip install "dask[dataframe]<2023.4.0" + echo "Installed distributed dependency: dask[dataframe]<2023.4.0 successfully." + + pip install "deepspeed!=0.11.0,<0.13.0" + echo "Installed distributed dependency: deepspeed!=0.11.0,<0.13.0 successfully." + + pip install "getdaft[ray]==0.1.20" + echo "Installed distributed dependency: getdaft[ray]==0.1.20 successfully." + + pip install GPUtil + echo "Installed distributed dependency: GPUtil successfully." + + pip install pyarrow + echo "Installed distributed dependency: pyarrow successfully." + + pip install "ray[default,data,serve,tune]==2.3.1" + echo "Installed distributed dependency: ray[default,data,serve,tune]==2.3.1 successfully." + + pip install tblib + echo "Installed distributed dependency: tblib successfully." + + pip install "tensorboardX<2.3" + echo "Installed distributed dependency: tensorboardX<2.3 successfully." + + # Explain dependencies + pip install captum + echo "Installed explain dependency: captum successfully." + + # Extra dependencies + pip install "horovod[pytorch]>=0.24.0,!=0.26.0" + echo "Installed extra dependency: horovod[pytorch]>=0.24.0,!=0.26.0 successfully." + + pip install "modin[ray]" + echo "Installed extra dependency: modin[ray] successfully." + + pip install "predibase>=2023.10.2" + echo "Installed extra dependency: predibase>=2023.10.2 successfully." + + - name: Debug dependency tree run: | python -m pip install pipdeptree From a86c1e77c7c23e1aa8dbe11d528001d381ef0405 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Wed, 22 Jan 2025 16:51:14 -0800 Subject: [PATCH 57/67] added final dependencies --- .github/workflows/pytest.yml | 70 ++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index efe1a627eb4..de6be214e0f 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -206,6 +206,76 @@ jobs: echo "Installed extra dependency: predibase>=2023.10.2 successfully." + - name: Install hyperopt, llm, serve, tree, and viz dependencies + run: | + # Hyperopt dependencies + pip install hyperopt + echo "Installed hyperopt successfully." + + pip install "ray[default,tune]>=2.0.0" + echo "Installed hyperopt dependency: ray[default,tune]>=2.0.0 successfully." + + # LLM dependencies + pip install accelerate + echo "Installed llm dependency: accelerate successfully." + + pip install faiss-cpu + echo "Installed llm dependency: faiss-cpu successfully." + + pip install loralib + echo "Installed llm dependency: loralib successfully." + + pip install "peft>=0.10.0" + echo "Installed llm dependency: peft>=0.10.0 successfully." + + pip install sentence-transformers + echo "Installed llm dependency: sentence-transformers successfully." + + # Serve dependencies + pip install cartonml-nightly + echo "Installed serve dependency: cartonml-nightly successfully." + + pip install fastapi + echo "Installed serve dependency: fastapi successfully." + + pip install httpx + echo "Installed serve dependency: httpx successfully." + + pip install "neuropod==0.3.0rc6 ; platform_system != 'Windows' and python_version < '3.9'" + echo "Installed serve dependency: neuropod==0.3.0rc6 successfully (if applicable)." + + pip install python-multipart + echo "Installed serve dependency: python-multipart successfully." + + pip install uvicorn + echo "Installed serve dependency: uvicorn successfully." + + pip install starlette + echo "Installed serve dependency: starlette successfully." + + # Tree dependencies + pip install "hummingbird-ml>=0.4.8" + echo "Installed tree dependency: hummingbird-ml>=0.4.8 successfully." + + pip install lightgbm + echo "Installed tree dependency: lightgbm successfully." + + pip install lightgbm-ray + echo "Installed tree dependency: lightgbm-ray successfully." + + # Viz dependencies + pip install hiplot + echo "Installed viz dependency: hiplot successfully." + + pip install "matplotlib==3.9.3" + echo "Installed viz dependency: matplotlib==3.9.3 successfully." + + pip install ptitprince + echo "Installed viz dependency: ptitprince successfully." + + pip install "seaborn>=0.7,<0.12" + echo "Installed viz dependency: seaborn>=0.7,<0.12 successfully." + - name: Debug dependency tree run: | python -m pip install pipdeptree From 87e0a646b8be8b1b1701c257a9240eec7b4f8482 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Thu, 23 Jan 2025 15:25:34 -0800 Subject: [PATCH 58/67] added test install all --- .github/workflows/pytest.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index de6be214e0f..f85cee2b95f 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -276,6 +276,10 @@ jobs: pip install "seaborn>=0.7,<0.12" echo "Installed viz dependency: seaborn>=0.7,<0.12 successfully." + - name: Test install all + run : | + pip install --prefer-binary . + - name: Debug dependency tree run: | python -m pip install pipdeptree From fbcb7e13e173f16c1c95697505f6b5ad81397641 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Thu, 23 Jan 2025 15:36:27 -0800 Subject: [PATCH 59/67] testing install all --- .github/workflows/pytest.yml | 243 ++++++++++++++++++----------------- 1 file changed, 122 insertions(+), 121 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f85cee2b95f..787f8b7649d 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -99,186 +99,187 @@ jobs: pip --version python -m pip install -U pip cmake --version - pip install --prefer-binary . + # pip install --prefer-binary . pip list shell: bash - - name: Incremental dependency installation - run: | - pip install pytest - echo "Installed pytest successfully." + # - name: Incremental dependency installation + # run: | + # pip install pytest + # echo "Installed pytest successfully." - pip install pytest-timeout - echo "Installed pytest-timeout successfully." + # pip install pytest-timeout + # echo "Installed pytest-timeout successfully." - pip install pytest-cov - echo "Installed pytest-cov successfully." + # pip install pytest-cov + # echo "Installed pytest-cov successfully." - pip install tifffile - echo "Installed tifffile successfully." + # pip install tifffile + # echo "Installed tifffile successfully." - pip install wget - echo "Installed wget successfully." + # pip install wget + # echo "Installed wget successfully." - pip install six>=1.13.0 - echo "Installed six>=1.13.0 successfully." + # pip install six>=1.13.0 + # echo "Installed six>=1.13.0 successfully." - pip install aim - echo "Installed aim successfully." + # pip install aim + # echo "Installed aim successfully." - pip install wandb - echo "Installed wandb successfully." + # pip install wandb + # echo "Installed wandb successfully." - pip install comet_ml - echo "Installed comet_ml successfully." + # pip install comet_ml + # echo "Installed comet_ml successfully." - pip install mlflow - echo "Installed mlflow successfully." + # pip install mlflow + # echo "Installed mlflow successfully." - pip install "sqlalchemy<2" - echo "Installed sqlalchemy<2 successfully." + # pip install "sqlalchemy<2" + # echo "Installed sqlalchemy<2 successfully." - pip install hpbandster - echo "Installed hpbandster successfully." + # pip install hpbandster + # echo "Installed hpbandster successfully." - pip install ConfigSpace==0.7.1 - echo "Installed ConfigSpace==0.7.1 successfully." + # pip install ConfigSpace==0.7.1 + # echo "Installed ConfigSpace==0.7.1 successfully." - pip install flaml[blendsearch] - echo "Installed flaml[blendsearch] successfully." + # pip install flaml[blendsearch] + # echo "Installed flaml[blendsearch] successfully." - pip install HEBO - echo "Installed HEBO successfully." + # pip install HEBO + # echo "Installed HEBO successfully." - pip install nevergrad - echo "Installed nevergrad successfully." + # pip install nevergrad + # echo "Installed nevergrad successfully." - pip install zoopt - echo "Installed zoopt successfully." + # pip install zoopt + # echo "Installed zoopt successfully." - pip install s3fs>=2022.8.2 - echo "Installed s3fs>=2022.8.2 successfully." - shell: bash + # pip install s3fs>=2022.8.2 + # echo "Installed s3fs>=2022.8.2 successfully." + # shell: bash - - name: Install distributed, explain, and extra dependencies - run: | - # Distributed dependencies - pip install awscli - echo "Installed distributed dependency: awscli successfully." + # - name: Install distributed, explain, and extra dependencies + # run: | + # # Distributed dependencies + # pip install awscli + # echo "Installed distributed dependency: awscli successfully." - pip install "dask[dataframe]<2023.4.0" - echo "Installed distributed dependency: dask[dataframe]<2023.4.0 successfully." + # pip install "dask[dataframe]<2023.4.0" + # echo "Installed distributed dependency: dask[dataframe]<2023.4.0 successfully." - pip install "deepspeed!=0.11.0,<0.13.0" - echo "Installed distributed dependency: deepspeed!=0.11.0,<0.13.0 successfully." + # pip install "deepspeed!=0.11.0,<0.13.0" + # echo "Installed distributed dependency: deepspeed!=0.11.0,<0.13.0 successfully." - pip install "getdaft[ray]==0.1.20" - echo "Installed distributed dependency: getdaft[ray]==0.1.20 successfully." + # pip install "getdaft[ray]==0.1.20" + # echo "Installed distributed dependency: getdaft[ray]==0.1.20 successfully." - pip install GPUtil - echo "Installed distributed dependency: GPUtil successfully." + # pip install GPUtil + # echo "Installed distributed dependency: GPUtil successfully." - pip install pyarrow - echo "Installed distributed dependency: pyarrow successfully." + # pip install pyarrow + # echo "Installed distributed dependency: pyarrow successfully." - pip install "ray[default,data,serve,tune]==2.3.1" - echo "Installed distributed dependency: ray[default,data,serve,tune]==2.3.1 successfully." + # pip install "ray[default,data,serve,tune]==2.3.1" + # echo "Installed distributed dependency: ray[default,data,serve,tune]==2.3.1 successfully." - pip install tblib - echo "Installed distributed dependency: tblib successfully." + # pip install tblib + # echo "Installed distributed dependency: tblib successfully." - pip install "tensorboardX<2.3" - echo "Installed distributed dependency: tensorboardX<2.3 successfully." + # pip install "tensorboardX<2.3" + # echo "Installed distributed dependency: tensorboardX<2.3 successfully." - # Explain dependencies - pip install captum - echo "Installed explain dependency: captum successfully." + # # Explain dependencies + # pip install captum + # echo "Installed explain dependency: captum successfully." - # Extra dependencies - pip install "horovod[pytorch]>=0.24.0,!=0.26.0" - echo "Installed extra dependency: horovod[pytorch]>=0.24.0,!=0.26.0 successfully." + # # Extra dependencies + # pip install "horovod[pytorch]>=0.24.0,!=0.26.0" + # echo "Installed extra dependency: horovod[pytorch]>=0.24.0,!=0.26.0 successfully." - pip install "modin[ray]" - echo "Installed extra dependency: modin[ray] successfully." + # pip install "modin[ray]" + # echo "Installed extra dependency: modin[ray] successfully." - pip install "predibase>=2023.10.2" - echo "Installed extra dependency: predibase>=2023.10.2 successfully." + # pip install "predibase>=2023.10.2" + # echo "Installed extra dependency: predibase>=2023.10.2 successfully." - - name: Install hyperopt, llm, serve, tree, and viz dependencies - run: | - # Hyperopt dependencies - pip install hyperopt - echo "Installed hyperopt successfully." + # - name: Install hyperopt, llm, serve, tree, and viz dependencies + # run: | + # # Hyperopt dependencies + # pip install hyperopt + # echo "Installed hyperopt successfully." - pip install "ray[default,tune]>=2.0.0" - echo "Installed hyperopt dependency: ray[default,tune]>=2.0.0 successfully." + # pip install "ray[default,tune]>=2.0.0" + # echo "Installed hyperopt dependency: ray[default,tune]>=2.0.0 successfully." - # LLM dependencies - pip install accelerate - echo "Installed llm dependency: accelerate successfully." + # # LLM dependencies + # pip install accelerate + # echo "Installed llm dependency: accelerate successfully." - pip install faiss-cpu - echo "Installed llm dependency: faiss-cpu successfully." + # pip install faiss-cpu + # echo "Installed llm dependency: faiss-cpu successfully." - pip install loralib - echo "Installed llm dependency: loralib successfully." + # pip install loralib + # echo "Installed llm dependency: loralib successfully." - pip install "peft>=0.10.0" - echo "Installed llm dependency: peft>=0.10.0 successfully." + # pip install "peft>=0.10.0" + # echo "Installed llm dependency: peft>=0.10.0 successfully." - pip install sentence-transformers - echo "Installed llm dependency: sentence-transformers successfully." + # pip install sentence-transformers + # echo "Installed llm dependency: sentence-transformers successfully." - # Serve dependencies - pip install cartonml-nightly - echo "Installed serve dependency: cartonml-nightly successfully." + # # Serve dependencies + # pip install cartonml-nightly + # echo "Installed serve dependency: cartonml-nightly successfully." - pip install fastapi - echo "Installed serve dependency: fastapi successfully." + # pip install fastapi + # echo "Installed serve dependency: fastapi successfully." - pip install httpx - echo "Installed serve dependency: httpx successfully." + # pip install httpx + # echo "Installed serve dependency: httpx successfully." - pip install "neuropod==0.3.0rc6 ; platform_system != 'Windows' and python_version < '3.9'" - echo "Installed serve dependency: neuropod==0.3.0rc6 successfully (if applicable)." + # pip install "neuropod==0.3.0rc6 ; platform_system != 'Windows' and python_version < '3.9'" + # echo "Installed serve dependency: neuropod==0.3.0rc6 successfully (if applicable)." - pip install python-multipart - echo "Installed serve dependency: python-multipart successfully." + # pip install python-multipart + # echo "Installed serve dependency: python-multipart successfully." - pip install uvicorn - echo "Installed serve dependency: uvicorn successfully." + # pip install uvicorn + # echo "Installed serve dependency: uvicorn successfully." - pip install starlette - echo "Installed serve dependency: starlette successfully." + # pip install starlette + # echo "Installed serve dependency: starlette successfully." - # Tree dependencies - pip install "hummingbird-ml>=0.4.8" - echo "Installed tree dependency: hummingbird-ml>=0.4.8 successfully." + # # Tree dependencies + # pip install "hummingbird-ml>=0.4.8" + # echo "Installed tree dependency: hummingbird-ml>=0.4.8 successfully." - pip install lightgbm - echo "Installed tree dependency: lightgbm successfully." + # pip install lightgbm + # echo "Installed tree dependency: lightgbm successfully." - pip install lightgbm-ray - echo "Installed tree dependency: lightgbm-ray successfully." + # pip install lightgbm-ray + # echo "Installed tree dependency: lightgbm-ray successfully." - # Viz dependencies - pip install hiplot - echo "Installed viz dependency: hiplot successfully." + # # Viz dependencies + # pip install hiplot + # echo "Installed viz dependency: hiplot successfully." - pip install "matplotlib==3.9.3" - echo "Installed viz dependency: matplotlib==3.9.3 successfully." + # pip install "matplotlib==3.9.3" + # echo "Installed viz dependency: matplotlib==3.9.3 successfully." - pip install ptitprince - echo "Installed viz dependency: ptitprince successfully." + # pip install ptitprince + # echo "Installed viz dependency: ptitprince successfully." - pip install "seaborn>=0.7,<0.12" - echo "Installed viz dependency: seaborn>=0.7,<0.12 successfully." + # pip install "seaborn>=0.7,<0.12" + # echo "Installed viz dependency: seaborn>=0.7,<0.12 successfully." - name: Test install all run : | - pip install --prefer-binary . + pip install .[dev,test,benchmarking,distributed,explain,extra,hyperopt,llm,serve,tree,viz] + - name: Debug dependency tree run: | From 10c85375ea6be4a912774b3bca3704d567040c1e Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Thu, 23 Jan 2025 15:40:40 -0800 Subject: [PATCH 60/67] fix yaml issue --- .github/workflows/pytest.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 787f8b7649d..5819324d267 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -99,7 +99,6 @@ jobs: pip --version python -m pip install -U pip cmake --version - # pip install --prefer-binary . pip list shell: bash From db68442b8fa1d66c85594faac5a23bb038ebb0d5 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Thu, 23 Jan 2025 15:54:10 -0800 Subject: [PATCH 61/67] testing each group of dependencies --- .github/workflows/pytest.yml | 269 +++++++++++++++++++---------------- 1 file changed, 147 insertions(+), 122 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 5819324d267..d056ce6eccb 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -102,182 +102,207 @@ jobs: pip list shell: bash - # - name: Incremental dependency installation - # run: | - # pip install pytest - # echo "Installed pytest successfully." + - name: Install Dependencies for Group: dev + run: | + pip install flake8 + echo "Installed flake8 successfully." - # pip install pytest-timeout - # echo "Installed pytest-timeout successfully." + pip install flake8-pyproject + echo "Installed flake8-pyproject successfully." - # pip install pytest-cov - # echo "Installed pytest-cov successfully." + pip install pre-commit + echo "Installed pre-commit successfully." - # pip install tifffile - # echo "Installed tifffile successfully." + pip install setuptools + echo "Installed setuptools successfully." - # pip install wget - # echo "Installed wget successfully." + - name: Install Dependencies for Group: test + run: | + pip install pytest + echo "Installed pytest successfully." - # pip install six>=1.13.0 - # echo "Installed six>=1.13.0 successfully." + pip install pytest-timeout + echo "Installed pytest-timeout successfully." - # pip install aim - # echo "Installed aim successfully." + pip install pytest-cov + echo "Installed pytest-cov successfully." - # pip install wandb - # echo "Installed wandb successfully." + pip install tifffile + echo "Installed tifffile successfully." - # pip install comet_ml - # echo "Installed comet_ml successfully." + pip install wget + echo "Installed wget successfully." - # pip install mlflow - # echo "Installed mlflow successfully." + pip install six>=1.13.0 + echo "Installed six successfully." - # pip install "sqlalchemy<2" - # echo "Installed sqlalchemy<2 successfully." + pip install aim + echo "Installed aim successfully." - # pip install hpbandster - # echo "Installed hpbandster successfully." + pip install wandb + echo "Installed wandb successfully." - # pip install ConfigSpace==0.7.1 - # echo "Installed ConfigSpace==0.7.1 successfully." + pip install comet_ml + echo "Installed comet_ml successfully." - # pip install flaml[blendsearch] - # echo "Installed flaml[blendsearch] successfully." + pip install mlflow + echo "Installed mlflow successfully." - # pip install HEBO - # echo "Installed HEBO successfully." + pip install sqlalchemy<2 + echo "Installed sqlalchemy successfully." - # pip install nevergrad - # echo "Installed nevergrad successfully." + pip install hpbandster + echo "Installed hpbandster successfully." - # pip install zoopt - # echo "Installed zoopt successfully." + pip install ConfigSpace==0.7.1 + echo "Installed ConfigSpace successfully." - # pip install s3fs>=2022.8.2 - # echo "Installed s3fs>=2022.8.2 successfully." - # shell: bash + pip install ax-platform + echo "Installed ax-platform successfully." + pip install bayesian-optimization + echo "Installed bayesian-optimization successfully." - # - name: Install distributed, explain, and extra dependencies - # run: | - # # Distributed dependencies - # pip install awscli - # echo "Installed distributed dependency: awscli successfully." + pip install flaml[blendsearch] + echo "Installed flaml[blendsearch] successfully." - # pip install "dask[dataframe]<2023.4.0" - # echo "Installed distributed dependency: dask[dataframe]<2023.4.0 successfully." + pip install HEBO + echo "Installed HEBO successfully." - # pip install "deepspeed!=0.11.0,<0.13.0" - # echo "Installed distributed dependency: deepspeed!=0.11.0,<0.13.0 successfully." + pip install nevergrad + echo "Installed nevergrad successfully." - # pip install "getdaft[ray]==0.1.20" - # echo "Installed distributed dependency: getdaft[ray]==0.1.20 successfully." + pip install optuna + echo "Installed optuna successfully." - # pip install GPUtil - # echo "Installed distributed dependency: GPUtil successfully." + pip install scikit-optimize + echo "Installed scikit-optimize successfully." - # pip install pyarrow - # echo "Installed distributed dependency: pyarrow successfully." + pip install zoopt + echo "Installed zoopt successfully." - # pip install "ray[default,data,serve,tune]==2.3.1" - # echo "Installed distributed dependency: ray[default,data,serve,tune]==2.3.1 successfully." + - name: Install Dependencies for Group: benchmarking + run: | + pip install s3fs>=2022.8.2 + echo "Installed s3fs successfully." - # pip install tblib - # echo "Installed distributed dependency: tblib successfully." + - name: Install Dependencies for Group: distributed + run: | + pip install awscli + echo "Installed awscli successfully." - # pip install "tensorboardX<2.3" - # echo "Installed distributed dependency: tensorboardX<2.3 successfully." + pip install dask[dataframe]<2023.4.0 + echo "Installed dask[dataframe] successfully." - # # Explain dependencies - # pip install captum - # echo "Installed explain dependency: captum successfully." + pip install deepspeed!=0.11.0,<0.13.0 + echo "Installed deepspeed successfully." - # # Extra dependencies - # pip install "horovod[pytorch]>=0.24.0,!=0.26.0" - # echo "Installed extra dependency: horovod[pytorch]>=0.24.0,!=0.26.0 successfully." + pip install getdaft[ray]==0.1.20 + echo "Installed getdaft[ray] successfully." - # pip install "modin[ray]" - # echo "Installed extra dependency: modin[ray] successfully." + pip install GPUtil + echo "Installed GPUtil successfully." - # pip install "predibase>=2023.10.2" - # echo "Installed extra dependency: predibase>=2023.10.2 successfully." + pip install pyarrow + echo "Installed pyarrow successfully." + pip install ray[default,data,serve,tune]==2.3.1 + echo "Installed ray[default,data,serve,tune] successfully." - # - name: Install hyperopt, llm, serve, tree, and viz dependencies - # run: | - # # Hyperopt dependencies - # pip install hyperopt - # echo "Installed hyperopt successfully." + pip install tblib + echo "Installed tblib successfully." + + pip install tensorboardX<2.3 + echo "Installed tensorboardX successfully." + + - name: Install Dependencies for Group: explain + run: | + pip install captum + echo "Installed captum successfully." + + - name: Install Dependencies for Group: extra + run: | + pip install horovod[pytorch]>=0.24.0,!=0.26.0 + echo "Installed horovod[pytorch] successfully." + + pip install modin[ray] + echo "Installed modin[ray] successfully." + + pip install predibase>=2023.10.2 + echo "Installed predibase successfully." - # pip install "ray[default,tune]>=2.0.0" - # echo "Installed hyperopt dependency: ray[default,tune]>=2.0.0 successfully." + - name: Install Dependencies for Group: hyperopt + run: | + pip install hyperopt + echo "Installed hyperopt successfully." - # # LLM dependencies - # pip install accelerate - # echo "Installed llm dependency: accelerate successfully." + pip install ray[default,tune]>=2.0.0 + echo "Installed ray[default,tune] successfully." - # pip install faiss-cpu - # echo "Installed llm dependency: faiss-cpu successfully." + - name: Install Dependencies for Group: llm + run: | + pip install accelerate + echo "Installed accelerate successfully." - # pip install loralib - # echo "Installed llm dependency: loralib successfully." + pip install faiss-cpu + echo "Installed faiss-cpu successfully." - # pip install "peft>=0.10.0" - # echo "Installed llm dependency: peft>=0.10.0 successfully." + pip install loralib + echo "Installed loralib successfully." - # pip install sentence-transformers - # echo "Installed llm dependency: sentence-transformers successfully." + pip install peft>=0.10.0 + echo "Installed peft successfully." - # # Serve dependencies - # pip install cartonml-nightly - # echo "Installed serve dependency: cartonml-nightly successfully." + pip install sentence-transformers + echo "Installed sentence-transformers successfully." - # pip install fastapi - # echo "Installed serve dependency: fastapi successfully." + - name: Install Dependencies for Group: serve + run: | + pip install cartonml-nightly + echo "Installed cartonml-nightly successfully." - # pip install httpx - # echo "Installed serve dependency: httpx successfully." + pip install fastapi + echo "Installed fastapi successfully." - # pip install "neuropod==0.3.0rc6 ; platform_system != 'Windows' and python_version < '3.9'" - # echo "Installed serve dependency: neuropod==0.3.0rc6 successfully (if applicable)." + pip install httpx + echo "Installed httpx successfully." - # pip install python-multipart - # echo "Installed serve dependency: python-multipart successfully." + pip install "neuropod==0.3.0rc6 ; platform_system != 'Windows' and python_version < '3.9'" + echo "Installed neuropod successfully." - # pip install uvicorn - # echo "Installed serve dependency: uvicorn successfully." + pip install python-multipart + echo "Installed python-multipart successfully." - # pip install starlette - # echo "Installed serve dependency: starlette successfully." + pip install uvicorn + echo "Installed uvicorn successfully." - # # Tree dependencies - # pip install "hummingbird-ml>=0.4.8" - # echo "Installed tree dependency: hummingbird-ml>=0.4.8 successfully." + pip install starlette + echo "Installed starlette successfully." - # pip install lightgbm - # echo "Installed tree dependency: lightgbm successfully." + - name: Install Dependencies for Group: tree + run: | + pip install hummingbird-ml>=0.4.8 + echo "Installed hummingbird-ml successfully." - # pip install lightgbm-ray - # echo "Installed tree dependency: lightgbm-ray successfully." + pip install lightgbm + echo "Installed lightgbm successfully." - # # Viz dependencies - # pip install hiplot - # echo "Installed viz dependency: hiplot successfully." + pip install lightgbm-ray + echo "Installed lightgbm-ray successfully." - # pip install "matplotlib==3.9.3" - # echo "Installed viz dependency: matplotlib==3.9.3 successfully." + - name: Install Dependencies for Group: viz + run: | + pip install hiplot + echo "Installed hiplot successfully." - # pip install ptitprince - # echo "Installed viz dependency: ptitprince successfully." + pip install matplotlib==3.9.3 + echo "Installed matplotlib successfully." - # pip install "seaborn>=0.7,<0.12" - # echo "Installed viz dependency: seaborn>=0.7,<0.12 successfully." + pip install ptitprince + echo "Installed ptitprince successfully." - - name: Test install all - run : | - pip install .[dev,test,benchmarking,distributed,explain,extra,hyperopt,llm,serve,tree,viz] + pip install seaborn>=0.7,<0.12 + echo "Installed seaborn successfully." - name: Debug dependency tree From 6c45530ea69125727c72b400e7324039ae749325 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Thu, 23 Jan 2025 16:01:27 -0800 Subject: [PATCH 62/67] fixed yaml issue --- .github/workflows/pytest.yml | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index d056ce6eccb..14b8936da00 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -102,8 +102,10 @@ jobs: pip list shell: bash - - name: Install Dependencies for Group: dev + - name: Install Dependencies for dev run: | + echo "starting dev" + pip install flake8 echo "Installed flake8 successfully." @@ -116,8 +118,9 @@ jobs: pip install setuptools echo "Installed setuptools successfully." - - name: Install Dependencies for Group: test + - name: Install Dependencies for test run: | + echo "starting test" pip install pytest echo "Installed pytest successfully." @@ -181,13 +184,15 @@ jobs: pip install zoopt echo "Installed zoopt successfully." - - name: Install Dependencies for Group: benchmarking + - name: Install Dependencies for benchmarking run: | + echo "starting benchmarking" pip install s3fs>=2022.8.2 echo "Installed s3fs successfully." - - name: Install Dependencies for Group: distributed + - name: Install Dependencies for distributed run: | + echo "starting distributed" pip install awscli echo "Installed awscli successfully." @@ -215,13 +220,15 @@ jobs: pip install tensorboardX<2.3 echo "Installed tensorboardX successfully." - - name: Install Dependencies for Group: explain + - name: Install Dependencies for explain run: | + echo "starting explain" pip install captum echo "Installed captum successfully." - - name: Install Dependencies for Group: extra + - name: Install Dependencies for extra run: | + echo "starting extra" pip install horovod[pytorch]>=0.24.0,!=0.26.0 echo "Installed horovod[pytorch] successfully." @@ -231,16 +238,18 @@ jobs: pip install predibase>=2023.10.2 echo "Installed predibase successfully." - - name: Install Dependencies for Group: hyperopt + - name: Install Dependencies for hyperopt run: | + echo "starting hyperopt" pip install hyperopt echo "Installed hyperopt successfully." pip install ray[default,tune]>=2.0.0 echo "Installed ray[default,tune] successfully." - - name: Install Dependencies for Group: llm + - name: Install Dependencies for llm run: | + echo "starting llm" pip install accelerate echo "Installed accelerate successfully." @@ -256,8 +265,9 @@ jobs: pip install sentence-transformers echo "Installed sentence-transformers successfully." - - name: Install Dependencies for Group: serve + - name: Install Dependencies for serve run: | + echo "starting serve" pip install cartonml-nightly echo "Installed cartonml-nightly successfully." @@ -279,8 +289,9 @@ jobs: pip install starlette echo "Installed starlette successfully." - - name: Install Dependencies for Group: tree + - name: Install Dependencies for tree run: | + echo "starting tree" pip install hummingbird-ml>=0.4.8 echo "Installed hummingbird-ml successfully." @@ -290,8 +301,9 @@ jobs: pip install lightgbm-ray echo "Installed lightgbm-ray successfully." - - name: Install Dependencies for Group: viz + - name: Install Dependencies for viz run: | + echo "starting viz" pip install hiplot echo "Installed hiplot successfully." From 7025eb96882cd36065ee5cebbdc36c70da988a94 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Thu, 23 Jan 2025 16:14:22 -0800 Subject: [PATCH 63/67] fixed quote issue --- .github/workflows/pytest.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 14b8936da00..9f96fff1e43 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -151,7 +151,7 @@ jobs: pip install mlflow echo "Installed mlflow successfully." - pip install sqlalchemy<2 + pip install "sqlalchemy<2" echo "Installed sqlalchemy successfully." pip install hpbandster @@ -196,10 +196,10 @@ jobs: pip install awscli echo "Installed awscli successfully." - pip install dask[dataframe]<2023.4.0 + pip install "dask[dataframe]<2023.4.0" echo "Installed dask[dataframe] successfully." - pip install deepspeed!=0.11.0,<0.13.0 + pip install "deepspeed!=0.11.0,<0.13.0" echo "Installed deepspeed successfully." pip install getdaft[ray]==0.1.20 @@ -217,7 +217,7 @@ jobs: pip install tblib echo "Installed tblib successfully." - pip install tensorboardX<2.3 + pip install "tensorboardX<2.3" echo "Installed tensorboardX successfully." - name: Install Dependencies for explain @@ -313,7 +313,7 @@ jobs: pip install ptitprince echo "Installed ptitprince successfully." - pip install seaborn>=0.7,<0.12 + pip install "seaborn>=0.7,<0.12" echo "Installed seaborn successfully." From 1363dc2fcd5b308a4cd79db1ec22d793ababad01 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Thu, 23 Jan 2025 16:37:12 -0800 Subject: [PATCH 64/67] testing base dependencies --- .github/workflows/pytest.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 9f96fff1e43..1e4658de20c 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -98,6 +98,7 @@ jobs: python --version pip --version python -m pip install -U pip + pip install . cmake --version pip list shell: bash From c3f082372397d6f09433241c5ffbdda88478fd4c Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Thu, 23 Jan 2025 16:57:16 -0800 Subject: [PATCH 65/67] testing GPy issue --- .github/workflows/pytest.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 1e4658de20c..d670f75f0b7 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -317,6 +317,10 @@ jobs: pip install "seaborn>=0.7,<0.12" echo "Installed seaborn successfully." + - name: test weird GPy issue + run: | + pip install '.[test]' + shell: bash - name: Debug dependency tree run: | From d148e4aa1945f8909d332e758c5dada8ddf46d39 Mon Sep 17 00:00:00 2001 From: ethanreidel Date: Thu, 23 Jan 2025 17:32:10 -0800 Subject: [PATCH 66/67] script to check dependencies --- .github/workflows/pytest.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index d670f75f0b7..ce2ca43673b 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -319,7 +319,13 @@ jobs: - name: test weird GPy issue run: | - pip install '.[test]' + # Extract and install each dependency individually + for dep in $(python -c "import tomllib; print(' '.join(tomllib.load(open('pyproject.toml', 'rb'))['project']['optional-dependencies']['test']))"); do + pip install "$dep" && echo "Installed $dep successfully." + done + + # Install the core package + pip install . && echo "Core package installed successfully." shell: bash - name: Debug dependency tree From 063fe3bc7370bf73e63f005b60a7b01162d1fa1e Mon Sep 17 00:00:00 2001 From: "m.habedank" Date: Fri, 7 Feb 2025 09:45:29 +0100 Subject: [PATCH 67/67] removed version statement for matplotlib --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a867001829b..ed9d340f4ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,7 +71,7 @@ dependencies = [ "rich~=12.4.4", "sacremoses", "scikit-learn==1.3", - "matplotlib==3.9.3,!=3.4.3", + "matplotlib", #"scipy>=0.18", "scipy==1.14.1", "sentencepiece",