Skip to content
8 changes: 8 additions & 0 deletions mindone/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,14 @@
RobertaModel,
RobertaPreTrainedModel,
)
from .models.segformer import (
SegformerDecodeHead,
SegformerForImageClassification,
SegformerForSemanticSegmentation,
SegformerImageProcessor,
SegformerModel,
SegformerPreTrainedModel,
)
from .models.siglip import (
SiglipForImageClassification,
SiglipModel,
Expand Down
1 change: 1 addition & 0 deletions mindone/transformers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
qwen2_audio,
qwen2_vl,
roberta,
segformer,
siglip,
smolvlm,
speecht5,
Expand Down
4 changes: 4 additions & 0 deletions mindone/transformers/models/auto/configuration_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
("clip_vision_model", "CLIPVisionConfig"),
("deberta", "DebertaConfig"),
("deberta-v2", "DebertaV2Config"),
("detr", "DetrConfig"),
("dpt", "DPTConfig"),
("gemma", "GemmaConfig"),
("granite", "GraniteConfig"),
Expand Down Expand Up @@ -100,6 +101,7 @@
("roberta", "RobertaConfig"),
("recurrent_gemma", "RecurrentGemmaConfig"),
("rembert", "RemBertConfig"),
("segformer", "SegformerConfig"),
("swin", "SwinConfig"),
("siglip", "SiglipConfig"),
("siglip_vision_model", "SiglipVisionConfig"),
Expand Down Expand Up @@ -141,6 +143,7 @@
("clip_vision_model", "CLIPVisionModel"),
("deberta", "DeBERTa"),
("deberta-v2", "DeBERTa-v2"),
("detr", "DETR"),
("dpt", "DPT"),
("gemma", "Gemma"),
("granite", "Granite"),
Expand Down Expand Up @@ -194,6 +197,7 @@
("qwen2_vl", "Qwen2VL"),
("recurrent_gemma", "RecurrentGemma"),
("rembert", "RemBERT"),
("segformer", "SegFormer"),
("swin", "Swin Transformer"),
("siglip", "SigLIP"),
("siglip_vision_model", "SiglipVisionModel"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
("llava_next", ("LlavaNextImageProcessor",)),
("llava_next_video", ("LlavaNextVideoImageProcessor",)),
("llava_onevision", ("LlavaOnevisionImageProcessor",)),
("segformer", ("SegformerImageProcessor",)),
]
)

Expand Down
40 changes: 36 additions & 4 deletions mindone/transformers/models/auto/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
("deberta", "DebertaModel"),
("opt", "OPTModel"),
("deberta-v2", "DebertaV2Model"),
("detr", "DetrModel"),
("dpt", "DPTModel"),
("gemma", "GemmaModel"),
("m2m_100", "M2M100Model"),
Expand Down Expand Up @@ -96,6 +97,8 @@
("qwen2_vl", "Qwen2VLModel"),
("roberta", "RobertaModel"),
("rembert", "RemBertModel"),
("resnet", "ResNetModel"),
("segformer", "SegformerModel"),
("siglip", "SiglipModel"),
("siglip_vision_model", "SiglipVisionModel"),
("smolvlm", "SmolVLMModel"),
Expand Down Expand Up @@ -215,14 +218,15 @@
[
# Model for Image mapping
("bit", "BitModel"),
("siglip_vision_model", "SiglipVisionModel"),
("detr", "DetrModel"),
("dpt", "DPTModel"),
("glpn", "GLPNModel"),
("hiera", "HieraModel"),
("hubert", "HubertModel"),
("ijepa", "IJepaModel"),
("imagegpt", "ImageGPTModel"),
("levit", "LevitModel"),
("segformer", "SegformerModel"),
("siglip_vision_model", "SiglipVisionModel"),
("vit", "ViTModel"),
]
Expand Down Expand Up @@ -253,18 +257,46 @@
"levit",
("LevitForImageClassification", "LevitForImageClassificationWithTeacher"),
),
("resnet", "ResNetForImageClassification"),
("segformer", "SegformerForImageClassification"),
("siglip", "SiglipForImageClassification"),
("vit", "ViTForImageClassification"),
]
)

MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES = OrderedDict()
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES = OrderedDict(
[
# Do not add new models here, this class will be deprecated in the future.
# Model for Image Segmentation mapping
("detr", "DetrForSegmentation"),
]
)

MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES = OrderedDict()
MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES = OrderedDict(
[
# Model for Semantic Segmentation mapping
("beit", "BeitForSemanticSegmentation"),
("data2vec-vision", "Data2VecVisionForSemanticSegmentation"),
("dpt", "DPTForSemanticSegmentation"),
("mobilenet_v2", "MobileNetV2ForSemanticSegmentation"),
("mobilevit", "MobileViTForSemanticSegmentation"),
("mobilevitv2", "MobileViTV2ForSemanticSegmentation"),
("segformer", "SegformerForSemanticSegmentation"),
("upernet", "UperNetForSemanticSegmentation"),
]
)

MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES = OrderedDict()

MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES = OrderedDict()
MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES = OrderedDict(
[
# Model for Universal Segmentation mapping
("detr", "DetrForSegmentation"),
("mask2former", "Mask2FormerForUniversalSegmentation"),
("maskformer", "MaskFormerForInstanceSegmentation"),
("oneformer", "OneFormerForUniversalSegmentation"),
]
)

MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES = OrderedDict()

Expand Down
18 changes: 18 additions & 0 deletions mindone/transformers/models/segformer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# This code is adapted from https://github.com/huggingface/transformers
# with modifications to run transformers on mindspore.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .image_processing_segformer import *
from .modeling_segformer import *
Loading