@@ -24,13 +24,17 @@ class DeepLabV3(SegmentationModel):
2424 encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
2525 other pretrained weights (see table with available weights for each encoder_name)
2626 decoder_channels: A number of convolution filters in ASPP module. Default is 256
27+ encoder_output_stride: Downsampling factor for last encoder features (see original paper for explanation)
28+ decoder_atrous_rates: Dilation rates for ASPP module (should be an iterable of 3 integer values)
29+ decoder_aspp_separable: Use separable convolutions in ASPP module. Default is False
30+ decoder_aspp_dropout: Use dropout in ASPP module projection layer. Default is 0.5
2731 in_channels: A number of input channels for the model, default is 3 (RGB images)
2832 classes: A number of classes for output mask (or you can think as a number of channels of output mask)
2933 activation: An activation function to apply after the final convolution layer.
3034 Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
3135 **callable** and **None**.
3236 Default is **None**
33- upsampling: Final upsampling factor. Default is 8 to preserve input-output spatial shape identity
37+ upsampling: Final upsampling factor (should have the same value as ``encoder_output_stride`` to preserve input-output spatial shape identity).
3438 aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
3539 on top of encoder if **aux_params** is not **None** (default). Supported params:
3640 - classes (int): A number of classes
@@ -51,11 +55,15 @@ def __init__(
5155 encoder_name : str = "resnet34" ,
5256 encoder_depth : int = 5 ,
5357 encoder_weights : Optional [str ] = "imagenet" ,
58+ encoder_output_stride : Literal [8 , 16 ] = 8 ,
5459 decoder_channels : int = 256 ,
60+ decoder_atrous_rates : Iterable [int ] = (12 , 24 , 36 ),
61+ decoder_aspp_separable : bool = False ,
62+ decoder_aspp_dropout : float = 0.5 ,
5563 in_channels : int = 3 ,
5664 classes : int = 1 ,
5765 activation : Optional [str ] = None ,
58- upsampling : int = 8 ,
66+ upsampling : Optional [ int ] = None ,
5967 aux_params : Optional [dict ] = None ,
6068 ):
6169 super ().__init__ ()
@@ -65,19 +73,23 @@ def __init__(
6573 in_channels = in_channels ,
6674 depth = encoder_depth ,
6775 weights = encoder_weights ,
68- output_stride = 8 ,
76+ output_stride = encoder_output_stride ,
6977 )
7078
7179 self .decoder = DeepLabV3Decoder (
72- in_channels = self .encoder .out_channels [- 1 ], out_channels = decoder_channels
80+ in_channels = self .encoder .out_channels [- 1 ],
81+ out_channels = decoder_channels ,
82+ atrous_rates = decoder_atrous_rates ,
83+ aspp_separable = decoder_aspp_separable ,
84+ aspp_dropout = decoder_aspp_dropout ,
7385 )
7486
7587 self .segmentation_head = SegmentationHead (
7688 in_channels = self .decoder .out_channels ,
7789 out_channels = classes ,
7890 activation = activation ,
7991 kernel_size = 1 ,
80- upsampling = upsampling ,
92+ upsampling = encoder_output_stride if upsampling is None else upsampling ,
8193 )
8294
8395 if aux_params is not None :
@@ -102,8 +114,9 @@ class DeepLabV3Plus(SegmentationModel):
102114 encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
103115 other pretrained weights (see table with available weights for each encoder_name)
104116 encoder_output_stride: Downsampling factor for last encoder features (see original paper for explanation)
105- decoder_atrous_rates: Dilation rates for ASPP module (should be a tuple of 3 integer values)
106117 decoder_atrous_rates: Dilation rates for ASPP module (should be an iterable of 3 integer values)
118+ decoder_aspp_separable: Use separable convolutions in ASPP module. Default is True
119+ decoder_aspp_dropout: Use dropout in ASPP module projection layer. Default is 0.5
107120 decoder_channels: A number of convolution filters in ASPP module. Default is 256
108121 in_channels: A number of input channels for the model, default is 3 (RGB images)
109122 classes: A number of classes for output mask (or you can think as a number of channels of output mask)
@@ -134,8 +147,9 @@ def __init__(
134147 encoder_weights : Optional [str ] = "imagenet" ,
135148 encoder_output_stride : Literal [8 , 16 ] = 16 ,
136149 decoder_channels : int = 256 ,
137- decoder_atrous_rates : tuple = (12 , 24 , 36 ),
138150 decoder_atrous_rates : Iterable [int ] = (12 , 24 , 36 ),
151+ decoder_aspp_separable : bool = True ,
152+ decoder_aspp_dropout : float = 0.5 ,
139153 in_channels : int = 3 ,
140154 classes : int = 1 ,
141155 activation : Optional [str ] = None ,
@@ -157,6 +171,8 @@ def __init__(
157171 out_channels = decoder_channels ,
158172 atrous_rates = decoder_atrous_rates ,
159173 output_stride = encoder_output_stride ,
174+ aspp_separable = decoder_aspp_separable ,
175+ aspp_dropout = decoder_aspp_dropout ,
160176 )
161177
162178 self .segmentation_head = SegmentationHead (
0 commit comments