-
Notifications
You must be signed in to change notification settings - Fork 87
fix(examples/hunyuanvideo-i2v): update to MS 2.6.0 and MS 2.7.0 #1385
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -319,7 +319,7 @@ def encode( | |||||||||
| if model_return_dict: | ||||||||||
| last_hidden_state = outputs.hidden_states[-(hidden_state_skip_layer + 1)] | ||||||||||
| else: | ||||||||||
| last_hidden_state = outputs[2][-(hidden_state_skip_layer + 1)] | ||||||||||
| last_hidden_state = outputs[1][-(hidden_state_skip_layer + 1)] | ||||||||||
|
Comment on lines
321
to
+322
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The index If the structure of the
Suggested change
|
||||||||||
| # last_hidden_state = outputs[0][-(hidden_state_skip_layer + 1)] | ||||||||||
| # Real last hidden state already has layer norm applied. So here we only apply it | ||||||||||
| # for intermediate layers. | ||||||||||
|
|
@@ -331,7 +331,7 @@ def encode( | |||||||||
| outputs_hidden_states = outputs.hidden_states | ||||||||||
| else: | ||||||||||
| last_hidden_state = outputs[self.key_idx] | ||||||||||
| outputs_hidden_states = outputs[2] if len(outputs) >= 3 else None # TODO: double-check if use t5 | ||||||||||
| outputs_hidden_states = outputs[1] if len(outputs) >= 2 else None # TODO: double-check if use t5 | ||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to the previous comment, the index
Suggested change
|
||||||||||
|
|
||||||||||
| # Remove hidden states of instruction tokens, only keep prompt tokens. | ||||||||||
| if self.use_template: | ||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,9 @@ | |
|
|
||
| import mindspore as ms | ||
| from mindspore import Parameter, Tensor, mint, nn | ||
| from mindspore.common.initializer import initializer | ||
|
|
||
| from mindone.diffusers.models.layers_compat import group_norm | ||
|
|
||
|
|
||
| class LayerNorm(nn.Cell): | ||
|
|
@@ -36,3 +39,78 @@ def construct(self, x: Tensor): | |
| x, self.normalized_shape, self.weight.to(x.dtype), self.bias.to(x.dtype), self.eps | ||
| ) | ||
| return x | ||
|
|
||
|
|
||
| class GroupNorm(nn.Cell): | ||
| r"""Applies Group Normalization over a mini-batch of inputs. | ||
|
|
||
| This layer implements the operation as described in | ||
| the paper `Group Normalization <https://arxiv.org/abs/1803.08494>`__ | ||
|
|
||
| .. math:: | ||
| y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta | ||
|
|
||
| The input channels are separated into :attr:`num_groups` groups, each containing | ||
| ``num_channels / num_groups`` channels. :attr:`num_channels` must be divisible by | ||
| :attr:`num_groups`. The mean and standard-deviation are calculated | ||
| separately over the each group. :math:`\gamma` and :math:`\beta` are learnable | ||
| per-channel affine transform parameter vectors of size :attr:`num_channels` if | ||
| :attr:`affine` is ``True``. | ||
|
|
||
| This layer uses statistics computed from input data in both training and | ||
| evaluation modes. | ||
|
|
||
| Args: | ||
| num_groups (int): number of groups to separate the channels into | ||
| num_channels (int): number of channels expected in input | ||
| eps: a value added to the denominator for numerical stability. Default: 1e-5 | ||
| affine: a boolean value that when set to ``True``, this module | ||
| has learnable per-channel affine parameters initialized to ones (for weights) | ||
| and zeros (for biases). Default: ``True``. | ||
|
|
||
| Shape: | ||
| - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}` | ||
| - Output: :math:`(N, C, *)` (same shape as input) | ||
|
|
||
| Examples:: | ||
|
|
||
| >>> input = mint.randn(20, 6, 10, 10) | ||
| >>> # Separate 6 channels into 3 groups | ||
| >>> m = GroupNorm(3, 6) | ||
| >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm) | ||
| >>> m = GroupNorm(6, 6) | ||
| >>> # Put all 6 channels into a single group (equivalent with LayerNorm) | ||
| >>> m = GroupNorm(1, 6) | ||
| >>> # Activating the module | ||
| >>> output = m(input) | ||
| """ | ||
|
Comment on lines
+44
to
+86
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| num_groups: int | ||
| num_channels: int | ||
| eps: float | ||
| affine: bool | ||
|
|
||
| def __init__(self, num_groups: int, num_channels: int, eps: float = 1e-5, affine: bool = True, dtype=ms.float32): | ||
| super().__init__() | ||
| if num_channels % num_groups != 0: | ||
| raise ValueError("num_channels must be divisible by num_groups") | ||
|
|
||
| self.num_groups = num_groups | ||
| self.num_channels = num_channels | ||
| self.eps = eps | ||
| self.affine = affine | ||
| weight = initializer("ones", num_channels, dtype=dtype) | ||
| bias = initializer("zeros", num_channels, dtype=dtype) | ||
| if self.affine: | ||
| self.weight = Parameter(weight, name="weight") | ||
| self.bias = Parameter(bias, name="bias") | ||
| else: | ||
| self.weight = None | ||
| self.bias = None | ||
|
|
||
| def construct(self, x: Tensor): | ||
| if self.affine: | ||
| x = group_norm(x, self.num_groups, self.weight.to(x.dtype), self.bias.to(x.dtype), self.eps) | ||
| else: | ||
| x = group_norm(x, self.num_groups, self.weight, self.bias, self.eps) | ||
| return x | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -20,15 +20,15 @@ | |||||
| # ============================================================================== | ||||||
| from typing import Optional, Tuple, Union | ||||||
|
|
||||||
| from hyvideo.utils.modules_utils import LayerNorm | ||||||
| from hyvideo.utils.modules_utils import GroupNorm, LayerNorm | ||||||
|
|
||||||
| import mindspore as ms | ||||||
| import mindspore.mint.nn.functional as F | ||||||
| from mindspore import mint, nn, ops | ||||||
|
|
||||||
| from mindone.diffusers.models.activations import get_activation | ||||||
| from mindone.diffusers.models.attention_processor import Attention, SpatialNorm | ||||||
| from mindone.diffusers.models.normalization import AdaGroupNorm, GroupNorm, RMSNorm | ||||||
| from mindone.diffusers.models.normalization import AdaGroupNorm, RMSNorm | ||||||
| from mindone.diffusers.utils import logging | ||||||
|
|
||||||
| logger = logging.get_logger(__name__) # pylint: disable=invalid-name | ||||||
|
|
@@ -412,7 +412,7 @@ def __init__( | |||||
| conv_3d_out_channels = conv_3d_out_channels or out_channels | ||||||
| self.conv2 = CausalConv3d(out_channels, conv_3d_out_channels, kernel_size=3, stride=1) | ||||||
|
|
||||||
| self.nonlinearity = get_activation(non_linearity)() | ||||||
| self.nonlinearity = get_activation(non_linearity) | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The
Suggested change
|
||||||
|
|
||||||
| self.upsample = self.downsample = None | ||||||
| if self.up: | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add CANN prefix
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed,