fix pyboost bug, adapt to new orangePi image, disable multi thread (#1969)

qhzhuang · Your Name · web-flow · commit 1294f5edebb3 · 2025-02-28T20:46:00.000+08:00
Co-authored-by: Your Name &lt;you@example.com&gt;
diff --git a/llm/inference/janus_pro/generated_samples/img_0.jpg b/llm/inference/janus_pro/generated_samples/img_0.jpg
diff --git a/llm/inference/janus_pro/generation.py b/llm/inference/janus_pro/generation.py
@@ -1,6 +1,8 @@
 import os
 import PIL.Image
 import mindspore
+from mindspore._c_expression import disable_multi_thread
+disable_multi_thread()
 import mindspore as ms
 import numpy as np
 from mindnlp.core import ops
@@ -76,7 +78,7 @@ def generate(
         generated_tokens = ops.zeros(parallel_size, image_token_num_per_image, dtype=ms.int32)
 
         for i in range(image_token_num_per_image): 
-            print(str(i)+'='*60)
+            print(f"generating token {i}")
             outputs = mmgpt.language_model.model(inputs_embeds=inputs_embeds, use_cache=True, past_key_values=outputs.past_key_values if i != 0 else None)
             hidden_states = outputs.last_hidden_state # (parallel_size*2, len(input_ids), 2048)
             
@@ -95,6 +97,7 @@ def generate(
             # print("img_embeds.shape:", img_embeds.shape)
             # print("img_embeds.dtype:", img_embeds.dtype)
             inputs_embeds = img_embeds.unsqueeze(dim=1) #(parallel_size*2, 2048)
+            print("generated one token")
 
         if image_token_num_per_image==576:
             dec = mmgpt.gen_vision_model.decode_code(generated_tokens.astype(ms.int32), shape=[parallel_size, 8, img_size//patch_size, img_size//patch_size])
diff --git a/llm/inference/janus_pro/janus/models/modeling_vlm.py b/llm/inference/janus_pro/janus/models/modeling_vlm.py
@@ -264,8 +264,8 @@ def prepare_inputs_embeds(
         # replace with the image embeddings
         # 627                               576
         # inputs_embeds[images_seq_mask] = images_embeds[images_emb_mask]
-        print("inputs_embeds:", inputs_embeds.shape)
-        print("images_embeds[images_emb_mask].dtype", images_embeds[images_emb_mask].dtype)
+        # print("inputs_embeds:", inputs_embeds.shape)
+        # print("images_embeds[images_emb_mask].dtype", images_embeds[images_emb_mask].dtype)
         print("inputs_embeds.dtype", inputs_embeds.dtype)
         padding_size = images_seq_mask.shape[1] - images_emb_mask.shape[1]
         padding = Tensor(np.full((images_seq_mask.shape[0], padding_size), False), dtype=images_emb_mask.dtype)
diff --git a/llm/inference/janus_pro/understanding.py b/llm/inference/janus_pro/understanding.py
@@ -1,11 +1,14 @@
 import mindspore
+from mindspore._c_expression import disable_multi_thread
+disable_multi_thread()
 from mindnlp.transformers import AutoModelForCausalLM
 from janus.models import MultiModalityCausalLM, VLChatProcessor
 from janus.utils.io import load_pil_images
 from mindnlp.configs import set_pyboost, use_pyboost
 from mindnlp.core import nn, Tensor
 from mindnlp.core import no_grad
 
+
 from mindnlp.configs import use_pyboost, set_pyboost
 print('use_pyboost:', use_pyboost())  # 这里默认是False
 mindspore.set_context(
@@ -27,7 +30,8 @@
     model_path, trust_remote_code=True, ms_dtype=mindspore.float16
 )
 print('loaded processor and ckpt ')
-question = 'describe this image'
+# question = 'describe this image'
+question = 'what is the animal in the image'
 image = "./inpain_model_cat.png"
 conversation = [
     {
diff --git a/mindnlp/core/nn/functional.py b/mindnlp/core/nn/functional.py
@@ -169,13 +169,16 @@ def drop_and_mask(keep_prob, seed=None):
 
 dense_ = ops.Dense()
 def linear(input, weight, bias=None):
-    input = input.to(mindspore.float16)
-    weight = weight.to(mindspore.float16)
-    if bias is not None:
-        bias = bias.to(mindspore.float16)
-        return dense_(input, weight) + bias
-    return dense_(input, weight)
-    
+    if ON_ORANGE_PI:
+        input = input.to(mindspore.float16)
+        weight = weight.to(mindspore.float16)
+        if bias is not None:
+            bias = bias.to(mindspore.float16)
+            return dense_(input, weight) + bias
+        return dense_(input, weight)
+    if use_pyboost():
+        return mindspore.mint.nn.functional.linear(input, weight, bias)
+    return dense_(input, weight, bias)
 
 
 def binary_cross_entropy_with_logits(input, target, weight=None, reduction='mean', pos_weight=None):
@@ -479,8 +482,8 @@ def addcmul(input, tensor1, tensor2, value=1):
     return input + value*tensor1*tensor2
 
 def group_norm(input, num_groups, weight=None, bias=None, eps=1e-5):
-    # if use_pyboost():
-    #     return mindspore.mint.nn.functional.group_norm(input, num_groups, weight, bias, eps)
+    if use_pyboost():
+        return mindspore.mint.nn.functional.group_norm(input, num_groups, weight, bias, eps)
 
     input_shape = input.shape
     N = input_shape[0]
@@ -491,8 +494,6 @@ def group_norm(input, num_groups, weight=None, bias=None, eps=1e-5):
     affine_param_shape = [1] * input.ndim
     affine_param_shape[1] = C
     affine_param_shape = tuple(affine_param_shape)
-    print(affine_param_shape)
-    print(out.shape)
     if weight is not None and bias is not None:
         # out = bias.view(affine_param_shape).addcmul(out, weight.view(affine_param_shape), 1)
         out = addcmul(bias.view(affine_param_shape), out, weight.view(affine_param_shape), 1)
diff --git a/mindnlp/core/nn/modules/linear.py b/mindnlp/core/nn/modules/linear.py
@@ -10,16 +10,6 @@
 from .. import functional as F
 from ... import ops
 
-
-def contains_nan_or_inf(tensor, info):
-    tensor = tensor.astype(mindspore.float16)
-    havenan = mops.isnan(tensor).any()
-    haveinf = mops.isinf(tensor).any()
-    if haveinf:
-        print(info+'haveinf')
-    if havenan:
-        print(info+'havenan')
-
 class Linear(Module):
     r"""Applies a linear transformation to the incoming data: :math:`y = Ax + b`
 
@@ -74,8 +64,6 @@ def forward(self, input):
             self.weight = Parameter(self.weight.astype(mindspore.float16))
         if self.bias is not None and self.bias.dtype == mindspore.float32:
             self.bias = Parameter(self.bias.astype(mindspore.float16))
-        print("self.weight.dtype:", self.weight.dtype)
-        contains_nan_or_inf(input, 'Linear.input ')
         return F.linear(input, self.weight, self.bias)
 
     def __repr__(self):
diff --git a/mindnlp/core/nn/modules/module.py b/mindnlp/core/nn/modules/module.py
@@ -1225,7 +1225,7 @@ def train(self, mode=True):
             Module: self
         """
         if ON_ORANGE_PI:
-            set_pyboost(not mode)
+            set_pyboost(False)
         self.training = mode
         for module in self.children():
             module.train(mode)
diff --git a/mindnlp/core/ops/array.py b/mindnlp/core/ops/array.py
@@ -21,8 +21,8 @@ def argwhere(input):
 # cat
 has_cat = hasattr(mindspore.mint, 'cat')
 def cat(tensors, dim=0):
-    # if use_pyboost() and has_cat:
-    #     return mindspore.mint.cat(tensors, dim)
+    if use_pyboost() and has_cat:
+        return mindspore.mint.cat(tensors, dim)
     return ops.cat(tensors, dim)
 
 # concat
diff --git a/mindnlp/core/ops/blas.py b/mindnlp/core/ops/blas.py
@@ -31,8 +31,8 @@ def bmm(input, other):
     if ON_ORANGE_PI:
         input = input.to(mindspore.float16)
         other = other.to(mindspore.float16)
-    # if use_pyboost() and has_bmm:
-    #     return mindspore.mint.bmm(input, other)
+    if use_pyboost() and has_bmm:
+        return mindspore.mint.bmm(input, other)
     return ops.bmm(input, other)
 
 # chain_matmul
diff --git a/mindnlp/transformers/cache_utils.py b/mindnlp/transformers/cache_utils.py
@@ -378,8 +378,7 @@ def update(
             self.key_cache[layer_idx] = key_states
             self.value_cache[layer_idx] = value_states
         else:
-            self.key_cache[layer_idx] = ops.cat(
-                [self.key_cache[layer_idx].astype(mindspore.float16), key_states.astype(mindspore.float16)], dim=-2)
+            self.key_cache[layer_idx] = ops.cat([self.key_cache[layer_idx].astype(mindspore.float16), key_states.astype(mindspore.float16)], dim=-2)
             self.value_cache[layer_idx] = ops.cat([self.value_cache[layer_idx], value_states], dim=-2)
 
         return self.key_cache[layer_idx], self.value_cache[layer_idx]
diff --git a/mindnlp/transformers/models/llama/modeling_llama.py b/mindnlp/transformers/models/llama/modeling_llama.py