mindspore-lab
diff --git a/‎llm/inference/janus_pro/.gitignore
Lines changed: 139 additions & 0 deletions b/‎llm/inference/janus_pro/.gitignore
Lines changed: 139 additions & 0 deletions
diff --git a/‎llm/inference/janus_pro/generated_samples/img_0.jpg
26.3 KB b/‎llm/inference/janus_pro/generated_samples/img_0.jpg
26.3 KB
diff --git a/‎llm/inference/janus_pro/generation.py
Lines changed: 121 additions & 0 deletions b/‎llm/inference/janus_pro/generation.py
Lines changed: 121 additions & 0 deletions
diff --git a/‎llm/inference/janus_pro/inpain_model_cat.png
1.17 MB b/‎llm/inference/janus_pro/inpain_model_cat.png
1.17 MB
diff --git a/‎llm/inference/janus_pro/janus/__init__.py
Lines changed: 31 additions & 0 deletions b/‎llm/inference/janus_pro/janus/__init__.py
Lines changed: 31 additions & 0 deletions
diff --git a/‎llm/inference/janus_pro/janus/models/__init__.py
Lines changed: 28 additions & 0 deletions b/‎llm/inference/janus_pro/janus/models/__init__.py
Lines changed: 28 additions & 0 deletions
@@ -0,0 +1,139 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+*__pycache__*
+*kernel_meta*
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
@@ -0,0 +1,121 @@
+import os
+import PIL.Image
+import mindspore
+import mindspore as ms
+import numpy as np
+from mindnlp.core import ops
+from mindnlp.transformers import AutoModelForCausalLM
+from janus.models import MultiModalityCausalLM, VLChatProcessor
+import mindspore.context as context
+
+from mindnlp.configs import use_pyboost, set_pyboost
+set_pyboost(False)
+print('use_pyboost:', use_pyboost())
+mindspore.set_context(
+    mode=mindspore.PYNATIVE_MODE,
+    # max_device_memory="15GB",
+    pynative_synchronize=True,
+    device_target="Ascend", 
+    # mode=mindspore.GRAPH_MODE,  
+    # jit_config={"jit_level":"O2"}, 
+    ascend_config={"precision_mode":"allow_mix_precision"})
+print(mindspore.get_context("mode"))
+# specify the path to the model
+model_path = "/home/HwHiAiUser/Janus-Pro-1B"
+vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
+tokenizer = vl_chat_processor.tokenizer
+
+vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
+    model_path, trust_remote_code=True, ms_dtype=mindspore.float16
+)
+print('loaded processor and ckpt ')
+
+
+conversation = [
+    {
+        "role": "<|User|>",
+        "content": "A stunning princess from kabul in red, white traditional clothing, blue eyes, brown hair",
+        # "content": "sun under blue sky",
+    },
+    {"role": "<|Assistant|>", "content": ""},
+]
+
+sft_format = vl_chat_processor.apply_sft_template_for_multi_turn_prompts(
+    conversations=conversation,
+    sft_format=vl_chat_processor.sft_format,
+    system_prompt="",
+)
+prompt = sft_format + vl_chat_processor.image_start_tag
+from mindnlp.core import no_grad
+
+# @torch.inference_mode()
+with no_grad():
+    def generate(
+        mmgpt: MultiModalityCausalLM,
+        vl_chat_processor: VLChatProcessor,
+        prompt: str,
+        temperature: float = 1,
+        parallel_size: int = 1, #16,
+        cfg_weight: float = 5,
+        # image_token_num_per_image: int = 8,#576,
+        image_token_num_per_image: int = 576,#576,
+        img_size: int = 384,
+        patch_size: int = 16,
+    ):
+        input_ids = vl_chat_processor.tokenizer.encode(prompt)
+        input_ids = ms.Tensor(input_ids, dtype=ms.int64)
+
+        tokens = ops.zeros(parallel_size*2, len(input_ids), dtype=ms.int32)
+        for i in range(parallel_size*2):
+            tokens[i, :] = input_ids
+            if i % 2 != 0:
+                tokens[i, 1:-1] = vl_chat_processor.pad_id
+
+        inputs_embeds = mmgpt.language_model.get_input_embeddings()(tokens) #(parallel_size*2, len(input_ids) )
+
+        generated_tokens = ops.zeros(parallel_size, image_token_num_per_image, dtype=ms.int32)
+
+        for i in range(image_token_num_per_image): 
+            print(str(i)+'='*60)
+            outputs = mmgpt.language_model.model(inputs_embeds=inputs_embeds, use_cache=True, past_key_values=outputs.past_key_values if i != 0 else None)
+            hidden_states = outputs.last_hidden_state # (parallel_size*2, len(input_ids), 2048)
+            
+            logits = mmgpt.gen_head(hidden_states[:, -1, :]) #取最后一个input_id送入gen_head=>(parallel_size*2, vocab_size)
+            logit_cond = logits[0::2, :]
+            logit_uncond = logits[1::2, :]
+            
+            logits = logit_uncond + cfg_weight * (logit_cond-logit_uncond)
+            probs = ops.softmax(logits / temperature, dim=-1)
+
+            next_token = ops.multinomial(probs, num_samples=1) # (parallel_size, num_samples=1)
+            generated_tokens[:, i] = next_token.squeeze(axis=-1)
+
+            next_token = ops.cat([next_token.unsqueeze(dim=1), next_token.unsqueeze(dim=1)], dim=1).view(-1) # (parallel_size*2)
+            img_embeds = mmgpt.prepare_gen_img_embeds(next_token) # (parallel_size*2, 2048)
+            # print("img_embeds.shape:", img_embeds.shape)
+            # print("img_embeds.dtype:", img_embeds.dtype)
+            inputs_embeds = img_embeds.unsqueeze(dim=1) #(parallel_size*2, 2048)
+
+        if image_token_num_per_image==576:
+            dec = mmgpt.gen_vision_model.decode_code(generated_tokens.astype(ms.int32), shape=[parallel_size, 8, img_size//patch_size, img_size//patch_size])
+        else:
+            pad_last_token = generated_tokens[:,-1].unsqueeze(dim=1).tile((1, 576-image_token_num_per_image))
+            cat_generated_tokens=ops.cat([generated_tokens, pad_last_token], dim=1) 
+            print("cat_generated_tokens.shape:",cat_generated_tokens.shape) #(1,576)
+            dec = mmgpt.gen_vision_model.decode_code(cat_generated_tokens.astype(ms.int32), shape=[parallel_size, 8, img_size//patch_size, img_size//patch_size])
+        dec = dec.astype(ms.float32).asnumpy().transpose(0, 2, 3, 1)
+
+        dec = np.clip((dec + 1) / 2 * 255, 0, 255)
+
+        visual_img = np.zeros((parallel_size, img_size, img_size, 3), dtype=np.uint8)
+        visual_img[:, :, :] = dec
+
+        os.makedirs('generated_samples', exist_ok=True)
+        for i in range(parallel_size):
+            save_path = os.path.join('generated_samples', "img_{}.jpg".format(i))
+            PIL.Image.fromarray(visual_img[i]).save(save_path)
+    generate(
+        vl_gpt,
+        vl_chat_processor,
+        prompt,
+    )
@@ -0,0 +1,31 @@
+# Copyright (c) 2023-2024 DeepSeek.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+# check if python version is above 3.10
+import sys
+
+if sys.version_info >= (3, 10):
+    print("Python version is above 3.10, patching the collections module.")
+    # Monkey patch collections
+    import collections
+    import collections.abc
+
+    for type_name in collections.abc.__all__:
+        setattr(collections, type_name, getattr(collections.abc, type_name))
@@ -0,0 +1,28 @@
+# Copyright (c) 2023-2024 DeepSeek.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+# the Software, and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+from .image_processing_vlm import VLMImageProcessor
+from .modeling_vlm import MultiModalityCausalLM
+from .processing_vlm import VLChatProcessor
+
+__all__ = [
+    "VLMImageProcessor",
+    "VLChatProcessor",
+    "MultiModalityCausalLM",
+]