Skip to content

Commit 4dd843d

Browse files
committed
Merge branch 'master' into asset-management
2 parents 46fdd63 + dd611a7 commit 4dd843d

File tree

145 files changed

+15028
-3199
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

145 files changed

+15028
-3199
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
name: Execution Tests
2+
3+
on:
4+
push:
5+
branches: [ main, master ]
6+
pull_request:
7+
branches: [ main, master ]
8+
9+
jobs:
10+
test:
11+
strategy:
12+
matrix:
13+
os: [ubuntu-latest, windows-latest, macos-latest]
14+
runs-on: ${{ matrix.os }}
15+
continue-on-error: true
16+
steps:
17+
- uses: actions/checkout@v4
18+
- name: Set up Python
19+
uses: actions/setup-python@v4
20+
with:
21+
python-version: '3.12'
22+
- name: Install requirements
23+
run: |
24+
python -m pip install --upgrade pip
25+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
26+
pip install -r requirements.txt
27+
pip install -r tests-unit/requirements.txt
28+
- name: Run Execution Tests
29+
run: |
30+
python -m pytest tests/execution -v --skip-timing-checks

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,18 +65,18 @@ See what ComfyUI can do with the [example workflows](https://comfyanonymous.gith
6565
- [Flux](https://comfyanonymous.github.io/ComfyUI_examples/flux/)
6666
- [Lumina Image 2.0](https://comfyanonymous.github.io/ComfyUI_examples/lumina2/)
6767
- [HiDream](https://comfyanonymous.github.io/ComfyUI_examples/hidream/)
68-
- [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/)
6968
- [Qwen Image](https://comfyanonymous.github.io/ComfyUI_examples/qwen_image/)
69+
- [Hunyuan Image 2.1](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_image/)
7070
- Image Editing Models
7171
- [Omnigen 2](https://comfyanonymous.github.io/ComfyUI_examples/omnigen/)
7272
- [Flux Kontext](https://comfyanonymous.github.io/ComfyUI_examples/flux/#flux-kontext-image-editing-model)
7373
- [HiDream E1.1](https://comfyanonymous.github.io/ComfyUI_examples/hidream/#hidream-e11)
74+
- [Qwen Image Edit](https://comfyanonymous.github.io/ComfyUI_examples/qwen_image/#edit-model)
7475
- Video Models
7576
- [Stable Video Diffusion](https://comfyanonymous.github.io/ComfyUI_examples/video/)
7677
- [Mochi](https://comfyanonymous.github.io/ComfyUI_examples/mochi/)
7778
- [LTX-Video](https://comfyanonymous.github.io/ComfyUI_examples/ltxv/)
7879
- [Hunyuan Video](https://comfyanonymous.github.io/ComfyUI_examples/hunyuan_video/)
79-
- [Nvidia Cosmos](https://comfyanonymous.github.io/ComfyUI_examples/cosmos/) and [Cosmos Predict2](https://comfyanonymous.github.io/ComfyUI_examples/cosmos_predict2/)
8080
- [Wan 2.1](https://comfyanonymous.github.io/ComfyUI_examples/wan/)
8181
- [Wan 2.2](https://comfyanonymous.github.io/ComfyUI_examples/wan22/)
8282
- Audio Models
@@ -191,7 +191,7 @@ comfy install
191191

192192
## Manual Install (Windows, Linux)
193193

194-
python 3.13 is supported but using 3.12 is recommended because some custom nodes and their dependencies might not support it yet.
194+
Python 3.13 is very well supported. If you have trouble with some custom node dependencies you can try 3.12
195195

196196
Git clone this repo.
197197

app/user_manager.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -363,10 +363,17 @@ async def post_userdata(request):
363363
if not overwrite and os.path.exists(path):
364364
return web.Response(status=409, text="File already exists")
365365

366-
body = await request.read()
366+
try:
367+
body = await request.read()
367368

368-
with open(path, "wb") as f:
369-
f.write(body)
369+
with open(path, "wb") as f:
370+
f.write(body)
371+
except OSError as e:
372+
logging.warning(f"Error saving file '{path}': {e}")
373+
return web.Response(
374+
status=400,
375+
reason="Invalid filename. Please avoid special characters like :\\/*?\"<>|"
376+
)
370377

371378
user_path = self.get_request_user_filepath(request, None)
372379
if full_info:
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from .wav2vec2 import Wav2Vec2Model
2+
from .whisper import WhisperLargeV3
3+
import comfy.model_management
4+
import comfy.ops
5+
import comfy.utils
6+
import logging
7+
import torchaudio
8+
9+
10+
class AudioEncoderModel():
11+
def __init__(self, config):
12+
self.load_device = comfy.model_management.text_encoder_device()
13+
offload_device = comfy.model_management.text_encoder_offload_device()
14+
self.dtype = comfy.model_management.text_encoder_dtype(self.load_device)
15+
model_type = config.pop("model_type")
16+
model_config = dict(config)
17+
model_config.update({
18+
"dtype": self.dtype,
19+
"device": offload_device,
20+
"operations": comfy.ops.manual_cast
21+
})
22+
23+
if model_type == "wav2vec2":
24+
self.model = Wav2Vec2Model(**model_config)
25+
elif model_type == "whisper3":
26+
self.model = WhisperLargeV3(**model_config)
27+
self.model.eval()
28+
self.patcher = comfy.model_patcher.ModelPatcher(self.model, load_device=self.load_device, offload_device=offload_device)
29+
self.model_sample_rate = 16000
30+
31+
def load_sd(self, sd):
32+
return self.model.load_state_dict(sd, strict=False)
33+
34+
def get_sd(self):
35+
return self.model.state_dict()
36+
37+
def encode_audio(self, audio, sample_rate):
38+
comfy.model_management.load_model_gpu(self.patcher)
39+
audio = torchaudio.functional.resample(audio, sample_rate, self.model_sample_rate)
40+
out, all_layers = self.model(audio.to(self.load_device))
41+
outputs = {}
42+
outputs["encoded_audio"] = out
43+
outputs["encoded_audio_all_layers"] = all_layers
44+
outputs["audio_samples"] = audio.shape[2]
45+
return outputs
46+
47+
48+
def load_audio_encoder_from_sd(sd, prefix=""):
49+
sd = comfy.utils.state_dict_prefix_replace(sd, {"wav2vec2.": ""})
50+
if "encoder.layer_norm.bias" in sd: #wav2vec2
51+
embed_dim = sd["encoder.layer_norm.bias"].shape[0]
52+
if embed_dim == 1024:# large
53+
config = {
54+
"model_type": "wav2vec2",
55+
"embed_dim": 1024,
56+
"num_heads": 16,
57+
"num_layers": 24,
58+
"conv_norm": True,
59+
"conv_bias": True,
60+
"do_normalize": True,
61+
"do_stable_layer_norm": True
62+
}
63+
elif embed_dim == 768: # base
64+
config = {
65+
"model_type": "wav2vec2",
66+
"embed_dim": 768,
67+
"num_heads": 12,
68+
"num_layers": 12,
69+
"conv_norm": False,
70+
"conv_bias": False,
71+
"do_normalize": False, # chinese-wav2vec2-base has this False
72+
"do_stable_layer_norm": False
73+
}
74+
else:
75+
raise RuntimeError("ERROR: audio encoder file is invalid or unsupported embed_dim: {}".format(embed_dim))
76+
elif "model.encoder.embed_positions.weight" in sd:
77+
sd = comfy.utils.state_dict_prefix_replace(sd, {"model.": ""})
78+
config = {
79+
"model_type": "whisper3",
80+
}
81+
else:
82+
raise RuntimeError("ERROR: audio encoder not supported.")
83+
84+
audio_encoder = AudioEncoderModel(config)
85+
m, u = audio_encoder.load_sd(sd)
86+
if len(m) > 0:
87+
logging.warning("missing audio encoder: {}".format(m))
88+
if len(u) > 0:
89+
logging.warning("unexpected audio encoder: {}".format(u))
90+
91+
return audio_encoder

0 commit comments

Comments
 (0)