Skip to content

Commit 2b119c7

Browse files
committed
feat(diffusion): add Qwen-Image support with true_cfg_scale parameter
**Problem:** Qwen-Image models from Alibaba use a different parameter name (true_cfg_scale) instead of the standard guidance_scale used by other diffusion models. Users could not properly configure Qwen-Image models. **Solution:** - Added true_cfg_scale parameter to generate_image_diffusers.py - Detects Qwen/QwenImage pipeline types and uses appropriate parameter - Updated Swift PythonDiffusersService to pass trueCfgScale - Backward compatible: falls back to guidance_scale if true_cfg_scale not specified - Python script already had QwenImagePipeline support via dynamic detection **Changes:** 1. scripts/generate_image_diffusers.py: - Added true_cfg_scale parameter to generateImage() function - Added model type detection (is_qwen) to choose correct parameter - Build guidance_kwargs dict with either true_cfg_scale or guidance_scale - Updated all pipeline calls to use **guidance_kwargs - Added --true-cfg-scale command-line argument 2. Sources/StableDiffusionIntegration/PythonDiffusersService.swift: - Added trueCfgScale: Float? parameter to generateImage() - Added logging for trueCfgScale value - Pass --true-cfg-scale to Python script when provided **Testing:** ✅ Build: PASS ✅ Python syntax: PASS ✅ Backward compatible: existing models continue to work **Notes:** - Qwen-Image recommended settings: 50 steps, true_cfg_scale 4.0 - Models already supported via HuggingFace browser in preferences - UI can search/download Qwen/Qwen-Image model directly
1 parent 4d2448b commit 2b119c7

2 files changed

Lines changed: 43 additions & 13 deletions

File tree

Sources/StableDiffusionIntegration/PythonDiffusersService.swift

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,8 @@ public class PythonDiffusersService {
176176
/// - modelName: Name of model in staging directory
177177
/// - scheduler: Scheduler to use
178178
/// - steps: Number of inference steps
179-
/// - guidanceScale: Guidance scale
179+
/// - guidanceScale: Guidance scale (standard SD models)
180+
/// - trueCfgScale: True CFG scale (Qwen-Image and similar models, overrides guidanceScale)
180181
/// - width: Image width
181182
/// - height: Image height
182183
/// - seed: Random seed (nil for random)
@@ -195,6 +196,7 @@ public class PythonDiffusersService {
195196
scheduler: PythonScheduler = .dpmppSDEKarras,
196197
steps: Int = 25,
197198
guidanceScale: Float = 7.5,
199+
trueCfgScale: Float? = nil,
198200
width: Int = 512,
199201
height: Int = 512,
200202
seed: Int? = nil,
@@ -225,6 +227,7 @@ public class PythonDiffusersService {
225227
"scheduler": .string(scheduler.rawValue),
226228
"steps": .stringConvertible(steps),
227229
"guidance": .stringConvertible(guidanceScale),
230+
"trueCfgScale": trueCfgScale != nil ? .stringConvertible(trueCfgScale!) : .string("nil"),
228231
"size": .string("\(width)×\(height)"),
229232
"mode": .string(inputImage != nil ? "img2img" : "txt2img")
230233
])
@@ -243,6 +246,11 @@ public class PythonDiffusersService {
243246
"--num-images", String(imageCount)
244247
]
245248

249+
/// Add true-cfg-scale if provided (for Qwen-Image and similar models)
250+
if let trueCfg = trueCfgScale {
251+
args.append(contentsOf: ["--true-cfg-scale", String(trueCfg)])
252+
}
253+
246254
if let negPrompt = negativePrompt, !negPrompt.isEmpty {
247255
args.append(contentsOf: ["-n", negPrompt])
248256
}

scripts/generate_image_diffusers.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ def generate_image(
455455
scheduler: str = "dpm++_sde_karras",
456456
steps: int = 25,
457457
guidance_scale: float = 7.5,
458+
true_cfg_scale: Optional[float] = None,
458459
width: int = 512,
459460
height: int = 512,
460461
seed: Optional[int] = None,
@@ -475,7 +476,8 @@ def generate_image(
475476
negative_prompt: Negative prompt
476477
scheduler: Scheduler name (dpm++_sde_karras, euler, etc.)
477478
steps: Number of inference steps
478-
guidance_scale: Guidance scale
479+
guidance_scale: Guidance scale (standard SD models)
480+
true_cfg_scale: True CFG scale (Qwen-Image and similar models, overrides guidance_scale)
479481
width: Image width (text-to-image only)
480482
height: Image height (text-to-image only)
481483
seed: Random seed (None for random)
@@ -862,6 +864,23 @@ def generate_image(
862864
prompt_embeds, negative_embeds = compel_result
863865
print("Using Compel embeddings for SD 1.x")
864866

867+
# Determine which guidance parameter to use
868+
# Qwen-Image uses true_cfg_scale, standard SD uses guidance_scale
869+
is_qwen = model_type.lower() in ["qwenimage", "qwen"]
870+
871+
# Use true_cfg_scale if provided, otherwise use guidance_scale for both
872+
effective_cfg_scale = true_cfg_scale if true_cfg_scale is not None else guidance_scale
873+
874+
# Build kwargs for pipeline call
875+
guidance_kwargs = {}
876+
if is_qwen:
877+
# Qwen-Image uses true_cfg_scale parameter
878+
guidance_kwargs['true_cfg_scale'] = effective_cfg_scale
879+
print(f" True CFG: {effective_cfg_scale} (Qwen-Image mode)")
880+
else:
881+
# Standard SD models use guidance_scale
882+
guidance_kwargs['guidance_scale'] = guidance_scale
883+
865884
# Generate image
866885
print(f"Generating {num_images} image(s)...")
867886
print(f" Mode: {'Image-to-Image' if is_img2img else 'Text-to-Image'}")
@@ -873,7 +892,8 @@ def generate_image(
873892
else:
874893
print(f" Resolution: {width}×{height}")
875894
print(f" Steps: {steps}")
876-
print(f" Guidance: {guidance_scale}")
895+
if not is_qwen:
896+
print(f" Guidance: {guidance_scale}")
877897
if use_compel and prompt_embeds is not None:
878898
print(f" Compel: ENABLED (prompt weighting active)")
879899

@@ -886,9 +906,9 @@ def generate_image(
886906
image=init_img,
887907
strength=strength,
888908
num_inference_steps=steps,
889-
guidance_scale=guidance_scale,
890909
num_images_per_prompt=num_images,
891910
generator=generator,
911+
**guidance_kwargs,
892912
**prompt_embeds
893913
)
894914
elif prompt_embeds is not None:
@@ -899,9 +919,9 @@ def generate_image(
899919
prompt_embeds=prompt_embeds,
900920
negative_prompt_embeds=negative_embeds,
901921
num_inference_steps=steps,
902-
guidance_scale=guidance_scale,
903922
num_images_per_prompt=num_images,
904-
generator=generator
923+
generator=generator,
924+
**guidance_kwargs
905925
)
906926
else:
907927
# Standard prompts
@@ -911,21 +931,21 @@ def generate_image(
911931
strength=strength,
912932
negative_prompt=negative_prompt if negative_prompt else None,
913933
num_inference_steps=steps,
914-
guidance_scale=guidance_scale,
915934
num_images_per_prompt=num_images,
916-
generator=generator
935+
generator=generator,
936+
**guidance_kwargs
917937
)
918938
else:
919939
# Text-to-Image generation
920940
if isinstance(prompt_embeds, dict):
921941
# SDXL with Compel
922942
result = pipe(
923943
num_inference_steps=steps,
924-
guidance_scale=guidance_scale,
925944
width=width,
926945
height=height,
927946
num_images_per_prompt=num_images,
928947
generator=generator,
948+
**guidance_kwargs,
929949
**prompt_embeds
930950
)
931951
elif prompt_embeds is not None:
@@ -934,23 +954,23 @@ def generate_image(
934954
prompt_embeds=prompt_embeds,
935955
negative_prompt_embeds=negative_embeds,
936956
num_inference_steps=steps,
937-
guidance_scale=guidance_scale,
938957
width=width,
939958
height=height,
940959
num_images_per_prompt=num_images,
941-
generator=generator
960+
generator=generator,
961+
**guidance_kwargs
942962
)
943963
else:
944964
# Standard prompts
945965
result = pipe(
946966
prompt=prompt,
947967
negative_prompt=negative_prompt if negative_prompt else None,
948968
num_inference_steps=steps,
949-
guidance_scale=guidance_scale,
950969
width=width,
951970
height=height,
952971
num_images_per_prompt=num_images,
953-
generator=generator
972+
generator=generator,
973+
**guidance_kwargs
954974
)
955975

956976
images = result.images
@@ -1043,6 +1063,7 @@ def main():
10431063
)
10441064
parser.add_argument('--steps', type=int, default=25, help='Number of inference steps (default: 25)')
10451065
parser.add_argument('--guidance', type=float, default=7.5, help='Guidance scale (default: 7.5)')
1066+
parser.add_argument('--true-cfg-scale', type=float, default=None, help='True CFG scale for Qwen-Image (overrides --guidance)')
10461067
parser.add_argument('--width', type=int, default=512, help='Image width (default: 512)')
10471068
parser.add_argument('--height', type=int, default=512, help='Image height (default: 512)')
10481069
parser.add_argument('--seed', type=int, default=None, help='Random seed (default: random)')
@@ -1064,6 +1085,7 @@ def main():
10641085
scheduler=args.scheduler,
10651086
steps=args.steps,
10661087
guidance_scale=args.guidance,
1088+
true_cfg_scale=getattr(args, 'true_cfg_scale', None),
10671089
width=args.width,
10681090
height=args.height,
10691091
seed=args.seed,

0 commit comments

Comments
 (0)