diff --git a/.gitmodules b/.gitmodules index 496c7d22f..30109eddc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,5 +1,5 @@ [submodule "ggml"] - path = ggml + path = ggml url = https://github.com/tetherto/qvac-ext-ggml.git branch = 2026-06-06 [submodule "examples/server/frontend"] diff --git a/CMakeLists.txt b/CMakeLists.txt index 2804bad46..242b84292 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,11 @@ if (MSVC) $<$:/utf-8> $<$:/MP> $<$:/utf-8> + # stable-diffusion.cpp is a large translation unit; with the LTX-2 + # additions it exceeds the COFF 2^16 section limit, so MSVC needs + # /bigobj (clang/gcc have no equivalent limit). Fatal error C1128. + $<$:/bigobj> + $<$:/bigobj> ) endif() @@ -298,13 +303,28 @@ endif() # Only add ggml if it hasn't been added yet if (NOT TARGET ggml) if (SD_USE_SYSTEM_GGML) - find_package(ggml REQUIRED) + # System ggml (e.g. the qvac-ext-ggml vcpkg port). The port exports + # GGML_MAX_NAME=128 as a PUBLIC/INTERFACE compile definition on + # ggml::ggml, so consumers inherit it automatically (no need for the + # add_definitions() above under system ggml). + find_package(ggml CONFIG) if (NOT ggml_FOUND) - message(FATAL_ERROR "System-installed GGML library not found.") + message(FATAL_ERROR + "SD_USE_SYSTEM_GGML is ON but no system GGML was found. Provide ggml " + "via the qvac-ext-ggml vcpkg port (or any package exporting the " + "ggml::ggml CMake target) and configure with the vcpkg toolchain " + "file, e.g. -DCMAKE_TOOLCHAIN_FILE=/scripts/buildsystems/vcpkg.cmake") endif() add_library(ggml ALIAS ggml::ggml) - else() + elseif (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/ggml/CMakeLists.txt") + # Vendored submodule (default): standalone builds with no external tooling. add_subdirectory(ggml) + else() + message(FATAL_ERROR + "ggml not found. Either initialize the bundled submodule " + "(git submodule update --init ggml, or clone with --recursive), or " + "build against system ggml with -DSD_USE_SYSTEM_GGML=ON together with " + "the vcpkg toolchain file (qvac-ext-ggml port).") endif() endif() diff --git a/docs/build.md b/docs/build.md index d33f9329a..616817326 100644 --- a/docs/build.md +++ b/docs/build.md @@ -3,19 +3,46 @@ ## Get the Code ``` -git clone --recursive https://github.com/leejet/stable-diffusion.cpp -cd stable-diffusion.cpp +git clone --recursive https://github.com/tetherto/qvac-ext-stable-diffusion.cpp +cd qvac-ext-stable-diffusion.cpp ``` -- If you have already cloned the repository, you can use the following command to update the repository to the latest code. +- If you have already cloned the repository, you can use the following command to update the repository to the latest code and fetch the submodules. ``` -cd stable-diffusion.cpp -git pull origin master -git submodule init -git submodule update +cd qvac-ext-stable-diffusion.cpp +git pull +git submodule update --init --recursive ``` +## GGML dependency (vendored submodule or system / vcpkg) + +`ggml` can be provided in two ways: + +- **Vendored submodule (default).** `SD_USE_SYSTEM_GGML` defaults to `OFF`, and + CMake builds the bundled `ggml` submodule + ([qvac-ext-ggml](https://github.com/tetherto/qvac-ext-ggml)) via + `add_subdirectory(ggml)`. This is the no-extra-tooling path for building the + repository standalone — just clone with `--recursive` (or run + `git submodule update --init --recursive`) and the plain `cmake ..` invocations + below work as-is. +- **System / vcpkg ggml.** Pass `-DSD_USE_SYSTEM_GGML=ON` and configure with the + vcpkg toolchain file so CMake resolves the `ggml::ggml` target from the + qvac-ext-ggml vcpkg port: + + ```shell + mkdir build && cd build + cmake .. -DSD_USE_SYSTEM_GGML=ON -DCMAKE_TOOLCHAIN_FILE=/scripts/buildsystems/vcpkg.cmake + cmake --build . --config Release + ``` + +The qvac-ext-ggml port is built with `GGML_MAX_NAME=128` and exports it as a +PUBLIC compile definition, so system-ggml consumers inherit it automatically; for +vendored builds the in-tree `add_definitions(-DGGML_MAX_NAME=128)` applies. + +The GPU backend flags below (`-DSD_METAL=ON`, `-DSD_CUDA=ON`, ...) apply to both +paths. + ## WebP and WebM Support in Examples The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O, and `examples/cli` can also use `libwebm` for `.webm` video output. Both are enabled by default. WebM output currently reuses `libwebp` to encode each frame as VP8 before muxing with `libwebm`. diff --git a/examples/common/common.cpp b/examples/common/common.cpp index 0ecc72dc6..6ca3b5e36 100644 --- a/examples/common/common.cpp +++ b/examples/common/common.cpp @@ -808,6 +808,7 @@ sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool vae_decode_only, bool f stream_layers, backend.c_str(), params_backend.c_str(), + SD_BACKEND_PREF_GPU, // qvac: default to GPU; honored only when --backend is unset }; return sd_ctx_params; } diff --git a/src/ggml_extend.hpp b/src/ggml_extend.hpp index 085c80b5c..da2afd7d0 100644 --- a/src/ggml_extend.hpp +++ b/src/ggml_extend.hpp @@ -1338,8 +1338,17 @@ __STATIC_INLINE__ ggml_tensor* ggml_ext_attention_ext(ggml_context* ctx, } k_in = ggml_cast(ctx, k_in, GGML_TYPE_F16); - auto v_fused = ggml_rope_flux(ctx, v_in, nullptr); - if (ggml_backend_supports_op(backend, v_fused)) { + // ggml_rope_flux(ctx, v_in, nullptr): the null position tensor means NO + // rotation is applied — V is never RoPE-rotated (only q/k are). With a + // null pe the fused kernel degenerates to exactly the permute(0,2,1,3) + + // reshape_3d layout transform in the else branch below; we use it purely + // as a fused-kernel fast path for that reshape. Gate it on the same + // GGML_ROPE_FLUX_DISABLE switch as the q/k fused path in rope.hpp so the + // whole fused-RoPE kernel family can be turned off together for + // debugging / backend bring-up. + static const bool rope_flux_disabled = std::getenv("GGML_ROPE_FLUX_DISABLE") != nullptr; + ggml_tensor* v_fused = rope_flux_disabled ? nullptr : ggml_rope_flux(ctx, v_in, nullptr); + if (v_fused != nullptr && ggml_backend_supports_op(backend, v_fused)) { v_in = v_fused; } else { v_in = ggml_ext_cont(ctx, ggml_permute(ctx, v_in, 0, 2, 1, 3)); diff --git a/src/ggml_graph_cut.cpp b/src/ggml_graph_cut.cpp index 61234eaf2..d22f24230 100644 --- a/src/ggml_graph_cut.cpp +++ b/src/ggml_graph_cut.cpp @@ -12,7 +12,6 @@ #include "ggml-backend.h" #include "util.h" -#include "../ggml/src/ggml-impl.h" namespace sd::ggml_graph_cut { @@ -31,8 +30,8 @@ namespace sd::ggml_graph_cut { static int graph_leaf_index(ggml_cgraph* gf, const ggml_tensor* tensor) { GGML_ASSERT(gf != nullptr); GGML_ASSERT(tensor != nullptr); - for (int i = 0; i < gf->n_leafs; ++i) { - if (gf->leafs[i] == tensor) { + for (int i = 0; i < ggml_graph_n_leafs(gf); ++i) { + if (ggml_graph_leaf(gf, i) == tensor) { return i; } } @@ -293,15 +292,15 @@ namespace sd::ggml_graph_cut { int leaf_count(ggml_cgraph* gf) { GGML_ASSERT(gf != nullptr); - return gf->n_leafs; + return ggml_graph_n_leafs(gf); } ggml_tensor* leaf_tensor(ggml_cgraph* gf, int leaf_index) { GGML_ASSERT(gf != nullptr); - if (leaf_index < 0 || leaf_index >= gf->n_leafs) { + if (leaf_index < 0 || leaf_index >= ggml_graph_n_leafs(gf)) { return nullptr; } - return gf->leafs[leaf_index]; + return ggml_graph_leaf(gf, leaf_index); } ggml_backend_buffer_t tensor_buffer(const ggml_tensor* tensor) { @@ -333,14 +332,14 @@ namespace sd::ggml_graph_cut { bool plan_matches_graph(ggml_cgraph* gf, const Plan& plan) { GGML_ASSERT(gf != nullptr); - if (ggml_graph_n_nodes(gf) != plan.n_nodes || gf->n_leafs != plan.n_leafs) { + if (ggml_graph_n_nodes(gf) != plan.n_nodes || ggml_graph_n_leafs(gf) != plan.n_leafs) { return false; } for (const auto& input_shape_ref : plan.input_shapes) { - if (input_shape_ref.leaf_index < 0 || input_shape_ref.leaf_index >= gf->n_leafs) { + if (input_shape_ref.leaf_index < 0 || input_shape_ref.leaf_index >= ggml_graph_n_leafs(gf)) { return false; } - ggml_tensor* leaf = gf->leafs[input_shape_ref.leaf_index]; + ggml_tensor* leaf = ggml_graph_leaf(gf, input_shape_ref.leaf_index); if (leaf == nullptr || input_shape_ref.type != leaf->type) { return false; } @@ -373,7 +372,7 @@ namespace sd::ggml_graph_cut { } return ggml_graph_node(gf, input_ref.node_index); } - if (input_ref.leaf_index < 0 || input_ref.leaf_index >= gf->n_leafs) { + if (input_ref.leaf_index < 0 || input_ref.leaf_index >= ggml_graph_n_leafs(gf)) { return nullptr; } return leaf_tensor(gf, input_ref.leaf_index); @@ -459,8 +458,7 @@ namespace sd::ggml_graph_cut { if (current_input == nullptr) { continue; } - GGML_ASSERT(segment_graph->n_leafs < segment_graph->size); - segment_graph->leafs[segment_graph->n_leafs++] = current_input; + ggml_graph_add_leaf(segment_graph, current_input); } for (int output_node_index : segment.output_node_indices) { @@ -518,9 +516,9 @@ namespace sd::ggml_graph_cut { return plan; } plan.n_nodes = n_nodes; - plan.n_leafs = gf->n_leafs; - for (int i = 0; i < gf->n_leafs; ++i) { - ggml_tensor* leaf = gf->leafs[i]; + plan.n_leafs = ggml_graph_n_leafs(gf); + for (int i = 0; i < ggml_graph_n_leafs(gf); ++i) { + ggml_tensor* leaf = ggml_graph_leaf(gf, i); if (is_params_tensor(params_tensor_set, leaf)) { continue; }