Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[submodule "ggml"]
path = ggml
path = ggml
url = https://github.com/tetherto/qvac-ext-ggml.git
branch = 2026-06-06
[submodule "examples/server/frontend"]
Expand Down
26 changes: 23 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ if (MSVC)
$<$<COMPILE_LANGUAGE:C>:/utf-8>
$<$<COMPILE_LANGUAGE:CXX>:/MP>
$<$<COMPILE_LANGUAGE:CXX>:/utf-8>
# stable-diffusion.cpp is a large translation unit; with the LTX-2
# additions it exceeds the COFF 2^16 section limit, so MSVC needs
# /bigobj (clang/gcc have no equivalent limit). Fatal error C1128.
$<$<COMPILE_LANGUAGE:C>:/bigobj>
$<$<COMPILE_LANGUAGE:CXX>:/bigobj>
)
endif()

Expand Down Expand Up @@ -298,13 +303,28 @@ endif()
# Only add ggml if it hasn't been added yet
if (NOT TARGET ggml)
if (SD_USE_SYSTEM_GGML)
find_package(ggml REQUIRED)
# System ggml (e.g. the qvac-ext-ggml vcpkg port). The port exports
# GGML_MAX_NAME=128 as a PUBLIC/INTERFACE compile definition on
# ggml::ggml, so consumers inherit it automatically (no need for the
# add_definitions() above under system ggml).
find_package(ggml CONFIG)
if (NOT ggml_FOUND)
message(FATAL_ERROR "System-installed GGML library not found.")
message(FATAL_ERROR
"SD_USE_SYSTEM_GGML is ON but no system GGML was found. Provide ggml "
"via the qvac-ext-ggml vcpkg port (or any package exporting the "
"ggml::ggml CMake target) and configure with the vcpkg toolchain "
"file, e.g. -DCMAKE_TOOLCHAIN_FILE=<vcpkg>/scripts/buildsystems/vcpkg.cmake")
endif()
add_library(ggml ALIAS ggml::ggml)
else()
elseif (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/ggml/CMakeLists.txt")
# Vendored submodule (default): standalone builds with no external tooling.
add_subdirectory(ggml)
else()
message(FATAL_ERROR
"ggml not found. Either initialize the bundled submodule "
"(git submodule update --init ggml, or clone with --recursive), or "
"build against system ggml with -DSD_USE_SYSTEM_GGML=ON together with "
"the vcpkg toolchain file (qvac-ext-ggml port).")
endif()
endif()

Expand Down
41 changes: 34 additions & 7 deletions docs/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,46 @@
## Get the Code

```
git clone --recursive https://github.com/leejet/stable-diffusion.cpp
cd stable-diffusion.cpp
git clone --recursive https://github.com/tetherto/qvac-ext-stable-diffusion.cpp
cd qvac-ext-stable-diffusion.cpp
```

- If you have already cloned the repository, you can use the following command to update the repository to the latest code.
- If you have already cloned the repository, you can use the following command to update the repository to the latest code and fetch the submodules.

```
cd stable-diffusion.cpp
git pull origin master
git submodule init
git submodule update
cd qvac-ext-stable-diffusion.cpp
git pull
git submodule update --init --recursive
```

## GGML dependency (vendored submodule or system / vcpkg)

`ggml` can be provided in two ways:

- **Vendored submodule (default).** `SD_USE_SYSTEM_GGML` defaults to `OFF`, and
CMake builds the bundled `ggml` submodule
([qvac-ext-ggml](https://github.com/tetherto/qvac-ext-ggml)) via
`add_subdirectory(ggml)`. This is the no-extra-tooling path for building the
repository standalone — just clone with `--recursive` (or run
`git submodule update --init --recursive`) and the plain `cmake ..` invocations
below work as-is.
- **System / vcpkg ggml.** Pass `-DSD_USE_SYSTEM_GGML=ON` and configure with the
vcpkg toolchain file so CMake resolves the `ggml::ggml` target from the
qvac-ext-ggml vcpkg port:

```shell
mkdir build && cd build
cmake .. -DSD_USE_SYSTEM_GGML=ON -DCMAKE_TOOLCHAIN_FILE=<vcpkg>/scripts/buildsystems/vcpkg.cmake
cmake --build . --config Release
```

The qvac-ext-ggml port is built with `GGML_MAX_NAME=128` and exports it as a
PUBLIC compile definition, so system-ggml consumers inherit it automatically; for
vendored builds the in-tree `add_definitions(-DGGML_MAX_NAME=128)` applies.

The GPU backend flags below (`-DSD_METAL=ON`, `-DSD_CUDA=ON`, ...) apply to both
paths.

## WebP and WebM Support in Examples

The example applications (`examples/cli` and `examples/server`) use `libwebp` to support WebP image I/O, and `examples/cli` can also use `libwebm` for `.webm` video output. Both are enabled by default. WebM output currently reuses `libwebp` to encode each frame as VP8 before muxing with `libwebm`.
Comment thread
aegioscy marked this conversation as resolved.
Expand Down
1 change: 1 addition & 0 deletions examples/common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,7 @@ sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool vae_decode_only, bool f
stream_layers,
backend.c_str(),
params_backend.c_str(),
SD_BACKEND_PREF_GPU, // qvac: default to GPU; honored only when --backend is unset
};
return sd_ctx_params;
}
Expand Down
13 changes: 11 additions & 2 deletions src/ggml_extend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1338,8 +1338,17 @@ __STATIC_INLINE__ ggml_tensor* ggml_ext_attention_ext(ggml_context* ctx,
}
k_in = ggml_cast(ctx, k_in, GGML_TYPE_F16);

auto v_fused = ggml_rope_flux(ctx, v_in, nullptr);
if (ggml_backend_supports_op(backend, v_fused)) {
// ggml_rope_flux(ctx, v_in, nullptr): the null position tensor means NO
// rotation is applied — V is never RoPE-rotated (only q/k are). With a
// null pe the fused kernel degenerates to exactly the permute(0,2,1,3) +
// reshape_3d layout transform in the else branch below; we use it purely
// as a fused-kernel fast path for that reshape. Gate it on the same
// GGML_ROPE_FLUX_DISABLE switch as the q/k fused path in rope.hpp so the
// whole fused-RoPE kernel family can be turned off together for
// debugging / backend bring-up.
static const bool rope_flux_disabled = std::getenv("GGML_ROPE_FLUX_DISABLE") != nullptr;
ggml_tensor* v_fused = rope_flux_disabled ? nullptr : ggml_rope_flux(ctx, v_in, nullptr);
if (v_fused != nullptr && ggml_backend_supports_op(backend, v_fused)) {
v_in = v_fused;
} else {
v_in = ggml_ext_cont(ctx, ggml_permute(ctx, v_in, 0, 2, 1, 3));
Expand Down
28 changes: 13 additions & 15 deletions src/ggml_graph_cut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "ggml-backend.h"
#include "util.h"

#include "../ggml/src/ggml-impl.h"

namespace sd::ggml_graph_cut {

Expand All @@ -31,8 +30,8 @@ namespace sd::ggml_graph_cut {
static int graph_leaf_index(ggml_cgraph* gf, const ggml_tensor* tensor) {
GGML_ASSERT(gf != nullptr);
GGML_ASSERT(tensor != nullptr);
for (int i = 0; i < gf->n_leafs; ++i) {
if (gf->leafs[i] == tensor) {
for (int i = 0; i < ggml_graph_n_leafs(gf); ++i) {
if (ggml_graph_leaf(gf, i) == tensor) {
return i;
}
}
Expand Down Expand Up @@ -293,15 +292,15 @@ namespace sd::ggml_graph_cut {

int leaf_count(ggml_cgraph* gf) {
GGML_ASSERT(gf != nullptr);
return gf->n_leafs;
return ggml_graph_n_leafs(gf);
}

ggml_tensor* leaf_tensor(ggml_cgraph* gf, int leaf_index) {
GGML_ASSERT(gf != nullptr);
if (leaf_index < 0 || leaf_index >= gf->n_leafs) {
if (leaf_index < 0 || leaf_index >= ggml_graph_n_leafs(gf)) {
return nullptr;
}
return gf->leafs[leaf_index];
return ggml_graph_leaf(gf, leaf_index);
}

ggml_backend_buffer_t tensor_buffer(const ggml_tensor* tensor) {
Expand Down Expand Up @@ -333,14 +332,14 @@ namespace sd::ggml_graph_cut {

bool plan_matches_graph(ggml_cgraph* gf, const Plan& plan) {
GGML_ASSERT(gf != nullptr);
if (ggml_graph_n_nodes(gf) != plan.n_nodes || gf->n_leafs != plan.n_leafs) {
if (ggml_graph_n_nodes(gf) != plan.n_nodes || ggml_graph_n_leafs(gf) != plan.n_leafs) {
return false;
}
for (const auto& input_shape_ref : plan.input_shapes) {
if (input_shape_ref.leaf_index < 0 || input_shape_ref.leaf_index >= gf->n_leafs) {
if (input_shape_ref.leaf_index < 0 || input_shape_ref.leaf_index >= ggml_graph_n_leafs(gf)) {
return false;
}
ggml_tensor* leaf = gf->leafs[input_shape_ref.leaf_index];
ggml_tensor* leaf = ggml_graph_leaf(gf, input_shape_ref.leaf_index);
if (leaf == nullptr || input_shape_ref.type != leaf->type) {
return false;
}
Expand Down Expand Up @@ -373,7 +372,7 @@ namespace sd::ggml_graph_cut {
}
return ggml_graph_node(gf, input_ref.node_index);
}
if (input_ref.leaf_index < 0 || input_ref.leaf_index >= gf->n_leafs) {
if (input_ref.leaf_index < 0 || input_ref.leaf_index >= ggml_graph_n_leafs(gf)) {
return nullptr;
}
return leaf_tensor(gf, input_ref.leaf_index);
Expand Down Expand Up @@ -459,8 +458,7 @@ namespace sd::ggml_graph_cut {
if (current_input == nullptr) {
continue;
}
GGML_ASSERT(segment_graph->n_leafs < segment_graph->size);
segment_graph->leafs[segment_graph->n_leafs++] = current_input;
ggml_graph_add_leaf(segment_graph, current_input);
}

for (int output_node_index : segment.output_node_indices) {
Expand Down Expand Up @@ -518,9 +516,9 @@ namespace sd::ggml_graph_cut {
return plan;
}
plan.n_nodes = n_nodes;
plan.n_leafs = gf->n_leafs;
for (int i = 0; i < gf->n_leafs; ++i) {
ggml_tensor* leaf = gf->leafs[i];
plan.n_leafs = ggml_graph_n_leafs(gf);
for (int i = 0; i < ggml_graph_n_leafs(gf); ++i) {
ggml_tensor* leaf = ggml_graph_leaf(gf, i);
if (is_params_tensor(params_tensor_set, leaf)) {
continue;
}
Expand Down
Loading