Skip to content

Commit 749b7d4

Browse files
unamedkrclaude
andcommitted
Fix Windows CI test failures + apply PR #12 bug fixes
Windows CI fixes: - tq_ops.c: pthread_cond_wait must use SleepConditionVariableSRW (not CS) since pthread_mutex_t is mapped to SRWLOCK. CS variant on SRW = deadlock in test_ops thread pool. - test_tqm.cpp, test_gguf_moe.cpp: replace hardcoded /tmp paths with CWD-relative names on Windows. - CMakeLists.txt: bump TIMEOUT to 600s on MSVC for slow tests (test_multihash_dim64, test_ops, test_unbiased, test_cumulative_error). MSVC release lacks auto-vectorization the GCC/Clang build relies on. Apply functional fixes from PR #12 (cherry-picked without reformatting): - tq_qjl.c: dim > 0 guard before src[dim-1] NaN check. - tq_uniform.c: heap-allocate Q8 query buffer (was 512B stack array). - tq_transformer.c: NULL-check key_cache and value_cache calloc results. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a4a2b43 commit 749b7d4

8 files changed

Lines changed: 29 additions & 4 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,4 @@ tq_run.dSYM/
6161
.claude/worktrees/
6262
.claude/worktrees/
6363
docs/assets/hero_backup.png
64+
build_nomt/

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,11 @@ if(TQ_BUILD_TESTS)
229229
target_link_libraries(${test_name} turboquant GTest::gtest_main)
230230
add_test(NAME ${test_name} COMMAND ${test_name})
231231

232+
# Slow tests on MSVC (no auto-vectorization, weaker codegen) need more time
233+
if(MSVC AND ${test_name} MATCHES "test_(multihash_dim64|ops|unbiased|cumulative_error)")
234+
set_tests_properties(${test_name} PROPERTIES TIMEOUT 600)
235+
endif()
236+
232237
# Pass Metal availability to test targets
233238
if(TQ_BUILD_METAL AND APPLE)
234239
target_compile_definitions(${test_name} PRIVATE TQ_HAS_METAL=1)

src/core/tq_qjl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ void tq_qjl_quantize_ref(const float* src, void* dst, int n) {
5454
if (dim > TQ_BK_QJL) dim = TQ_BK_QJL;
5555

5656
/* Quick NaN check on first and last element */
57-
if (src[0] != src[0] || src[dim-1] != src[dim-1]) {
57+
if (dim > 0 && (src[0] != src[0] || src[dim-1] != src[dim-1])) {
5858
memset(block, 0, sizeof(*block));
5959
return;
6060
}

src/core/tq_uniform.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include "turboquant/turboquant.h"
1111
#include <math.h>
12+
#include <stdlib.h>
1213
#include <string.h>
1314
#include <float.h>
1415

@@ -191,8 +192,10 @@ void tq_quantize_query_q8(const float* query, int8_t* q8_out,
191192
*/
192193
void tq_uniform_4b_attention_int_ref(const float* query, const void* kv,
193194
float* scores, int seq_len, int head_dim) {
194-
/* Step 1: Quantize query to Q8 (once, amortized over seq_len) */
195-
int8_t q8[512]; /* max head_dim supported */
195+
/* Step 1: Quantize query to Q8 (once, amortized over seq_len).
196+
* Heap-allocate to avoid stack overflow on large head_dim. */
197+
int8_t* q8 = (int8_t*)malloc((size_t)head_dim * sizeof(int8_t));
198+
if (!q8) { for (int s = 0; s < seq_len; s++) scores[s] = 0.0f; return; }
196199
float q_scale, q_sum;
197200
tq_quantize_query_q8(query, q8, &q_scale, &q_sum, head_dim);
198201

@@ -225,6 +228,7 @@ void tq_uniform_4b_attention_int_ref(const float* query, const void* kv,
225228
}
226229
scores[s] = score;
227230
}
231+
free(q8);
228232
}
229233

230234
/* ---------- Uniform 4-bit attention (dequantize + dot product) ---------- */

src/engine/tq_ops.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ typedef CONDITION_VARIABLE pthread_cond_t;
2323
#define pthread_mutex_unlock(m) LeaveCriticalSection(m)
2424
#define pthread_mutex_destroy(m) DeleteCriticalSection(m)
2525
#define pthread_cond_init(c, a) InitializeConditionVariable(c)
26-
#define pthread_cond_wait(c, m) SleepConditionVariableCS(c, m, INFINITE)
26+
/* Note: cond_wait uses SRW variant because mutex is SRWLOCK (see below) */
27+
#define pthread_cond_wait(c, m) SleepConditionVariableSRW(c, m, INFINITE, 0)
2728
#define pthread_cond_broadcast(c) WakeAllConditionVariable(c)
2829
#define pthread_cond_destroy(c) ((void)0)
2930
/* __thread → __declspec(thread) */

src/engine/tq_transformer.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ tq_state_t* tq_create_state_ex(const tq_model_config_t* config, tq_type kv_type,
216216
#else
217217
s->key_cache = (float*)calloc(1, kv_total_bytes);
218218
#endif
219+
if (!s->key_cache) { free(s); return NULL; }
219220

220221
/* Value cache quantization: Q4 or Q2 for aggressive V compression.
221222
* When value_quant_bits > 0, V is stored quantized instead of FP16/FP32.
@@ -265,6 +266,7 @@ tq_state_t* tq_create_state_ex(const tq_model_config_t* config, tq_type kv_type,
265266
#else
266267
s->value_cache = (float*)calloc((size_t)n_layers * kv_layer_size, sizeof(float));
267268
#endif
269+
if (!s->value_cache) { free(s->key_cache); free(s); return NULL; }
268270
s->value_cache_qs = NULL;
269271
s->value_cache_scales = NULL;
270272
s->kv_cache_size = (size_t)n_layers * kv_layer_size * sizeof(float);

tests/test_gguf_moe.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,11 @@ TEST(ModelConfig, MoEFields) {
420420

421421
TEST(GGUF, MagicDetection) {
422422
/* Write a tiny file with GGUF magic but invalid content */
423+
#ifdef _WIN32
424+
const char* tmppath = "test_gguf_magic.gguf";
425+
#else
423426
const char* tmppath = "/tmp/test_gguf_magic.gguf";
427+
#endif
424428
FILE* f = fopen(tmppath, "wb");
425429
ASSERT_NE(f, nullptr);
426430
uint32_t magic = 0x46554747; /* "GGUF" as LE uint32 */

tests/test_tqm.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,11 @@ TEST(TQM, SaveLoadRoundtrip) {
175175
ASSERT_EQ(model.use_q4_weights, 1);
176176

177177
/* Save to temp file */
178+
#ifdef _WIN32
179+
const char* tmp_path = "test_tqm_roundtrip.tqm";
180+
#else
178181
const char* tmp_path = "/tmp/test_tqm_roundtrip.tqm";
182+
#endif
179183
int ret = tq_save_tqm(&model, NULL, tmp_path);
180184
ASSERT_EQ(ret, 0);
181185

@@ -259,7 +263,11 @@ TEST(TQM, SaveLoadRoundtrip) {
259263
* ============================================================ */
260264
TEST(TQM, AutoDetect) {
261265
/* Write a minimal TQM header to a temp file */
266+
#ifdef _WIN32
267+
const char* tmp_path = "test_tqm_autodetect.tqm";
268+
#else
262269
const char* tmp_path = "/tmp/test_tqm_autodetect.tqm";
270+
#endif
263271
FILE* f = fopen(tmp_path, "wb");
264272
ASSERT_NE(f, nullptr);
265273

0 commit comments

Comments
 (0)