Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ void CpuThread::run() {

Cubiomes *cubiomes = cubiomes_create(large_biomes);

while (!should_stop()) {
while (true) {
GpuOutput input;
{
std::unique_lock lock(inputs.mutex);
if (inputs.queue.empty()) {
if (should_stop()) break;
lock.unlock();
std::this_thread::sleep_for(std::chrono::seconds(1));
continue;
Expand Down
24 changes: 16 additions & 8 deletions src/gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1861,31 +1861,39 @@ void GpuThread::run() {
std::printf("\n");
std::printf("start_seed = %" PRIi64 "\n", start_seed);

uint64_t total_inputs = stage_filter_seeds.total_inputs * stage_filter_seeds.inputs_multiplier;
uint64_t total_outputs = filter_2.back().stage.total_outputs;

// Find max input value for dynamic column width
uint64_t max_inputs = total_inputs;
uint64_t gradvecs_1_inputs = stage_filter_gradvecs_1.total_inputs * stage_filter_gradvecs_1.inputs_multiplier;
if (gradvecs_1_inputs > max_inputs) max_inputs = gradvecs_1_inputs;
int inputs_width = std::max(12, (int)std::snprintf(nullptr, 0, "%" PRIu64, max_inputs));
int outputs_width = 12;

double kernel_total_time = 0;
for (auto &stage : stage_stats) {
uint64_t scaled_total_inputs = stage.total_inputs * stage.inputs_multiplier;
auto [scaled_input_speed, input_speed_unit] = scale_si(scaled_total_inputs / stage.total_time);
auto [scaled_output_speed, output_speed_unit] = scale_si(stage.total_outputs / stage.total_time);
std::printf("%-20s - %9.3f ms | %7.3f %% | %12" PRIu64 " -> %12" PRIu64
std::printf("%-20s - %9.3f ms | %7.3f %% | %*" PRIu64 " -> %*" PRIu64
" | 1 in %11.3f | %7.3f %cips | %7.3f %cops\n",
stage.name.c_str(), stage.total_time * 1e3,
stage.total_time / host_total_time * 100.0,
scaled_total_inputs, stage.total_outputs,
inputs_width, scaled_total_inputs, outputs_width, stage.total_outputs,
(double)scaled_total_inputs / stage.total_outputs,
scaled_input_speed, input_speed_unit, scaled_output_speed,
output_speed_unit);
kernel_total_time += stage.total_time;
}

uint64_t total_inputs = stage_filter_seeds.total_inputs * stage_filter_seeds.inputs_multiplier;
uint64_t total_outputs = filter_2.back().stage.total_outputs;
auto [scaled_input_speed, input_speed_unit] = scale_si(total_inputs / host_total_time);
auto [scaled_output_speed, output_speed_unit] = scale_si(total_outputs / host_total_time);
std::printf(
"total - %9.3f ms | %7.3f %% | %12" PRIu64
" -> %12" PRIu64 " | | %7.3f %cips | %7.3f %cops\n",
host_total_time * 1e3, kernel_total_time / host_total_time * 100.0,
total_inputs, total_outputs, scaled_input_speed, input_speed_unit,
"%-20s - %9.3f ms | %7.3f %% | %*" PRIu64
" -> %*" PRIu64 " | | %7.3f %cips | %7.3f %cops\n",
"total", host_total_time * 1e3, kernel_total_time / host_total_time * 100.0,
inputs_width, total_inputs, outputs_width, total_outputs, scaled_input_speed, input_speed_unit,
scaled_output_speed, output_speed_unit);

size_t gpu_outputs_size;
Expand Down
52 changes: 50 additions & 2 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,24 @@
#include <cstring>
#include <cinttypes>
#include <cstdio>
#include <csignal>
#include <chrono>
#include <optional>
#include <charconv>
#include <algorithm>
#include <random>
#include <unistd.h>

static std::atomic_bool running{true};
static std::atomic_uint32_t signal_count{0};

static void signal_handler(int) {
uint32_t count = signal_count.fetch_add(1, std::memory_order_relaxed) + 1;
running.store(false, std::memory_order_relaxed);
if (count >= 2) {
_exit(130);
}
}

#ifdef NO_GPU
constexpr bool no_gpu = true;
Expand Down Expand Up @@ -244,7 +257,10 @@ int main_inner(int argc, char **argv) {
}
#endif

for (size_t i = 0;; i++) {
std::signal(SIGINT, signal_handler);
std::signal(SIGTERM, signal_handler);

for (size_t i = 0; running.load(std::memory_order_relaxed); i++) {
if (threads != 0) {
std::lock_guard lock(cpu_outputs.mutex);
while (!cpu_outputs.queue.empty()) {
Expand All @@ -264,6 +280,23 @@ int main_inner(int argc, char **argv) {
std::this_thread::sleep_for(std::chrono::seconds(1));
}

size_t pending_cpu_inputs = 0;
{
std::lock_guard lock(gpu_outputs.mutex);
pending_cpu_inputs = gpu_outputs.queue.size();
}
size_t pending_cpu_outputs = 0;
{
std::lock_guard lock(cpu_outputs.mutex);
pending_cpu_outputs = cpu_outputs.queue.size();
}

std::printf("\nShutting down...\n");
Comment thread
Likqez marked this conversation as resolved.
if (pending_cpu_inputs > 0 || pending_cpu_outputs > 0) {
std::printf("Pending CPU input queue: %zu, pending CPU output queue: %zu\n", pending_cpu_inputs, pending_cpu_outputs);
}
std::printf("Press Ctrl+C again to force exit immediately.\n");

#ifndef NO_GPU
for (auto &thread : gpu_threads) {
(*thread).stop();
Expand All @@ -279,19 +312,34 @@ int main_inner(int argc, char **argv) {
(*client_thread).stop();
}
if (server_thread) {
(*server_thread).stop();
(*server_thread).shutdown();
}
#endif

#ifndef NO_GPU
std::printf("Waiting for GPU batches to finish...\n");
for (auto &thread : gpu_threads) {
Comment thread
Likqez marked this conversation as resolved.
(*thread).join();
}
{
uint64_t total_seeds_checked = seed_range.pos.load(std::memory_order_relaxed) - start_seed;
std::printf("Start seed: %" PRIi64 ", Total seeds checked: %" PRIu64 "\n", start_seed, total_seeds_checked);
}
#endif
#ifndef NO_CPU
for (auto &thread : cpu_threads) {
(*thread).join();
}
{
std::lock_guard lock(cpu_outputs.mutex);
while (!cpu_outputs.queue.empty()) {
auto output = cpu_outputs.queue.front();
cpu_outputs.queue.pop();
std::printf("%" PRIi64 " at %" PRIi32 " %" PRIi32 " with %" PRIi32 "\n", output.seed, output.x, output.z, output.score);
std::fprintf(output_file, "%" PRIi64 " %" PRIi32 " %" PRIi32 " %" PRIi32 "\n", output.seed, output.x, output.z, output.score);
std::fflush(output_file);
}
}
#endif
#ifndef NO_NET
if (client_thread) {
Expand Down
9 changes: 9 additions & 0 deletions src/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ void ServerThread::run() {

try {
asio::io_context io_context;
io_ctx.store(&io_context, std::memory_order_release);

asio::ip::tcp::resolver resolver(io_context);
auto endpoints = resolver.resolve(listen_address.host, listen_address.service);
Expand All @@ -140,8 +141,16 @@ void ServerThread::run() {
tcp_server server(io_context, endpoint, outputs);

io_context.run();

io_ctx.store(nullptr, std::memory_order_release);
} catch (std::exception &e) {
std::fprintf(stderr, "Uncaught exception on server thread: %s\n", e.what());
std::abort();
}
}

void ServerThread::shutdown() {
stop();
auto *ctx = io_ctx.load(std::memory_order_acquire);
if (ctx) ctx->stop();
}
4 changes: 4 additions & 0 deletions src/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@

#include "common.h"

namespace asio { class io_context; }

struct ServerThread: Thread<ServerThread> {
HostService &listen_address;
GpuOutputs &outputs;
std::atomic<asio::io_context*> io_ctx{nullptr};

ServerThread(HostService &listen_address, GpuOutputs &outputs);

void run();
void shutdown();
};