-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathMakefile
More file actions
183 lines (148 loc) · 6.81 KB
/
Makefile
File metadata and controls
183 lines (148 loc) · 6.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# MIT License
# Copyright (c) Meta Platforms, Inc. and affiliates.
# See LICENSE file for details.
# Project name
PROJECT := cutracer
# Compiler settings
CXX ?=
NVCC=nvcc -ccbin=$(CXX) -D_FORCE_INLINES
PTXAS=ptxas
BIN2C=bin2c
# Version checks
NVCC_VER_REQ=10.1
NVCC_VER=$(shell $(NVCC) --version | grep release | cut -f2 -d, | cut -f3 -d' ')
NVCC_VER_CHECK=$(shell echo "${NVCC_VER} >= $(NVCC_VER_REQ)" | bc)
ifeq ($(NVCC_VER_CHECK),0)
$(error ERROR: nvcc version >= $(NVCC_VER_REQ) required to compile an nvbit tool! Instrumented applications can still use lower versions of nvcc.)
endif
PTXAS_VER_ADD_FLAG=12.3
PTXAS_VER=$(shell $(PTXAS) --version | grep release | cut -f2 -d, | cut -f3 -d' ')
PTXAS_VER_CHECK=$(shell echo "${PTXAS_VER} >= $(PTXAS_VER_ADD_FLAG)" | bc)
ifeq ($(PTXAS_VER_CHECK), 0)
MAXRREGCOUNT_FLAG=-maxrregcount=24
else
MAXRREGCOUNT_FLAG=
endif
# Debug settings
ifeq ($(DEBUG),1)
DEBUG_FLAGS := -g -O0
else
DEBUG_FLAGS := -O3 -g
endif
# Directory structure
SRC_DIR := src
OBJ_DIR := obj
LIB_DIR := lib
INCLUDE_DIR := include
# NVBIT settings
NVBIT_PATH=./third_party/nvbit/core
INCLUDES=-I$(NVBIT_PATH) -I./$(INCLUDE_DIR) -I./third_party
# Libraries
# zstd linking strategy:
# - RHEL/CentOS/Fedora: static linking (their libzstd.a is PIC-compatible)
# - Ubuntu/Debian/others: dynamic linking (their libzstd.a lacks -fPIC)
# - Override with STATIC_ZSTD=1 or DYNAMIC_ZSTD=1
# Note: -lpthread is required because zstd uses POSIX threads internally
# Detect OS type from /etc/os-release
OS_ID := $(shell . /etc/os-release 2>/dev/null && echo $$ID)
OS_ID_LIKE := $(shell . /etc/os-release 2>/dev/null && echo $$ID_LIKE)
IS_RHEL_LIKE := $(if $(or $(findstring rhel,$(OS_ID) $(OS_ID_LIKE)),\
$(findstring centos,$(OS_ID)),\
$(findstring fedora,$(OS_ID)),\
$(findstring rocky,$(OS_ID)),\
$(findstring almalinux,$(OS_ID))),1,)
# Helper function to find static zstd library
define find_static_zstd
$(or $(wildcard $(shell pkg-config --variable=libdir libzstd 2>/dev/null)/libzstd.a),\
$(if $(filter-out libzstd.a,$(shell $(CC) -print-file-name=libzstd.a 2>/dev/null)),\
$(shell $(CC) -print-file-name=libzstd.a 2>/dev/null),))
endef
ifdef DYNAMIC_ZSTD
# User explicitly requested dynamic linking
ZSTD_LIB := -lzstd
else ifdef STATIC_ZSTD
# User explicitly requested static linking
ZSTD_LIB := $(call find_static_zstd)
ifeq ($(ZSTD_LIB),)
$(error ERROR: libzstd.a not found. Install with: dnf install libzstd-static (RHEL/Fedora) or apt install libzstd-dev (Ubuntu/Debian))
endif
else ifdef IS_RHEL_LIKE
# RHEL-like OS: default to static linking (their static lib is PIC-compatible)
ZSTD_LIB := $(call find_static_zstd)
ifeq ($(ZSTD_LIB),)
$(error ERROR: libzstd.a not found. Install with: dnf install libzstd-static)
endif
else
# Other OS (Ubuntu, Debian, etc.): default to dynamic linking
# Their libzstd.a is not compiled with -fPIC, so it can't be linked into a .so
ZSTD_LIB := -lzstd
endif
LIBS=-L$(NVBIT_PATH) -lnvbit $(ZSTD_LIB) -lpthread -ldl
NVCC_PATH=-L $(subst bin/nvcc,lib64,$(shell which nvcc | tr -s /))
# Identify inject_funcs.cu specifically
INJECT_FUNCS_SRC := $(SRC_DIR)/inject_funcs.cu
INJECT_FUNCS_OBJ := $(OBJ_DIR)/inject_funcs.o
# Source files (excluding inject_funcs.cu)
CU_SRCS := $(filter-out $(INJECT_FUNCS_SRC),$(wildcard $(SRC_DIR)/*.cu))
CPP_SRCS := $(wildcard $(SRC_DIR)/*.cpp)
# Internal fb/ source files (only compiled if fb/ directory exists)
FB_SRC_DIR := $(SRC_DIR)/fb
FB_CU_SRCS := $(wildcard $(FB_SRC_DIR)/*.cu)
# Separate inject_funcs_fb.cu from other fb/ files (needs special flags)
FB_INJECT_FUNCS_SRC := $(FB_SRC_DIR)/inject_funcs_fb.cu
FB_INJECT_FUNCS_OBJ := $(if $(wildcard $(FB_INJECT_FUNCS_SRC)),$(OBJ_DIR)/fb_inject_funcs_fb.o,)
FB_REGULAR_CU_SRCS := $(filter-out $(FB_INJECT_FUNCS_SRC),$(FB_CU_SRCS))
FB_REGULAR_OBJS := $(patsubst $(FB_SRC_DIR)/%.cu,$(OBJ_DIR)/fb_%.o,$(FB_REGULAR_CU_SRCS))
FB_OBJS := $(FB_REGULAR_OBJS) $(FB_INJECT_FUNCS_OBJ)
# Object files
REGULAR_OBJS := $(patsubst $(SRC_DIR)/%.cu,$(OBJ_DIR)/%.o,$(CU_SRCS))
CPP_OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(CPP_SRCS))
# All objects (regular + inject_funcs + cpp + fb)
OBJS := $(REGULAR_OBJS) $(INJECT_FUNCS_OBJ) $(CPP_OBJS) $(FB_OBJS)
# Tool function sources (precompiled fatbin for flush_channel)
TOOL_FUNC_DIR := $(SRC_DIR)/tool_func
TOOL_FUNC_SOURCES := $(wildcard $(TOOL_FUNC_DIR)/*.cu)
TOOL_FUNC_FATBINS := $(TOOL_FUNC_SOURCES:.cu=.fatbin)
TOOL_FUNC_BIN2CS := $(TOOL_FUNC_FATBINS:.fatbin=.c)
# Architecture
ARCH?=all
# Output file
NVBIT_TOOL=$(LIB_DIR)/$(PROJECT).so
# Main targets
all: dirs $(NVBIT_TOOL)
@echo ""
@echo "✅ Build successful! Output: $(NVBIT_TOOL)"
@echo ""
dirs: $(OBJ_DIR) $(LIB_DIR)
$(OBJ_DIR):
mkdir -p $@
$(LIB_DIR):
mkdir -p $@
# Linking rule
$(NVBIT_TOOL): $(OBJS) $(NVBIT_PATH)/libnvbit.a
$(NVCC) -arch=$(ARCH) $(DEBUG_FLAGS) $(OBJS) $(LIBS) $(NVCC_PATH) -Wno-deprecated-gpu-targets -lcuda -lcudart_static -shared -Xcompiler -rdynamic -o $@
# Compile tool_func/*.cu → *.fatbin
$(TOOL_FUNC_DIR)/%.fatbin: $(TOOL_FUNC_DIR)/%.cu
$(NVCC) -arch=$(ARCH) $(INCLUDES) $(DEBUG_FLAGS) -Wno-deprecated-gpu-targets -fatbin $< -o $@
# Convert *.fatbin → *.c (embedded as C byte array via bin2c)
$(TOOL_FUNC_DIR)/%.c: $(TOOL_FUNC_DIR)/%.fatbin
$(BIN2C) -c --name $(basename $(notdir $<))_bin $< > $@
# Compilation rule for regular CUDA files (excluding inject_funcs.cu)
# Depends on bin2c outputs so #include "tool_func/flush_channel.c" is available
$(REGULAR_OBJS): $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cu $(TOOL_FUNC_BIN2CS)
$(NVCC) -dc -c -std=c++17 $(INCLUDES) -Xptxas -cloning=no -Wno-deprecated-gpu-targets -Xcompiler -Wall -arch=$(ARCH) $(DEBUG_FLAGS) -Xcompiler -fPIC $< -o $@
# Special rule for inject_funcs.cu
$(INJECT_FUNCS_OBJ): $(INJECT_FUNCS_SRC)
$(NVCC) $(INCLUDES) $(MAXRREGCOUNT_FLAG) -Wno-deprecated-gpu-targets -Xptxas -astoolspatch --keep-device-functions -arch=$(ARCH) -Xcompiler -Wall -Xcompiler -fPIC -c $< -o $@
# Compilation rule for C++ files
$(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp
$(CXX) -std=c++17 $(INCLUDES) -Wall $(DEBUG_FLAGS) -fPIC -c $< -o $@
# Compilation rule for internal fb/ CUDA files (only if fb/ directory exists)
$(OBJ_DIR)/fb_%.o: $(FB_SRC_DIR)/%.cu
$(NVCC) -dc -c -std=c++17 $(INCLUDES) -Xptxas -cloning=no -Wno-deprecated-gpu-targets -Xcompiler -Wall -arch=$(ARCH) $(DEBUG_FLAGS) -Xcompiler -fPIC $< -o $@
# Special rule for inject_funcs_fb.cu (same flags as inject_funcs.cu for NVBit device functions)
$(OBJ_DIR)/fb_inject_funcs_fb.o: $(FB_INJECT_FUNCS_SRC)
$(NVCC) $(INCLUDES) $(MAXRREGCOUNT_FLAG) -Wno-deprecated-gpu-targets -Xptxas -astoolspatch --keep-device-functions -arch=$(ARCH) -Xcompiler -Wall -Xcompiler -fPIC -c $< -o $@
clean:
rm -rf $(OBJ_DIR) $(LIB_DIR) $(TOOL_FUNC_DIR)/*.fatbin $(TOOL_FUNC_DIR)/*.c
.PHONY: all clean dirs