|
15 | 15 | #include "node_realm-inl.h" |
16 | 16 | #include "node_shadow_realm.h" |
17 | 17 | #include "node_snapshot_builder.h" |
| 18 | +#include "node_v8.h" |
18 | 19 | #include "node_v8_platform-inl.h" |
| 20 | +#include "node_handle_address.h" |
19 | 21 | #include "node_wasm_web_api.h" |
20 | 22 | #include "uv.h" |
21 | 23 | #ifdef NODE_ENABLE_VTUNE_PROFILING |
@@ -191,11 +193,147 @@ void DebuggingArrayBufferAllocator::RegisterPointerInternal(void* data, |
191 | 193 | allocations_[data] = size; |
192 | 194 | } |
193 | 195 |
|
| 196 | +void* ProfilingArrayBufferAllocator::Allocate(size_t size) { |
| 197 | + void* ret = NodeArrayBufferAllocator::Allocate(size); |
| 198 | + if (ret != nullptr && enabled_.load(std::memory_order_acquire)) { |
| 199 | + LabelPairs labels = FindCurrentLabels(); |
| 200 | + if (!labels.empty()) { |
| 201 | + std::string key = SerializeLabels(labels); |
| 202 | + Mutex::ScopedLock lock(mutex_); |
| 203 | + allocations_[ret] = {key, size}; |
| 204 | + auto& entry = per_label_bytes_[key]; |
| 205 | + if (entry.labels.empty()) entry.labels = std::move(labels); |
| 206 | + entry.bytes += static_cast<int64_t>(size); |
| 207 | + } |
| 208 | + } |
| 209 | + return ret; |
| 210 | +} |
| 211 | + |
| 212 | +void* ProfilingArrayBufferAllocator::AllocateUninitialized(size_t size) { |
| 213 | + void* ret = NodeArrayBufferAllocator::AllocateUninitialized(size); |
| 214 | + if (ret != nullptr && enabled_.load(std::memory_order_acquire)) { |
| 215 | + LabelPairs labels = FindCurrentLabels(); |
| 216 | + if (!labels.empty()) { |
| 217 | + std::string key = SerializeLabels(labels); |
| 218 | + Mutex::ScopedLock lock(mutex_); |
| 219 | + allocations_[ret] = {key, size}; |
| 220 | + auto& entry = per_label_bytes_[key]; |
| 221 | + if (entry.labels.empty()) entry.labels = std::move(labels); |
| 222 | + entry.bytes += static_cast<int64_t>(size); |
| 223 | + } |
| 224 | + } |
| 225 | + return ret; |
| 226 | +} |
| 227 | + |
| 228 | +void ProfilingArrayBufferAllocator::Free(void* data, size_t size) { |
| 229 | + if (enabled_.load(std::memory_order_acquire)) { |
| 230 | + Mutex::ScopedLock lock(mutex_); |
| 231 | + auto it = allocations_.find(data); |
| 232 | + if (it != allocations_.end()) { |
| 233 | + auto label_it = per_label_bytes_.find(it->second.first); |
| 234 | + if (label_it != per_label_bytes_.end()) { |
| 235 | + label_it->second.bytes -= static_cast<int64_t>(it->second.second); |
| 236 | + } |
| 237 | + allocations_.erase(it); |
| 238 | + } |
| 239 | + } |
| 240 | + NodeArrayBufferAllocator::Free(data, size); |
| 241 | +} |
| 242 | + |
| 243 | +void ProfilingArrayBufferAllocator::Enable( |
| 244 | + v8::Isolate* isolate, |
| 245 | + std::unordered_map<uintptr_t, |
| 246 | + v8_utils::HeapProfileLabelEntry>* label_map) { |
| 247 | + Mutex::ScopedLock lock(mutex_); |
| 248 | + isolate_ = isolate; |
| 249 | + label_map_ = label_map; |
| 250 | + main_thread_id_ = std::this_thread::get_id(); |
| 251 | + enabled_.store(true, std::memory_order_release); |
| 252 | +} |
| 253 | + |
| 254 | +void ProfilingArrayBufferAllocator::Disable() { |
| 255 | + enabled_.store(false, std::memory_order_release); |
| 256 | + Mutex::ScopedLock lock(mutex_); |
| 257 | + allocations_.clear(); |
| 258 | + per_label_bytes_.clear(); |
| 259 | + isolate_ = nullptr; |
| 260 | + label_map_ = nullptr; |
| 261 | +} |
| 262 | + |
| 263 | +std::vector<ProfilingArrayBufferAllocator::LabeledBytes> |
| 264 | +ProfilingArrayBufferAllocator::GetPerLabelBytes() const { |
| 265 | + Mutex::ScopedLock lock(mutex_); |
| 266 | + std::vector<LabeledBytes> result; |
| 267 | + for (const auto& [key, entry] : per_label_bytes_) { |
| 268 | + if (entry.bytes > 0) { |
| 269 | + result.push_back(entry); |
| 270 | + } |
| 271 | + } |
| 272 | + return result; |
| 273 | +} |
| 274 | + |
| 275 | +std::string ProfilingArrayBufferAllocator::SerializeLabels( |
| 276 | + const LabelPairs& labels) { |
| 277 | + std::string key; |
| 278 | + for (const auto& [k, v] : labels) { |
| 279 | + if (!key.empty()) key += '\0'; |
| 280 | + key += k; |
| 281 | + key += '\0'; |
| 282 | + key += v; |
| 283 | + } |
| 284 | + return key; |
| 285 | +} |
| 286 | + |
| 287 | +ProfilingArrayBufferAllocator::LabelPairs |
| 288 | +ProfilingArrayBufferAllocator::FindCurrentLabels() { |
| 289 | + // Skip non-main-thread allocations (SharedArrayBuffer from workers). |
| 290 | + if (std::this_thread::get_id() != main_thread_id_) return {}; |
| 291 | + if (isolate_ == nullptr || label_map_ == nullptr) return {}; |
| 292 | + |
| 293 | + // Read CPED via public V8 API. This is safe because: |
| 294 | + // 1. ArrayBuffer allocator runs in normal JS context, not during GC |
| 295 | + // 2. HandleScope is always active during JS execution |
| 296 | + v8::Local<v8::Value> cped = |
| 297 | + isolate_->GetContinuationPreservedEmbedderData(); |
| 298 | + if (cped.IsEmpty() || cped->IsUndefined() || cped->IsNull()) return {}; |
| 299 | + |
| 300 | + uintptr_t addr = GetLocalAddress(cped); |
| 301 | + if (addr == 0) return {}; // Smi::zero() — no ALS context |
| 302 | + |
| 303 | + // O(1) lookup by tagged object address. |
| 304 | + auto it = label_map_->find(addr); |
| 305 | + if (it != label_map_->end() && !it->second.labels.empty()) { |
| 306 | + return it->second.labels; |
| 307 | + } |
| 308 | + |
| 309 | + // Slow path: GC may have moved the object. Scan by identity. |
| 310 | + for (const auto& [key, entry] : *label_map_) { |
| 311 | + if (!entry.context_key.IsEmpty() && entry.context_key == cped) { |
| 312 | + if (!entry.labels.empty()) { |
| 313 | + // Copy labels before extract invalidates the reference. |
| 314 | + auto result = entry.labels; |
| 315 | + // Rehash for future fast-path hits. |
| 316 | + auto node = label_map_->extract(key); |
| 317 | + node.key() = addr; |
| 318 | + label_map_->insert(std::move(node)); |
| 319 | + return result; |
| 320 | + } |
| 321 | + break; |
| 322 | + } |
| 323 | + } |
| 324 | + |
| 325 | + return {}; |
| 326 | +} |
| 327 | + |
194 | 328 | std::unique_ptr<ArrayBufferAllocator> ArrayBufferAllocator::Create(bool debug) { |
195 | 329 | if (debug || per_process::cli_options->debug_arraybuffer_allocations) |
196 | 330 | return std::make_unique<DebuggingArrayBufferAllocator>(); |
197 | | - else |
198 | | - return std::make_unique<NodeArrayBufferAllocator>(); |
| 331 | + // Always use ProfilingArrayBufferAllocator so that per-label external memory |
| 332 | + // tracking is available when the sampling heap profiler is started via |
| 333 | + // v8.startSamplingHeapProfiler(). When profiling is disabled (the default) |
| 334 | + // the only overhead is a single atomic load (enabled_.load()) on each |
| 335 | + // Allocate/Free — no hash-map lookups or CPED reads occur. |
| 336 | + return std::make_unique<ProfilingArrayBufferAllocator>(); |
199 | 337 | } |
200 | 338 |
|
201 | 339 | ArrayBufferAllocator* CreateArrayBufferAllocator() { |
|
0 commit comments