From ebf37a54c899ab75dfcd3b03002c3eb7f17af5d3 Mon Sep 17 00:00:00 2001 From: Raahul Kalyaan Jakka Date: Mon, 27 Oct 2025 13:09:01 -0700 Subject: [PATCH] Changing Backend Tensor initialization (#5055) Summary: X-link: https://github.com/facebookresearch/FBGEMM/pull/2065 **Context:** Currently, RocksDB stores data on row-wise format, to enable optimizer offloading for the Kernel. We will append the optimizer state to its corresponding row. During initialization, we need to randomly initialize weights while the optimizer values need to initialized to zero. When optimizer offloading is enabled, **In this diff:** We add two new arguments: 1. enable_optimizer_offloading: This flag toggles between initializing the last optimizer_D rows to zero 2. optimizer_D: The number of columns in the table that needs to be initialized to zero. This set of columns represent the optimizer values (w/wo padding). **Scenarios:** 1. Optimizer_offloading is False: max_D = Dimensions of weights only, optimizer_D = 0 2. Optimizer_offloading is True: max_D = Dimension of weights (w_D) + optimizers (o_D) optimizer_D = dimensions of optimizers (o_D) initialize o_D columns with zero Differential Revision: D85157732 --- .../ssd_table_batched_embeddings.h | 34 +++++++++++++------ 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/fbgemm_gpu/src/ssd_split_embeddings_cache/ssd_table_batched_embeddings.h b/fbgemm_gpu/src/ssd_split_embeddings_cache/ssd_table_batched_embeddings.h index 47e1d29893..c440eec69b 100644 --- a/fbgemm_gpu/src/ssd_split_embeddings_cache/ssd_table_batched_embeddings.h +++ b/fbgemm_gpu/src/ssd_split_embeddings_cache/ssd_table_batched_embeddings.h @@ -121,7 +121,9 @@ class EmbeddingRocksDB : public kv_db::EmbeddingKVDB { std::optional table_dims = std::nullopt, std::optional hash_size_cumsum = std::nullopt, int64_t flushing_block_size = 2000000000 /*2GB*/, - bool disable_random_init = false) + bool disable_random_init = false, + bool enable_optimizer_offloading = false, + int64_t optimizer_D = 0) : kv_db::EmbeddingKVDB( num_shards, max_D, @@ -266,7 +268,9 @@ class EmbeddingRocksDB : public kv_db::EmbeddingKVDB { uniform_init_lower, uniform_init_upper, row_storage_bitwidth, - disable_random_init); + disable_random_init, + enable_optimizer_offloading, + optimizer_D); executor_ = std::make_unique(num_shards); ro_.verify_checksums = false; ro_.async_io = true; @@ -421,19 +425,29 @@ class EmbeddingRocksDB : public kv_db::EmbeddingKVDB { float uniform_init_lower, float uniform_init_upper, int64_t row_storage_bitwidth, - bool disable_random_init) { + bool disable_random_init, + bool enable_optimizer_offloading = false, + int64_t optimizer_D = 0) { for (auto i = 0; i < num_shards; ++i) { auto* gen = at::check_generator( at::detail::getDefaultCPUGenerator()); { std::lock_guard lock(gen->mutex_); - initializers_.push_back( - std::make_unique( - gen->random64(), - max_D, - uniform_init_lower, - uniform_init_upper, - row_storage_bitwidth)); + auto initializer = std::make_unique( + gen->random64(), + max_D, + uniform_init_lower, + uniform_init_upper, + row_storage_bitwidth); + + // When Optimizer offloading is enabled, we want to initialize the last + // optimizer_D columns(optimizer values) to zero + if (enable_optimizer_offloading) { + auto& tensor = initializer->row_storage_; + tensor.index({"...", at::indexing::Slice(max_D - optimizer_D, max_D)}) + .zero_(); + } + initializers_.push_back(std::move(initializer)); } } disable_random_init_ = disable_random_init;