From dddd9537ec70f3171637fa5b75b3aca8d74d244f Mon Sep 17 00:00:00 2001
From: Hang Qu <quhang@meta.com>
Date: Thu, 6 Nov 2025 22:09:21 -0800
Subject: [PATCH] Update embedding_forward_quantized_cpu_template.cpp to use
 initialized output memory instead of uninitialized (#5054)

Summary:

X-link: https://github.com/facebookresearch/FBGEMM/pull/2064

We observe, if the memory of output is uninitialized, the output may be garbage.

This is because certain memory is untouched. The proposed fix is a quick workaround, but it will be more efficient to directly fill the untouched memory with zero.

Reviewed By: sryap

Differential Revision: D85447298
---
 .../inference/embedding_forward_quantized_cpu_template.cpp  | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fbgemm_gpu/codegen/inference/embedding_forward_quantized_cpu_template.cpp b/fbgemm_gpu/codegen/inference/embedding_forward_quantized_cpu_template.cpp
index 1a2942071c..00a9b944c1 100644
--- a/fbgemm_gpu/codegen/inference/embedding_forward_quantized_cpu_template.cpp
+++ b/fbgemm_gpu/codegen/inference/embedding_forward_quantized_cpu_template.cpp
@@ -210,6 +210,9 @@ Tensor int_nbit_split_embedding{{ "_nobag" if nobag else "" }}_codegen_forward_{
       total_adjusted_D += T * kINT8QparamsBytes;
     }
     output = at::empty({B, total_adjusted_D}, dev_weights.options().dtype(getScalarType(o_dtype)).pinned_memory(pinned_memory));
+    if (!output_is_int8 && !output_is_int4) {
+      output.fill_(0);
+    }
     {% else %}
     constexpr int kINT8QparamsBytes = 4; // no bag int8 output aligns with fbgemm weights storage size and layout
     constexpr int kINT4QparamsElems = 8; // scale + bias takes 4 bytes which are 8 int4 elements
@@ -220,6 +223,9 @@ Tensor int_nbit_split_embedding{{ "_nobag" if nobag else "" }}_codegen_forward_{
       adjusted_D += kINT4QparamsElems;
     }
     output = at::empty({total_L, adjusted_D}, dev_weights.options().dtype(getScalarType(o_dtype)).pinned_memory(pinned_memory));
+    if (!output_is_int8 && !output_is_int4) {
+      output.fill_(0);
+    }
 
     {% endif %}