pytorch · jeffdaily · Apr 26, 2025 · May 2, 2025 · May 5, 2025 · May 6, 2025
diff --git a/external/hipify_torch b/external/hipify_torch
diff --git a/fbgemm_gpu/include/fbgemm_gpu/utils/cuda_prelude.cuh b/fbgemm_gpu/include/fbgemm_gpu/utils/cuda_prelude.cuh
@@ -14,9 +14,9 @@
 
 #ifdef __HIP_PLATFORM_AMD__
 #include <ATen/cuda/CUDAGeneratorImpl.h>
+#include <ATen/cuda/CUDAContext.h>
 #include <ATen/cuda/PhiloxUtils.cuh>
-
-#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h> // @manual
+#include <c10/cuda/CUDAGuard.h>
 #else
 #include <ATen/cuda/CUDAGraphsUtils.cuh>
 #endif
@@ -25,15 +25,9 @@
 namespace {
 
 inline int get_device_sm_cnt_() {
-#ifdef __HIP_PLATFORM_AMD__
-  hipDeviceProp_t deviceProp;
-  hipGetDeviceProperties(&deviceProp, c10::hip::current_device());
-  return deviceProp.multiProcessorCount;
-#else
   cudaDeviceProp* deviceProp =
       at::cuda::getDeviceProperties(c10::cuda::current_device());
   return deviceProp->multiProcessorCount;
-#endif
 }
 
 } // namespace

diff --git a/fbgemm_gpu/include/fbgemm_gpu/utils/kernel_launcher.cuh b/fbgemm_gpu/include/fbgemm_gpu/utils/kernel_launcher.cuh
@@ -229,16 +229,7 @@ struct KernelLauncher {
       // transformation.
 
       auto& launch_registry =
-#ifdef __HIPCC__
-          // CUDAKernelLaunchRegistry has only been recently added to Torch
-          // HIPify mappings, so wrap this with USE_ROCM until the mappings land
-          // in PyTorch OSS.
-          //
-          // TODO: Remove when CUDAKernelLaunchRegistry lands in the nightlies
-          c10::hip::HIPKernelLaunchRegistry::get_singleton_ref();
-#else
           c10::cuda::CUDAKernelLaunchRegistry::get_singleton_ref();
-#endif
 
       // If barrier isolation is enabled, synchronize the stream first before
       // launching the kernel.  This has roughly the same effect as setting