[cp] Add cudnn attention support to Context Parallel

[ghstack-poisoned]
pytorch · Jan 17, 2025 · a89cf3c · a89cf3c
1 parent 95677cb
commit a89cf3c
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 2 deletions.
diff --git a/torchtitan/models/llama/__init__.py b/torchtitan/models/llama/__init__.py
@@ -14,7 +14,7 @@
     "debugmodel": ModelArgs(dim=256, n_layers=8, n_heads=16, rope_theta=500000),
     "8B": ModelArgs(
         dim=4096,
-        n_layers=32,
+        n_layers=1,
         n_heads=32,
         n_kv_heads=8,
         ffn_dim_multiplier=1.3,

diff --git a/torchtitan/utils.py b/torchtitan/utils.py
@@ -212,7 +212,8 @@ def context(cp_context: Optional[Generator[None, None, None]] = None):
                 # TODO (xilunwu): support cuDNN backend
                 stack.enter_context(
                     sdpa_kernel(
-                        [SDPBackend.FLASH_ATTENTION, SDPBackend.EFFICIENT_ATTENTION]
+                        # [SDPBackend.FLASH_ATTENTION, SDPBackend.EFFICIENT_ATTENTION]
+                        [SDPBackend.CUDNN_ATTENTION]
                     )
                 )
                 stack.enter_context(cp_context)