Skip to content

Commit

Permalink
Implement Quad Control intrinsics (#5981)
Browse files Browse the repository at this point in the history
  • Loading branch information
fairywreath authored Jan 18, 2025
1 parent 87a0816 commit a85c350
Show file tree
Hide file tree
Showing 18 changed files with 632 additions and 124 deletions.
21 changes: 21 additions & 0 deletions docs/user-guide/a3-02-reference-capability-atoms.md
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,12 @@ Extensions
`SPV_EXT_demote_to_helper_invocation`
> Represents the SPIR-V extension for demoting to helper invocation.
`SPV_KHR_maximal_reconvergence`
> Represents the SPIR-V extension for maximal reconvergence.
`SPV_KHR_quad_control`
> Represents the SPIR-V extension for quad group control.
`SPV_KHR_fragment_shader_barycentric`
> Represents the SPIR-V extension for fragment shader barycentric.
Expand Down Expand Up @@ -503,6 +509,12 @@ Extensions
`spvDemoteToHelperInvocation`
> Represents the SPIR-V capability for demoting to helper invocation.
`spvMaximalReconvergenceKHR`
> Represents the SPIR-V capability for maximal reconvergence.
`spvQuadControlKHR`
> Represents the SPIR-V capability for quad group control.
`GL_EXT_buffer_reference`
> Represents the GL_EXT_buffer_reference extension.
Expand All @@ -515,6 +527,12 @@ Extensions
`GL_EXT_demote_to_helper_invocation`
> Represents the GL_EXT_demote_to_helper_invocation extension.
`GL_EXT_maximal_reconvergence`
> Represents the GL_EXT_maximal_reconvergence extension.
`GL_EXT_shader_quad_control`
> Represents the GL_EXT_shader_quad_control extension.
`GL_EXT_fragment_shader_barycentric`
> Represents the GL_EXT_fragment_shader_barycentric extension.
Expand Down Expand Up @@ -1078,6 +1096,9 @@ Compound Capabilities
`helper_lane`
> Capabilities required to enable helper-lane demotion
`quad_control`
> Capabilities required to enable quad group control
`breakpoint`
> Capabilities required to enable shader breakpoints
Expand Down
21 changes: 21 additions & 0 deletions source/slang/core.meta.slang
Original file line number Diff line number Diff line change
Expand Up @@ -3244,6 +3244,12 @@ __Addr<T> __getLegalizedSPIRVGlobalParamAddr(T val);
__intrinsic_op($(kIROp_RequireComputeDerivative))
void __requireComputeDerivative();

__intrinsic_op($(kIROp_RequireMaximallyReconverges))
void __requireMaximallyReconverges();

__intrinsic_op($(kIROp_RequireQuadDerivatives))
void __requireQuadDerivatives();

//@ public:
/// @category misc_types
enum MemoryOrder
Expand Down Expand Up @@ -3978,6 +3984,21 @@ attribute_syntax [DerivativeGroupQuad] : DerivativeGroupQuadAttribute;
__attributeTarget(FuncDecl)
attribute_syntax [DerivativeGroupLinear] : DerivativeGroupLinearAttribute;

/// Emits `MaximallyReconvergesKHR` execution mode when producing SPIR-V.
/// This attribute has no effect on other targets.
__attributeTarget(FuncDecl)
attribute_syntax [MaximallyReconverges] : MaximallyReconvergesAttribute;

/// Emits `QuadDerivativesKHR` execution mode when producing SPIR-V.
/// This attribute has no effect on other targets.
__attributeTarget(FuncDecl)
attribute_syntax [QuadDerivatives] : QuadDerivativesAttribute;

/// Emits `RequireFullQuadsKHR` execution mode when producing SPIR-V.
/// This attribute has no effect on other targets.
__attributeTarget(FuncDecl)
attribute_syntax [RequireFullQuads] : RequireFullQuadsAttribute;

__generic<T>
typealias NodePayloadPtr = Ptr<T, $( (uint64_t)AddressSpace::NodePayloadAMDX)>;

57 changes: 57 additions & 0 deletions source/slang/hlsl.meta.slang
Original file line number Diff line number Diff line change
Expand Up @@ -15687,6 +15687,63 @@ bool IsHelperLane()
}
}

//
// Quad Control intrinsics
//
// For SPIRV and GLSL targets, the behavior is taken from Vulkan's `VK_KHR_shader_quad_control` spec.
// QuadAny/QuadAll will map to OpGroupNonUniformQuadAny/All, and using either of these functions will
// result in the QuadDerivativesKHR execution mode being used. If MaximallyReconvergesKHR is not already
// specified by other means, it will be added when using either of QuadAny/QuadAll,
//

//@public:
/// Returns true if `expr` is true in any lane of the current quad.
__glsl_extension(GL_KHR_shader_subgroup_vote)
__glsl_extension(GL_EXT_maximal_reconvergence)
__glsl_extension(GL_EXT_shader_quad_control)
[ForceInline]
[require(glsl_hlsl_metal_spirv, quad_control)]
bool QuadAny(bool expr)
{
__requireMaximallyReconverges();
__requireQuadDerivatives();
__target_switch
{
case hlsl: __intrinsic_asm "QuadAny";
case glsl: __intrinsic_asm "subgroupQuadAny";
case metal: __intrinsic_asm "quad_any";
case spirv:
return spirv_asm
{
result:$$bool = OpGroupNonUniformQuadAnyKHR $expr;
};
}
}

//@public:
/// Returns true if `expr` is true in all lanes of the current quad.
__glsl_extension(GL_KHR_shader_subgroup_vote)
__glsl_extension(GL_EXT_maximal_reconvergence)
__glsl_extension(GL_EXT_shader_quad_control)
[ForceInline]
[require(glsl_hlsl_metal_spirv, quad_control)]
bool QuadAll(bool expr)
{
__requireMaximallyReconverges();
__requireQuadDerivatives();
__target_switch
{
case hlsl: __intrinsic_asm "QuadAll";
case glsl: __intrinsic_asm "subgroupQuadAll";
case metal: __intrinsic_asm "quad_all";
case spirv:
return spirv_asm
{
result:$$bool = OpGroupNonUniformQuadAllKHR $expr;
};
}
}

// `typedef`s to help with the fact that HLSL has been sorta-kinda case insensitive at various points
//@hidden:
typedef Texture2D texture2D;
Expand Down
15 changes: 15 additions & 0 deletions source/slang/slang-ast-modifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -1664,6 +1664,21 @@ class DerivativeGroupLinearAttribute : public Attribute
SLANG_AST_CLASS(DerivativeGroupLinearAttribute)
};

class MaximallyReconvergesAttribute : public Attribute
{
SLANG_AST_CLASS(MaximallyReconvergesAttribute)
};

class QuadDerivativesAttribute : public Attribute
{
SLANG_AST_CLASS(QuadDerivativesAttribute)
};

class RequireFullQuadsAttribute : public Attribute
{
SLANG_AST_CLASS(RequireFullQuadsAttribute)
};

/// A `[payload]` attribute indicates that a `struct` type will be used as
/// a ray payload for `TraceRay()` calls, and thus also as input/output
/// for shaders in the ray tracing pipeline that might be invoked for
Expand Down
33 changes: 33 additions & 0 deletions source/slang/slang-capabilities.capdef
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,14 @@ def SPV_EXT_mesh_shader : _spirv_1_4;
/// [EXT]
def SPV_EXT_demote_to_helper_invocation : _spirv_1_4;

/// Represents the SPIR-V extension for maximal reconvergence.
/// [EXT]
def SPV_KHR_maximal_reconvergence : _spirv_1_0;

/// Represents the SPIR-V extension for quad group control.
/// [EXT]
def SPV_KHR_quad_control : _spirv_1_3;

/// Represents the SPIR-V extension for fragment shader barycentric.
/// [EXT]
def SPV_KHR_fragment_shader_barycentric : _spirv_1_0;
Expand Down Expand Up @@ -654,6 +662,14 @@ def spvDemoteToHelperInvocationEXT : SPV_EXT_demote_to_helper_invocation;
/// [EXT]
def spvDemoteToHelperInvocation : spvDemoteToHelperInvocationEXT;

/// Represents the SPIR-V capability for maximal reconvergence.
/// [EXT]
def spvMaximalReconvergenceKHR : SPV_KHR_maximal_reconvergence;

/// Represents the SPIR-V capability for quad group control.
/// [EXT]
def spvQuadControlKHR : SPV_KHR_quad_control;

// The following capabilities all pertain to how ray tracing shaders are translated
// to GLSL, where there are two different extensions that can provide the core
// functionality of `TraceRay` and the related operations.
Expand Down Expand Up @@ -691,6 +707,8 @@ def _GL_EXT_shader_image_load_store : _GLSL_130;
def _GL_EXT_shader_realtime_clock : glsl;
def _GL_EXT_texture_query_lod : glsl;
def _GL_EXT_texture_shadow_lod : _GLSL_130;
def _GL_EXT_maximal_reconvergence : _GLSL_140;
def _GL_EXT_shader_quad_control : _GLSL_140;

def _GL_ARB_derivative_control : _GLSL_400;
def _GL_ARB_fragment_shader_interlock : _GLSL_450;
Expand Down Expand Up @@ -746,6 +764,14 @@ alias GL_EXT_debug_printf = _GL_EXT_debug_printf | SPV_KHR_non_semantic_info;
/// [EXT]
alias GL_EXT_demote_to_helper_invocation = _GL_EXT_demote_to_helper_invocation | spvDemoteToHelperInvocationEXT;

/// Represents the GL_EXT_maximal_reconvergence extension.
/// [EXT]
alias GL_EXT_maximal_reconvergence = _GL_EXT_maximal_reconvergence | spvMaximalReconvergenceKHR;

/// Represents the GL_EXT_shader_quad_control extension.
/// [EXT]
alias GL_EXT_shader_quad_control = _GL_EXT_shader_quad_control | spvQuadControlKHR;

/// Represents the GL_EXT_fragment_shader_barycentric extension.
/// [EXT]
alias GL_EXT_fragment_shader_barycentric = _GL_EXT_fragment_shader_barycentric | spvFragmentBarycentricKHR;
Expand Down Expand Up @@ -1925,6 +1951,13 @@ alias helper_lane = _sm_6_0 + fragment
| metal + fragment
;

/// Capabilities required to enable quad group control
/// [Compound]
alias quad_control = _sm_6_7
| GL_EXT_shader_quad_control + GL_EXT_maximal_reconvergence + GL_KHR_shader_subgroup_vote
| metal
;

/// Capabilities required to enable shader breakpoints
/// [Compound]
alias breakpoint = GL_EXT_debug_printf | hlsl | _cuda_sm_8_0 | cpp;
Expand Down
8 changes: 0 additions & 8 deletions source/slang/slang-emit-c-like.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,6 @@ void CLikeSourceEmitter::emitPreModuleImpl()
m_writer->emit("\n");
}
}
void CLikeSourceEmitter::emitPostModuleImpl()
{
if (m_requiredAfter.requireComputeDerivatives.getLength() > 0)
{
m_writer->emit(m_requiredAfter.requireComputeDerivatives);
m_writer->emit("\n");
}
}

//
// Types
Expand Down
6 changes: 0 additions & 6 deletions source/slang/slang-emit-c-like.h
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,6 @@ class CLikeSourceEmitter : public SourceEmitterBase
void emitFrontMatter(TargetRequest* targetReq) { emitFrontMatterImpl(targetReq); }

void emitPreModule() { emitPreModuleImpl(); }
void emitPostModule() { emitPostModuleImpl(); }
void emitModule(IRModule* module, DiagnosticSink* sink)
{
m_irModule = module;
Expand Down Expand Up @@ -555,7 +554,6 @@ class CLikeSourceEmitter : public SourceEmitterBase
/// For example on targets that don't have built in vector/matrix support, this is where
/// the appropriate generated declarations occur.
virtual void emitPreModuleImpl();
virtual void emitPostModuleImpl();

virtual void emitSimpleTypeAndDeclaratorImpl(IRType* type, DeclaratorInfo* declarator);
void emitSimpleTypeAndDeclarator(IRType* type, DeclaratorInfo* declarator)
Expand Down Expand Up @@ -736,10 +734,6 @@ class CLikeSourceEmitter : public SourceEmitterBase
Dictionary<IRInst*, String> m_mapInstToName;

OrderedHashSet<IRStringLit*> m_requiredPreludes;
struct RequiredAfter
{
String requireComputeDerivatives;
} m_requiredAfter;

Dictionary<const char*, IRStringLit*> m_builtinPreludes;
};
Expand Down
Loading

0 comments on commit a85c350

Please sign in to comment.