shader: Implement VOTE
This commit is contained in:
parent
d40faa1db0
commit
3d07cef009
|
@ -15,6 +15,7 @@ add_library(shader_recompiler STATIC
|
|||
backend/spirv/emit_spirv_memory.cpp
|
||||
backend/spirv/emit_spirv_select.cpp
|
||||
backend/spirv/emit_spirv_undefined.cpp
|
||||
backend/spirv/emit_spirv_vote.cpp
|
||||
environment.h
|
||||
exception.h
|
||||
file_environment.cpp
|
||||
|
@ -122,6 +123,7 @@ add_library(shader_recompiler STATIC
|
|||
frontend/maxwell/translate/impl/select_source_with_predicate.cpp
|
||||
frontend/maxwell/translate/impl/texture_fetch.cpp
|
||||
frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
|
||||
frontend/maxwell/translate/impl/vote.cpp
|
||||
frontend/maxwell/translate/translate.cpp
|
||||
frontend/maxwell/translate/translate.h
|
||||
ir_opt/collect_shader_info_pass.cpp
|
||||
|
|
|
@ -259,6 +259,10 @@ void EmitContext::DefineInputs(const Info& info, Stage stage) {
|
|||
if (info.uses_local_invocation_id) {
|
||||
local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId);
|
||||
}
|
||||
if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) {
|
||||
subgroup_local_invocation_id =
|
||||
DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId);
|
||||
}
|
||||
if (info.loads_position) {
|
||||
const bool is_fragment{stage != Stage::Fragment};
|
||||
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};
|
||||
|
|
|
@ -82,6 +82,7 @@ public:
|
|||
|
||||
Id workgroup_id{};
|
||||
Id local_invocation_id{};
|
||||
Id subgroup_local_invocation_id{};
|
||||
Id instance_id{};
|
||||
Id instance_index{};
|
||||
Id base_instance{};
|
||||
|
@ -96,7 +97,7 @@ public:
|
|||
std::array<Id, 32> output_generics{};
|
||||
|
||||
std::array<Id, 8> frag_color{};
|
||||
Id frag_depth {};
|
||||
Id frag_depth{};
|
||||
|
||||
std::vector<Id> interfaces;
|
||||
|
||||
|
|
|
@ -224,6 +224,15 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
|
|||
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
|
||||
ctx.AddCapability(spv::Capability::DrawParameters);
|
||||
}
|
||||
if (info.uses_subgroup_vote && profile.support_vote) {
|
||||
ctx.AddExtension("SPV_KHR_shader_ballot");
|
||||
ctx.AddCapability(spv::Capability::SubgroupBallotKHR);
|
||||
if (!profile.warp_size_potentially_larger_than_guest) {
|
||||
// vote ops are only used when not taking the long path
|
||||
ctx.AddExtension("SPV_KHR_subgroup_vote");
|
||||
ctx.AddCapability(spv::Capability::SubgroupVoteKHR);
|
||||
}
|
||||
}
|
||||
// TODO: Track this usage
|
||||
ctx.AddCapability(spv::Capability::ImageGatherExtended);
|
||||
}
|
||||
|
|
|
@ -346,5 +346,9 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
|
|||
Id coords, Id dref, Id bias_lc, Id offset);
|
||||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id coords, Id dref, Id lod_lc, Id offset);
|
||||
Id EmitVoteAll(EmitContext& ctx, Id pred);
|
||||
Id EmitVoteAny(EmitContext& ctx, Id pred);
|
||||
Id EmitVoteEqual(EmitContext& ctx, Id pred);
|
||||
Id EmitSubgroupBallot(EmitContext& ctx, Id pred);
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
Id LargeWarpBallot(EmitContext& ctx, Id ballot) {
|
||||
const Id shift{ctx.Constant(ctx.U32[1], 5)};
|
||||
const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
|
||||
return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitVoteAll(EmitContext& ctx, Id pred) {
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return ctx.OpSubgroupAllKHR(ctx.U1, pred);
|
||||
}
|
||||
const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
|
||||
const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
|
||||
const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
|
||||
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
|
||||
return ctx.OpIEqual(ctx.U1, lhs, active_mask);
|
||||
}
|
||||
|
||||
Id EmitVoteAny(EmitContext& ctx, Id pred) {
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return ctx.OpSubgroupAnyKHR(ctx.U1, pred);
|
||||
}
|
||||
const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
|
||||
const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
|
||||
const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
|
||||
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
|
||||
return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value);
|
||||
}
|
||||
|
||||
Id EmitVoteEqual(EmitContext& ctx, Id pred) {
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred);
|
||||
}
|
||||
const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
|
||||
const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
|
||||
const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
|
||||
const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)};
|
||||
return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value),
|
||||
ctx.OpIEqual(ctx.U1, lhs, active_mask));
|
||||
}
|
||||
|
||||
Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
|
||||
const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)};
|
||||
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
|
||||
return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
|
||||
}
|
||||
return LargeWarpBallot(ctx, ballot);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
|
@ -1444,4 +1444,20 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coor
|
|||
return Inst<F32>(op, Flags{info}, handle, coords, dref, lod_lc, offset);
|
||||
}
|
||||
|
||||
U1 IREmitter::VoteAll(const U1& value) {
|
||||
return Inst<U1>(Opcode::VoteAll, value);
|
||||
}
|
||||
|
||||
U1 IREmitter::VoteAny(const U1& value) {
|
||||
return Inst<U1>(Opcode::VoteAny, value);
|
||||
}
|
||||
|
||||
U1 IREmitter::VoteEqual(const U1& value) {
|
||||
return Inst<U1>(Opcode::VoteEqual, value);
|
||||
}
|
||||
|
||||
U32 IREmitter::SubgroupBallot(const U1& value) {
|
||||
return Inst<U32>(Opcode::SubgroupBallot, value);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
|
|
@ -234,6 +234,11 @@ public:
|
|||
const Value& offset, const F32& lod_clamp,
|
||||
TextureInstInfo info);
|
||||
|
||||
[[nodiscard]] U1 VoteAll(const U1& value);
|
||||
[[nodiscard]] U1 VoteAny(const U1& value);
|
||||
[[nodiscard]] U1 VoteEqual(const U1& value);
|
||||
[[nodiscard]] U32 SubgroupBallot(const U1& value);
|
||||
|
||||
private:
|
||||
IR::Block::iterator insertion_point;
|
||||
|
||||
|
|
|
@ -355,3 +355,9 @@ OPCODE(ImageSampleImplicitLod, F32x4, U32,
|
|||
OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
|
||||
OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
|
||||
OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
|
||||
|
||||
// Vote operations
|
||||
OPCODE(VoteAll, U1, U1, )
|
||||
OPCODE(VoteAny, U1, U1, )
|
||||
OPCODE(VoteEqual, U1, U1, )
|
||||
OPCODE(SubgroupBallot, U32, U1, )
|
||||
|
|
|
@ -417,10 +417,6 @@ void TranslatorVisitor::VMNMX(u64) {
|
|||
ThrowNotImplemented(Opcode::VMNMX);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VOTE(u64) {
|
||||
ThrowNotImplemented(Opcode::VOTE);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::VOTE_vtg(u64) {
|
||||
ThrowNotImplemented(Opcode::VOTE_vtg);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class VoteOp : u64 {
|
||||
ALL,
|
||||
ANY,
|
||||
EQ,
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
|
||||
switch (vote_op) {
|
||||
case VoteOp::ALL:
|
||||
return ir.VoteAll(pred);
|
||||
case VoteOp::ANY:
|
||||
return ir.VoteAny(pred);
|
||||
case VoteOp::EQ:
|
||||
return ir.VoteEqual(pred);
|
||||
default:
|
||||
throw NotImplementedException("Invalid VOTE op {}", vote_op);
|
||||
}
|
||||
}
|
||||
|
||||
void Vote(TranslatorVisitor& v, u64 insn) {
|
||||
union {
|
||||
u64 insn;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<39, 3, IR::Pred> pred_a;
|
||||
BitField<42, 1, u64> neg_pred_a;
|
||||
BitField<45, 3, IR::Pred> pred_b;
|
||||
BitField<48, 2, VoteOp> vote_op;
|
||||
} const vote{insn};
|
||||
|
||||
const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
|
||||
v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
|
||||
v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::VOTE(u64 insn) {
|
||||
Vote(*this, insn);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
|
@ -359,6 +359,12 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
|||
inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::VoteAll:
|
||||
case IR::Opcode::VoteAny:
|
||||
case IR::Opcode::VoteEqual:
|
||||
case IR::Opcode::SubgroupBallot:
|
||||
info.uses_subgroup_vote = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@ struct Profile {
|
|||
bool support_fp16_signed_zero_nan_preserve{};
|
||||
bool support_fp32_signed_zero_nan_preserve{};
|
||||
bool support_fp64_signed_zero_nan_preserve{};
|
||||
bool support_vote{};
|
||||
bool warp_size_potentially_larger_than_guest{};
|
||||
|
||||
// FClamp is broken and OpFMax + OpFMin should be used instead
|
||||
bool has_broken_spirv_clamp{};
|
||||
|
|
|
@ -80,6 +80,7 @@ struct Info {
|
|||
bool uses_sampled_1d{};
|
||||
bool uses_sparse_residency{};
|
||||
bool uses_demote_to_helper_invocation{};
|
||||
bool uses_subgroup_vote{};
|
||||
|
||||
IR::Type used_constant_buffer_types{};
|
||||
|
||||
|
|
|
@ -36,13 +36,18 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip
|
|||
descriptor_update_template = std::move(tuple.descriptor_update_template);
|
||||
descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);
|
||||
|
||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.requiredSubgroupSize = GuestWarpSize,
|
||||
};
|
||||
pipeline = device.GetLogical().CreateComputePipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
|
||||
.flags = 0,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = *spv_module,
|
||||
|
|
|
@ -455,6 +455,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
|
|||
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
|
||||
.support_fp64_signed_zero_nan_preserve =
|
||||
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
|
||||
.support_vote = true,
|
||||
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
|
||||
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
|
||||
};
|
||||
}
|
||||
|
|
|
@ -737,6 +737,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
|
|||
subgroup_properties.maxSubgroupSize >= GuestWarpSize) {
|
||||
extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
|
||||
guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages;
|
||||
ext_subgroup_size_control = true;
|
||||
}
|
||||
} else {
|
||||
is_warp_potentially_bigger = true;
|
||||
|
|
|
@ -193,6 +193,11 @@ public:
|
|||
return ext_shader_viewport_index_layer;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_subgroup_size_control.
|
||||
bool IsExtSubgroupSizeControlSupported() const {
|
||||
return ext_subgroup_size_control;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_transform_feedback.
|
||||
bool IsExtTransformFeedbackSupported() const {
|
||||
return ext_transform_feedback;
|
||||
|
@ -297,6 +302,7 @@ private:
|
|||
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
|
||||
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
|
||||
bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
|
||||
bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control.
|
||||
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
|
||||
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
|
||||
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
|
||||
|
|
Loading…
Reference in New Issue