shader: Implement FSWZADD

This commit is contained in:
ameerj 2021-03-28 22:23:45 -04:00
parent 34aba9627a
commit 6c51f49632
14 changed files with 87 additions and 4 deletions

View File

@ -89,6 +89,7 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate/impl/floating_point_multiply.cpp
frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
frontend/maxwell/translate/impl/half_floating_point_add.cpp
frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
frontend/maxwell/translate/impl/half_floating_point_helper.cpp

View File

@ -393,6 +393,14 @@ void EmitContext::DefineInputs(const Info& info) {
subgroup_local_invocation_id =
DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId);
}
if (info.uses_fswzadd) {
const Id f32_one{Constant(F32[1], 1.0f)};
const Id f32_minus_one{Constant(F32[1], -1.0f)};
const Id f32_zero{Constant(F32[1], 0.0f)};
fswzadd_lut_a = ConstantComposite(F32[4], f32_minus_one, f32_one, f32_minus_one, f32_zero);
fswzadd_lut_b =
ConstantComposite(F32[4], f32_minus_one, f32_minus_one, f32_one, f32_minus_one);
}
if (info.loads_position) {
const bool is_fragment{stage != Stage::Fragment};
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};

View File

@ -103,6 +103,8 @@ public:
Id vertex_index{};
Id base_vertex{};
Id front_face{};
Id fswzadd_lut_a{};
Id fswzadd_lut_b{};
Id local_memory{};

View File

@ -397,5 +397,6 @@ Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clam
Id segmentation_mask);
Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp,
Id segmentation_mask);
Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle);
} // namespace Shader::Backend::SPIRV

View File

@ -132,4 +132,20 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id
return SelectValue(ctx, in_range, value, src_thread_id);
}
Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) {
const Id three{ctx.Constant(ctx.U32[1], 3)};
Id mask{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
mask = ctx.OpShiftLeftLogical(ctx.U32[1], mask, ctx.Constant(ctx.U32[1], 1));
mask = ctx.OpShiftRightLogical(ctx.U32[1], swizzle, mask);
mask = ctx.OpBitwiseAnd(ctx.U32[1], mask, three);
const Id modifier_a{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_a, mask)};
const Id modifier_b{ctx.OpVectorExtractDynamic(ctx.F32[1], ctx.fswzadd_lut_b, mask)};
const Id result_a{ctx.OpFMul(ctx.F32[1], op_a, modifier_a)};
const Id result_b{ctx.OpFMul(ctx.F32[1], op_b, modifier_b)};
return ctx.OpFAdd(ctx.F32[1], result_a, result_b);
}
} // namespace Shader::Backend::SPIRV

View File

@ -1602,4 +1602,7 @@ U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, cons
const IR::U32& seg_mask) {
return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask);
}
F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) {
return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle);
}
} // namespace Shader::IR

View File

@ -277,6 +277,8 @@ public:
const IR::U32& seg_mask);
[[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index,
const IR::U32& clamp, const IR::U32& seg_mask);
[[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle,
FpControl control = {});
private:
IR::Block::iterator insertion_point;

View File

@ -408,3 +408,4 @@ OPCODE(ShuffleIndex, U32, U32,
OPCODE(ShuffleUp, U32, U32, U32, U32, U32, )
OPCODE(ShuffleDown, U32, U32, U32, U32, U32, )
OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, )
OPCODE(FSwizzleAdd, F32, F32, F32, U32, )

View File

@ -0,0 +1,44 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
void TranslatorVisitor::FSWZADD(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<28, 8, u64> swizzle;
BitField<38, 1, u64> ndv;
BitField<39, 2, FpRounding> round;
BitField<44, 1, u64> ftz;
BitField<47, 1, u64> cc;
} const fswzadd{insn};
if (fswzadd.ndv != 0) {
throw NotImplementedException("FSWZADD NDV");
}
const IR::F32 src_a{GetFloatReg8(insn)};
const IR::F32 src_b{GetFloatReg20(insn)};
const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
const IR::FpControl fp_control{
.no_contraction{false},
.rounding{CastFpRounding(fswzadd.round)},
.fmz_mode{fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
};
const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
F(fswzadd.dest_reg, result);
if (fswzadd.cc != 0) {
throw NotImplementedException("FSWZADD CC");
}
}
} // namespace Shader::Maxwell

View File

@ -91,6 +91,10 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
return X(reg.index);
}
IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
return ir.BitCast<IR::F32>(GetReg8(insn));
}
IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
return ir.BitCast<IR::F32>(GetReg20(insn));
}

View File

@ -353,6 +353,7 @@ public:
[[nodiscard]] IR::U32 GetReg8(u64 insn);
[[nodiscard]] IR::U32 GetReg20(u64 insn);
[[nodiscard]] IR::U32 GetReg39(u64 insn);
[[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
[[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
[[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
[[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);

View File

@ -89,10 +89,6 @@ void TranslatorVisitor::FCHK_imm(u64) {
ThrowNotImplemented(Opcode::FCHK_imm);
}
void TranslatorVisitor::FSWZADD(u64) {
ThrowNotImplemented(Opcode::FSWZADD);
}
void TranslatorVisitor::GETCRSPTR(u64) {
ThrowNotImplemented(Opcode::GETCRSPTR);
}

View File

@ -389,6 +389,9 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::SubgroupBallot:
info.uses_subgroup_vote = true;
break;
case IR::Opcode::FSwizzleAdd:
info.uses_fswzadd = true;
break;
default:
break;
}

View File

@ -94,6 +94,7 @@ struct Info {
bool uses_sparse_residency{};
bool uses_demote_to_helper_invocation{};
bool uses_subgroup_vote{};
bool uses_fswzadd{};
IR::Type used_constant_buffer_types{};