Merge pull request #3312 from ReinUsesLisp/atoms-u32

shader/memory: Implement ATOMS.ADD.U32
This commit is contained in:
bunnei 2020-01-18 00:54:07 -05:00 committed by GitHub
commit 15163edaaa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 74 additions and 3 deletions

View File

@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
Trunc = 11, Trunc = 11,
}; };
enum class AtomicOp : u64 {
Add = 0,
Min = 1,
Max = 2,
Inc = 3,
Dec = 4,
And = 5,
Or = 6,
Xor = 7,
Exch = 8,
};
enum class UniformType : u64 { enum class UniformType : u64 {
UnsignedByte = 0, UnsignedByte = 0,
SignedByte = 1, SignedByte = 1,
@ -236,6 +248,13 @@ enum class StoreType : u64 {
Bits128 = 6, Bits128 = 6,
}; };
enum class AtomicType : u64 {
U32 = 0,
S32 = 1,
U64 = 2,
S64 = 3,
};
enum class IMinMaxExchange : u64 { enum class IMinMaxExchange : u64 {
None = 0, None = 0,
XLo = 1, XLo = 1,
@ -938,6 +957,16 @@ union Instruction {
BitField<46, 2, u64> cache_mode; BitField<46, 2, u64> cache_mode;
} stg; } stg;
union {
BitField<52, 4, AtomicOp> operation;
BitField<28, 2, AtomicType> type;
BitField<30, 22, s64> offset;
s32 GetImmediateOffset() const {
return static_cast<s32>(offset << 2);
}
} atoms;
union { union {
BitField<32, 1, PhysicalAttributeDirection> direction; BitField<32, 1, PhysicalAttributeDirection> direction;
BitField<47, 3, AttributeSize> size; BitField<47, 3, AttributeSize> size;
@ -1659,9 +1688,10 @@ public:
ST_A, ST_A,
ST_L, ST_L,
ST_S, ST_S,
ST, // Store in generic memory ST, // Store in generic memory
STG, // Store in global memory STG, // Store in global memory
AL2P, // Transforms attribute memory into physical memory ATOMS, // Atomic operation on shared memory
AL2P, // Transforms attribute memory into physical memory
TEX, TEX,
TEX_B, // Texture Load Bindless TEX_B, // Texture Load Bindless
TXQ, // Texture Query TXQ, // Texture Query
@ -1964,6 +1994,7 @@ private:
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("101-------------", Id::ST, Type::Memory, "ST"), INST("101-------------", Id::ST, Type::Memory, "ST"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),

View File

@ -1856,6 +1856,16 @@ private:
Type::Uint}; Type::Uint};
} }
template <const std::string_view& opname, Type type>
Expression Atomic(Operation operation) {
ASSERT(stage == ShaderType::Compute);
auto& smem = std::get<SmemNode>(*operation[0]);
return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
Visit(operation[1]).As(type)),
type};
}
Expression Branch(Operation operation) { Expression Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(&*operation[0]); const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target); UNIMPLEMENTED_IF(!target);
@ -2194,6 +2204,8 @@ private:
&GLSLDecompiler::AtomicImage<Func::Xor>, &GLSLDecompiler::AtomicImage<Func::Xor>,
&GLSLDecompiler::AtomicImage<Func::Exchange>, &GLSLDecompiler::AtomicImage<Func::Exchange>,
&GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
&GLSLDecompiler::Branch, &GLSLDecompiler::Branch,
&GLSLDecompiler::BranchIndirect, &GLSLDecompiler::BranchIndirect,
&GLSLDecompiler::PushFlowStack, &GLSLDecompiler::PushFlowStack,

View File

@ -1796,6 +1796,11 @@ private:
return {}; return {};
} }
Expression UAtomicAdd(Operation) {
UNIMPLEMENTED();
return {};
}
Expression Branch(Operation operation) { Expression Branch(Operation operation) {
const auto& target = std::get<ImmediateNode>(*operation[0]); const auto& target = std::get<ImmediateNode>(*operation[0]);
OpStore(jmp_to, Constant(t_uint, target.GetValue())); OpStore(jmp_to, Constant(t_uint, target.GetValue()));
@ -2373,6 +2378,8 @@ private:
&SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageXor,
&SPIRVDecompiler::AtomicImageExchange, &SPIRVDecompiler::AtomicImageExchange,
&SPIRVDecompiler::UAtomicAdd,
&SPIRVDecompiler::Branch, &SPIRVDecompiler::Branch,
&SPIRVDecompiler::BranchIndirect, &SPIRVDecompiler::BranchIndirect,
&SPIRVDecompiler::PushFlowStack, &SPIRVDecompiler::PushFlowStack,

View File

@ -16,6 +16,8 @@
namespace VideoCommon::Shader { namespace VideoCommon::Shader {
using Tegra::Shader::AtomicOp;
using Tegra::Shader::AtomicType;
using Tegra::Shader::Attribute; using Tegra::Shader::Attribute;
using Tegra::Shader::Instruction; using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode; using Tegra::Shader::OpCode;
@ -333,6 +335,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
} }
break; break;
} }
case OpCode::Id::ATOMS: {
UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
static_cast<int>(instr.atoms.operation.Value()));
UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
static_cast<int>(instr.atoms.type.Value()));
const s32 offset = instr.atoms.GetImmediateOffset();
Node address = GetRegister(instr.gpr8);
address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
Node memory = GetSharedMemory(std::move(address));
Node data = GetRegister(instr.gpr20);
Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
SetRegister(bb, instr.gpr0, std::move(value));
break;
}
case OpCode::Id::AL2P: { case OpCode::Id::AL2P: {
// Ignore al2p.direction since we don't care about it. // Ignore al2p.direction since we don't care about it.

View File

@ -162,6 +162,8 @@ enum class OperationCode {
AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageXor, /// (MetaImage, int[N] coords) -> void
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
UAtomicAdd, /// (smem, uint) -> uint
Branch, /// (uint branch_target) -> void Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void