Merge pull request #3312 from ReinUsesLisp/atoms-u32

shader/memory: Implement ATOMS.ADD.U32
2020-01-18 00:54:07 -05:00 · 2020-01-18 00:54:07 -05:00 · 15163edaaa
commit 15163edaaa
parent 3cce5056ff 63ba41a26d
5 changed files with 74 additions and 3 deletions
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
    Trunc = 11,
 };
 enum class AtomicOp : u64 {
    Add = 0,
    Min = 1,
    Max = 2,
    Inc = 3,
    Dec = 4,
    And = 5,
    Or = 6,
    Xor = 7,
    Exch = 8,
 };
 enum class UniformType : u64 {
    UnsignedByte = 0,
    SignedByte = 1,
@ -236,6 +248,13 @@ enum class StoreType : u64 {
    Bits128 = 6,
 };
 enum class AtomicType : u64 {
    U32 = 0,
    S32 = 1,
    U64 = 2,
    S64 = 3,
 };
 enum class IMinMaxExchange : u64 {
    None = 0,
    XLo = 1,
@ -938,6 +957,16 @@ union Instruction {
        BitField<46, 2, u64> cache_mode;
    } stg;
    union {
        BitField<52, 4, AtomicOp> operation;
        BitField<28, 2, AtomicType> type;
        BitField<30, 22, s64> offset;
        s32 GetImmediateOffset() const {
            return static_cast<s32>(offset << 2);
        }
    } atoms;
    union {
        BitField<32, 1, PhysicalAttributeDirection> direction;
        BitField<47, 3, AttributeSize> size;
@ -1659,9 +1688,10 @@ public:
        ST_A,
        ST_L,
        ST_S,
-        ST,   // Store in generic memory
+        ST,    // Store in generic memory
-        STG,  // Store in global memory
+        STG,   // Store in global memory
-        AL2P, // Transforms attribute memory into physical memory
+        ATOMS, // Atomic operation on shared memory
        AL2P,  // Transforms attribute memory into physical memory
        TEX,
        TEX_B,  // Texture Load Bindless
        TXQ,    // Texture Query
@ -1964,6 +1994,7 @@ private:
            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
            INST("101-------------", Id::ST, Type::Memory, "ST"),
            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
            INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
            INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
            INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
            INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@ -1856,6 +1856,16 @@ private:
                Type::Uint};
    }
    template <const std::string_view& opname, Type type>
    Expression Atomic(Operation operation) {
        ASSERT(stage == ShaderType::Compute);
        auto& smem = std::get<SmemNode>(*operation[0]);
        return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
                            Visit(operation[1]).As(type)),
                type};
    }
    Expression Branch(Operation operation) {
        const auto target = std::get_if<ImmediateNode>(&*operation[0]);
        UNIMPLEMENTED_IF(!target);
@ -2194,6 +2204,8 @@ private:
        &GLSLDecompiler::AtomicImage<Func::Xor>,
        &GLSLDecompiler::AtomicImage<Func::Exchange>,
        &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
        &GLSLDecompiler::Branch,
        &GLSLDecompiler::BranchIndirect,
        &GLSLDecompiler::PushFlowStack,
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@ -1796,6 +1796,11 @@ private:
        return {};
    }
    Expression UAtomicAdd(Operation) {
        UNIMPLEMENTED();
        return {};
    }
    Expression Branch(Operation operation) {
        const auto& target = std::get<ImmediateNode>(*operation[0]);
        OpStore(jmp_to, Constant(t_uint, target.GetValue()));
@ -2373,6 +2378,8 @@ private:
        &SPIRVDecompiler::AtomicImageXor,
        &SPIRVDecompiler::AtomicImageExchange,
        &SPIRVDecompiler::UAtomicAdd,
        &SPIRVDecompiler::Branch,
        &SPIRVDecompiler::BranchIndirect,
        &SPIRVDecompiler::PushFlowStack,
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@ -16,6 +16,8 @@
 namespace VideoCommon::Shader {
 using Tegra::Shader::AtomicOp;
 using Tegra::Shader::AtomicType;
 using Tegra::Shader::Attribute;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
@ -333,6 +335,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        }
        break;
    }
    case OpCode::Id::ATOMS: {
        UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
                             static_cast<int>(instr.atoms.operation.Value()));
        UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
                             static_cast<int>(instr.atoms.type.Value()));
        const s32 offset = instr.atoms.GetImmediateOffset();
        Node address = GetRegister(instr.gpr8);
        address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
        Node memory = GetSharedMemory(std::move(address));
        Node data = GetRegister(instr.gpr20);
        Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
        SetRegister(bb, instr.gpr0, std::move(value));
        break;
    }
    case OpCode::Id::AL2P: {
        // Ignore al2p.direction since we don't care about it.
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@ -162,6 +162,8 @@ enum class OperationCode {
    AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
    AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
    UAtomicAdd, /// (smem, uint) -> uint
    Branch,         /// (uint branch_target) -> void
    BranchIndirect, /// (uint branch_target) -> void
    PushFlowStack,  /// (uint branch_target) -> void