From e09c1fbc1f5868b1bff54a69a58fd6d788c54251 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 16 Dec 2019 04:09:24 -0300 Subject: [PATCH] shader/texture: Implement TLD4.PTP --- src/video_core/engines/shader_bytecode.h | 12 +-- .../renderer_opengl/gl_shader_decompiler.cpp | 84 ++++++++++++------- src/video_core/shader/decode/texture.cpp | 74 ++++++++++++---- src/video_core/shader/node.h | 1 + src/video_core/shader/shader_ir.h | 5 +- 5 files changed, 120 insertions(+), 56 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d6a2cc8b8..1cb0ac0c2 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1239,7 +1239,7 @@ union Instruction { BitField<35, 1, u64> ndv_flag; BitField<49, 1, u64> nodep_flag; BitField<50, 1, u64> dc_flag; - BitField<54, 2, u64> info; + BitField<54, 2, u64> offset_mode; BitField<56, 2, u64> component; bool UsesMiscMode(TextureMiscMode mode) const { @@ -1251,9 +1251,9 @@ union Instruction { case TextureMiscMode::DC: return dc_flag != 0; case TextureMiscMode::AOFFI: - return info == 1; + return offset_mode == 1; case TextureMiscMode::PTP: - return info == 2; + return offset_mode == 2; default: break; } @@ -1265,7 +1265,7 @@ union Instruction { BitField<35, 1, u64> ndv_flag; BitField<49, 1, u64> nodep_flag; BitField<50, 1, u64> dc_flag; - BitField<33, 2, u64> info; + BitField<33, 2, u64> offset_mode; BitField<37, 2, u64> component; bool UsesMiscMode(TextureMiscMode mode) const { @@ -1277,9 +1277,9 @@ union Instruction { case TextureMiscMode::DC: return dc_flag != 0; case TextureMiscMode::AOFFI: - return info == 1; + return offset_mode == 1; case TextureMiscMode::PTP: - return info == 2; + return offset_mode == 2; default: break; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6b4b8ff67..83a6769ae 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -48,10 +48,10 @@ class ExprDecompiler; enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; -struct TextureAoffi {}; +struct TextureOffset {}; struct TextureDerivates {}; using TextureArgument = std::pair; -using TextureIR = std::variant; +using TextureIR = std::variant; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); @@ -1089,6 +1089,8 @@ private: std::string expr = "texture" + function_suffix; if (!meta->aoffi.empty()) { expr += "Offset"; + } else if (!meta->ptp.empty()) { + expr += "Offsets"; } expr += '(' + GetSampler(meta->sampler) + ", "; expr += coord_constructors.at(count + (has_array ? 1 : 0) + @@ -1117,8 +1119,12 @@ private: for (const auto& variant : extras) { if (const auto argument = std::get_if(&variant)) { expr += GenerateTextureArgument(*argument); - } else if (std::holds_alternative(variant)) { - expr += GenerateTextureAoffi(meta->aoffi); + } else if (std::holds_alternative(variant)) { + if (!meta->aoffi.empty()) { + expr += GenerateTextureAoffi(meta->aoffi); + } else if (!meta->ptp.empty()) { + expr += GenerateTexturePtp(meta->ptp); + } } else if (std::holds_alternative(variant)) { expr += GenerateTextureDerivates(meta->derivates); } else { @@ -1159,6 +1165,20 @@ private: return expr; } + std::string ReadTextureOffset(const Node& value) { + if (const auto immediate = std::get_if(&*value)) { + // Inline the string as an immediate integer in GLSL (AOFFI arguments are required + // to be constant by the standard). + return std::to_string(static_cast(immediate->GetValue())); + } else if (device.HasVariableAoffi()) { + // Avoid using variable AOFFI on unsupported devices. + return Visit(value).AsInt(); + } else { + // Insert 0 on devices not supporting variable AOFFI. + return "0"; + } + } + std::string GenerateTextureAoffi(const std::vector& aoffi) { if (aoffi.empty()) { return {}; @@ -1169,18 +1189,7 @@ private: expr += '('; for (std::size_t index = 0; index < aoffi.size(); ++index) { - const auto operand{aoffi.at(index)}; - if (const auto immediate = std::get_if(&*operand)) { - // Inline the string as an immediate integer in GLSL (AOFFI arguments are required - // to be constant by the standard). - expr += std::to_string(static_cast(immediate->GetValue())); - } else if (device.HasVariableAoffi()) { - // Avoid using variable AOFFI on unsupported devices. - expr += Visit(operand).AsInt(); - } else { - // Insert 0 on devices not supporting variable AOFFI. - expr += '0'; - } + expr += ReadTextureOffset(aoffi.at(index)); if (index + 1 < aoffi.size()) { expr += ", "; } @@ -1190,6 +1199,20 @@ private: return expr; } + std::string GenerateTexturePtp(const std::vector& ptp) { + static constexpr std::size_t num_vectors = 4; + ASSERT(ptp.size() == num_vectors * 2); + + std::string expr = ", ivec2[]("; + for (std::size_t vector = 0; vector < num_vectors; ++vector) { + const bool has_next = vector + 1 < num_vectors; + expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)), + ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : ""); + } + expr += ')'; + return expr; + } + std::string GenerateTextureDerivates(const std::vector& derivates) { if (derivates.empty()) { return {}; @@ -1688,7 +1711,7 @@ private: ASSERT(meta); std::string expr = GenerateTexture( - operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}}); + operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}}); if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } @@ -1700,7 +1723,7 @@ private: ASSERT(meta); std::string expr = GenerateTexture( - operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}}); + operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}}); if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } @@ -1708,21 +1731,19 @@ private: } Expression TextureGather(Operation operation) { - const auto meta = std::get_if(&operation.GetMeta()); - ASSERT(meta); + const auto& meta = std::get(operation.GetMeta()); - const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; - if (meta->sampler.IsShadow()) { - return {GenerateTexture(operation, "Gather", {TextureAoffi{}}, true) + - GetSwizzle(meta->element), - Type::Float}; + const auto type = meta.sampler.IsShadow() ? Type::Float : Type::Int; + const bool separate_dc = meta.sampler.IsShadow(); + + std::vector ir; + if (meta.sampler.IsShadow()) { + ir = {TextureOffset{}}; } else { - return {GenerateTexture(operation, "Gather", - {TextureAoffi{}, TextureArgument{type, meta->component}}, - false) + - GetSwizzle(meta->element), - Type::Float}; + ir = {TextureOffset{}, TextureArgument{type, meta.component}}; } + return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element), + Type::Float}; } Expression TextureQueryDimensions(Operation operation) { @@ -1793,7 +1814,8 @@ private: const auto meta = std::get_if(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureAoffi{}}); + std::string expr = + GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}}); return {std::move(expr) + GetSwizzle(meta->element), Type::Float}; } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 6b6458993..dd8ff851e 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -91,18 +91,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { case OpCode::Id::TLD4: { UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), "NDV is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), - "PTP is not implemented"); - const auto texture_type = instr.tld4.texture_type.Value(); const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) : instr.tld4.UsesMiscMode(TextureMiscMode::DC); const bool is_array = instr.tld4.array != 0; const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); - WriteTexInstructionFloat( - bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless)); + const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) + : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); + WriteTexInstructionFloat(bb, instr, + GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, + is_ptp, is_bindless)); break; } case OpCode::Id::TLD4S: { @@ -145,7 +144,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, component, element}; + MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -194,7 +193,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, {}, {}, {}, derivates, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element}; values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); } @@ -234,7 +233,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { if (!instr.txq.IsComponentEnabled(element)) { continue; } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); @@ -303,7 +302,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { continue; } auto params = coords; - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); SetTemporary(bb, indexer++, value); } @@ -542,7 +541,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, for (u32 element = 0; element < values.size(); ++element) { auto copy_coords = coords; - MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element}; + MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element}; values[element] = Operation(read_method, meta, std::move(copy_coords)); } @@ -639,7 +638,9 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, } Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array, bool is_aoffi, bool is_bindless) { + bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { + ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); + const std::size_t coord_count = GetCoordCount(texture_type); // If enabled arrays index is always stored in the gpr8 field @@ -665,12 +666,15 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de return values; } - std::vector aoffi; + std::vector aoffi, ptp; if (is_aoffi) { aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); + } else if (is_ptp) { + ptp = GetPtpCoordinates( + {GetRegister(parameter_register++), GetRegister(parameter_register++)}); } - Node dc{}; + Node dc; if (depth_compare) { dc = GetRegister(parameter_register++); } @@ -680,8 +684,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{*sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component, - element}; + MetaTexture meta{ + *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -714,7 +718,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array_register, {}, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } @@ -759,7 +763,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array, {}, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; @@ -824,4 +828,38 @@ std::vector ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor return aoffi; } +std::vector ShaderIR::GetPtpCoordinates(std::array ptp_regs) { + static constexpr u32 num_entries = 8; + + std::vector ptp; + ptp.reserve(num_entries); + + const auto global_size = static_cast(global_code.size()); + const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); + const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); + if (!low || !high) { + for (u32 entry = 0; entry < num_entries; ++entry) { + const u32 reg = entry / 4; + const u32 offset = entry % 4; + const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); + const Node condition = + Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); + const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); + ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); + } + return ptp; + } + + const u64 immediate = (static_cast(*high) << 32) | static_cast(*low); + for (u32 entry = 0; entry < num_entries; ++entry) { + s32 value = (immediate >> (entry * 8)) & 0b111111; + if (value >= 32) { + value -= 64; + } + ptp.push_back(Immediate(value)); + } + + return ptp; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index abd40f582..4d2f4d6a8 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -374,6 +374,7 @@ struct MetaTexture { Node array; Node depth_compare; std::vector aoffi; + std::vector ptp; std::vector derivates; Node bias; Node lod; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 04ae5f822..baed06ccd 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -350,7 +350,8 @@ private: bool is_array); Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, - bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless); + bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, + bool is_bindless); Node4 GetTldCode(Tegra::Shader::Instruction instr); @@ -363,6 +364,8 @@ private: std::vector GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); + std::vector GetPtpCoordinates(std::array ptp_regs); + Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, std::vector coords, Node array, Node depth_compare, u32 bias_offset, std::vector aoffi,