From c921e496eb47de49a4d6ce62527581b966dca259 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 3 Jan 2020 16:16:29 -0400 Subject: [PATCH 01/16] GPU: Implement guest driver profile and deduce texture handler sizes. --- src/video_core/CMakeLists.txt | 2 + .../engines/const_buffer_engine_interface.h | 3 ++ src/video_core/engines/kepler_compute.cpp | 4 ++ src/video_core/engines/kepler_compute.h | 2 + src/video_core/engines/maxwell_3d.cpp | 4 ++ src/video_core/engines/maxwell_3d.h | 2 + src/video_core/guest_driver.cpp | 34 +++++++++++++++++ src/video_core/guest_driver.h | 37 +++++++++++++++++++ src/video_core/rasterizer_interface.h | 8 ++++ src/video_core/shader/const_buffer_locker.h | 8 ++++ src/video_core/shader/decode.cpp | 21 +++++++++++ src/video_core/shader/shader_ir.cpp | 1 + src/video_core/shader/shader_ir.h | 1 + 13 files changed, 127 insertions(+) create mode 100644 src/video_core/guest_driver.cpp create mode 100644 src/video_core/guest_driver.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index ccfed4f2e..04a25da4f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -29,6 +29,8 @@ add_library(video_core STATIC gpu_synch.h gpu_thread.cpp gpu_thread.h + guest_driver.cpp + guest_driver.h macro_interpreter.cpp macro_interpreter.h memory_manager.cpp diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index 44b8b8d22..c29156e34 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h" #include "video_core/textures/texture.h" namespace Tegra::Engines { @@ -106,6 +107,8 @@ public: virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, u64 offset) const = 0; virtual u32 GetBoundBuffer() const = 0; + + virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 110406f2f..f177ae938 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -94,6 +94,10 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con return result; } +VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { + return rasterizer.AccessGuestDriverProfile(); +} + void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 4ef3e0613..99c82a9af 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -218,6 +218,8 @@ public: return regs.tex_cb_index; } + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; + private: Core::System& system; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 58dfa8033..8167864c0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -784,4 +784,8 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b return result; } +VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { + return rasterizer.AccessGuestDriverProfile(); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ee79260fc..08ef95410 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1306,6 +1306,8 @@ public: return regs.tex_cb_index; } + VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. using MacroMemory = std::array; diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp new file mode 100644 index 000000000..b1ac254ff --- /dev/null +++ b/src/video_core/guest_driver.cpp @@ -0,0 +1,34 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/guest_driver.h" + +namespace VideoCore { + +void GuestDriverProfile::DeduceTextureHandlerSize(std::vector&& bound_offsets) { + if (texture_handler_size_deduced) { + return; + } + std::size_t size = bound_offsets.size(); + if (size < 2) { + return; + } + std::sort(bound_offsets.begin(), bound_offsets.end(), + [](const u32& a, const u32& b) { return a < b; }); + u32 min_val = 0xFFFFFFFF; // set to highest possible 32 bit integer; + for (std::size_t i = 1; i < size; i++) { + if (bound_offsets[i] == bound_offsets[i - 1]) { + continue; + } + const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; + min_val = std::min(min_val, new_min); + } + if (min_val > 2) { + return; + } + texture_handler_size_deduced = true; + texture_handler_size = sizeof(u32) * min_val; +} + +} // namespace VideoCore diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h new file mode 100644 index 000000000..f64f043af --- /dev/null +++ b/src/video_core/guest_driver.h @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include + +#include "common/common_types.h" + +namespace VideoCore { + +/** + * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect + * information necessary for impossible to avoid HLE methods like shader tracks. + */ +class GuestDriverProfile { +public: + u32 GetTextureHandlerSize() const { + return texture_handler_size; + } + + bool TextureHandlerSizeKnown() const { + return texture_handler_size_deduced; + } + + void DeduceTextureHandlerSize(std::vector&& bound_offsets); + +private: + // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily + // use 4 bytes instead. Thus, certain drivers may squish the size. + static constexpr u32 default_texture_handler_size = 8; + u32 texture_handler_size{default_texture_handler_size}; + bool texture_handler_size_deduced{}; +}; + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5b0eca9e2..149f79af3 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -9,6 +9,7 @@ #include "common/common_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" +#include "video_core/guest_driver.h" namespace Tegra { class MemoryManager; @@ -78,5 +79,12 @@ public: /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, const DiskResourceLoadCallback& callback = {}) {} + + GuestDriverProfile& AccessGuestDriverProfile() { + return guest_driver_profile; + } + +private: + GuestDriverProfile guest_driver_profile{}; }; } // namespace VideoCore diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index d32e2d657..78d9d7037 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -10,6 +10,7 @@ #include "common/hash.h" #include "video_core/engines/const_buffer_engine_interface.h" #include "video_core/engines/shader_type.h" +#include "video_core/guest_driver.h" namespace VideoCommon::Shader { @@ -71,6 +72,13 @@ public: return bindless_samplers; } + VideoCore::GuestDriverProfile* AccessGuestDriverProfile() { + if (engine) { + return &(engine->AccessGuestDriverProfile()); + } + return nullptr; + } + private: const Tegra::Engines::ShaderType stage; Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 22c3e5120..aed35a9b8 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -315,4 +315,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { return pc + 1; } +void ShaderIR::PostDecode() { + // Deduce texture handler size if needed + auto* gpu_driver = locker.AccessGuestDriverProfile(); + if (gpu_driver) { + if (!gpu_driver->TextureHandlerSizeKnown() && used_samplers.size() > 1) { + u32 count{}; + std::vector bound_offsets; + for (const auto& sampler : used_samplers) { + if (sampler.IsBindless()) { + continue; + } + count++; + bound_offsets.emplace_back(sampler.GetOffset()); + } + if (count > 1) { + gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); + } + } + } +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 31eecb3f4..a186e22b2 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet ConstBufferLocker& locker) : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { Decode(); + PostDecode(); } ShaderIR::~ShaderIR() = default; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ba1db4c11..92c24247d 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -191,6 +191,7 @@ private: }; void Decode(); + void PostDecode(); NodeBlock DecodeRange(u32 begin, u32 end); void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); From 1e4b6bef6f1b278bdc99170b76f33179a2eff26f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 3 Jan 2020 18:15:24 -0400 Subject: [PATCH 02/16] Shader_IR: Store Bound buffer on Shader Usage --- .../renderer_opengl/gl_shader_cache.cpp | 4 +++- .../renderer_opengl/gl_shader_disk_cache.cpp | 12 ++++++++---- .../renderer_opengl/gl_shader_disk_cache.h | 1 + src/video_core/shader/const_buffer_locker.cpp | 17 +++++++++++++++++ src/video_core/shader/const_buffer_locker.h | 12 ++++++++++++ 5 files changed, 41 insertions(+), 5 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3c5bdd377..489eb143c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,6 +214,7 @@ std::unique_ptr MakeLocker(Core::System& system, ShaderType s } void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { + locker.SetBoundBuffer(usage.bound_buffer); for (const auto& key : usage.keys) { const auto [buffer, offset] = key.first; locker.InsertKey(buffer, offset, key.second); @@ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() { ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, const ConstBufferLocker& locker) const { - return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(), + return ShaderDiskCacheUsage{unique_identifier, variant, + locker.GetBoundBuffer(), locker.GetKeys(), locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index cf874a09a..1fc204f6f 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -53,7 +53,7 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 11; +constexpr u32 NativeVersion = 12; // Making sure sizes doesn't change by accident static_assert(sizeof(ProgramVariant) == 20); @@ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() { u32 num_bound_samplers{}; u32 num_bindless_samplers{}; if (file.ReadArray(&usage.unique_identifier, 1) != 1 || - file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || + file.ReadArray(&usage.variant, 1) != 1 || + file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || file.ReadArray(&num_bindless_samplers, 1) != 1) { LOG_ERROR(Render_OpenGL, error_loading); @@ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { u32 num_bindless_samplers{}; ShaderDiskCacheUsage usage; if (!LoadObjectFromPrecompiled(usage.unique_identifier) || - !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || + !LoadObjectFromPrecompiled(usage.variant) || + !LoadObjectFromPrecompiled(usage.bound_buffer) || + !LoadObjectFromPrecompiled(num_keys) || !LoadObjectFromPrecompiled(num_bound_samplers) || !LoadObjectFromPrecompiled(num_bindless_samplers)) { return {}; @@ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || + file.WriteObject(usage.bound_buffer) != 1 || file.WriteObject(static_cast(usage.keys.size())) != 1 || file.WriteObject(static_cast(usage.bound_samplers.size())) != 1 || file.WriteObject(static_cast(usage.bindless_samplers.size())) != 1) { @@ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p }; if (!SaveObjectToPrecompiled(usage.unique_identifier) || - !SaveObjectToPrecompiled(usage.variant) || + !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) || !SaveObjectToPrecompiled(static_cast(usage.keys.size())) || !SaveObjectToPrecompiled(static_cast(usage.bound_samplers.size())) || !SaveObjectToPrecompiled(static_cast(usage.bindless_samplers.size()))) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 69a2fbdda..ef2371f6d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v); struct ShaderDiskCacheUsage { u64 unique_identifier{}; ProgramVariant variant; + u32 bound_buffer{}; VideoCommon::Shader::KeyMap keys; VideoCommon::Shader::BoundSamplerMap bound_samplers; VideoCommon::Shader::BindlessSamplerMap bindless_samplers; diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index a4a0319eb..0638be8cb 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp @@ -66,6 +66,18 @@ std::optional ConstBufferLocker::ObtainBindle return value; } +std::optional ConstBufferLocker::ObtainBoundBuffer() { + if (bound_buffer_saved) { + return bound_buffer; + } + if (!engine) { + return std::nullopt; + } + bound_buffer_saved = true; + bound_buffer = engine->GetBoundBuffer(); + return bound_buffer; +} + void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { keys.insert_or_assign({buffer, offset}, value); } @@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes bindless_samplers.insert_or_assign({buffer, offset}, sampler); } +void ConstBufferLocker::SetBoundBuffer(u32 buffer) { + bound_buffer_saved = true; + bound_buffer = buffer; +} + bool ConstBufferLocker::IsConsistent() const { if (!engine) { return false; diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 78d9d7037..f26cce2ce 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -41,6 +41,8 @@ public: std::optional ObtainBindlessSampler(u32 buffer, u32 offset); + std::optional ObtainBoundBuffer(); + /// Inserts a key. void InsertKey(u32 buffer, u32 offset, u32 value); @@ -50,6 +52,10 @@ public: /// Inserts a bindless sampler key. void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); + /// Set the bound buffer for this locker. + + void SetBoundBuffer(u32 buffer); + /// Checks keys and samplers against engine's current const buffers. Returns true if they are /// the same value, false otherwise; bool IsConsistent() const; @@ -72,6 +78,10 @@ public: return bindless_samplers; } + u32 GetBoundBuffer() const { + return bound_buffer; + } + VideoCore::GuestDriverProfile* AccessGuestDriverProfile() { if (engine) { return &(engine->AccessGuestDriverProfile()); @@ -85,6 +95,8 @@ private: KeyMap keys; BoundSamplerMap bound_samplers; BindlessSamplerMap bindless_samplers; + bool bound_buffer_saved{}; + u32 bound_buffer{}; }; } // namespace VideoCommon::Shader From 74aa7de5e3905a9438f8839fa273024edd118f19 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 4 Jan 2020 12:30:11 -0400 Subject: [PATCH 03/16] Guest_driver: Correct compiling errors in GCC. --- src/video_core/guest_driver.cpp | 4 +++- src/video_core/guest_driver.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp index b1ac254ff..55b9bd021 100644 --- a/src/video_core/guest_driver.cpp +++ b/src/video_core/guest_driver.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "video_core/guest_driver.h" namespace VideoCore { @@ -28,7 +30,7 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector&& bound_offse return; } texture_handler_size_deduced = true; - texture_handler_size = sizeof(u32) * min_val; + texture_handler_size = min_texture_handler_size * min_val; } } // namespace VideoCore diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h index f64f043af..7687a0434 100644 --- a/src/video_core/guest_driver.h +++ b/src/video_core/guest_driver.h @@ -27,6 +27,8 @@ public: void DeduceTextureHandlerSize(std::vector&& bound_offsets); private: + // Minimum size of texture handler any driver can use. + static constexpr u32 min_texture_handler_size = 4; // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily // use 4 bytes instead. Thus, certain drivers may squish the size. static constexpr u32 default_texture_handler_size = 8; From dc5cfa8d287757dede737553b6f1f8521971c6e2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 5 Jan 2020 12:08:39 -0400 Subject: [PATCH 04/16] Shader_IR: Address Feedback --- src/video_core/guest_driver.cpp | 7 ++-- src/video_core/guest_driver.h | 2 +- src/video_core/shader/const_buffer_locker.h | 1 - src/video_core/shader/decode.cpp | 42 ++++++++++++--------- 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp index 55b9bd021..1ded52905 100644 --- a/src/video_core/guest_driver.cpp +++ b/src/video_core/guest_driver.cpp @@ -1,8 +1,9 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include +#include #include "video_core/guest_driver.h" @@ -12,13 +13,13 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector&& bound_offse if (texture_handler_size_deduced) { return; } - std::size_t size = bound_offsets.size(); + const std::size_t size = bound_offsets.size(); if (size < 2) { return; } std::sort(bound_offsets.begin(), bound_offsets.end(), [](const u32& a, const u32& b) { return a < b; }); - u32 min_val = 0xFFFFFFFF; // set to highest possible 32 bit integer; + u32 min_val = UINT_MAX; for (std::size_t i = 1; i < size; i++) { if (bound_offsets[i] == bound_offsets[i - 1]) { continue; diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h index 7687a0434..e08588ee9 100644 --- a/src/video_core/guest_driver.h +++ b/src/video_core/guest_driver.h @@ -1,4 +1,4 @@ -// Copyright 2019 yuzu Emulator Project +// Copyright 2020 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index f26cce2ce..c7b72fa5e 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -53,7 +53,6 @@ public: void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); /// Set the bound buffer for this locker. - void SetBoundBuffer(u32 buffer); /// Checks keys and samplers against engine's current const buffers. Returns true if they are diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index aed35a9b8..c702c7629 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -315,25 +315,33 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { return pc + 1; } -void ShaderIR::PostDecode() { - // Deduce texture handler size if needed - auto* gpu_driver = locker.AccessGuestDriverProfile(); - if (gpu_driver) { - if (!gpu_driver->TextureHandlerSizeKnown() && used_samplers.size() > 1) { - u32 count{}; - std::vector bound_offsets; - for (const auto& sampler : used_samplers) { - if (sampler.IsBindless()) { - continue; - } - count++; - bound_offsets.emplace_back(sampler.GetOffset()); - } - if (count > 1) { - gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); - } +void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, + std::list& used_samplers) { + if (gpu_driver == nullptr) { + LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); + return; + } + if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { + return; + } + u32 count{}; + std::vector bound_offsets; + for (const auto& sampler : used_samplers) { + if (sampler.IsBindless()) { + continue; } + count++; + bound_offsets.emplace_back(sampler.GetOffset()); + } + if (count > 1) { + gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); } } +void ShaderIR::PostDecode() { + // Deduce texture handler size if needed + auto* gpu_driver = locker.AccessGuestDriverProfile(); + DeduceTextureHandlerSize(gpu_driver, used_samplers); +} + } // namespace VideoCommon::Shader From b97608ca646962a6f5a217b9477bdd86eed5e48f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 Jan 2020 10:28:29 -0400 Subject: [PATCH 05/16] Shader_IR: Allow constant access of guest driver. --- src/video_core/engines/const_buffer_engine_interface.h | 1 + src/video_core/engines/kepler_compute.cpp | 4 ++++ src/video_core/engines/kepler_compute.h | 2 ++ src/video_core/engines/maxwell_3d.cpp | 4 ++++ src/video_core/engines/maxwell_3d.h | 2 ++ src/video_core/rasterizer_interface.h | 4 ++++ src/video_core/shader/const_buffer_locker.h | 2 +- 7 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index c29156e34..d56a47710 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h @@ -109,6 +109,7 @@ public: virtual u32 GetBoundBuffer() const = 0; virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; + virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; }; } // namespace Tegra::Engines diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index f177ae938..4b824aa4e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -98,6 +98,10 @@ VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { return rasterizer.AccessGuestDriverProfile(); } +const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { + return rasterizer.AccessGuestDriverProfile(); +} + void KeplerCompute::ProcessLaunch() { const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 99c82a9af..eeb79c56f 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -220,6 +220,8 @@ public: VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; + const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; + private: Core::System& system; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 8167864c0..7cea146f0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -788,4 +788,8 @@ VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { return rasterizer.AccessGuestDriverProfile(); } +const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { + return rasterizer.AccessGuestDriverProfile(); +} + } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 08ef95410..8808bbf76 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1308,6 +1308,8 @@ public: VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; + const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than /// we've seen used. using MacroMemory = std::array; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 149f79af3..1b0cc56f1 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -84,6 +84,10 @@ public: return guest_driver_profile; } + const GuestDriverProfile& AccessGuestDriverProfile() const { + return guest_driver_profile; + } + private: GuestDriverProfile guest_driver_profile{}; }; diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index c7b72fa5e..f5655ac64 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -81,7 +81,7 @@ public: return bound_buffer; } - VideoCore::GuestDriverProfile* AccessGuestDriverProfile() { + VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { if (engine) { return &(engine->AccessGuestDriverProfile()); } From 64496f24569ecc23ebbb816725f27142867b1468 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 Jan 2020 11:46:36 -0400 Subject: [PATCH 06/16] Shader_IR: Address Feedback --- src/video_core/guest_driver.cpp | 9 ++-- src/video_core/guest_driver.h | 11 ++--- src/video_core/rasterizer_interface.h | 2 + src/video_core/shader/const_buffer_locker.h | 2 +- src/video_core/shader/decode.cpp | 48 ++++++++++----------- 5 files changed, 37 insertions(+), 35 deletions(-) diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp index 1ded52905..6adef459e 100644 --- a/src/video_core/guest_driver.cpp +++ b/src/video_core/guest_driver.cpp @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #include -#include +#include #include "video_core/guest_driver.h" @@ -17,10 +17,9 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector&& bound_offse if (size < 2) { return; } - std::sort(bound_offsets.begin(), bound_offsets.end(), - [](const u32& a, const u32& b) { return a < b; }); - u32 min_val = UINT_MAX; - for (std::size_t i = 1; i < size; i++) { + std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); + u32 min_val = std::numeric_limits::max(); + for (std::size_t i = 1; i < size; ++i) { if (bound_offsets[i] == bound_offsets[i - 1]) { continue; } diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h index e08588ee9..0a9a826b6 100644 --- a/src/video_core/guest_driver.h +++ b/src/video_core/guest_driver.h @@ -12,10 +12,13 @@ namespace VideoCore { /** * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect - * information necessary for impossible to avoid HLE methods like shader tracks. + * information necessary for impossible to avoid HLE methods like shader tracks as they are + * Entscheidungsproblems. */ class GuestDriverProfile { public: + void DeduceTextureHandlerSize(std::vector&& bound_offsets); + u32 GetTextureHandlerSize() const { return texture_handler_size; } @@ -24,16 +27,14 @@ public: return texture_handler_size_deduced; } - void DeduceTextureHandlerSize(std::vector&& bound_offsets); - private: // Minimum size of texture handler any driver can use. static constexpr u32 min_texture_handler_size = 4; // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily // use 4 bytes instead. Thus, certain drivers may squish the size. static constexpr u32 default_texture_handler_size = 8; - u32 texture_handler_size{default_texture_handler_size}; - bool texture_handler_size_deduced{}; + u32 texture_handler_size = default_texture_handler_size; + bool texture_handler_size_deduced = false; }; } // namespace VideoCore diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 1b0cc56f1..c586cd6fe 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -80,10 +80,12 @@ public: virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, const DiskResourceLoadCallback& callback = {}) {} + /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. GuestDriverProfile& AccessGuestDriverProfile() { return guest_driver_profile; } + /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. const GuestDriverProfile& AccessGuestDriverProfile() const { return guest_driver_profile; } diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index f5655ac64..fd1bb476a 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -83,7 +83,7 @@ public: VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { if (engine) { - return &(engine->AccessGuestDriverProfile()); + return &engine->AccessGuestDriverProfile(); } return nullptr; } diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index c702c7629..507614d59 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -33,6 +33,29 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { return (absolute_offset % SchedPeriod) == 0; } +void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, + std::list& used_samplers) { + if (gpu_driver == nullptr) { + LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); + return; + } + if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { + return; + } + u32 count{}; + std::vector bound_offsets; + for (const auto& sampler : used_samplers) { + if (sampler.IsBindless()) { + continue; + } + ++count; + bound_offsets.emplace_back(sampler.GetOffset()); + } + if (count > 1) { + gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); + } +} + } // Anonymous namespace class ASTDecoder { @@ -315,32 +338,9 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { return pc + 1; } -void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, - std::list& used_samplers) { - if (gpu_driver == nullptr) { - LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); - return; - } - if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { - return; - } - u32 count{}; - std::vector bound_offsets; - for (const auto& sampler : used_samplers) { - if (sampler.IsBindless()) { - continue; - } - count++; - bound_offsets.emplace_back(sampler.GetOffset()); - } - if (count > 1) { - gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); - } -} - void ShaderIR::PostDecode() { // Deduce texture handler size if needed - auto* gpu_driver = locker.AccessGuestDriverProfile(); + auto gpu_driver = locker.AccessGuestDriverProfile(); DeduceTextureHandlerSize(gpu_driver, used_samplers); } From 603c861532ed1a89254556ff84bbe121bd700765 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 5 Jan 2020 15:23:24 -0400 Subject: [PATCH 07/16] Shader_IR: Implement initial code for tracking indexed samplers. --- src/video_core/shader/node.h | 48 +++++++++++++++++ src/video_core/shader/node_helper.h | 6 +++ src/video_core/shader/shader_ir.h | 3 ++ src/video_core/shader/track.cpp | 82 +++++++++++++++++++++++++++++ 4 files changed, 139 insertions(+) diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 075c7d07c..b370df8f9 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -230,6 +230,12 @@ using Node = std::shared_ptr; using Node4 = std::array; using NodeBlock = std::vector; +class BindlessSamplerNode; +class ArraySamplerNode; + +using TrackSamplerData = std::variant; +using TrackSampler = std::shared_ptr; + class Sampler { public: /// This constructor is for bound samplers @@ -288,6 +294,48 @@ private: bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. }; +/// Represents a tracked bindless sampler into a direct const buffer +class ArraySamplerNode final { +public: + explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) + : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} + + u32 GetIndex() const { + return index; + } + + u32 GetBaseOffset() const { + return base_offset; + } + + u32 GetIndexVar() const { + return bindless_var; + } + +private: + u32 index; + u32 base_offset; + u32 bindless_var; +}; + +/// Represents a tracked bindless sampler into a direct const buffer +class BindlessSamplerNode final { +public: + explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} + + u32 GetIndex() const { + return index; + } + + u32 GetOffset() const { + return offset; + } + +private: + u32 index; + u32 offset; +}; + class Image final { public: /// This constructor is for bound images diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 0c2aa749b..11231bbea 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h @@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) { return std::make_shared(T(std::forward(args)...)); } +template +TrackSampler MakeTrackSampler(Args&&... args) { + static_assert(std::is_convertible_v); + return std::make_shared(T(std::forward(args)...)); +} + template Node Operation(OperationCode code, Args&&... args) { if constexpr (sizeof...(args) == 0) { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 92c24247d..d85f14c97 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -388,6 +388,9 @@ private: std::tuple TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; + std::tuple TrackSampler(Node tracked, const NodeBlock& code, + s64 cursor) const; + std::optional TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; std::pair TrackRegister(const GprNode* tracked, const NodeBlock& code, diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 165c79330..69a677394 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -8,6 +8,7 @@ #include "common/common_types.h" #include "video_core/shader/node.h" +#include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" namespace VideoCommon::Shader { @@ -37,6 +38,87 @@ std::pair FindOperation(const NodeBlock& code, s64 cursor, } } // Anonymous namespace +std::optional> DecoupleIndirectRead(const OperationNode& operation) { + if (operation.GetCode() != OperationCode::UAdd) { + return std::nullopt; + } + Node gpr{}; + Node offset{}; + if (operation.GetOperandsCount() != 2) { + return std::nullopt; + } + for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { + Node operand = operation[i]; + if (std::holds_alternative(*operand)) { + offset = operation[i]; + } else if (std::holds_alternative(*operand)) { + gpr = operation[i]; + } + } + if (offset && gpr) { + return {std::make_pair(gpr, offset)}; + } + return std::nullopt; +} + +std::tuple ShaderIR::TrackSampler(Node tracked, const NodeBlock& code, + s64 cursor) const { + if (const auto cbuf = std::get_if(&*tracked)) { + // Constant buffer found, test if it's an immediate + const auto offset = cbuf->GetOffset(); + if (const auto immediate = std::get_if(&*offset)) { + auto track = + MakeTrackSampler(cbuf->GetIndex(), immediate->GetValue()); + return {tracked, track}; + } else if (const auto operation = std::get_if(&*offset)) { + auto bound_buffer = locker.ObtainBoundBuffer(); + if (!bound_buffer) { + return {}; + } + if (*bound_buffer != cbuf->GetIndex()) { + return {}; + } + auto pair = DecoupleIndirectRead(*operation); + if (!pair) { + return {}; + } + auto [gpr, base_offset] = *pair; + const auto offset_inm = std::get_if(&*base_offset); + // TODO Implement Bindless Index custom variable + auto track = + MakeTrackSampler(cbuf->GetIndex(), offset_inm->GetValue(), 0); + return {tracked, track}; + } + return {}; + } + if (const auto gpr = std::get_if(&*tracked)) { + if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { + return {}; + } + // Reduce the cursor in one to avoid infinite loops when the instruction sets the same + // register that it uses as operand + const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); + if (!source) { + return {}; + } + return TrackSampler(source, code, new_cursor); + } + if (const auto operation = std::get_if(&*tracked)) { + for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { + if (auto found = TrackSampler((*operation)[i - 1], code, cursor); std::get<0>(found)) { + // Cbuf found in operand. + return found; + } + } + return {}; + } + if (const auto conditional = std::get_if(&*tracked)) { + const auto& conditional_code = conditional->GetCode(); + return TrackSampler(tracked, conditional_code, static_cast(conditional_code.size())); + } + return {}; +} + std::tuple ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { if (const auto cbuf = std::get_if(&*tracked)) { From f4603d23c551ece65cd205a850a31a84531daf43 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 5 Jan 2020 15:53:22 -0400 Subject: [PATCH 08/16] Shader_IR: Setup Indexed Samplers on the IR --- src/video_core/shader/decode/texture.cpp | 66 +++++++++++++++++------- 1 file changed, 46 insertions(+), 20 deletions(-) diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b567e39d..886650d9e 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -389,31 +389,57 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, std::optional sampler_info) { const Node sampler_register = GetRegister(reg); - const auto [base_sampler, buffer, offset] = - TrackCbuf(sampler_register, global_code, static_cast(global_code.size())); - ASSERT(base_sampler != nullptr); - if (base_sampler == nullptr) { + const auto [base_node, tracked_sampler_info] = + TrackSampler(sampler_register, global_code, static_cast(global_code.size())); + ASSERT(base_node != nullptr); + if (base_node == nullptr) { return nullptr; } - const auto info = GetSamplerInfo(sampler_info, offset, buffer); + if (const auto bindless_sampler_info = + std::get_if(&*tracked_sampler_info)) { + const u32 buffer = bindless_sampler_info->GetIndex(); + const u32 offset = bindless_sampler_info->GetOffset(); + const auto info = GetSamplerInfo(sampler_info, offset, buffer); - // If this sampler has already been used, return the existing mapping. - const auto it = - std::find_if(used_samplers.begin(), used_samplers.end(), - [buffer = buffer, offset = offset](const Sampler& entry) { - return entry.GetBuffer() == buffer && entry.GetOffset() == offset; - }); - if (it != used_samplers.end()) { - ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && - it->IsShadow() == info.is_shadow); - return &*it; + // If this sampler has already been used, return the existing mapping. + const auto it = + std::find_if(used_samplers.begin(), used_samplers.end(), + [buffer = buffer, offset = offset](const Sampler& entry) { + return entry.GetBuffer() == buffer && entry.GetOffset() == offset; + }); + if (it != used_samplers.end()) { + ASSERT(it->IsBindless() && it->GetType() == info.type && + it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow); + return &*it; + } + + // Otherwise create a new mapping for this sampler + const auto next_index = static_cast(used_samplers.size()); + return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, + info.is_shadow, info.is_buffer); + } else if (const auto array_sampler_info = + std::get_if(&*tracked_sampler_info)) { + const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; + const auto info = GetSamplerInfo(sampler_info, base_offset); + + // If this sampler has already been used, return the existing mapping. + const auto it = std::find_if( + used_samplers.begin(), used_samplers.end(), + [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; }); + if (it != used_samplers.end()) { + ASSERT(!it->IsBindless() && it->GetType() == info.type && + it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow && + it->IsBuffer() == info.is_buffer); + return &*it; + } + + // Otherwise create a new mapping for this sampler + const auto next_index = static_cast(used_samplers.size()); + return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array, + info.is_shadow, info.is_buffer); } - - // Otherwise create a new mapping for this sampler - const auto next_index = static_cast(used_samplers.size()); - return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, - info.is_shadow, info.is_buffer); + return nullptr; } void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { From 037ea431ceb93e93274fdcf9fb724819639d04fd Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 5 Jan 2020 18:36:21 -0400 Subject: [PATCH 09/16] Shader_IR: deduce size of indexed samplers --- src/video_core/shader/decode.cpp | 36 ++++++++++++++++++++++++ src/video_core/shader/decode/texture.cpp | 9 +++--- src/video_core/shader/node.h | 22 ++++++++++++--- src/video_core/shader/shader_ir.h | 1 + 4 files changed, 60 insertions(+), 8 deletions(-) diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 507614d59..dd2f68a3e 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -56,6 +56,29 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, } } +std::optional TryDeduceSamplerSize(Sampler& sampler_to_deduce, + VideoCore::GuestDriverProfile* gpu_driver, + std::list& used_samplers) { + if (gpu_driver == nullptr) { + LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); + return std::nullopt; + } + const u32 base_offset = sampler_to_deduce.GetOffset(); + u32 max_offset{UINT_MAX}; + for (const auto& sampler : used_samplers) { + if (sampler.IsBindless()) { + continue; + } + if (sampler.GetOffset() > base_offset) { + max_offset = std::min(sampler.GetOffset(), max_offset); + } + } + if (max_offset == UINT_MAX) { + return std::nullopt; + } + return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); +} + } // Anonymous namespace class ASTDecoder { @@ -342,6 +365,19 @@ void ShaderIR::PostDecode() { // Deduce texture handler size if needed auto gpu_driver = locker.AccessGuestDriverProfile(); DeduceTextureHandlerSize(gpu_driver, used_samplers); + // Deduce Indexed Samplers + if (uses_indexed_samplers) { + for (auto& sampler : used_samplers) { + if (sampler.IsIndexed()) { + auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers); + if (size) { + sampler.SetSize(*size); + } else { + sampler.SetSize(1); + } + } + } + } } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 886650d9e..e7c38f5d6 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -383,7 +383,7 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, // Otherwise create a new mapping for this sampler const auto next_index = static_cast(used_samplers.size()); return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, - info.is_buffer); + info.is_buffer, false); } const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, @@ -417,7 +417,7 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, // Otherwise create a new mapping for this sampler const auto next_index = static_cast(used_samplers.size()); return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, - info.is_shadow, info.is_buffer); + info.is_shadow, info.is_buffer, false); } else if (const auto array_sampler_info = std::get_if(&*tracked_sampler_info)) { const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; @@ -430,14 +430,15 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, if (it != used_samplers.end()) { ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow && - it->IsBuffer() == info.is_buffer); + it->IsBuffer() == info.is_buffer && it->IsIndexed()); return &*it; } + uses_indexed_samplers = true; // Otherwise create a new mapping for this sampler const auto next_index = static_cast(used_samplers.size()); return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array, - info.is_shadow, info.is_buffer); + info.is_shadow, info.is_buffer, true); } return nullptr; } diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index b370df8f9..2f29b9506 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -240,15 +240,15 @@ class Sampler { public: /// This constructor is for bound samplers constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_buffer) + bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, - is_buffer{is_buffer} {} + is_buffer{is_buffer}, is_indexed{is_indexed} {} /// This constructor is for bindless samplers constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, - bool is_array, bool is_shadow, bool is_buffer) + bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, - is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} + is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} constexpr u32 GetIndex() const { return index; @@ -282,16 +282,30 @@ public: return is_bindless; } + constexpr bool IsIndexed() const { + return is_indexed; + } + + constexpr u32 Size() const { + return size; + } + + void SetSize(u32 new_size) { + size = new_size; + } + private: u32 index{}; ///< Emulated index given for the this sampler. u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). + u32 size{}; ///< Size of the sampler if indexed. Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. + bool is_indexed{}; ///< Whether this sampler is an indexed array of textures. }; /// Represents a tracked bindless sampler into a direct const buffer diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index d85f14c97..121528346 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -435,6 +435,7 @@ private: bool uses_instance_id{}; bool uses_vertex_id{}; bool uses_warps{}; + bool uses_indexed_samplers{}; Tegra::Shader::Header header; }; From 2b02f29a2ddfe40639ea0f855bdf257beca59e65 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 6 Jan 2020 11:43:13 -0400 Subject: [PATCH 10/16] GL Backend: Introduce indexed samplers into the GL backend --- .../renderer_opengl/gl_rasterizer.cpp | 34 +++++++++++++++---- .../renderer_opengl/gl_shader_decompiler.cpp | 15 ++++++-- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c428f06e4..362942e09 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -55,16 +55,20 @@ namespace { template Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - Tegra::Engines::ShaderType shader_type) { + Tegra::Engines::ShaderType shader_type, + std::size_t index = 0) { if (entry.IsBindless()) { const Tegra::Texture::TextureHandle tex_handle = engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); return engine.GetTextureInfo(tex_handle); } + const auto& gpu_profile = engine.AccessGuestDriverProfile(); + const u32 offset = + entry.GetOffset() + static_cast(index * gpu_profile.GetTextureHandlerSize()); if constexpr (std::is_same_v) { - return engine.GetStageTexture(shader_type, entry.GetOffset()); + return engine.GetStageTexture(shader_type, offset); } else { - return engine.GetTexture(entry.GetOffset()); + return engine.GetTexture(offset); } } @@ -942,8 +946,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& u32 binding = device.GetBaseBindings(stage_index).sampler; for (const auto& entry : shader->GetShaderEntries().samplers) { const auto shader_type = static_cast(stage_index); - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); - SetupTexture(binding++, texture, entry); + if (!entry.IsIndexed()) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); + SetupTexture(binding++, texture, entry); + } else { + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); + SetupTexture(binding++, texture, entry); + } + } } } @@ -952,8 +963,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().samplers) { - const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); - SetupTexture(binding++, texture, entry); + if (!entry.IsIndexed()) { + const auto texture = + GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); + SetupTexture(binding++, texture, entry); + } else { + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = + GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i); + SetupTexture(binding++, texture, entry); + } + } } } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2996aaf08..4b35396f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -655,7 +655,8 @@ private: u32 binding = device.GetBaseBindings(stage).sampler; for (const auto& sampler : ir.GetSamplers()) { const std::string name = GetSampler(sampler); - const std::string description = fmt::format("layout (binding = {}) uniform", binding++); + const std::string description = fmt::format("layout (binding = {}) uniform", binding); + binding += sampler.IsIndexed() ? sampler.Size() : 1; std::string sampler_type = [&]() { if (sampler.IsBuffer()) { @@ -682,7 +683,11 @@ private: sampler_type += "Shadow"; } - code.AddLine("{} {} {};", description, sampler_type, name); + if (!sampler.IsIndexed()) { + code.AddLine("{} {} {};", description, sampler_type, name); + } else { + code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size()); + } } if (!ir.GetSamplers().empty()) { code.AddNewLine(); @@ -1099,7 +1104,11 @@ private: } else if (!meta->ptp.empty()) { expr += "Offsets"; } - expr += '(' + GetSampler(meta->sampler) + ", "; + if (!meta->sampler.IsIndexed()) { + expr += '(' + GetSampler(meta->sampler) + ", "; + } else { + expr += '(' + GetSampler(meta->sampler) + "[0], "; + } expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow && !separate_dc ? 1 : 0) - 1); expr += '('; From 3c34678627eeb1b48375cf70ec38b72691fedd1e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 Jan 2020 14:53:46 -0400 Subject: [PATCH 11/16] Shader_IR: Implement Injectable Custom Variables to the IR. --- .../renderer_opengl/gl_shader_decompiler.cpp | 20 +++++++++++++++++++ .../renderer_vulkan/vk_shader_decompiler.cpp | 16 +++++++++++++++ src/video_core/shader/node.h | 17 +++++++++++++++- src/video_core/shader/shader_ir.cpp | 9 +++++++++ src/video_core/shader/shader_ir.h | 9 +++++++++ 5 files changed, 70 insertions(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 4b35396f9..8b413ae9a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -391,6 +391,7 @@ public: DeclareVertex(); DeclareGeometry(); DeclareRegisters(); + DeclareCustomVariables(); DeclarePredicates(); DeclareLocalMemory(); DeclareInternalFlags(); @@ -503,6 +504,16 @@ private: } } + void DeclareCustomVariables() { + const u32 cv_num = ir.GetCustomVariablesAmount(); + for (u32 i = 0; i < cv_num; ++i) { + code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); + } + if (cv_num > 0) { + code.AddNewLine(); + } + } + void DeclarePredicates() { const auto& predicates = ir.GetPredicates(); for (const auto pred : predicates) { @@ -780,6 +791,11 @@ private: return {GetRegister(index), Type::Float}; } + if (const auto cv = std::get_if(&*node)) { + const u32 index = cv->GetIndex(); + return {GetCustomVariable(index), Type::Float}; + } + if (const auto immediate = std::get_if(&*node)) { const u32 value = immediate->GetValue(); if (value < 10) { @@ -2250,6 +2266,10 @@ private: return GetDeclarationWithSuffix(index, "gpr"); } + std::string GetCustomVariable(u32 index) const { + return GetDeclarationWithSuffix(index, "custom_var"); + } + std::string GetPredicate(Tegra::Shader::Pred pred) const { return GetDeclarationWithSuffix(static_cast(pred), "pred"); } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index dd6d2ef03..bf797dad3 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -353,6 +353,7 @@ private: DeclareFragment(); DeclareCompute(); DeclareRegisters(); + DeclareCustomVariables(); DeclarePredicates(); DeclareLocalMemory(); DeclareSharedMemory(); @@ -587,6 +588,15 @@ private: } } + void DeclareCustomVariables() { + const u32 cv_num = ir.GetCustomVariablesAmount(); + for (u32 i = 0; i < cv_num; ++i) { + const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); + Name(id, fmt::format("custom_var_{}", i)); + custom_variables.emplace(i, AddGlobalVariable(id)); + } + } + void DeclarePredicates() { for (const auto pred : ir.GetPredicates()) { const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); @@ -974,6 +984,11 @@ private: return {OpLoad(t_float, registers.at(index)), Type::Float}; } + if (const auto cv = std::get_if(&*node)) { + const u32 index = cv->GetIndex(); + return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; + } + if (const auto immediate = std::get_if(&*node)) { return {Constant(t_uint, immediate->GetValue()), Type::Uint}; } @@ -2505,6 +2520,7 @@ private: Id out_vertex{}; Id in_vertex{}; std::map registers; + std::map custom_variables; std::map predicates; std::map flow_variables; Id local_memory{}; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 2f29b9506..db06767f6 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -212,6 +212,7 @@ enum class MetaStackClass { class OperationNode; class ConditionalNode; class GprNode; +class CustomVarNode; class ImmediateNode; class InternalFlagNode; class PredicateNode; @@ -223,7 +224,7 @@ class SmemNode; class GmemNode; class CommentNode; -using NodeData = std::variant; using Node = std::shared_ptr; @@ -550,6 +551,20 @@ private: Tegra::Shader::Register index{}; }; +/// A custom variable +class CustomVarNode final { +public: + explicit constexpr CustomVarNode(u32 index) : index{index} {} + + u32 GetIndex() const { + return index; + } + +private: + u32 index{}; +}; + + /// A 32-bits value that represents an immediate value class ImmediateNode final { public: diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index a186e22b2..94972d57f 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -39,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) { return MakeNode(reg); } +Node ShaderIR::GetCustomVariable(u32 id) { + return MakeNode(id); +} + Node ShaderIR::GetImmediate19(Instruction instr) { return Immediate(instr.alu.GetImm20_19()); } @@ -453,4 +457,9 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) { return id; } +u32 ShaderIR::NewCustomVariable() { + const u32 id = num_custom_variables++; + return id; +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 121528346..2fe14e815 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -180,6 +180,10 @@ public: return amend_code[index]; } + u32 GetCustomVariablesAmount() const { + return num_custom_variables; + } + private: friend class ASTDecoder; @@ -236,6 +240,8 @@ private: /// Generates a node for a passed register. Node GetRegister(Tegra::Shader::Register reg); + /// Generates a node for a custom variable + Node GetCustomVariable(u32 id); /// Generates a node representing a 19-bit immediate value Node GetImmediate19(Tegra::Shader::Instruction instr); /// Generates a node representing a 32-bit immediate value @@ -403,6 +409,8 @@ private: /// Register new amending code and obtain the reference id. std::size_t DeclareAmend(Node new_amend); + u32 NewCustomVariable(); + const ProgramCode& program_code; const u32 main_offset; const CompilerSettings settings; @@ -418,6 +426,7 @@ private: NodeBlock global_code; ASTManager program_manager{true, true}; std::vector amend_code; + u32 num_custom_variables{}; std::set used_registers; std::set used_predicates; From 7c530e06661d760eb6366724d109468423363072 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 Jan 2020 17:45:12 -0400 Subject: [PATCH 12/16] Shader_IR: Propagate bindless index into the GL compiler. --- .../renderer_opengl/gl_shader_decompiler.cpp | 2 +- src/video_core/shader/decode/texture.cpp | 40 +++++++++++-------- src/video_core/shader/node.h | 2 +- src/video_core/shader/shader_ir.h | 5 +-- src/video_core/shader/track.cpp | 29 ++++++++++++-- 5 files changed, 54 insertions(+), 24 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 8b413ae9a..df681bdcb 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1123,7 +1123,7 @@ private: if (!meta->sampler.IsIndexed()) { expr += '(' + GetSampler(meta->sampler) + ", "; } else { - expr += '(' + GetSampler(meta->sampler) + "[0], "; + expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; } expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow && !separate_dc ? 1 : 0) - 1); diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index e7c38f5d6..31b09b18c 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element}; + MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, + {}, {}, component, element, {}}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { const auto derivate_reg = instr.gpr20.Value(); const auto texture_type = instr.txd.texture_type.Value(); const auto coord_count = GetCoordCount(texture_type); - + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}}) + is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}}) : GetSampler(instr.sampler, {{texture_type, is_array, false}}); Node4 values; if (sampler == nullptr) { @@ -200,7 +201,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element}; + MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element, index_var}; values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); } @@ -215,8 +216,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { // TODO: The new commits on the texture refactor, change the way samplers work. // Sadly, not all texture instructions specify the type of texture their sampler // uses. This must be fixed at a later instance. + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); + is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler); if (sampler == nullptr) { u32 indexer = 0; @@ -240,7 +242,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { if (!instr.txq.IsComponentEnabled(element)) { continue; } - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; const Node value = Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); @@ -266,8 +268,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { auto texture_type = instr.tmml.texture_type.Value(); const bool is_array = instr.tmml.array != 0; + Node index_var{}; const Sampler* sampler = - is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); + is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler); if (sampler == nullptr) { u32 indexer = 0; @@ -309,7 +312,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { continue; } auto params = coords; - MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; + MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); SetTemporary(bb, indexer++, value); } @@ -386,7 +389,7 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, info.is_buffer, false); } -const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, +const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, std::optional sampler_info) { const Node sampler_register = GetRegister(reg); const auto [base_node, tracked_sampler_info] = @@ -421,6 +424,7 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, } else if (const auto array_sampler_info = std::get_if(&*tracked_sampler_info)) { const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; + index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); const auto info = GetSamplerInfo(sampler_info, base_offset); // If this sampler has already been used, return the existing mapping. @@ -526,8 +530,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, "This method is not supported."); const SamplerInfo info{texture_type, is_array, is_shadow, false}; - const Sampler* sampler = - is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); + Node index_var{}; + const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info) + : GetSampler(instr.sampler, info); Node4 values; if (sampler == nullptr) { for (u32 element = 0; element < values.size(); ++element) { @@ -575,7 +580,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, for (u32 element = 0; element < values.size(); ++element) { auto copy_coords = coords; - MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element}; + MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, + lod, {}, element, index_var}; values[element] = Operation(read_method, meta, std::move(copy_coords)); } @@ -690,7 +696,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de u64 parameter_register = instr.gpr20.Value(); const SamplerInfo info{texture_type, is_array, depth_compare, false}; - const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) + Node index_var{}; + const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info) : GetSampler(instr.sampler, info); Node4 values; if (sampler == nullptr) { @@ -719,7 +726,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; MetaTexture meta{ - *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element}; + *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, + index_var}; values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); } @@ -752,7 +760,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } @@ -802,7 +810,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is Node4 values; for (u32 element = 0; element < values.size(); ++element) { auto coords_copy = coords; - MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element}; + MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}}; values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); } return values; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index db06767f6..d75453458 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -445,6 +445,7 @@ struct MetaTexture { Node lod; Node component{}; u32 element{}; + Node index{}; }; struct MetaImage { @@ -564,7 +565,6 @@ private: u32 index{}; }; - /// A 32-bits value that represents an immediate value class ImmediateNode final { public: diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 2fe14e815..0421dac0c 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -328,7 +328,7 @@ private: std::optional sampler_info = std::nullopt); /// Accesses a texture sampler for a bindless texture. - const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, + const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, std::optional sampler_info = std::nullopt); /// Accesses an image. @@ -394,8 +394,7 @@ private: std::tuple TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; - std::tuple TrackSampler(Node tracked, const NodeBlock& code, - s64 cursor) const; + std::tuple TrackSampler(Node tracked, const NodeBlock& code, s64 cursor); std::optional TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 69a677394..d449b625e 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -61,8 +61,19 @@ std::optional> DecoupleIndirectRead(const OperationNode& o return std::nullopt; } +bool AmendNodeCv(std::size_t amend_index, Node node) { + if (const auto operation = std::get_if(&*node)) { + operation->SetAmendIndex(amend_index); + return true; + } else if (const auto conditional = std::get_if(&*node)) { + conditional->SetAmendIndex(amend_index); + return true; + } + return false; +} + std::tuple ShaderIR::TrackSampler(Node tracked, const NodeBlock& code, - s64 cursor) const { + s64 cursor) { if (const auto cbuf = std::get_if(&*tracked)) { // Constant buffer found, test if it's an immediate const auto offset = cbuf->GetOffset(); @@ -84,9 +95,21 @@ std::tuple ShaderIR::TrackSampler(Node tracked, const NodeBl } auto [gpr, base_offset] = *pair; const auto offset_inm = std::get_if(&*base_offset); + auto gpu_driver = locker.AccessGuestDriverProfile(); + if (gpu_driver == nullptr) { + return {}; + } + const u32 bindless_cv = NewCustomVariable(); + const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, + Immediate(gpu_driver->GetTextureHandlerSize())); + + const Node cv_node = GetCustomVariable(bindless_cv); + Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); + const std::size_t amend_index = DeclareAmend(amend_op); + AmendNodeCv(amend_index, code[cursor]); // TODO Implement Bindless Index custom variable - auto track = - MakeTrackSampler(cbuf->GetIndex(), offset_inm->GetValue(), 0); + auto track = MakeTrackSampler(cbuf->GetIndex(), + offset_inm->GetValue(), bindless_cv); return {tracked, track}; } return {}; From 37b8504faaeca9aaffd67649f5a026a900743431 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 7 Jan 2020 18:56:03 -0400 Subject: [PATCH 13/16] Shader_IR: Correct Custom Variable assignment. --- src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 2 ++ src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index df681bdcb..2f2bb07a4 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1336,6 +1336,8 @@ private: const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), Type::Uint}; + } else if (const auto cv = std::get_if(&*dest)) { + target = {GetCustomVariable(cv->GetIndex()), Type::Float}; } else { UNREACHABLE_MSG("Assign called without a proper target"); } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index bf797dad3..130060369 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1361,6 +1361,8 @@ private: target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset), Type::Float}; + } else if (const auto cv = std::get_if(&*dest)) { + target = {custom_variables.at(cv->GetIndex()), Type::Float}; } else { UNIMPLEMENTED(); } From 3919b7b8a935174c91927bc0a312cbfee2971583 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 Jan 2020 12:13:05 -0400 Subject: [PATCH 14/16] Shader_IR: Corrections, styling and extras. --- src/video_core/shader/decode.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index dd2f68a3e..d4a10eee5 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include @@ -64,7 +65,7 @@ std::optional TryDeduceSamplerSize(Sampler& sampler_to_deduce, return std::nullopt; } const u32 base_offset = sampler_to_deduce.GetOffset(); - u32 max_offset{UINT_MAX}; + u32 max_offset{std::numeric_limits::max()}; for (const auto& sampler : used_samplers) { if (sampler.IsBindless()) { continue; @@ -73,7 +74,7 @@ std::optional TryDeduceSamplerSize(Sampler& sampler_to_deduce, max_offset = std::min(sampler.GetOffset(), max_offset); } } - if (max_offset == UINT_MAX) { + if (max_offset == std::numeric_limits::max()) { return std::nullopt; } return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); @@ -373,6 +374,7 @@ void ShaderIR::PostDecode() { if (size) { sampler.SetSize(*size); } else { + LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); sampler.SetSize(1); } } From 806f5691430b86640d64d4c5ae77c5e1dac1625a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 8 Jan 2020 15:59:21 -0400 Subject: [PATCH 15/16] Shader_IR: Change name of TrackSampler function so it does not confuse with the type. --- src/video_core/shader/decode/texture.cpp | 2 +- src/video_core/shader/shader_ir.h | 3 ++- src/video_core/shader/track.cpp | 12 +++++++----- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 31b09b18c..6da9668fe 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -393,7 +393,7 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& i std::optional sampler_info) { const Node sampler_register = GetRegister(reg); const auto [base_node, tracked_sampler_info] = - TrackSampler(sampler_register, global_code, static_cast(global_code.size())); + TrackBindlessSampler(sampler_register, global_code, static_cast(global_code.size())); ASSERT(base_node != nullptr); if (base_node == nullptr) { return nullptr; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0421dac0c..43672b41c 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -394,7 +394,8 @@ private: std::tuple TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; - std::tuple TrackSampler(Node tracked, const NodeBlock& code, s64 cursor); + std::tuple TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor); std::optional TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index d449b625e..4db721f69 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -72,8 +72,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) { return false; } -std::tuple ShaderIR::TrackSampler(Node tracked, const NodeBlock& code, - s64 cursor) { +std::tuple ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, + s64 cursor) { if (const auto cbuf = std::get_if(&*tracked)) { // Constant buffer found, test if it's an immediate const auto offset = cbuf->GetOffset(); @@ -124,11 +124,12 @@ std::tuple ShaderIR::TrackSampler(Node tracked, const NodeBl if (!source) { return {}; } - return TrackSampler(source, code, new_cursor); + return TrackBindlessSampler(source, code, new_cursor); } if (const auto operation = std::get_if(&*tracked)) { for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { - if (auto found = TrackSampler((*operation)[i - 1], code, cursor); std::get<0>(found)) { + if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); + std::get<0>(found)) { // Cbuf found in operand. return found; } @@ -137,7 +138,8 @@ std::tuple ShaderIR::TrackSampler(Node tracked, const NodeBl } if (const auto conditional = std::get_if(&*tracked)) { const auto& conditional_code = conditional->GetCode(); - return TrackSampler(tracked, conditional_code, static_cast(conditional_code.size())); + return TrackBindlessSampler(tracked, conditional_code, + static_cast(conditional_code.size())); } return {}; } From bb8eb15d392d69693f8cda0427669d011e23db97 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 24 Jan 2020 10:44:34 -0400 Subject: [PATCH 16/16] Shader_IR: Address feedback. --- src/video_core/guest_driver.h | 1 + .../renderer_opengl/gl_shader_decompiler.cpp | 6 ++-- .../renderer_vulkan/vk_shader_decompiler.cpp | 5 +-- src/video_core/shader/const_buffer_locker.h | 2 ++ src/video_core/shader/decode.cpp | 31 ++++++++++--------- src/video_core/shader/decode/texture.cpp | 3 +- src/video_core/shader/node.h | 14 ++++----- src/video_core/shader/shader_ir.cpp | 3 +- src/video_core/shader/shader_ir.h | 2 +- src/video_core/shader/track.cpp | 9 +++--- 10 files changed, 40 insertions(+), 36 deletions(-) diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h index 0a9a826b6..fc1917347 100644 --- a/src/video_core/guest_driver.h +++ b/src/video_core/guest_driver.h @@ -33,6 +33,7 @@ private: // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily // use 4 bytes instead. Thus, certain drivers may squish the size. static constexpr u32 default_texture_handler_size = 8; + u32 texture_handler_size = default_texture_handler_size; bool texture_handler_size_deduced = false; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2f2bb07a4..cb1a5f35c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -505,11 +505,11 @@ private: } void DeclareCustomVariables() { - const u32 cv_num = ir.GetCustomVariablesAmount(); - for (u32 i = 0; i < cv_num; ++i) { + const u32 num_custom_variables = ir.GetNumCustomVariables(); + for (u32 i = 0; i < num_custom_variables; ++i) { code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); } - if (cv_num > 0) { + if (num_custom_variables > 0) { code.AddNewLine(); } } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 130060369..36d928fab 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -589,8 +589,8 @@ private: } void DeclareCustomVariables() { - const u32 cv_num = ir.GetCustomVariablesAmount(); - for (u32 i = 0; i < cv_num; ++i) { + const u32 num_custom_variables = ir.GetNumCustomVariables(); + for (u32 i = 0; i < num_custom_variables; ++i) { const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); Name(id, fmt::format("custom_var_{}", i)); custom_variables.emplace(i, AddGlobalVariable(id)); @@ -1363,6 +1363,7 @@ private: } else if (const auto cv = std::get_if(&*dest)) { target = {custom_variables.at(cv->GetIndex()), Type::Float}; + } else { UNIMPLEMENTED(); } diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index fd1bb476a..d3ea11087 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h @@ -77,10 +77,12 @@ public: return bindless_samplers; } + /// Gets bound buffer used on this shader u32 GetBoundBuffer() const { return bound_buffer; } + /// Obtains access to the guest driver's profile. VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { if (engine) { return &engine->AccessGuestDriverProfile(); diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index d4a10eee5..6b697ed5d 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -35,9 +35,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { } void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, - std::list& used_samplers) { + const std::list& used_samplers) { if (gpu_driver == nullptr) { - LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); + LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); return; } if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { @@ -57,9 +57,9 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, } } -std::optional TryDeduceSamplerSize(Sampler& sampler_to_deduce, +std::optional TryDeduceSamplerSize(const Sampler& sampler_to_deduce, VideoCore::GuestDriverProfile* gpu_driver, - std::list& used_samplers) { + const std::list& used_samplers) { if (gpu_driver == nullptr) { LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); return std::nullopt; @@ -367,17 +367,18 @@ void ShaderIR::PostDecode() { auto gpu_driver = locker.AccessGuestDriverProfile(); DeduceTextureHandlerSize(gpu_driver, used_samplers); // Deduce Indexed Samplers - if (uses_indexed_samplers) { - for (auto& sampler : used_samplers) { - if (sampler.IsIndexed()) { - auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers); - if (size) { - sampler.SetSize(*size); - } else { - LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); - sampler.SetSize(1); - } - } + if (!uses_indexed_samplers) { + return; + } + for (auto& sampler : used_samplers) { + if (!sampler.IsIndexed()) { + continue; + } + if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { + sampler.SetSize(*size); + } else { + LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); + sampler.SetSize(1); } } } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 6da9668fe..d980535b1 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -201,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { } for (u32 element = 0; element < values.size(); ++element) { - MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element, index_var}; + MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, + {}, {}, {}, element, index_var}; values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); } diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index d75453458..53a551d27 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -291,7 +291,7 @@ public: return size; } - void SetSize(u32 new_size) { + constexpr void SetSize(u32 new_size) { size = new_size; } @@ -315,15 +315,15 @@ public: explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} - u32 GetIndex() const { + constexpr u32 GetIndex() const { return index; } - u32 GetBaseOffset() const { + constexpr u32 GetBaseOffset() const { return base_offset; } - u32 GetIndexVar() const { + constexpr u32 GetIndexVar() const { return bindless_var; } @@ -338,11 +338,11 @@ class BindlessSamplerNode final { public: explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} - u32 GetIndex() const { + constexpr u32 GetIndex() const { return index; } - u32 GetOffset() const { + constexpr u32 GetOffset() const { return offset; } @@ -557,7 +557,7 @@ class CustomVarNode final { public: explicit constexpr CustomVarNode(u32 index) : index{index} {} - u32 GetIndex() const { + constexpr u32 GetIndex() const { return index; } diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 94972d57f..3a5d280a9 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -458,8 +458,7 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) { } u32 ShaderIR::NewCustomVariable() { - const u32 id = num_custom_variables++; - return id; + return num_custom_variables++; } } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 43672b41c..b0851c3be 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -180,7 +180,7 @@ public: return amend_code[index]; } - u32 GetCustomVariablesAmount() const { + u32 GetNumCustomVariables() const { return num_custom_variables; } diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 4db721f69..ea39bca54 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -36,7 +36,6 @@ std::pair FindOperation(const NodeBlock& code, s64 cursor, } return {}; } -} // Anonymous namespace std::optional> DecoupleIndirectRead(const OperationNode& operation) { if (operation.GetCode() != OperationCode::UAdd) { @@ -44,9 +43,7 @@ std::optional> DecoupleIndirectRead(const OperationNode& o } Node gpr{}; Node offset{}; - if (operation.GetOperandsCount() != 2) { - return std::nullopt; - } + ASSERT(operation.GetOperandsCount() == 2); for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { Node operand = operation[i]; if (std::holds_alternative(*operand)) { @@ -56,7 +53,7 @@ std::optional> DecoupleIndirectRead(const OperationNode& o } } if (offset && gpr) { - return {std::make_pair(gpr, offset)}; + return std::make_pair(gpr, offset); } return std::nullopt; } @@ -72,6 +69,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) { return false; } +} // Anonymous namespace + std::tuple ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, s64 cursor) { if (const auto cbuf = std::get_if(&*tracked)) {