GPU: Implement guest driver profile and deduce texture handler sizes.
This commit is contained in:
parent
a104b985a8
commit
c921e496eb
|
@ -29,6 +29,8 @@ add_library(video_core STATIC
|
|||
gpu_synch.h
|
||||
gpu_thread.cpp
|
||||
gpu_thread.h
|
||||
guest_driver.cpp
|
||||
guest_driver.h
|
||||
macro_interpreter.cpp
|
||||
macro_interpreter.h
|
||||
memory_manager.cpp
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/guest_driver.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
@ -106,6 +107,8 @@ public:
|
|||
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
|
||||
u64 offset) const = 0;
|
||||
virtual u32 GetBoundBuffer() const = 0;
|
||||
|
||||
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -94,6 +94,10 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
|
|||
return result;
|
||||
}
|
||||
|
||||
VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
|
||||
return rasterizer.AccessGuestDriverProfile();
|
||||
}
|
||||
|
||||
void KeplerCompute::ProcessLaunch() {
|
||||
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
|
||||
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
|
||||
|
|
|
@ -218,6 +218,8 @@ public:
|
|||
return regs.tex_cb_index;
|
||||
}
|
||||
|
||||
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
|
|
|
@ -784,4 +784,8 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
|
|||
return result;
|
||||
}
|
||||
|
||||
VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
|
||||
return rasterizer.AccessGuestDriverProfile();
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
||||
|
|
|
@ -1306,6 +1306,8 @@ public:
|
|||
return regs.tex_cb_index;
|
||||
}
|
||||
|
||||
VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
|
||||
|
||||
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
|
||||
/// we've seen used.
|
||||
using MacroMemory = std::array<u32, 0x40000>;
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/guest_driver.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
|
||||
if (texture_handler_size_deduced) {
|
||||
return;
|
||||
}
|
||||
std::size_t size = bound_offsets.size();
|
||||
if (size < 2) {
|
||||
return;
|
||||
}
|
||||
std::sort(bound_offsets.begin(), bound_offsets.end(),
|
||||
[](const u32& a, const u32& b) { return a < b; });
|
||||
u32 min_val = 0xFFFFFFFF; // set to highest possible 32 bit integer;
|
||||
for (std::size_t i = 1; i < size; i++) {
|
||||
if (bound_offsets[i] == bound_offsets[i - 1]) {
|
||||
continue;
|
||||
}
|
||||
const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
|
||||
min_val = std::min(min_val, new_min);
|
||||
}
|
||||
if (min_val > 2) {
|
||||
return;
|
||||
}
|
||||
texture_handler_size_deduced = true;
|
||||
texture_handler_size = sizeof(u32) * min_val;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
|
@ -0,0 +1,37 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
/**
|
||||
* The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
|
||||
* information necessary for impossible to avoid HLE methods like shader tracks.
|
||||
*/
|
||||
class GuestDriverProfile {
|
||||
public:
|
||||
u32 GetTextureHandlerSize() const {
|
||||
return texture_handler_size;
|
||||
}
|
||||
|
||||
bool TextureHandlerSizeKnown() const {
|
||||
return texture_handler_size_deduced;
|
||||
}
|
||||
|
||||
void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
|
||||
|
||||
private:
|
||||
// This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
|
||||
// use 4 bytes instead. Thus, certain drivers may squish the size.
|
||||
static constexpr u32 default_texture_handler_size = 8;
|
||||
u32 texture_handler_size{default_texture_handler_size};
|
||||
bool texture_handler_size_deduced{};
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
|
@ -9,6 +9,7 @@
|
|||
#include "common/common_types.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/guest_driver.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
|
@ -78,5 +79,12 @@ public:
|
|||
/// Initialize disk cached resources for the game being emulated
|
||||
virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
|
||||
const DiskResourceLoadCallback& callback = {}) {}
|
||||
|
||||
GuestDriverProfile& AccessGuestDriverProfile() {
|
||||
return guest_driver_profile;
|
||||
}
|
||||
|
||||
private:
|
||||
GuestDriverProfile guest_driver_profile{};
|
||||
};
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "common/hash.h"
|
||||
#include "video_core/engines/const_buffer_engine_interface.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/guest_driver.h"
|
||||
|
||||
namespace VideoCommon::Shader {
|
||||
|
||||
|
@ -71,6 +72,13 @@ public:
|
|||
return bindless_samplers;
|
||||
}
|
||||
|
||||
VideoCore::GuestDriverProfile* AccessGuestDriverProfile() {
|
||||
if (engine) {
|
||||
return &(engine->AccessGuestDriverProfile());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
const Tegra::Engines::ShaderType stage;
|
||||
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
|
||||
|
|
|
@ -315,4 +315,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
|
|||
return pc + 1;
|
||||
}
|
||||
|
||||
void ShaderIR::PostDecode() {
|
||||
// Deduce texture handler size if needed
|
||||
auto* gpu_driver = locker.AccessGuestDriverProfile();
|
||||
if (gpu_driver) {
|
||||
if (!gpu_driver->TextureHandlerSizeKnown() && used_samplers.size() > 1) {
|
||||
u32 count{};
|
||||
std::vector<u32> bound_offsets;
|
||||
for (const auto& sampler : used_samplers) {
|
||||
if (sampler.IsBindless()) {
|
||||
continue;
|
||||
}
|
||||
count++;
|
||||
bound_offsets.emplace_back(sampler.GetOffset());
|
||||
}
|
||||
if (count > 1) {
|
||||
gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon::Shader
|
||||
|
|
|
@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet
|
|||
ConstBufferLocker& locker)
|
||||
: program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
|
||||
Decode();
|
||||
PostDecode();
|
||||
}
|
||||
|
||||
ShaderIR::~ShaderIR() = default;
|
||||
|
|
|
@ -191,6 +191,7 @@ private:
|
|||
};
|
||||
|
||||
void Decode();
|
||||
void PostDecode();
|
||||
|
||||
NodeBlock DecodeRange(u32 begin, u32 end);
|
||||
void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
|
||||
|
|
Loading…
Reference in New Issue