Merge pull request #4807 from ReinUsesLisp/glasm-robust-ssbo
gl_arb_decompiler: Implement robust buffer operations
This commit is contained in:
commit
536c51912d
@ -376,9 +376,11 @@ private:
|
|||||||
std::string temporary = AllocTemporary();
|
std::string temporary = AllocTemporary();
|
||||||
std::string address;
|
std::string address;
|
||||||
std::string_view opname;
|
std::string_view opname;
|
||||||
|
bool robust = false;
|
||||||
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
||||||
address = GlobalMemoryPointer(*gmem);
|
address = GlobalMemoryPointer(*gmem);
|
||||||
opname = "ATOM";
|
opname = "ATOM";
|
||||||
|
robust = true;
|
||||||
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
} else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
||||||
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
|
address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress()));
|
||||||
opname = "ATOMS";
|
opname = "ATOMS";
|
||||||
@ -386,7 +388,15 @@ private:
|
|||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
return "{0, 0, 0, 0}";
|
return "{0, 0, 0, 0}";
|
||||||
}
|
}
|
||||||
|
if (robust) {
|
||||||
|
AddLine("IF NE.x;");
|
||||||
|
}
|
||||||
AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
|
AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address);
|
||||||
|
if (robust) {
|
||||||
|
AddLine("ELSE;");
|
||||||
|
AddLine("MOV.S {}, 0;", temporary);
|
||||||
|
AddLine("ENDIF;");
|
||||||
|
}
|
||||||
return temporary;
|
return temporary;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -980,10 +990,9 @@ void ARBDecompiler::DeclareLocalMemory() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ARBDecompiler::DeclareGlobalMemory() {
|
void ARBDecompiler::DeclareGlobalMemory() {
|
||||||
const std::size_t num_entries = ir.GetGlobalMemory().size();
|
const size_t num_entries = ir.GetGlobalMemory().size();
|
||||||
if (num_entries > 0) {
|
if (num_entries > 0) {
|
||||||
const std::size_t num_vectors = Common::AlignUp(num_entries, 2) / 2;
|
AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_entries, num_entries - 1);
|
||||||
AddLine("PARAM c[{}] = {{ program.local[0..{}] }};", num_vectors, num_vectors - 1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1363,7 +1372,8 @@ std::string ARBDecompiler::Visit(const Node& node) {
|
|||||||
|
|
||||||
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
||||||
std::string temporary = AllocTemporary();
|
std::string temporary = AllocTemporary();
|
||||||
AddLine("LOAD.U32 {}, {};", temporary, GlobalMemoryPointer(*gmem));
|
AddLine("MOV {}, 0;", temporary);
|
||||||
|
AddLine("LOAD.U32 {} (NE.x), {};", temporary, GlobalMemoryPointer(*gmem));
|
||||||
return temporary;
|
return temporary;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1441,18 +1451,21 @@ std::string ARBDecompiler::BuildAoffi(Operation operation) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
|
std::string ARBDecompiler::GlobalMemoryPointer(const GmemNode& gmem) {
|
||||||
|
// Read a bindless SSBO, return its address and set CC accordingly
|
||||||
|
// address = c[binding].xy
|
||||||
|
// length = c[binding].z
|
||||||
const u32 binding = global_memory_names.at(gmem.GetDescriptor());
|
const u32 binding = global_memory_names.at(gmem.GetDescriptor());
|
||||||
const char result_swizzle = binding % 2 == 0 ? 'x' : 'y';
|
|
||||||
|
|
||||||
const std::string pointer = AllocLongVectorTemporary();
|
const std::string pointer = AllocLongVectorTemporary();
|
||||||
std::string temporary = AllocTemporary();
|
std::string temporary = AllocTemporary();
|
||||||
|
|
||||||
const u32 local_index = binding / 2;
|
AddLine("PK64.U {}, c[{}];", pointer, binding);
|
||||||
AddLine("PK64.U {}, c[{}];", pointer, local_index);
|
|
||||||
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
|
AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem.GetRealAddress()),
|
||||||
Visit(gmem.GetBaseAddress()));
|
Visit(gmem.GetBaseAddress()));
|
||||||
AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
|
AddLine("CVT.U64.U32 {}.z, {};", pointer, temporary);
|
||||||
AddLine("ADD.U64 {}.x, {}.{}, {}.z;", pointer, pointer, result_swizzle, pointer);
|
AddLine("ADD.U64 {}.x, {}.x, {}.z;", pointer, pointer, pointer);
|
||||||
|
// Compare offset to length and set CC
|
||||||
|
AddLine("SLT.U.CC RC.x, {}, c[{}].z;", temporary, binding);
|
||||||
return fmt::format("{}.x", pointer);
|
return fmt::format("{}.x", pointer);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1552,7 +1565,9 @@ std::string ARBDecompiler::Assign(Operation operation) {
|
|||||||
ResetTemporaries();
|
ResetTemporaries();
|
||||||
return {};
|
return {};
|
||||||
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
||||||
|
AddLine("IF NE.x;");
|
||||||
AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
|
AddLine("STORE.U32 {}, {};", Visit(src), GlobalMemoryPointer(*gmem));
|
||||||
|
AddLine("ENDIF;");
|
||||||
ResetTemporaries();
|
ResetTemporaries();
|
||||||
return {};
|
return {};
|
||||||
} else {
|
} else {
|
||||||
|
@ -139,16 +139,12 @@ void oglEnable(GLenum cap, bool state) {
|
|||||||
(state ? glEnable : glDisable)(cap);
|
(state ? glEnable : glDisable)(cap);
|
||||||
}
|
}
|
||||||
|
|
||||||
void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t num_entries) {
|
void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) {
|
||||||
if (num_entries == 0) {
|
if (num_ssbos == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (num_entries % 2 == 1) {
|
glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos),
|
||||||
pointers[num_entries] = 0;
|
reinterpret_cast<const GLuint*>(ssbos));
|
||||||
}
|
|
||||||
const GLsizei num_vectors = static_cast<GLsizei>((num_entries + 1) / 2);
|
|
||||||
glProgramLocalParametersI4uivNV(target, 0, num_vectors,
|
|
||||||
reinterpret_cast<const GLuint*>(pointers));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
@ -900,11 +896,11 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||||
static constexpr std::array PARAMETER_LUT = {
|
static constexpr std::array PARAMETER_LUT{
|
||||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||||
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
|
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV,
|
||||||
|
};
|
||||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||||
const auto& stages = maxwell3d.state.shader_stages;
|
const auto& stages = maxwell3d.state.shader_stages;
|
||||||
const auto& shader_stage = stages[stage_index];
|
const auto& shader_stage = stages[stage_index];
|
||||||
@ -1007,8 +1003,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
|
|||||||
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
|
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
|
||||||
const auto& entries{shader->GetEntries().global_memory_entries};
|
const auto& entries{shader->GetEntries().global_memory_entries};
|
||||||
|
|
||||||
std::array<GLuint64EXT, 32> pointers;
|
std::array<BindlessSSBO, 32> ssbos;
|
||||||
ASSERT(entries.size() < pointers.size());
|
ASSERT(entries.size() < ssbos.size());
|
||||||
|
|
||||||
const bool assembly_shaders = device.UseAssemblyShaders();
|
const bool assembly_shaders = device.UseAssemblyShaders();
|
||||||
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
||||||
@ -1016,11 +1012,11 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
|
|||||||
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
||||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
|
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||||
++binding;
|
++binding;
|
||||||
}
|
}
|
||||||
if (assembly_shaders) {
|
if (assembly_shaders) {
|
||||||
UpdateBindlessPointers(TARGET_LUT[stage_index], pointers.data(), entries.size());
|
UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1028,29 +1024,32 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
|||||||
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
|
const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
|
||||||
const auto& entries{kernel->GetEntries().global_memory_entries};
|
const auto& entries{kernel->GetEntries().global_memory_entries};
|
||||||
|
|
||||||
std::array<GLuint64EXT, 32> pointers;
|
std::array<BindlessSSBO, 32> ssbos;
|
||||||
ASSERT(entries.size() < pointers.size());
|
ASSERT(entries.size() < ssbos.size());
|
||||||
|
|
||||||
u32 binding = 0;
|
u32 binding = 0;
|
||||||
for (const auto& entry : entries) {
|
for (const auto& entry : entries) {
|
||||||
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
|
||||||
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
|
||||||
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
const u32 size{gpu_memory.Read<u32>(addr + 8)};
|
||||||
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
|
SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]);
|
||||||
++binding;
|
++binding;
|
||||||
}
|
}
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
UpdateBindlessPointers(GL_COMPUTE_PROGRAM_NV, pointers.data(), entries.size());
|
UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
|
||||||
GPUVAddr gpu_addr, std::size_t size,
|
GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) {
|
||||||
GLuint64EXT* pointer) {
|
const size_t alignment{device.GetShaderStorageBufferAlignment()};
|
||||||
const std::size_t alignment{device.GetShaderStorageBufferAlignment()};
|
|
||||||
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written);
|
||||||
if (device.UseAssemblyShaders()) {
|
if (device.UseAssemblyShaders()) {
|
||||||
*pointer = info.address + info.offset;
|
*ssbo = BindlessSSBO{
|
||||||
|
.address = static_cast<GLuint64EXT>(info.address + info.offset),
|
||||||
|
.length = static_cast<GLsizei>(size),
|
||||||
|
.padding = 0,
|
||||||
|
};
|
||||||
} else {
|
} else {
|
||||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset,
|
||||||
static_cast<GLsizeiptr>(size));
|
static_cast<GLsizeiptr>(size));
|
||||||
|
@ -53,6 +53,13 @@ namespace OpenGL {
|
|||||||
struct ScreenInfo;
|
struct ScreenInfo;
|
||||||
struct DrawParameters;
|
struct DrawParameters;
|
||||||
|
|
||||||
|
struct BindlessSSBO {
|
||||||
|
GLuint64EXT address;
|
||||||
|
GLsizei length;
|
||||||
|
GLsizei padding;
|
||||||
|
};
|
||||||
|
static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128);
|
||||||
|
|
||||||
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
|
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
|
||||||
public:
|
public:
|
||||||
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
|
explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
|
||||||
@ -126,7 +133,7 @@ private:
|
|||||||
|
|
||||||
/// Configures a global memory buffer.
|
/// Configures a global memory buffer.
|
||||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||||
std::size_t size, GLuint64EXT* pointer);
|
size_t size, BindlessSSBO* ssbo);
|
||||||
|
|
||||||
/// Configures the current textures to use for the draw command.
|
/// Configures the current textures to use for the draw command.
|
||||||
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
||||||
|
Loading…
Reference in New Issue
Block a user