shader: Rework global memory tracking to use breadth-first search

This commit is contained in:
ReinUsesLisp 2021-04-03 05:18:12 -03:00 committed by ameerj
parent c4aab5c40e
commit 9a342f5605
1 changed files with 81 additions and 70 deletions

View File

@ -4,9 +4,9 @@
#include <algorithm> #include <algorithm>
#include <compare> #include <compare>
#include <map>
#include <optional> #include <optional>
#include <ranges> #include <ranges>
#include <queue>
#include <boost/container/flat_set.hpp> #include <boost/container/flat_set.hpp>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
@ -40,15 +40,19 @@ struct Bias {
u32 offset_end; u32 offset_end;
}; };
using boost::container::flat_set;
using boost::container::small_vector;
using StorageBufferSet = using StorageBufferSet =
boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
boost::container::small_vector<StorageBufferAddr, 16>>; using StorageInstVector = small_vector<StorageInst, 24>;
using StorageInstVector = boost::container::small_vector<StorageInst, 24>;
using VisitedBlocks = boost::container::flat_set<IR::Block*, std::less<IR::Block*>,
boost::container::small_vector<IR::Block*, 4>>;
using StorageWritesSet = using StorageWritesSet =
boost::container::flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
boost::container::small_vector<StorageBufferAddr, 16>>;
struct StorageInfo {
StorageBufferSet set;
StorageInstVector to_replace;
StorageWritesSet writes;
};
/// Returns true when the instruction is a global memory instruction /// Returns true when the instruction is a global memory instruction
bool IsGlobalMemory(const IR::Inst& inst) { bool IsGlobalMemory(const IR::Inst& inst) {
@ -215,15 +219,11 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
}; };
} }
/// Recursively tries to track the storage buffer address used by a global memory instruction /// Tries to get the storage buffer out of a constant buffer read instruction
std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value, const Bias* bias, std::optional<StorageBufferAddr> TryGetStorageBuffer(const IR::Inst* inst, const Bias* bias) {
VisitedBlocks& visited) { if (inst->Opcode() != IR::Opcode::GetCbufU32) {
if (value.IsImmediate()) {
// Immediates can't be a storage buffer
return std::nullopt; return std::nullopt;
} }
const IR::Inst* const inst{value.InstRecursive()};
if (inst->Opcode() == IR::Opcode::GetCbufU32) {
const IR::Value index{inst->Arg(0)}; const IR::Value index{inst->Arg(0)};
const IR::Value offset{inst->Arg(1)}; const IR::Value offset{inst->Arg(1)};
if (!index.IsImmediate()) { if (!index.IsImmediate()) {
@ -243,32 +243,48 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value,
return std::nullopt; return std::nullopt;
} }
return storage_buffer; return storage_buffer;
}
/// Tries to track the storage buffer address used by a global memory instruction
std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
if (value.IsImmediate()) {
// Nothing to do with immediates
return std::nullopt;
} }
// Reversed loops are more likely to find the right result // Breadth-first search visiting the right most arguments first
// Small vector has been determined from shaders in Super Smash Bros. Ultimate
small_vector<const IR::Inst*, 2> visited;
std::queue<const IR::Inst*> queue;
queue.push(value.InstRecursive());
while (!queue.empty()) {
// Pop one instruction from the queue
const IR::Inst* const inst{queue.front()};
queue.pop();
if (const std::optional<StorageBufferAddr> result = TryGetStorageBuffer(inst, bias)) {
// This is the instruction we were looking for
return result;
}
// Visit the right most arguments first
for (size_t arg = inst->NumArgs(); arg--;) { for (size_t arg = inst->NumArgs(); arg--;) {
IR::Block* inst_block{block}; const IR::Value arg_value{inst->Arg(arg)};
if (inst->Opcode() == IR::Opcode::Phi) { if (arg_value.IsImmediate()) {
// If we are going through a phi node, mark the current block as visited
visited.insert(block);
// and skip already visited blocks to avoid looping forever
IR::Block* const phi_block{inst->PhiBlock(arg)};
if (visited.contains(phi_block)) {
// Already visited, skip
continue; continue;
} }
inst_block = phi_block; // Queue instruction if it hasn't been visited
} const IR::Inst* const arg_inst{arg_value.InstRecursive()};
const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)}; if (std::ranges::find(visited, arg_inst) == visited.end()) {
if (storage_buffer) { visited.push_back(arg_inst);
return *storage_buffer; queue.push(arg_inst);
} }
} }
}
// SSA tree has been traversed and the origin hasn't been found
return std::nullopt; return std::nullopt;
} }
/// Collects the storage buffer used by a global memory instruction and the instruction itself /// Collects the storage buffer used by a global memory instruction and the instruction itself
void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& storage_buffer_set, void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) {
StorageInstVector& to_replace, StorageWritesSet& writes_set) {
// NVN puts storage buffers in a specific range, we have to bias towards these addresses to // NVN puts storage buffers in a specific range, we have to bias towards these addresses to
// avoid getting false positives // avoid getting false positives
static constexpr Bias nvn_bias{ static constexpr Bias nvn_bias{
@ -284,24 +300,23 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageBufferSet& s
} }
// First try to find storage buffers in the NVN address // First try to find storage buffers in the NVN address
const IR::U32 low_addr{low_addr_info->value}; const IR::U32 low_addr{low_addr_info->value};
VisitedBlocks visited_blocks; std::optional storage_buffer{Track(low_addr, &nvn_bias)};
std::optional storage_buffer{Track(&block, low_addr, &nvn_bias, visited_blocks)};
if (!storage_buffer) { if (!storage_buffer) {
// If it fails, track without a bias // If it fails, track without a bias
visited_blocks.clear(); storage_buffer = Track(low_addr, nullptr);
storage_buffer = Track(&block, low_addr, nullptr, visited_blocks);
if (!storage_buffer) { if (!storage_buffer) {
// If that also failed, drop the global memory usage // If that also failed, drop the global memory usage
// LOG_ERROR
DiscardGlobalMemory(block, inst); DiscardGlobalMemory(block, inst);
return; return;
} }
} }
// Collect storage buffer and the instruction // Collect storage buffer and the instruction
if (IsGlobalMemoryWrite(inst)) { if (IsGlobalMemoryWrite(inst)) {
writes_set.insert(*storage_buffer); info.writes.insert(*storage_buffer);
} }
storage_buffer_set.insert(*storage_buffer); info.set.insert(*storage_buffer);
to_replace.push_back(StorageInst{ info.to_replace.push_back(StorageInst{
.storage_buffer{*storage_buffer}, .storage_buffer{*storage_buffer},
.inst{&inst}, .inst{&inst},
.block{&block}, .block{&block},
@ -371,33 +386,29 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
} // Anonymous namespace } // Anonymous namespace
void GlobalMemoryToStorageBufferPass(IR::Program& program) { void GlobalMemoryToStorageBufferPass(IR::Program& program) {
StorageBufferSet storage_buffers; StorageInfo info;
StorageInstVector to_replace;
StorageWritesSet writes_set;
for (IR::Block* const block : program.post_order_blocks) { for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) { for (IR::Inst& inst : block->Instructions()) {
if (!IsGlobalMemory(inst)) { if (!IsGlobalMemory(inst)) {
continue; continue;
} }
CollectStorageBuffers(*block, inst, storage_buffers, to_replace, writes_set); CollectStorageBuffers(*block, inst, info);
} }
} }
Info& info{program.info};
u32 storage_index{}; u32 storage_index{};
for (const StorageBufferAddr& storage_buffer : storage_buffers) { for (const StorageBufferAddr& storage_buffer : info.set) {
info.storage_buffers_descriptors.push_back({ program.info.storage_buffers_descriptors.push_back({
.cbuf_index{storage_buffer.index}, .cbuf_index{storage_buffer.index},
.cbuf_offset{storage_buffer.offset}, .cbuf_offset{storage_buffer.offset},
.count{1}, .count{1},
.is_written{writes_set.contains(storage_buffer)}, .is_written{info.writes.contains(storage_buffer)},
}); });
++storage_index; ++storage_index;
} }
for (const StorageInst& storage_inst : to_replace) { for (const StorageInst& storage_inst : info.to_replace) {
const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
const auto it{storage_buffers.find(storage_inst.storage_buffer)}; const auto it{info.set.find(storage_inst.storage_buffer)};
const IR::U32 index{IR::Value{static_cast<u32>(storage_buffers.index_of(it))}}; const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}};
IR::Block* const block{storage_inst.block}; IR::Block* const block{storage_inst.block};
IR::Inst* const inst{storage_inst.inst}; IR::Inst* const inst{storage_inst.inst};
const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)};