MacroHLE: Add HLE replacement for base vertex and base instance.
This commit is contained in:
parent
93ac5a6a6d
commit
aad0cbf024
|
@ -339,6 +339,10 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
|
||||||
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
|
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
|
||||||
return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
|
return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
|
||||||
}
|
}
|
||||||
|
case IR::Attribute::BaseInstance:
|
||||||
|
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance));
|
||||||
|
case IR::Attribute::BaseVertex:
|
||||||
|
return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex));
|
||||||
case IR::Attribute::FrontFace:
|
case IR::Attribute::FrontFace:
|
||||||
return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
|
return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
|
||||||
ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
|
ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
|
||||||
|
@ -380,6 +384,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
|
||||||
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
|
const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)};
|
||||||
return ctx.OpISub(ctx.U32[1], index, base);
|
return ctx.OpISub(ctx.U32[1], index, base);
|
||||||
}
|
}
|
||||||
|
case IR::Attribute::BaseInstance:
|
||||||
|
return ctx.OpLoad(ctx.U32[1], ctx.base_instance);
|
||||||
|
case IR::Attribute::BaseVertex:
|
||||||
|
return ctx.OpLoad(ctx.U32[1], ctx.base_vertex);
|
||||||
default:
|
default:
|
||||||
throw NotImplementedException("Read U32 attribute {}", attr);
|
throw NotImplementedException("Read U32 attribute {}", attr);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1379,18 +1379,28 @@ void EmitContext::DefineInputs(const IR::Program& program) {
|
||||||
if (loads[IR::Attribute::InstanceId]) {
|
if (loads[IR::Attribute::InstanceId]) {
|
||||||
if (profile.support_vertex_instance_id) {
|
if (profile.support_vertex_instance_id) {
|
||||||
instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
|
instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId);
|
||||||
|
if (loads[IR::Attribute::BaseInstance]) {
|
||||||
|
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
|
instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex);
|
||||||
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
|
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
|
||||||
}
|
}
|
||||||
|
} else if (loads[IR::Attribute::BaseInstance]) {
|
||||||
|
base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance);
|
||||||
}
|
}
|
||||||
if (loads[IR::Attribute::VertexId]) {
|
if (loads[IR::Attribute::VertexId]) {
|
||||||
if (profile.support_vertex_instance_id) {
|
if (profile.support_vertex_instance_id) {
|
||||||
vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
|
vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId);
|
||||||
|
if (loads[IR::Attribute::BaseVertex]) {
|
||||||
|
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
|
vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex);
|
||||||
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||||
}
|
}
|
||||||
|
} else if (loads[IR::Attribute::BaseVertex]) {
|
||||||
|
base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex);
|
||||||
}
|
}
|
||||||
if (loads[IR::Attribute::FrontFace]) {
|
if (loads[IR::Attribute::FrontFace]) {
|
||||||
front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
|
front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing);
|
||||||
|
|
|
@ -34,6 +34,11 @@ public:
|
||||||
|
|
||||||
[[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
|
[[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0;
|
||||||
|
|
||||||
|
[[nodiscard]] virtual bool HasHLEMacroState() const = 0;
|
||||||
|
|
||||||
|
[[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer(
|
||||||
|
u32 bank, u32 offset) = 0;
|
||||||
|
|
||||||
virtual void Dump(u64 hash) = 0;
|
virtual void Dump(u64 hash) = 0;
|
||||||
|
|
||||||
[[nodiscard]] const ProgramHeader& SPH() const noexcept {
|
[[nodiscard]] const ProgramHeader& SPH() const noexcept {
|
||||||
|
|
|
@ -446,6 +446,10 @@ std::string NameOf(Attribute attribute) {
|
||||||
return "ViewportMask";
|
return "ViewportMask";
|
||||||
case Attribute::FrontFace:
|
case Attribute::FrontFace:
|
||||||
return "FrontFace";
|
return "FrontFace";
|
||||||
|
case Attribute::BaseInstance:
|
||||||
|
return "BaseInstance";
|
||||||
|
case Attribute::BaseVertex:
|
||||||
|
return "BaseVertex";
|
||||||
}
|
}
|
||||||
return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
|
return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
|
||||||
}
|
}
|
||||||
|
|
|
@ -219,6 +219,10 @@ enum class Attribute : u64 {
|
||||||
FixedFncTexture9Q = 231,
|
FixedFncTexture9Q = 231,
|
||||||
ViewportMask = 232,
|
ViewportMask = 232,
|
||||||
FrontFace = 255,
|
FrontFace = 255,
|
||||||
|
|
||||||
|
// Implementation attributes
|
||||||
|
BaseInstance = 256,
|
||||||
|
BaseVertex = 257,
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr size_t NUM_GENERICS = 32;
|
constexpr size_t NUM_GENERICS = 32;
|
||||||
|
|
|
@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
|
||||||
return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
|
return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U32 IREmitter::GetAttributeU32(IR::Attribute attribute) {
|
||||||
|
return GetAttributeU32(attribute, Imm32(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) {
|
||||||
|
return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex);
|
||||||
|
}
|
||||||
|
|
||||||
void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
|
void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
|
||||||
Inst(Opcode::SetAttribute, attribute, value, vertex);
|
Inst(Opcode::SetAttribute, attribute, value, vertex);
|
||||||
}
|
}
|
||||||
|
|
|
@ -74,6 +74,8 @@ public:
|
||||||
|
|
||||||
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
|
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
|
||||||
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
|
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
|
||||||
|
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute);
|
||||||
|
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex);
|
||||||
void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
|
void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
|
||||||
|
|
||||||
[[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
|
[[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
|
||||||
|
|
|
@ -219,7 +219,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||||
}
|
}
|
||||||
Optimization::SsaRewritePass(program);
|
Optimization::SsaRewritePass(program);
|
||||||
|
|
||||||
Optimization::ConstantPropagationPass(program);
|
Optimization::ConstantPropagationPass(env, program);
|
||||||
|
|
||||||
Optimization::PositionPass(env, program);
|
Optimization::PositionPass(env, program);
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
#include "common/bit_cast.h"
|
#include "common/bit_cast.h"
|
||||||
|
#include "shader_recompiler/environment.h"
|
||||||
#include "shader_recompiler/exception.h"
|
#include "shader_recompiler/exception.h"
|
||||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||||
#include "shader_recompiler/frontend/ir/value.h"
|
#include "shader_recompiler/frontend/ir/value.h"
|
||||||
|
@ -515,6 +516,8 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
|
||||||
case IR::Attribute::PrimitiveId:
|
case IR::Attribute::PrimitiveId:
|
||||||
case IR::Attribute::InstanceId:
|
case IR::Attribute::InstanceId:
|
||||||
case IR::Attribute::VertexId:
|
case IR::Attribute::VertexId:
|
||||||
|
case IR::Attribute::BaseVertex:
|
||||||
|
case IR::Attribute::BaseInstance:
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return;
|
return;
|
||||||
|
@ -644,7 +647,37 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
|
||||||
|
const IR::Value bank{inst.Arg(0)};
|
||||||
|
const IR::Value offset{inst.Arg(1)};
|
||||||
|
if (!bank.IsImmediate() || !offset.IsImmediate()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const auto bank_value = bank.U32();
|
||||||
|
const auto offset_value = offset.U32();
|
||||||
|
auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value);
|
||||||
|
if (!replacement) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const auto new_attribute = [replacement]() {
|
||||||
|
switch (*replacement) {
|
||||||
|
case ReplaceConstant::BaseInstance:
|
||||||
|
return IR::Attribute::BaseInstance;
|
||||||
|
case ReplaceConstant::BaseVertex:
|
||||||
|
return IR::Attribute::BaseVertex;
|
||||||
|
default:
|
||||||
|
throw NotImplementedException("Not implemented replacement variable {}", *replacement);
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
if (inst.GetOpcode() == IR::Opcode::GetCbufU32) {
|
||||||
|
inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute));
|
||||||
|
} else {
|
||||||
|
inst.ReplaceUsesWith(ir.GetAttribute(new_attribute));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
|
||||||
switch (inst.GetOpcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::GetRegister:
|
case IR::Opcode::GetRegister:
|
||||||
return FoldGetRegister(inst);
|
return FoldGetRegister(inst);
|
||||||
|
@ -789,18 +822,24 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||||
IR::Opcode::CompositeInsertF16x4);
|
IR::Opcode::CompositeInsertF16x4);
|
||||||
case IR::Opcode::FSwizzleAdd:
|
case IR::Opcode::FSwizzleAdd:
|
||||||
return FoldFSwizzleAdd(block, inst);
|
return FoldFSwizzleAdd(block, inst);
|
||||||
|
case IR::Opcode::GetCbufF32:
|
||||||
|
case IR::Opcode::GetCbufU32:
|
||||||
|
if (env.HasHLEMacroState()) {
|
||||||
|
return FoldConstBuffer(env, block, inst);
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
void ConstantPropagationPass(IR::Program& program) {
|
void ConstantPropagationPass(Environment& env, IR::Program& program) {
|
||||||
const auto end{program.post_order_blocks.rend()};
|
const auto end{program.post_order_blocks.rend()};
|
||||||
for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
|
for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) {
|
||||||
IR::Block* const block{*it};
|
IR::Block* const block{*it};
|
||||||
for (IR::Inst& inst : block->Instructions()) {
|
for (IR::Inst& inst : block->Instructions()) {
|
||||||
ConstantPropagation(*block, inst);
|
ConstantPropagation(env, *block, inst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,7 +13,7 @@ struct HostTranslateInfo;
|
||||||
namespace Shader::Optimization {
|
namespace Shader::Optimization {
|
||||||
|
|
||||||
void CollectShaderInfoPass(Environment& env, IR::Program& program);
|
void CollectShaderInfoPass(Environment& env, IR::Program& program);
|
||||||
void ConstantPropagationPass(IR::Program& program);
|
void ConstantPropagationPass(Environment& env, IR::Program& program);
|
||||||
void DeadCodeEliminationPass(IR::Program& program);
|
void DeadCodeEliminationPass(IR::Program& program);
|
||||||
void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
||||||
void IdentityRemovalPass(IR::Program& program);
|
void IdentityRemovalPass(IR::Program& program);
|
||||||
|
|
|
@ -16,6 +16,11 @@
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
|
enum class ReplaceConstant : u32 {
|
||||||
|
BaseInstance,
|
||||||
|
BaseVertex,
|
||||||
|
};
|
||||||
|
|
||||||
enum class TextureType : u32 {
|
enum class TextureType : u32 {
|
||||||
Color1D,
|
Color1D,
|
||||||
ColorArray1D,
|
ColorArray1D,
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
struct VaryingState {
|
struct VaryingState {
|
||||||
std::bitset<256> mask{};
|
std::bitset<512> mask{};
|
||||||
|
|
||||||
void Set(IR::Attribute attribute, bool state = true) {
|
void Set(IR::Attribute attribute, bool state = true) {
|
||||||
mask[static_cast<size_t>(attribute)] = state;
|
mask[static_cast<size_t>(attribute)] = state;
|
||||||
|
|
|
@ -182,7 +182,13 @@ u32 Maxwell3D::GetMaxCurrentVertices() {
|
||||||
size_t Maxwell3D::EstimateIndexBufferSize() {
|
size_t Maxwell3D::EstimateIndexBufferSize() {
|
||||||
GPUVAddr start_address = regs.index_buffer.StartAddress();
|
GPUVAddr start_address = regs.index_buffer.StartAddress();
|
||||||
GPUVAddr end_address = regs.index_buffer.EndAddress();
|
GPUVAddr end_address = regs.index_buffer.EndAddress();
|
||||||
return std::min<size_t>(memory_manager.GetMemoryLayoutSize(start_address),
|
constexpr std::array<size_t, 4> max_sizes = {
|
||||||
|
std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
|
||||||
|
std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
|
||||||
|
const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
|
||||||
|
return std::min<size_t>(
|
||||||
|
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
|
||||||
|
byte_size,
|
||||||
static_cast<size_t>(end_address - start_address));
|
static_cast<size_t>(end_address - start_address));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -572,4 +578,9 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
|
||||||
return regs.reg_array[method];
|
return regs.reg_array[method];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) {
|
||||||
|
const u64 key = (static_cast<u64>(bank) << 32) | offset;
|
||||||
|
replace_table.emplace(key, name);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Tegra::Engines
|
} // namespace Tegra::Engines
|
||||||
|
|
|
@ -3020,6 +3020,23 @@ public:
|
||||||
/// Store temporary hw register values, used by some calls to restore state after a operation
|
/// Store temporary hw register values, used by some calls to restore state after a operation
|
||||||
Regs shadow_state;
|
Regs shadow_state;
|
||||||
|
|
||||||
|
// None Engine
|
||||||
|
enum class EngineHint : u32 {
|
||||||
|
None = 0x0,
|
||||||
|
OnHLEMacro = 0x1,
|
||||||
|
};
|
||||||
|
|
||||||
|
EngineHint engine_state{EngineHint::None};
|
||||||
|
|
||||||
|
enum class HLEReplaceName : u32 {
|
||||||
|
BaseVertex = 0x0,
|
||||||
|
BaseInstance = 0x1,
|
||||||
|
};
|
||||||
|
|
||||||
|
void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name);
|
||||||
|
|
||||||
|
std::unordered_map<u64, HLEReplaceName> replace_table;
|
||||||
|
|
||||||
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
|
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
|
||||||
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
|
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
|
||||||
|
|
||||||
|
|
|
@ -14,26 +14,29 @@
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
|
||||||
|
using Maxwell = Engines::Maxwell3D;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
bool IsTopologySafe(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
|
bool IsTopologySafe(Maxwell::Regs::PrimitiveTopology topology) {
|
||||||
switch (topology) {
|
switch (topology) {
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
|
case Maxwell::Regs::PrimitiveTopology::Points:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
|
case Maxwell::Regs::PrimitiveTopology::Lines:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineLoop:
|
case Maxwell::Regs::PrimitiveTopology::LineLoop:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
|
case Maxwell::Regs::PrimitiveTopology::LineStrip:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
|
case Maxwell::Regs::PrimitiveTopology::Triangles:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
|
case Maxwell::Regs::PrimitiveTopology::TriangleStrip:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
|
case Maxwell::Regs::PrimitiveTopology::TriangleFan:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
|
case Maxwell::Regs::PrimitiveTopology::LinesAdjacency:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
|
case Maxwell::Regs::PrimitiveTopology::LineStripAdjacency:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
|
case Maxwell::Regs::PrimitiveTopology::TrianglesAdjacency:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
|
case Maxwell::Regs::PrimitiveTopology::TriangleStripAdjacency:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Patches:
|
case Maxwell::Regs::PrimitiveTopology::Patches:
|
||||||
return true;
|
return true;
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Quads:
|
case Maxwell::Regs::PrimitiveTopology::Quads:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::QuadStrip:
|
case Maxwell::Regs::PrimitiveTopology::QuadStrip:
|
||||||
case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Polygon:
|
case Maxwell::Regs::PrimitiveTopology::Polygon:
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -82,8 +85,7 @@ public:
|
||||||
: HLEMacroImpl(maxwell3d_), extended(extended_) {}
|
: HLEMacroImpl(maxwell3d_), extended(extended_) {}
|
||||||
|
|
||||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
auto topology =
|
auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
|
||||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
|
|
||||||
if (!IsTopologySafe(topology)) {
|
if (!IsTopologySafe(topology)) {
|
||||||
Fallback(parameters);
|
Fallback(parameters);
|
||||||
return;
|
return;
|
||||||
|
@ -99,18 +101,16 @@ public:
|
||||||
params.stride = 0;
|
params.stride = 0;
|
||||||
|
|
||||||
if (extended) {
|
if (extended) {
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||||
maxwell3d.CallMethod(0x8e4, parameters[4], true);
|
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
|
||||||
}
|
}
|
||||||
|
|
||||||
maxwell3d.draw_manager->DrawArrayIndirect(topology);
|
maxwell3d.draw_manager->DrawArrayIndirect(topology);
|
||||||
|
|
||||||
if (extended) {
|
if (extended) {
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
maxwell3d.engine_state = Maxwell::EngineHint::None;
|
||||||
maxwell3d.CallMethod(0x8e4, 0, true);
|
maxwell3d.replace_table.clear();
|
||||||
}
|
}
|
||||||
maxwell3d.regs.vertex_buffer.first = 0;
|
|
||||||
maxwell3d.regs.vertex_buffer.count = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -134,13 +134,18 @@ private:
|
||||||
|
|
||||||
const u32 base_instance = parameters[4];
|
const u32 base_instance = parameters[4];
|
||||||
if (extended) {
|
if (extended) {
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||||
maxwell3d.CallMethod(0x8e4, base_instance, true);
|
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
|
||||||
}
|
}
|
||||||
|
|
||||||
maxwell3d.draw_manager->DrawArray(
|
maxwell3d.draw_manager->DrawArray(
|
||||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
||||||
vertex_first, vertex_count, base_instance, instance_count);
|
vertex_first, vertex_count, base_instance, instance_count);
|
||||||
|
|
||||||
|
if (extended) {
|
||||||
|
maxwell3d.engine_state = Maxwell::EngineHint::None;
|
||||||
|
maxwell3d.replace_table.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool extended;
|
bool extended;
|
||||||
|
@ -151,8 +156,7 @@ public:
|
||||||
explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
|
||||||
|
|
||||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
auto topology =
|
auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
|
||||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
|
|
||||||
if (!IsTopologySafe(topology)) {
|
if (!IsTopologySafe(topology)) {
|
||||||
Fallback(parameters);
|
Fallback(parameters);
|
||||||
return;
|
return;
|
||||||
|
@ -164,16 +168,12 @@ public:
|
||||||
minimum_limit = std::max(parameters[3], minimum_limit);
|
minimum_limit = std::max(parameters[3], minimum_limit);
|
||||||
}
|
}
|
||||||
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
|
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
|
||||||
const u32 base_size = std::max(minimum_limit, estimate);
|
const u32 base_size = std::max<u32>(minimum_limit, estimate);
|
||||||
const u32 element_base = parameters[4];
|
|
||||||
const u32 base_instance = parameters[5];
|
|
||||||
maxwell3d.regs.index_buffer.first = 0;
|
|
||||||
maxwell3d.regs.index_buffer.count = base_size; // Use a fixed size, just for mapping
|
|
||||||
maxwell3d.regs.draw.topology.Assign(topology);
|
maxwell3d.regs.draw.topology.Assign(topology);
|
||||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||||
maxwell3d.CallMethod(0x8e4, element_base, true);
|
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
|
||||||
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
|
||||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||||
params.is_indexed = true;
|
params.is_indexed = true;
|
||||||
params.include_count = false;
|
params.include_count = false;
|
||||||
|
@ -184,9 +184,8 @@ public:
|
||||||
params.stride = 0;
|
params.stride = 0;
|
||||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size);
|
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size);
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
maxwell3d.engine_state = Maxwell::EngineHint::None;
|
||||||
maxwell3d.CallMethod(0x8e4, 0x0, true);
|
maxwell3d.replace_table.clear();
|
||||||
maxwell3d.CallMethod(0x8e5, 0x0, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -197,18 +196,17 @@ private:
|
||||||
const u32 base_instance = parameters[5];
|
const u32 base_instance = parameters[5];
|
||||||
maxwell3d.regs.vertex_id_base = element_base;
|
maxwell3d.regs.vertex_id_base = element_base;
|
||||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||||
maxwell3d.CallMethod(0x8e4, element_base, true);
|
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
|
||||||
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
|
||||||
|
|
||||||
maxwell3d.draw_manager->DrawIndex(
|
maxwell3d.draw_manager->DrawIndex(
|
||||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
|
||||||
parameters[3], parameters[1], element_base, base_instance, instance_count);
|
parameters[3], parameters[1], element_base, base_instance, instance_count);
|
||||||
|
|
||||||
maxwell3d.regs.vertex_id_base = 0x0;
|
maxwell3d.regs.vertex_id_base = 0x0;
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
maxwell3d.engine_state = Maxwell::EngineHint::None;
|
||||||
maxwell3d.CallMethod(0x8e4, 0x0, true);
|
maxwell3d.replace_table.clear();
|
||||||
maxwell3d.CallMethod(0x8e5, 0x0, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 minimum_limit{1 << 18};
|
u32 minimum_limit{1 << 18};
|
||||||
|
@ -238,8 +236,7 @@ public:
|
||||||
: HLEMacroImpl(maxwell3d_) {}
|
: HLEMacroImpl(maxwell3d_) {}
|
||||||
|
|
||||||
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
|
||||||
const auto topology =
|
const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]);
|
||||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
|
|
||||||
if (!IsTopologySafe(topology)) {
|
if (!IsTopologySafe(topology)) {
|
||||||
Fallback(parameters);
|
Fallback(parameters);
|
||||||
return;
|
return;
|
||||||
|
@ -277,9 +274,6 @@ public:
|
||||||
}
|
}
|
||||||
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
|
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
|
||||||
const u32 base_size = std::max(minimum_limit, estimate);
|
const u32 base_size = std::max(minimum_limit, estimate);
|
||||||
|
|
||||||
maxwell3d.regs.index_buffer.first = 0;
|
|
||||||
maxwell3d.regs.index_buffer.count = std::max(highest_limit, base_size);
|
|
||||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
auto& params = maxwell3d.draw_manager->GetIndirectParams();
|
||||||
params.is_indexed = true;
|
params.is_indexed = true;
|
||||||
|
@ -290,7 +284,12 @@ public:
|
||||||
params.max_draw_counts = draw_count;
|
params.max_draw_counts = draw_count;
|
||||||
params.stride = stride;
|
params.stride = stride;
|
||||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit);
|
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||||
|
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
|
||||||
|
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
|
||||||
|
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size);
|
||||||
|
maxwell3d.engine_state = Maxwell::EngineHint::None;
|
||||||
|
maxwell3d.replace_table.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -299,9 +298,8 @@ private:
|
||||||
// Clean everything.
|
// Clean everything.
|
||||||
// Clean everything.
|
// Clean everything.
|
||||||
maxwell3d.regs.vertex_id_base = 0x0;
|
maxwell3d.regs.vertex_id_base = 0x0;
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
maxwell3d.engine_state = Maxwell::EngineHint::None;
|
||||||
maxwell3d.CallMethod(0x8e4, 0x0, true);
|
maxwell3d.replace_table.clear();
|
||||||
maxwell3d.CallMethod(0x8e5, 0x0, true);
|
|
||||||
});
|
});
|
||||||
maxwell3d.RefreshParameters();
|
maxwell3d.RefreshParameters();
|
||||||
const u32 start_indirect = parameters[0];
|
const u32 start_indirect = parameters[0];
|
||||||
|
@ -310,8 +308,7 @@ private:
|
||||||
// Nothing to do.
|
// Nothing to do.
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto topology =
|
const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]);
|
||||||
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
|
|
||||||
maxwell3d.regs.draw.topology.Assign(topology);
|
maxwell3d.regs.draw.topology.Assign(topology);
|
||||||
const u32 padding = parameters[3];
|
const u32 padding = parameters[3];
|
||||||
const std::size_t max_draws = parameters[4];
|
const std::size_t max_draws = parameters[4];
|
||||||
|
@ -326,9 +323,9 @@ private:
|
||||||
const u32 base_vertex = parameters[base + 3];
|
const u32 base_vertex = parameters[base + 3];
|
||||||
const u32 base_instance = parameters[base + 4];
|
const u32 base_instance = parameters[base + 4];
|
||||||
maxwell3d.regs.vertex_id_base = base_vertex;
|
maxwell3d.regs.vertex_id_base = base_vertex;
|
||||||
maxwell3d.CallMethod(0x8e3, 0x640, true);
|
maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
|
||||||
maxwell3d.CallMethod(0x8e4, base_vertex, true);
|
maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
|
||||||
maxwell3d.CallMethod(0x8e5, base_instance, true);
|
maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
|
||||||
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
|
||||||
maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
|
maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
|
||||||
base_vertex, base_instance, parameters[base + 1]);
|
base_vertex, base_instance, parameters[base + 1]);
|
||||||
|
|
|
@ -577,7 +577,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const {
|
||||||
return range_so_far;
|
return range_so_far;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const {
|
size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const {
|
||||||
PTEKind base_kind = GetPageKind(gpu_addr);
|
PTEKind base_kind = GetPageKind(gpu_addr);
|
||||||
if (base_kind == PTEKind::INVALID) {
|
if (base_kind == PTEKind::INVALID) {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -596,6 +596,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
range_so_far += copy_amount;
|
range_so_far += copy_amount;
|
||||||
|
if (range_so_far >= max_size) {
|
||||||
|
result = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
|
auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
|
||||||
|
@ -605,6 +609,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
range_so_far += copy_amount;
|
range_so_far += copy_amount;
|
||||||
|
if (range_so_far >= max_size) {
|
||||||
|
result = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
|
auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
|
||||||
|
|
|
@ -118,7 +118,8 @@ public:
|
||||||
|
|
||||||
PTEKind GetPageKind(GPUVAddr gpu_addr) const;
|
PTEKind GetPageKind(GPUVAddr gpu_addr) const;
|
||||||
|
|
||||||
size_t GetMemoryLayoutSize(GPUVAddr gpu_addr) const;
|
size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
|
||||||
|
size_t max_size = std::numeric_limits<size_t>::max()) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
|
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
|
||||||
|
|
|
@ -97,6 +97,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
|
||||||
smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0);
|
smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0);
|
||||||
alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0);
|
alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0);
|
||||||
alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0);
|
alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0);
|
||||||
|
app_stage.Assign(maxwell3d.engine_state);
|
||||||
|
|
||||||
for (size_t i = 0; i < regs.rt.size(); ++i) {
|
for (size_t i = 0; i < regs.rt.size(); ++i) {
|
||||||
color_formats[i] = static_cast<u8>(regs.rt[i].format);
|
color_formats[i] = static_cast<u8>(regs.rt[i].format);
|
||||||
|
|
|
@ -197,6 +197,7 @@ struct FixedPipelineState {
|
||||||
BitField<14, 1, u32> smooth_lines;
|
BitField<14, 1, u32> smooth_lines;
|
||||||
BitField<15, 1, u32> alpha_to_coverage_enabled;
|
BitField<15, 1, u32> alpha_to_coverage_enabled;
|
||||||
BitField<16, 1, u32> alpha_to_one_enabled;
|
BitField<16, 1, u32> alpha_to_one_enabled;
|
||||||
|
BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;
|
||||||
};
|
};
|
||||||
std::array<u8, Maxwell::NumRenderTargets> color_formats;
|
std::array<u8, Maxwell::NumRenderTargets> color_formats;
|
||||||
|
|
||||||
|
|
|
@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment;
|
||||||
using VideoCommon::GenericEnvironment;
|
using VideoCommon::GenericEnvironment;
|
||||||
using VideoCommon::GraphicsEnvironment;
|
using VideoCommon::GraphicsEnvironment;
|
||||||
|
|
||||||
constexpr u32 CACHE_VERSION = 8;
|
constexpr u32 CACHE_VERSION = 9;
|
||||||
|
|
||||||
template <typename Container>
|
template <typename Container>
|
||||||
auto MakeSpan(Container& container) {
|
auto MakeSpan(Container& container) {
|
||||||
|
|
|
@ -202,12 +202,15 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
|
||||||
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
|
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
|
||||||
const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};
|
const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};
|
||||||
const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
|
const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
|
||||||
|
const u64 num_cbuf_replacement_values{static_cast<u64>(cbuf_replacements.size())};
|
||||||
|
|
||||||
file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
|
file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
|
||||||
.write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
|
.write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
|
||||||
.write(reinterpret_cast<const char*>(&num_texture_pixel_formats),
|
.write(reinterpret_cast<const char*>(&num_texture_pixel_formats),
|
||||||
sizeof(num_texture_pixel_formats))
|
sizeof(num_texture_pixel_formats))
|
||||||
.write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
|
.write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
|
||||||
|
.write(reinterpret_cast<const char*>(&num_cbuf_replacement_values),
|
||||||
|
sizeof(num_cbuf_replacement_values))
|
||||||
.write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
|
.write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
|
||||||
.write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
|
.write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
|
||||||
.write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
|
.write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
|
||||||
|
@ -229,6 +232,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
|
||||||
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
||||||
.write(reinterpret_cast<const char*>(&type), sizeof(type));
|
.write(reinterpret_cast<const char*>(&type), sizeof(type));
|
||||||
}
|
}
|
||||||
|
for (const auto& [key, type] : cbuf_replacements) {
|
||||||
|
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
||||||
|
.write(reinterpret_cast<const char*>(&type), sizeof(type));
|
||||||
|
}
|
||||||
if (stage == Shader::Stage::Compute) {
|
if (stage == Shader::Stage::Compute) {
|
||||||
file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
|
file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
|
||||||
.write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
|
.write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
|
||||||
|
@ -318,6 +325,8 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||||
ASSERT(local_size <= std::numeric_limits<u32>::max());
|
ASSERT(local_size <= std::numeric_limits<u32>::max());
|
||||||
local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
|
local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
|
||||||
texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot;
|
texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot;
|
||||||
|
has_hle_engine_state =
|
||||||
|
maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
|
u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
|
||||||
|
@ -331,6 +340,30 @@ u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<Shader::ReplaceConstant> GraphicsEnvironment::GetReplaceConstBuffer(u32 bank,
|
||||||
|
u32 offset) {
|
||||||
|
if (!has_hle_engine_state) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset);
|
||||||
|
auto it = maxwell3d->replace_table.find(key);
|
||||||
|
if (it == maxwell3d->replace_table.end()) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) {
|
||||||
|
switch (name) {
|
||||||
|
case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex:
|
||||||
|
return Shader::ReplaceConstant::BaseVertex;
|
||||||
|
case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance:
|
||||||
|
return Shader::ReplaceConstant::BaseInstance;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}(it->second);
|
||||||
|
cbuf_replacements.emplace(key, converted_value);
|
||||||
|
return converted_value;
|
||||||
|
}
|
||||||
|
|
||||||
Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
|
Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
|
||||||
const auto& regs{maxwell3d->regs};
|
const auto& regs{maxwell3d->regs};
|
||||||
const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
|
const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
|
||||||
|
@ -409,11 +442,14 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
|
||||||
u64 num_texture_types{};
|
u64 num_texture_types{};
|
||||||
u64 num_texture_pixel_formats{};
|
u64 num_texture_pixel_formats{};
|
||||||
u64 num_cbuf_values{};
|
u64 num_cbuf_values{};
|
||||||
|
u64 num_cbuf_replacement_values{};
|
||||||
file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
|
file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
|
||||||
.read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
|
.read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
|
||||||
.read(reinterpret_cast<char*>(&num_texture_pixel_formats),
|
.read(reinterpret_cast<char*>(&num_texture_pixel_formats),
|
||||||
sizeof(num_texture_pixel_formats))
|
sizeof(num_texture_pixel_formats))
|
||||||
.read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
|
.read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
|
||||||
|
.read(reinterpret_cast<char*>(&num_cbuf_replacement_values),
|
||||||
|
sizeof(num_cbuf_replacement_values))
|
||||||
.read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
|
.read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
|
||||||
.read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
|
.read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
|
||||||
.read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
|
.read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
|
||||||
|
@ -444,6 +480,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
|
||||||
.read(reinterpret_cast<char*>(&value), sizeof(value));
|
.read(reinterpret_cast<char*>(&value), sizeof(value));
|
||||||
cbuf_values.emplace(key, value);
|
cbuf_values.emplace(key, value);
|
||||||
}
|
}
|
||||||
|
for (size_t i = 0; i < num_cbuf_replacement_values; ++i) {
|
||||||
|
u64 key;
|
||||||
|
Shader::ReplaceConstant value;
|
||||||
|
file.read(reinterpret_cast<char*>(&key), sizeof(key))
|
||||||
|
.read(reinterpret_cast<char*>(&value), sizeof(value));
|
||||||
|
cbuf_replacements.emplace(key, value);
|
||||||
|
}
|
||||||
if (stage == Shader::Stage::Compute) {
|
if (stage == Shader::Stage::Compute) {
|
||||||
file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
|
file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
|
||||||
.read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
|
.read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
|
||||||
|
@ -512,6 +555,16 @@ std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
|
||||||
return workgroup_size;
|
return workgroup_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<Shader::ReplaceConstant> FileEnvironment::GetReplaceConstBuffer(u32 bank,
|
||||||
|
u32 offset) {
|
||||||
|
const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset);
|
||||||
|
auto it = cbuf_replacements.find(key);
|
||||||
|
if (it == cbuf_replacements.end()) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
|
void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
|
||||||
const std::filesystem::path& filename, u32 cache_version) try {
|
const std::filesystem::path& filename, u32 cache_version) try {
|
||||||
std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
|
std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
|
||||||
|
|
|
@ -60,6 +60,10 @@ public:
|
||||||
|
|
||||||
void Serialize(std::ofstream& file) const;
|
void Serialize(std::ofstream& file) const;
|
||||||
|
|
||||||
|
bool HasHLEMacroState() const override {
|
||||||
|
return has_hle_engine_state;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::optional<u64> TryFindSize();
|
std::optional<u64> TryFindSize();
|
||||||
|
|
||||||
|
@ -73,6 +77,7 @@ protected:
|
||||||
std::unordered_map<u32, Shader::TextureType> texture_types;
|
std::unordered_map<u32, Shader::TextureType> texture_types;
|
||||||
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
|
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
|
||||||
std::unordered_map<u64, u32> cbuf_values;
|
std::unordered_map<u64, u32> cbuf_values;
|
||||||
|
std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;
|
||||||
|
|
||||||
u32 local_memory_size{};
|
u32 local_memory_size{};
|
||||||
u32 texture_bound{};
|
u32 texture_bound{};
|
||||||
|
@ -89,6 +94,7 @@ protected:
|
||||||
u32 viewport_transform_state = 1;
|
u32 viewport_transform_state = 1;
|
||||||
|
|
||||||
bool has_unbound_instructions = false;
|
bool has_unbound_instructions = false;
|
||||||
|
bool has_hle_engine_state = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
class GraphicsEnvironment final : public GenericEnvironment {
|
class GraphicsEnvironment final : public GenericEnvironment {
|
||||||
|
@ -109,6 +115,8 @@ public:
|
||||||
|
|
||||||
u32 ReadViewportTransformState() override;
|
u32 ReadViewportTransformState() override;
|
||||||
|
|
||||||
|
std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, u32 offset) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Tegra::Engines::Maxwell3D* maxwell3d{};
|
Tegra::Engines::Maxwell3D* maxwell3d{};
|
||||||
size_t stage_index{};
|
size_t stage_index{};
|
||||||
|
@ -131,6 +139,11 @@ public:
|
||||||
|
|
||||||
u32 ReadViewportTransformState() override;
|
u32 ReadViewportTransformState() override;
|
||||||
|
|
||||||
|
std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(
|
||||||
|
[[maybe_unused]] u32 bank, [[maybe_unused]] u32 offset) override {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Tegra::Engines::KeplerCompute* kepler_compute{};
|
Tegra::Engines::KeplerCompute* kepler_compute{};
|
||||||
};
|
};
|
||||||
|
@ -166,6 +179,13 @@ public:
|
||||||
|
|
||||||
[[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
|
[[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
|
||||||
|
|
||||||
|
[[nodiscard]] std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank,
|
||||||
|
u32 offset) override;
|
||||||
|
|
||||||
|
[[nodiscard]] bool HasHLEMacroState() const override {
|
||||||
|
return cbuf_replacements.size() != 0;
|
||||||
|
}
|
||||||
|
|
||||||
void Dump(u64 hash) override;
|
void Dump(u64 hash) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -173,6 +193,7 @@ private:
|
||||||
std::unordered_map<u32, Shader::TextureType> texture_types;
|
std::unordered_map<u32, Shader::TextureType> texture_types;
|
||||||
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
|
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
|
||||||
std::unordered_map<u64, u32> cbuf_values;
|
std::unordered_map<u64, u32> cbuf_values;
|
||||||
|
std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;
|
||||||
std::array<u32, 3> workgroup_size{};
|
std::array<u32, 3> workgroup_size{};
|
||||||
u32 local_memory_size{};
|
u32 local_memory_size{};
|
||||||
u32 shared_memory_size{};
|
u32 shared_memory_size{};
|
||||||
|
|
Loading…
Reference in New Issue