From 548dd27f4567f751d54073f1408d6f8949344fa9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 18 Apr 2020 04:03:29 -0300 Subject: [PATCH] fixed_pipeline_state: Pack rasterizer state Reduce FixedPipelineState's size to 600 bytes. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 199 +++++++++--------- .../renderer_vulkan/fixed_pipeline_state.h | 105 +++++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 12 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 6 +- 4 files changed, 157 insertions(+), 165 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 8734045e5..1a23de07f 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include @@ -12,6 +13,31 @@ namespace Vulkan { +namespace { + +constexpr std::size_t POINT = 0; +constexpr std::size_t LINE = 1; +constexpr std::size_t POLYGON = 2; +constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { + POINT, // Points + LINE, // Lines + LINE, // LineLoop + LINE, // LineStrip + POLYGON, // Triangles + POLYGON, // TriangleStrip + POLYGON, // TriangleFan + POLYGON, // Quads + POLYGON, // QuadStrip + POLYGON, // Polygon + LINE, // LinesAdjacency + LINE, // LineStripAdjacency + POLYGON, // TrianglesAdjacency + POLYGON, // TriangleStripAdjacency + POLYGON, // Patches +}; + +} // Anonymous namespace + void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept { raw = 0; front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail)); @@ -36,14 +62,41 @@ void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept { depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); } -namespace { +void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { + const auto& clip = regs.view_volume_clip_control; + const std::array enabled_lut = {regs.polygon_offset_point_enable, + regs.polygon_offset_line_enable, + regs.polygon_offset_fill_enable}; + const u32 topology_index = static_cast(regs.draw.topology.Value()); -constexpr FixedPipelineState::InputAssembly GetInputAssemblyState(const Maxwell& regs) { - return FixedPipelineState::InputAssembly( - regs.draw.topology, regs.primitive_restart.enabled, - regs.draw.topology == Maxwell::PrimitiveTopology::Points ? regs.point_size : 0.0f); + u32 packed_front_face = PackFrontFace(regs.front_face); + if (regs.screen_y_control.triangle_rast_flip != 0 && + regs.viewport_transform[0].scale_y > 0.0f) { + // Flip front face + packed_front_face = 1 - packed_front_face; + } + + raw = 0; + topology.Assign(topology_index); + primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); + cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); + depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); + depth_clamp_enable.Assign(clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1 ? 1 : 0); + ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0); + cull_face.Assign(PackCullFace(regs.cull_face)); + front_face.Assign(packed_front_face); + polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front)); + patch_control_points_minus_one.Assign(regs.patch_vertices - 1); + tessellation_primitive.Assign(static_cast(regs.tess_mode.prim.Value())); + tessellation_spacing.Assign(static_cast(regs.tess_mode.spacing.Value())); + tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); + logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); + logic_op.Assign(PackLogicOp(regs.logic_op.operation)); + std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast } +namespace { + constexpr FixedPipelineState::BlendingAttachment GetBlendingAttachmentState( const Maxwell& regs, std::size_t render_target) { const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : render_target]; @@ -86,56 +139,6 @@ constexpr FixedPipelineState::ColorBlending GetColorBlendingState(const Maxwell& GetBlendingAttachmentState(regs, 6), GetBlendingAttachmentState(regs, 7)}); } -constexpr FixedPipelineState::Tessellation GetTessellationState(const Maxwell& regs) { - return FixedPipelineState::Tessellation(regs.patch_vertices, regs.tess_mode.prim, - regs.tess_mode.spacing, regs.tess_mode.cw != 0); -} - -constexpr std::size_t Point = 0; -constexpr std::size_t Line = 1; -constexpr std::size_t Polygon = 2; -constexpr std::array PolygonOffsetEnableLUT = { - Point, // Points - Line, // Lines - Line, // LineLoop - Line, // LineStrip - Polygon, // Triangles - Polygon, // TriangleStrip - Polygon, // TriangleFan - Polygon, // Quads - Polygon, // QuadStrip - Polygon, // Polygon - Line, // LinesAdjacency - Line, // LineStripAdjacency - Polygon, // TrianglesAdjacency - Polygon, // TriangleStripAdjacency - Polygon, // Patches -}; - -constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) { - const std::array enabled_lut = {regs.polygon_offset_point_enable, - regs.polygon_offset_line_enable, - regs.polygon_offset_fill_enable}; - const auto topology = static_cast(regs.draw.topology.Value()); - const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]]; - - const auto& clip = regs.view_volume_clip_control; - const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1; - - Maxwell::FrontFace front_face = regs.front_face; - if (regs.screen_y_control.triangle_rast_flip != 0 && - regs.viewport_transform[0].scale_y > 0.0f) { - if (front_face == Maxwell::FrontFace::CounterClockWise) - front_face = Maxwell::FrontFace::ClockWise; - else if (front_face == Maxwell::FrontFace::ClockWise) - front_face = Maxwell::FrontFace::CounterClockWise; - } - - const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; - return FixedPipelineState::Rasterizer(regs.cull_test_enabled, depth_bias_enabled, - depth_clamp_enabled, gl_ndc, regs.cull_face, front_face); -} - } // Anonymous namespace std::size_t FixedPipelineState::BlendingAttachment::Hash() const noexcept { @@ -168,43 +171,14 @@ bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const n return std::memcmp(this, &rhs, sizeof *this) == 0; } -std::size_t FixedPipelineState::InputAssembly::Hash() const noexcept { - std::size_t point_size_int = 0; - std::memcpy(&point_size_int, &point_size, sizeof(point_size)); - return (static_cast(topology) << 24) ^ (point_size_int << 32) ^ - static_cast(primitive_restart_enable); -} - -bool FixedPipelineState::InputAssembly::operator==(const InputAssembly& rhs) const noexcept { - return std::tie(topology, primitive_restart_enable, point_size) == - std::tie(rhs.topology, rhs.primitive_restart_enable, rhs.point_size); -} - -std::size_t FixedPipelineState::Tessellation::Hash() const noexcept { - return static_cast(patch_control_points) ^ - (static_cast(primitive) << 6) ^ (static_cast(spacing) << 8) ^ - (static_cast(clockwise) << 10); -} - -bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const noexcept { - return std::tie(patch_control_points, primitive, spacing, clockwise) == - std::tie(rhs.patch_control_points, rhs.primitive, rhs.spacing, rhs.clockwise); -} - std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept { - return static_cast(cull_enable) ^ - (static_cast(depth_bias_enable) << 1) ^ - (static_cast(depth_clamp_enable) << 2) ^ - (static_cast(ndc_minus_one_to_one) << 3) ^ - (static_cast(cull_face) << 24) ^ - (static_cast(front_face) << 48); + u64 hash = static_cast(raw) << 32; + std::memcpy(&hash, &point_size, sizeof(u32)); + return static_cast(hash); } bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept { - return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one, - cull_face, front_face) == - std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable, - rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face); + return raw == rhs.raw && point_size == rhs.point_size; } std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept { @@ -231,8 +205,6 @@ bool FixedPipelineState::ColorBlending::operator==(const ColorBlending& rhs) con std::size_t FixedPipelineState::Hash() const noexcept { std::size_t hash = 0; boost::hash_combine(hash, vertex_input.Hash()); - boost::hash_combine(hash, input_assembly.Hash()); - boost::hash_combine(hash, tessellation.Hash()); boost::hash_combine(hash, rasterizer.Hash()); boost::hash_combine(hash, depth_stencil.Hash()); boost::hash_combine(hash, color_blending.Hash()); @@ -240,17 +212,13 @@ std::size_t FixedPipelineState::Hash() const noexcept { } bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { - return std::tie(vertex_input, input_assembly, tessellation, rasterizer, depth_stencil, - color_blending) == std::tie(rhs.vertex_input, rhs.input_assembly, - rhs.tessellation, rhs.rasterizer, rhs.depth_stencil, - rhs.color_blending); + return std::tie(vertex_input, rasterizer, depth_stencil, color_blending) == + std::tie(rhs.vertex_input, rhs.rasterizer, rhs.depth_stencil, rhs.color_blending); } FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { FixedPipelineState fixed_state; - fixed_state.input_assembly = GetInputAssemblyState(regs); - fixed_state.tessellation = GetTessellationState(regs); - fixed_state.rasterizer = GetRasterizerState(regs); + fixed_state.rasterizer.Fill(regs); fixed_state.depth_stencil.Fill(regs); fixed_state.color_blending = GetColorBlendingState(regs); return fixed_state; @@ -307,4 +275,41 @@ Maxwell::StencilOp FixedPipelineState::UnpackStencilOp(u32 packed) noexcept { return LUT[packed]; } +u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept { + // FrontAndBack is 0x408, by substracting 0x406 in it we get 2. + // Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1. + const u32 value = static_cast(cull); + return value - (value == 0x408 ? 0x406 : 0x404); +} + +Maxwell::CullFace FixedPipelineState::UnpackCullFace(u32 packed) noexcept { + static constexpr std::array LUT = {Maxwell::CullFace::Front, Maxwell::CullFace::Back, + Maxwell::CullFace::FrontAndBack}; + return LUT[packed]; +} + +u32 FixedPipelineState::PackFrontFace(Maxwell::FrontFace face) noexcept { + return static_cast(face) - 0x900; +} + +Maxwell::FrontFace FixedPipelineState::UnpackFrontFace(u32 packed) noexcept { + return static_cast(packed + 0x900); +} + +u32 FixedPipelineState::PackPolygonMode(Maxwell::PolygonMode mode) noexcept { + return static_cast(mode) - 0x1B00; +} + +Maxwell::PolygonMode FixedPipelineState::UnpackPolygonMode(u32 packed) noexcept { + return static_cast(packed + 0x1B00); +} + +u32 FixedPipelineState::PackLogicOp(Maxwell::LogicOperation op) noexcept { + return static_cast(op) - 0x1500; +} + +Maxwell::LogicOperation FixedPipelineState::UnpackLogicOp(u32 packed) noexcept { + return static_cast(packed + 0x1500); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index e30877e77..75b093e90 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -30,6 +30,18 @@ struct FixedPipelineState { static u32 PackStencilOp(Maxwell::StencilOp op) noexcept; static Maxwell::StencilOp UnpackStencilOp(u32 packed) noexcept; + static u32 PackCullFace(Maxwell::CullFace cull) noexcept; + static Maxwell::CullFace UnpackCullFace(u32 packed) noexcept; + + static u32 PackFrontFace(Maxwell::FrontFace face) noexcept; + static Maxwell::FrontFace UnpackFrontFace(u32 packed) noexcept; + + static u32 PackPolygonMode(Maxwell::PolygonMode mode) noexcept; + static Maxwell::PolygonMode UnpackPolygonMode(u32 packed) noexcept; + + static u32 PackLogicOp(Maxwell::LogicOperation op) noexcept; + static Maxwell::LogicOperation UnpackLogicOp(u32 packed) noexcept; + struct BlendingAttachment { constexpr BlendingAttachment(bool enable, Maxwell::Blend::Equation rgb_equation, Maxwell::Blend::Factor src_rgb_func, @@ -119,62 +131,30 @@ struct FixedPipelineState { }; static_assert(IsHashable); - struct InputAssembly { - constexpr InputAssembly(Maxwell::PrimitiveTopology topology, bool primitive_restart_enable, - float point_size) - : topology{topology}, primitive_restart_enable{primitive_restart_enable}, - point_size{point_size} {} - InputAssembly() = default; - - Maxwell::PrimitiveTopology topology; - bool primitive_restart_enable; - float point_size; - - std::size_t Hash() const noexcept; - - bool operator==(const InputAssembly& rhs) const noexcept; - - bool operator!=(const InputAssembly& rhs) const noexcept { - return !operator==(rhs); - } - }; - - struct Tessellation { - constexpr Tessellation(u32 patch_control_points, Maxwell::TessellationPrimitive primitive, - Maxwell::TessellationSpacing spacing, bool clockwise) - : patch_control_points{patch_control_points}, primitive{primitive}, spacing{spacing}, - clockwise{clockwise} {} - Tessellation() = default; - - u32 patch_control_points; - Maxwell::TessellationPrimitive primitive; - Maxwell::TessellationSpacing spacing; - bool clockwise; - - std::size_t Hash() const noexcept; - - bool operator==(const Tessellation& rhs) const noexcept; - - bool operator!=(const Tessellation& rhs) const noexcept { - return !operator==(rhs); - } - }; - struct Rasterizer { - constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, - bool ndc_minus_one_to_one, Maxwell::CullFace cull_face, - Maxwell::FrontFace front_face) - : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, - depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, - cull_face{cull_face}, front_face{front_face} {} - Rasterizer() = default; + union { + u32 raw; + BitField<0, 4, u32> topology; + BitField<4, 1, u32> primitive_restart_enable; + BitField<5, 1, u32> cull_enable; + BitField<6, 1, u32> depth_bias_enable; + BitField<7, 1, u32> depth_clamp_enable; + BitField<8, 1, u32> ndc_minus_one_to_one; + BitField<9, 2, u32> cull_face; + BitField<11, 1, u32> front_face; + BitField<12, 2, u32> polygon_mode; + BitField<14, 5, u32> patch_control_points_minus_one; + BitField<19, 2, u32> tessellation_primitive; + BitField<21, 2, u32> tessellation_spacing; + BitField<23, 1, u32> tessellation_clockwise; + BitField<24, 1, u32> logic_op_enable; + BitField<25, 4, u32> logic_op; + }; - bool cull_enable; - bool depth_bias_enable; - bool depth_clamp_enable; - bool ndc_minus_one_to_one; - Maxwell::CullFace cull_face; - Maxwell::FrontFace front_face; + // TODO(Rodrigo): Move this to push constants + u32 point_size; + + void Fill(const Maxwell& regs) noexcept; std::size_t Hash() const noexcept; @@ -183,7 +163,20 @@ struct FixedPipelineState { bool operator!=(const Rasterizer& rhs) const noexcept { return !operator==(rhs); } + + constexpr Maxwell::PrimitiveTopology Topology() const noexcept { + return static_cast(topology.Value()); + } + + Maxwell::CullFace CullFace() const noexcept { + return UnpackCullFace(cull_face.Value()); + } + + Maxwell::FrontFace FrontFace() const noexcept { + return UnpackFrontFace(front_face.Value()); + } }; + static_assert(IsHashable); struct DepthStencil { template @@ -257,8 +250,6 @@ struct FixedPipelineState { }; VertexInput vertex_input; - InputAssembly input_assembly; - Tessellation tessellation; Rasterizer rasterizer; DepthStencil depth_stencil; ColorBlending color_blending; @@ -273,8 +264,6 @@ struct FixedPipelineState { }; static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0dd3ea5bc..e12c26076 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -158,10 +158,8 @@ std::vector VKGraphicsPipeline::CreateShaderModules( vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, const SPIRVProgram& program) const { const auto& vi = fixed_state.vertex_input; - const auto& ia = fixed_state.input_assembly; const auto& ds = fixed_state.depth_stencil; const auto& cd = fixed_state.color_blending; - const auto& ts = fixed_state.tessellation; const auto& rs = fixed_state.rasterizer; std::vector vertex_bindings; @@ -226,15 +224,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; input_assembly_ci.pNext = nullptr; input_assembly_ci.flags = 0; - input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, ia.topology); + input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, rs.Topology()); input_assembly_ci.primitiveRestartEnable = - ia.primitive_restart_enable && SupportsPrimitiveRestart(input_assembly_ci.topology); + rs.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology); VkPipelineTessellationStateCreateInfo tessellation_ci; tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; tessellation_ci.pNext = nullptr; tessellation_ci.flags = 0; - tessellation_ci.patchControlPoints = ts.patch_control_points; + tessellation_ci.patchControlPoints = rs.patch_control_points_minus_one.Value() + 1; VkPipelineViewportStateCreateInfo viewport_ci; viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; @@ -253,8 +251,8 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa rasterization_ci.rasterizerDiscardEnable = VK_FALSE; rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; rasterization_ci.cullMode = - rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : VK_CULL_MODE_NONE; - rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.front_face); + rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE; + rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.FrontFace()); rasterization_ci.depthBiasEnable = rs.depth_bias_enable; rasterization_ci.depthBiasConstantFactor = 0.0f; rasterization_ci.depthBiasClamp = 0.0f; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 083da9999..8fdc6400d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -329,9 +329,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const auto& gpu = system.GPU().Maxwell3D(); Specialization specialization; - if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) { - ASSERT(fixed_state.input_assembly.point_size != 0.0f); - specialization.point_size = fixed_state.input_assembly.point_size; + if (fixed_state.rasterizer.Topology() == Maxwell::PrimitiveTopology::Points) { + ASSERT(fixed_state.rasterizer.point_size != 0); + std::memcpy(&specialization.point_size, &fixed_state.rasterizer.point_size, sizeof(u32)); } for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type();