From b56ad93bbc9ac38820c1e1cb4b03256dd50aa17a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 5 Jan 2023 06:43:54 -0500 Subject: [PATCH] BufferBase: Don't ignore GPU pages. --- src/tests/video_core/buffer_base.cpp | 2 +- src/video_core/buffer_cache/buffer_base.h | 14 ++++++-------- src/video_core/engines/maxwell_dma.cpp | 4 ++-- src/video_core/invalidation_accumulator.h | 13 +++++++------ src/video_core/memory_manager.cpp | 2 +- src/video_core/memory_manager.h | 2 +- src/video_core/rasterizer_interface.h | 2 +- src/video_core/renderer_vulkan/vk_rasterizer.cpp | 6 +++--- 8 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp index f7236afab..5cd0628f2 100644 --- a/src/tests/video_core/buffer_base.cpp +++ b/src/tests/video_core/buffer_base.cpp @@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") { int num = 0; buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); - REQUIRE(num == 0); + REQUIRE(num == 1); REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); buffer.FlushCachedWrites(); diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 92d77eef2..c47b7d866 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -430,7 +430,7 @@ private: if (query_begin >= SizeBytes() || size < 0) { return; } - u64* const untracked_words = Array(); + [[maybe_unused]] u64* const untracked_words = Array(); u64* const state_words = Array(); const u64 query_end = query_begin + std::min(static_cast(size), SizeBytes()); u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; @@ -483,7 +483,7 @@ private: NotifyRasterizer(word_index, current_bits, ~u64{0}); } // Exclude CPU modified pages when visiting GPU pages - const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); + const u64 word = current_word; u64 page = page_begin; page_begin = 0; @@ -531,7 +531,7 @@ private: [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { static_assert(type != Type::Untracked); - const u64* const untracked_words = Array(); + [[maybe_unused]] const u64* const untracked_words = Array(); const u64* const state_words = Array(); const u64 num_query_words = size / BYTES_PER_WORD + 1; const u64 word_begin = offset / BYTES_PER_WORD; @@ -539,8 +539,7 @@ private: const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { - const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; - const u64 word = state_words[word_index] & ~off_word; + const u64 word = state_words[word_index]; if (word == 0) { continue; } @@ -564,7 +563,7 @@ private: [[nodiscard]] std::pair ModifiedRegion(u64 offset, u64 size) const noexcept { static_assert(type != Type::Untracked); - const u64* const untracked_words = Array(); + [[maybe_unused]] const u64* const untracked_words = Array(); const u64* const state_words = Array(); const u64 num_query_words = size / BYTES_PER_WORD + 1; const u64 word_begin = offset / BYTES_PER_WORD; @@ -574,8 +573,7 @@ private: u64 begin = std::numeric_limits::max(); u64 end = 0; for (u64 word_index = word_begin; word_index < word_end; ++word_index) { - const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; - const u64 word = state_words[word_index] & ~off_word; + const u64 word = state_words[word_index]; if (word == 0) { continue; } diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 7bf08e3e0..7762c7d96 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -123,7 +123,7 @@ void MaxwellDMA::Launch() { convert_linear_2_blocklinear_addr(regs.offset_in + offset), tmp_buffer.data(), tmp_buffer.size()); memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), - tmp_buffer.size()); + tmp_buffer.size()); } } else if (is_src_pitch && !is_dst_pitch) { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); @@ -143,7 +143,7 @@ void MaxwellDMA::Launch() { memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), - regs.line_length_in); + regs.line_length_in); } } } diff --git a/src/video_core/invalidation_accumulator.h b/src/video_core/invalidation_accumulator.h index 42420e31c..2c2aaf7bb 100644 --- a/src/video_core/invalidation_accumulator.h +++ b/src/video_core/invalidation_accumulator.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include "common/common_types.h" @@ -26,8 +27,8 @@ public: if (address >= start_address && address + size <= last_collection) [[likely]] { return; } - size = (address + size + atomicy_side_mask) & atomicy_mask - address; - address = address & atomicy_mask; + size = ((address + size + atomicity_size_mask) & atomicity_mask) - address; + address = address & atomicity_mask; if (!has_collected) [[unlikely]] { reset_values(); has_collected = true; @@ -64,10 +65,10 @@ public: } private: - static constexpr size_t atomicy_bits = 5; - static constexpr size_t atomicy_size = 1ULL << atomicy_bits; - static constexpr size_t atomicy_side_mask = atomicy_size - 1; - static constexpr size_t atomicy_mask = ~atomicy_side_mask; + static constexpr size_t atomicity_bits = 5; + static constexpr size_t atomicity_size = 1ULL << atomicity_bits; + static constexpr size_t atomicity_size_mask = atomicity_size - 1; + static constexpr size_t atomicity_mask = ~atomicity_size_mask; GPUVAddr start_address{}; GPUVAddr last_collection{}; size_t accumulated_size{}; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 0a6390054..3bcae3503 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -46,7 +46,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_table_cpu.resize(big_page_table_size); big_page_continous.resize(big_page_table_size / continous_bits, 0); entries.resize(page_table_size / 32, 0); - if (!Settings::IsGPULevelExtreme()) { + if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) { fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); } else { fastmem_arena = nullptr; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index ff9e3c0b3..2936364f0 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -107,7 +107,7 @@ public: * will be returned; */ std::vector> GetSubmappedRange(GPUVAddr gpu_addr, - std::size_t size) const; + std::size_t size) const; GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 6b66ad7b6..1735b6164 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -97,7 +97,7 @@ public: VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; virtual void InnerInvalidation(std::span> sequences) { - for (const auto [cpu_addr, size] : sequences) { + for (const auto& [cpu_addr, size] : sequences) { InvalidateRegion(cpu_addr, size); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 6c4d74564..ed4a72166 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -486,18 +486,18 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache void RasterizerVulkan::InnerInvalidation(std::span> sequences) { { std::scoped_lock lock{texture_cache.mutex}; - for (const auto [addr, size] : sequences) { + for (const auto& [addr, size] : sequences) { texture_cache.WriteMemory(addr, size); } } { std::scoped_lock lock{buffer_cache.mutex}; - for (const auto [addr, size] : sequences) { + for (const auto& [addr, size] : sequences) { buffer_cache.WriteMemory(addr, size); } } { - for (const auto [addr, size] : sequences) { + for (const auto& [addr, size] : sequences) { query_cache.InvalidateRegion(addr, size); pipeline_cache.InvalidateRegion(addr, size); }