From d540d284b5711f044678191bbab858de626103a9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 15 Aug 2021 15:35:53 +0200 Subject: [PATCH 1/4] VideoCore: Rework Garbage Collection. --- src/common/lru_cache.h | 141 ++++++++++++++++++ src/video_core/buffer_cache/buffer_base.h | 13 +- src/video_core/buffer_cache/buffer_cache.h | 61 ++++---- src/video_core/texture_cache/image_base.h | 2 +- src/video_core/texture_cache/texture_cache.h | 89 ++++------- .../texture_cache/texture_cache_base.h | 8 +- 6 files changed, 213 insertions(+), 101 deletions(-) create mode 100644 src/common/lru_cache.h diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h new file mode 100644 index 000000000..048e9c3da --- /dev/null +++ b/src/common/lru_cache.h @@ -0,0 +1,141 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2+ or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "common/common_types.h" + +namespace Common { + +template +class LeastRecentlyUsedCache { + using ObjectType = typename Traits::ObjectType; + using TickType = typename Traits::TickType; + + struct Item { + ObjectType obj; + TickType tick; + Item* next{}; + Item* prev{}; + }; + +public: + LeastRecentlyUsedCache() : first_item{}, last_item{} {} + ~LeastRecentlyUsedCache() = default; + + size_t Insert(ObjectType obj, TickType tick) { + const auto new_id = build(); + auto& item = item_pool[new_id]; + item.obj = obj; + item.tick = tick; + attach(item); + return new_id; + } + + void Touch(size_t id, TickType tick) { + auto& item = item_pool[id]; + if (item.tick >= tick) { + return; + } + item.tick = tick; + if (&item == last_item) { + return; + } + detach(item); + attach(item); + } + + void Free(size_t id) { + auto& item = item_pool[id]; + detach(item); + item.prev = nullptr; + item.next = nullptr; + free_items.push_back(id); + } + + template + void ForEachItemBelow(TickType tick, Func&& func) { + static constexpr bool RETURNS_BOOL = + std::is_same_v, bool>; + Item* iterator = first_item; + while (iterator) { + if (static_cast(tick) - static_cast(iterator->tick) < 0) { + return; + } + Item* next = iterator->next; + if constexpr (RETURNS_BOOL) { + if (func(iterator->obj)) { + return; + } + } else { + func(iterator->obj); + } + iterator = next; + } + } + +private: + size_t build() { + if (free_items.empty()) { + const size_t item_id = item_pool.size(); + item_pool.emplace_back(); + auto& item = item_pool[item_id]; + item.next = nullptr; + item.prev = nullptr; + return item_id; + } + const size_t item_id = free_items.front(); + free_items.pop_front(); + auto& item = item_pool[item_id]; + item.next = nullptr; + item.prev = nullptr; + return item_id; + } + + void attach(Item& item) { + if (!first_item) { + first_item = &item; + } + if (!last_item) { + last_item = &item; + } else { + item.prev = last_item; + last_item->next = &item; + item.next = nullptr; + last_item = &item; + } + } + + void detach(Item& item) { + if (item.prev) { + item.prev->next = item.next; + } + if (item.next) { + item.next->prev = item.prev; + } + if (&item == first_item) { + first_item = item.next; + if (first_item) { + first_item->prev = nullptr; + } + } + if (&item == last_item) { + last_item = item.prev; + if (last_item) { + last_item->next = nullptr; + } + } + } + + std::deque item_pool; + std::deque free_items; + Item* first_item; + Item* last_item; +}; + +} // namespace Common diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index c3318095c..4b696a60f 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -261,16 +261,6 @@ public: stream_score += score; } - /// Sets the new frame tick - void SetFrameTick(u64 new_frame_tick) noexcept { - frame_tick = new_frame_tick; - } - - /// Returns the new frame tick - [[nodiscard]] u64 FrameTick() const noexcept { - return frame_tick; - } - /// Returns the likeliness of this being a stream buffer [[nodiscard]] int StreamScore() const noexcept { return stream_score; @@ -307,6 +297,8 @@ public: return words.size_bytes; } + size_t lru_id; + private: template u64* Array() noexcept { @@ -603,7 +595,6 @@ private: RasterizerInterface* rasterizer = nullptr; VAddr cpu_addr = 0; Words words; - u64 frame_tick = 0; BufferFlagBits flags{}; int stream_score = 0; }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3b43554f9..a0217908a 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -20,6 +20,7 @@ #include "common/common_types.h" #include "common/div_ceil.h" #include "common/literals.h" +#include "common/lru_cache.h" #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/settings.h" @@ -77,7 +78,7 @@ class BufferCache { static constexpr BufferId NULL_BUFFER_ID{0}; - static constexpr u64 EXPECTED_MEMORY = 512_MiB; + static constexpr u64 EXPECTED_MEMORY = 256_MiB; static constexpr u64 CRITICAL_MEMORY = 1_GiB; using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -330,7 +331,7 @@ private: template void ChangeRegister(BufferId buffer_id); - void TouchBuffer(Buffer& buffer) const noexcept; + void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); @@ -428,7 +429,11 @@ private: size_t immediate_buffer_capacity = 0; std::unique_ptr immediate_buffer_alloc; - typename SlotVector::Iterator deletion_iterator; + struct LRUItemParams { + using ObjectType = BufferId; + using TickType = u64; + }; + Common::LeastRecentlyUsedCache lru_cache; u64 frame_tick = 0; u64 total_used_memory = 0; @@ -445,7 +450,6 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); - deletion_iterator = slot_buffers.end(); common_ranges.clear(); } @@ -454,20 +458,17 @@ void BufferCache

::RunGarbageCollector() { const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; int num_iterations = aggressive_gc ? 64 : 32; - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_buffers.end()) { - deletion_iterator = slot_buffers.begin(); + const auto clean_up = [this, &num_iterations](BufferId buffer_id) { + if (num_iterations == 0) { + return true; } - ++deletion_iterator; - if (deletion_iterator == slot_buffers.end()) { - break; - } - const auto [buffer_id, buffer] = *deletion_iterator; - if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { - DownloadBufferMemory(*buffer); - DeleteBuffer(buffer_id); - } - } + --num_iterations; + auto& buffer = slot_buffers[buffer_id]; + DownloadBufferMemory(buffer); + DeleteBuffer(buffer_id); + return false; + }; + lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); } template @@ -954,7 +955,7 @@ bool BufferCache

::IsRegionCpuModified(VAddr addr, size_t size) { template void BufferCache

::BindHostIndexBuffer() { Buffer& buffer = slot_buffers[index_buffer.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, index_buffer.buffer_id); const u32 offset = buffer.Offset(index_buffer.cpu_addr); const u32 size = index_buffer.size; SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); @@ -975,7 +976,7 @@ void BufferCache

::BindHostVertexBuffers() { for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { const Binding& binding = vertex_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); if (!flags[Dirty::VertexBuffer0 + index]) { continue; @@ -1011,7 +1012,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const VAddr cpu_addr = binding.cpu_addr; const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size); @@ -1083,7 +1084,7 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { const Binding& binding = storage_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1128,7 +1129,7 @@ void BufferCache

::BindHostTransformFeedbackBuffers() { for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { const Binding& binding = transform_feedback_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1148,7 +1149,7 @@ void BufferCache

::BindHostComputeUniformBuffers() { ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1168,7 +1169,7 @@ void BufferCache

::BindHostComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { const Binding& binding = compute_storage_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1513,11 +1514,11 @@ BufferId BufferCache

::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); const u32 size = static_cast(overlap.end - overlap.begin); const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); - TouchBuffer(slot_buffers[new_buffer_id]); for (const BufferId overlap_id : overlap.ids) { JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); } Register(new_buffer_id); + TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); return new_buffer_id; } @@ -1534,12 +1535,14 @@ void BufferCache

::Unregister(BufferId buffer_id) { template template void BufferCache

::ChangeRegister(BufferId buffer_id) { - const Buffer& buffer = slot_buffers[buffer_id]; + Buffer& buffer = slot_buffers[buffer_id]; const auto size = buffer.SizeBytes(); if (insert) { total_used_memory += Common::AlignUp(size, 1024); + buffer.lru_id = lru_cache.Insert(buffer_id, frame_tick); } else { total_used_memory -= Common::AlignUp(size, 1024); + lru_cache.Free(buffer.lru_id); } const VAddr cpu_addr_begin = buffer.CpuAddr(); const VAddr cpu_addr_end = cpu_addr_begin + size; @@ -1555,8 +1558,10 @@ void BufferCache

::ChangeRegister(BufferId buffer_id) { } template -void BufferCache

::TouchBuffer(Buffer& buffer) const noexcept { - buffer.SetFrameTick(frame_tick); +void BufferCache

::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { + if (buffer_id != NULL_BUFFER_ID) { + lru_cache.Touch(buffer.lru_id, frame_tick); + } } template diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index ff1feda9b..662089e3d 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -80,7 +80,7 @@ struct ImageBase { VAddr cpu_addr_end = 0; u64 modification_tick = 0; - u64 frame_tick = 0; + size_t lru_index = ~0; std::array mip_level_offsets{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a087498ff..c16cc0838 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -43,8 +43,6 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& void(slot_image_views.insert(runtime, NullImageParams{})); void(slot_samplers.insert(runtime, sampler_descriptor)); - deletion_iterator = slot_images.begin(); - if constexpr (HAS_DEVICE_MEMORY_INFO) { const auto device_memory = runtime.GetDeviceLocalMemory(); const u64 possible_expected_memory = (device_memory * 3) / 10; @@ -64,65 +62,33 @@ template void TextureCache

::RunGarbageCollector() { const bool high_priority_mode = total_used_memory >= expected_memory; const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; - int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_images.end()) { - deletion_iterator = slot_images.begin(); - if (deletion_iterator == slot_images.end()) { - break; - } + const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 50ULL : 100ULL; + size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 50 : 5); + const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { + if (num_iterations == 0) { + return true; } - auto [image_id, image_tmp] = *deletion_iterator; - Image* image = image_tmp; // fix clang error. - const bool is_alias = True(image->flags & ImageFlagBits::Alias); - const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); - const bool must_download = image->IsSafeDownload(); - bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); - const u64 ticks_needed = - is_bad_overlap - ? ticks_to_destroy >> 4 - : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); - should_care |= aggressive_mode; - if (should_care && image->frame_tick + ticks_needed < frame_tick) { - if (is_bad_overlap) { - const bool overlap_check = std::ranges::all_of( - image->overlapping_images, [&, image](const ImageId& overlap_id) { - auto& overlap = slot_images[overlap_id]; - return overlap.frame_tick >= image->frame_tick; - }); - if (!overlap_check) { - ++deletion_iterator; - continue; - } - } - if (!is_bad_overlap && must_download) { - const bool alias_check = std::ranges::none_of( - image->aliased_images, [&, image](const AliasedImage& alias) { - auto& alias_image = slot_images[alias.id]; - return (alias_image.frame_tick < image->frame_tick) || - (alias_image.modification_tick < image->modification_tick); - }); - - if (alias_check) { - auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image->info); - image->DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); - } - } - if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image, image_id); - } - UnregisterImage(image_id); - DeleteImage(image_id); - if (is_bad_overlap) { - ++num_iterations; - } + --num_iterations; + auto& image = slot_images[image_id]; + const bool must_download = image.IsSafeDownload(); + if (!high_priority_mode && must_download) { + return false; } - ++deletion_iterator; - } + if (must_download) { + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + } + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + UnregisterImage(image_id); + DeleteImage(image_id); + return false; + }; + lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); } template @@ -1078,6 +1044,8 @@ void TextureCache

::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); + image.lru_index = lru_cache.Insert(image_id, frame_tick); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); if (False(image.flags & ImageFlagBits::Sparse)) { @@ -1115,6 +1083,7 @@ void TextureCache

::UnregisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); + lru_cache.Free(image.lru_index); const auto& clear_page_table = [this, image_id]( u64 page, @@ -1384,7 +1353,7 @@ void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool if (is_modification) { MarkModification(image); } - image.frame_tick = frame_tick; + lru_cache.Touch(image.lru_index, frame_tick); } template diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index e4ae351cb..d7528ed24 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/literals.h" +#include "common/lru_cache.h" #include "video_core/compatible_formats.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/engines/fermi_2d.h" @@ -370,6 +371,12 @@ private: std::vector uncommitted_downloads; std::queue> committed_downloads; + struct LRUItemParams { + using ObjectType = ImageId; + using TickType = u64; + }; + Common::LeastRecentlyUsedCache lru_cache; + static constexpr size_t TICKS_TO_DESTROY = 6; DelayedDestructionRing sentenced_images; DelayedDestructionRing sentenced_image_view; @@ -379,7 +386,6 @@ private: u64 modification_tick = 0; u64 frame_tick = 0; - typename SlotVector::Iterator deletion_iterator; }; } // namespace VideoCommon From ba82bb359bac918bf0d048fcd21d13c426bcd336 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 8 Aug 2021 12:32:09 +0200 Subject: [PATCH 2/4] Garbage Collection: enable as default, eliminate option. --- src/common/settings.cpp | 2 -- src/common/settings.h | 1 - src/video_core/buffer_cache/buffer_cache.h | 2 +- src/video_core/texture_cache/texture_cache.h | 3 +-- src/yuzu/configuration/config.cpp | 2 -- src/yuzu/configuration/configure_graphics_advanced.cpp | 6 ------ src/yuzu/configuration/configure_graphics_advanced.h | 1 - src/yuzu/configuration/configure_graphics_advanced.ui | 10 ---------- src/yuzu_cmd/config.cpp | 1 - 9 files changed, 2 insertions(+), 26 deletions(-) diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 996315999..fd3b639cd 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -59,7 +59,6 @@ void LogSettings() { log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); - log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); log_setting("Audio_OutputEngine", values.sink_id.GetValue()); log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue()); @@ -143,7 +142,6 @@ void RestoreGlobalState(bool is_powered_on) { values.shader_backend.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); - values.use_caches_gc.SetGlobal(true); values.bg_red.SetGlobal(true); values.bg_green.SetGlobal(true); values.bg_blue.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 20769d310..ec4d381e8 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -475,7 +475,6 @@ struct Values { ShaderBackend::SPIRV, "shader_backend"}; Setting use_asynchronous_shaders{false, "use_asynchronous_shaders"}; Setting use_fast_gpu_time{true, "use_fast_gpu_time"}; - Setting use_caches_gc{false, "use_caches_gc"}; Setting bg_red{0, "bg_red"}; Setting bg_green{0, "bg_green"}; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index a0217908a..e40aa7fe6 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -486,7 +486,7 @@ void BufferCache

::TickFrame() { const bool skip_preferred = hits * 256 < shots * 251; uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; - if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) { + if (total_used_memory >= EXPECTED_MEMORY) { RunGarbageCollector(); } ++frame_tick; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c16cc0838..caf1949f2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -5,7 +5,6 @@ #pragma once #include "common/alignment.h" -#include "common/settings.h" #include "video_core/dirty_flags.h" #include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/texture_cache_base.h" @@ -93,7 +92,7 @@ void TextureCache

::RunGarbageCollector() { template void TextureCache

::TickFrame() { - if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { + if (total_used_memory > minimum_memory) { RunGarbageCollector(); } sentenced_images.Tick(); diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 377795326..85d292bcc 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -818,7 +818,6 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.shader_backend); ReadGlobalSetting(Settings::values.use_asynchronous_shaders); ReadGlobalSetting(Settings::values.use_fast_gpu_time); - ReadGlobalSetting(Settings::values.use_caches_gc); ReadGlobalSetting(Settings::values.bg_red); ReadGlobalSetting(Settings::values.bg_green); ReadGlobalSetting(Settings::values.bg_blue); @@ -1359,7 +1358,6 @@ void Config::SaveRendererValues() { Settings::values.shader_backend.UsingGlobal()); WriteGlobalSetting(Settings::values.use_asynchronous_shaders); WriteGlobalSetting(Settings::values.use_fast_gpu_time); - WriteGlobalSetting(Settings::values.use_caches_gc); WriteGlobalSetting(Settings::values.bg_red); WriteGlobalSetting(Settings::values.bg_green); WriteGlobalSetting(Settings::values.bg_blue); diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index a31b8e192..bfd464061 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -28,7 +28,6 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); - ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); if (Settings::IsConfiguringGlobal()) { @@ -55,8 +54,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, ui->use_asynchronous_shaders, use_asynchronous_shaders); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc, - use_caches_gc); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, ui->use_fast_gpu_time, use_fast_gpu_time); } @@ -81,7 +78,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); - ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal()); ui->anisotropic_filtering_combobox->setEnabled( Settings::values.max_anisotropy.UsingGlobal()); @@ -94,8 +90,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { use_asynchronous_shaders); ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time, Settings::values.use_fast_gpu_time, use_fast_gpu_time); - ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc, - use_caches_gc); ConfigurationShared::SetColoredComboBox( ui->gpu_accuracy, ui->label_gpu_accuracy, static_cast(Settings::values.gpu_accuracy.GetValue(true))); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 7356e6916..13ba4ff6b 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -37,5 +37,4 @@ private: ConfigurationShared::CheckState use_vsync; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; - ConfigurationShared::CheckState use_caches_gc; }; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 4fe6b86ae..5891f8299 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -96,16 +96,6 @@ - - - - Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games. - - - Enable GPU cache garbage collection (experimental) - - - diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 4f14be524..757dd1ea0 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -468,7 +468,6 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.use_nvdec_emulation); ReadSetting("Renderer", Settings::values.accelerate_astc); ReadSetting("Renderer", Settings::values.use_fast_gpu_time); - ReadSetting("Renderer", Settings::values.use_caches_gc); ReadSetting("Renderer", Settings::values.bg_red); ReadSetting("Renderer", Settings::values.bg_green); From ff48f06fb92e5fe2105fd6b4c5d4f57bbb2714c7 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 29 Aug 2021 18:19:53 +0200 Subject: [PATCH 3/4] Garbage Collection: Adress Feedback. --- src/common/lru_cache.h | 23 +++++++++++----------- src/video_core/buffer_cache/buffer_base.h | 9 ++++++++- src/video_core/buffer_cache/buffer_cache.h | 6 +++--- src/video_core/texture_cache/image_base.h | 2 +- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h index 048e9c3da..365488ba5 100644 --- a/src/common/lru_cache.h +++ b/src/common/lru_cache.h @@ -29,11 +29,11 @@ public: ~LeastRecentlyUsedCache() = default; size_t Insert(ObjectType obj, TickType tick) { - const auto new_id = build(); + const auto new_id = Build(); auto& item = item_pool[new_id]; item.obj = obj; item.tick = tick; - attach(item); + Attach(item); return new_id; } @@ -46,13 +46,13 @@ public: if (&item == last_item) { return; } - detach(item); - attach(item); + Detach(item); + Attach(item); } void Free(size_t id) { auto& item = item_pool[id]; - detach(item); + Detach(item); item.prev = nullptr; item.next = nullptr; free_items.push_back(id); @@ -80,11 +80,10 @@ public: } private: - size_t build() { + size_t Build() { if (free_items.empty()) { const size_t item_id = item_pool.size(); - item_pool.emplace_back(); - auto& item = item_pool[item_id]; + auto& item = item_pool.emplace_back(); item.next = nullptr; item.prev = nullptr; return item_id; @@ -97,7 +96,7 @@ private: return item_id; } - void attach(Item& item) { + void Attach(Item& item) { if (!first_item) { first_item = &item; } @@ -111,7 +110,7 @@ private: } } - void detach(Item& item) { + void Detach(Item& item) { if (item.prev) { item.prev->next = item.next; } @@ -134,8 +133,8 @@ private: std::deque item_pool; std::deque free_items; - Item* first_item; - Item* last_item; + Item* first_item{}; + Item* last_item{}; }; } // namespace Common diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 4b696a60f..18a3390c0 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -297,7 +297,13 @@ public: return words.size_bytes; } - size_t lru_id; + size_t getLRUID() const noexcept { + return lru_id; + } + + void setLRUID(size_t lru_id_) { + lru_id = lru_id_; + } private: template @@ -597,6 +603,7 @@ private: Words words; BufferFlagBits flags{}; int stream_score = 0; + size_t lru_id = SIZE_MAX; }; } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e40aa7fe6..617435968 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1539,10 +1539,10 @@ void BufferCache

::ChangeRegister(BufferId buffer_id) { const auto size = buffer.SizeBytes(); if (insert) { total_used_memory += Common::AlignUp(size, 1024); - buffer.lru_id = lru_cache.Insert(buffer_id, frame_tick); + buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick)); } else { total_used_memory -= Common::AlignUp(size, 1024); - lru_cache.Free(buffer.lru_id); + lru_cache.Free(buffer.getLRUID()); } const VAddr cpu_addr_begin = buffer.CpuAddr(); const VAddr cpu_addr_end = cpu_addr_begin + size; @@ -1560,7 +1560,7 @@ void BufferCache

::ChangeRegister(BufferId buffer_id) { template void BufferCache

::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { if (buffer_id != NULL_BUFFER_ID) { - lru_cache.Touch(buffer.lru_id, frame_tick); + lru_cache.Touch(buffer.getLRUID(), frame_tick); } } diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 662089e3d..0c17a791b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -80,7 +80,7 @@ struct ImageBase { VAddr cpu_addr_end = 0; u64 modification_tick = 0; - size_t lru_index = ~0; + size_t lru_index = SIZE_MAX; std::array mip_level_offsets{}; From fe0acec53924fd05829901e43b4783324a733b13 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 29 Aug 2021 18:24:19 +0200 Subject: [PATCH 4/4] Garbage Collection: Make it more agressive on high priority mode. --- src/video_core/buffer_cache/buffer_base.h | 4 ++-- src/video_core/buffer_cache/buffer_cache.h | 2 +- src/video_core/texture_cache/texture_cache.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 18a3390c0..be2113f5a 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -298,11 +298,11 @@ public: } size_t getLRUID() const noexcept { - return lru_id; + return lru_id; } void setLRUID(size_t lru_id_) { - lru_id = lru_id_; + lru_id = lru_id_; } private: diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 617435968..7bfd57369 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -78,7 +78,7 @@ class BufferCache { static constexpr BufferId NULL_BUFFER_ID{0}; - static constexpr u64 EXPECTED_MEMORY = 256_MiB; + static constexpr u64 EXPECTED_MEMORY = 512_MiB; static constexpr u64 CRITICAL_MEMORY = 1_GiB; using Maxwell = Tegra::Engines::Maxwell3D::Regs; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index caf1949f2..24b809242 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -61,8 +61,8 @@ template void TextureCache

::RunGarbageCollector() { const bool high_priority_mode = total_used_memory >= expected_memory; const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 50ULL : 100ULL; - size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 50 : 5); + const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; + size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5); const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { if (num_iterations == 0) { return true;