diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 46da81aaa..1ba544943 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -2,124 +2,145 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include #include #include -#include "common/alignment.h" #include "common/assert.h" -#include "core/memory.h" -#include "video_core/memory_manager.h" +#include "common/bit_util.h" +#include "core/core.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" namespace Vulkan { -CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, - std::size_t alignment, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, - alignment{alignment} {} +namespace { -VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, - Memory::Memory& cpu_memory_, - VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, - VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size) - : RasterizerCache{rasterizer}, tegra_memory_manager{tegra_memory_manager}, cpu_memory{ - cpu_memory_} { - const auto usage = vk::BufferUsageFlagBits::eVertexBuffer | - vk::BufferUsageFlagBits::eIndexBuffer | - vk::BufferUsageFlagBits::eUniformBuffer; - const auto access = vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eIndexRead | - vk::AccessFlagBits::eUniformRead; - stream_buffer = - std::make_unique(device, memory_manager, scheduler, size, usage, access, - vk::PipelineStageFlagBits::eAllCommands); - buffer_handle = stream_buffer->GetBuffer(); +const auto BufferUsage = + vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | + vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer; + +const auto UploadPipelineStage = + vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput | + vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | + vk::PipelineStageFlagBits::eComputeShader; + +const auto UploadAccessBarriers = + vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead | + vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead | + vk::AccessFlagBits::eIndexRead; + +auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { + return std::make_unique(device, scheduler, BufferUsage); } +} // Anonymous namespace + +CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, + CacheAddr cache_addr, std::size_t size) + : VideoCommon::BufferBlock{cache_addr, size} { + const vk::BufferCreateInfo buffer_ci({}, static_cast(size), + BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | + vk::BufferUsageFlagBits::eTransferDst, + vk::SharingMode::eExclusive, 0, nullptr); + + const auto& dld{device.GetDispatchLoader()}; + const auto dev{device.GetLogical()}; + buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld); + buffer.commit = memory_manager.Commit(*buffer.handle, false); +} + +CachedBufferBlock::~CachedBufferBlock() = default; + +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStagingBufferPool& staging_pool) + : VideoCommon::BufferCache{rasterizer, system, + CreateStreamBuffer(device, + scheduler)}, + device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ + staging_pool} {} + VKBufferCache::~VKBufferCache() = default; -u64 VKBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment, bool cache) { - const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; - ASSERT_MSG(cpu_addr, "Invalid GPU address"); - - // Cache management is a big overhead, so only cache entries with a given size. - // TODO: Figure out which size is the best for given games. - cache &= size >= 2048; - - u8* const host_ptr{cpu_memory.GetPointer(*cpu_addr)}; - if (cache) { - const auto entry = TryGet(host_ptr); - if (entry) { - if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { - return entry->GetOffset(); - } - Unregister(entry); - } - } - - AlignBuffer(alignment); - const u64 uploaded_offset = buffer_offset; - - if (host_ptr == nullptr) { - return uploaded_offset; - } - - std::memcpy(buffer_ptr, host_ptr, size); - buffer_ptr += size; - buffer_offset += size; - - if (cache) { - auto entry = std::make_shared(*cpu_addr, size, uploaded_offset, - alignment, host_ptr); - Register(entry); - } - - return uploaded_offset; +Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { + return std::make_shared(device, memory_manager, cache_addr, size); } -u64 VKBufferCache::UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment) { - AlignBuffer(alignment); - std::memcpy(buffer_ptr, raw_pointer, size); - const u64 uploaded_offset = buffer_offset; - - buffer_ptr += size; - buffer_offset += size; - return uploaded_offset; +const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { + return buffer->GetHandle(); } -std::tuple VKBufferCache::ReserveMemory(std::size_t size, u64 alignment) { - AlignBuffer(alignment); - u8* const uploaded_ptr = buffer_ptr; - const u64 uploaded_offset = buffer_offset; - - buffer_ptr += size; - buffer_offset += size; - return {uploaded_ptr, uploaded_offset}; +const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) { + size = std::max(size, std::size_t(4)); + const auto& empty = staging_pool.GetUnusedBuffer(size, false); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) { + cmdbuf.fillBuffer(buffer, 0, size, 0, dld); + }); + return &*empty.handle; } -void VKBufferCache::Reserve(std::size_t max_size) { - bool invalidate; - std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer->Reserve(max_size); - buffer_offset = buffer_offset_base; +void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + const u8* data) { + const auto& staging = staging_pool.GetUnusedBuffer(size, true); + std::memcpy(staging.commit->Map(size), data, size); - if (invalidate) { - InvalidateAll(); - } + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, + size](auto cmdbuf, auto& dld) { + cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld); + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, + {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, + offset, size)}, + {}, dld); + }); } -void VKBufferCache::Send() { - stream_buffer->Send(buffer_offset - buffer_offset_base); +void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + u8* data) { + const auto& staging = staging_pool.GetUnusedBuffer(size, true); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, + size](auto cmdbuf, auto& dld) { + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | + vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer, {}, {}, + {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite, + vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)}, + {}, dld); + cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld); + }); + scheduler.Finish(); + + std::memcpy(data, staging.commit->Map(size), size); } -void VKBufferCache::AlignBuffer(std::size_t alignment) { - // Align the offset, not the mapped pointer - const u64 offset_aligned = Common::AlignUp(buffer_offset, alignment); - buffer_ptr += offset_aligned - buffer_offset; - buffer_offset = offset_aligned; +void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, + std::size_t dst_offset, std::size_t size) { + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset, + dst_offset, size](auto cmdbuf, auto& dld) { + cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld); + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, + {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead, + vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size), + vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer, + dst_offset, size)}, + {}, dld); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index daa8ccf66..3f38eed0c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -5,105 +5,74 @@ #pragma once #include -#include +#include +#include #include "common/common_types.h" -#include "video_core/gpu.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/rasterizer_cache.h" #include "video_core/renderer_vulkan/declarations.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" -namespace Memory { -class Memory; -} - -namespace Tegra { -class MemoryManager; +namespace Core { +class System; } namespace Vulkan { class VKDevice; -class VKFence; class VKMemoryManager; -class VKStreamBuffer; +class VKScheduler; -class CachedBufferEntry final : public RasterizerCacheObject { +class CachedBufferBlock final : public VideoCommon::BufferBlock { public: - explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment, - u8* host_ptr); + explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, + CacheAddr cache_addr, std::size_t size); + ~CachedBufferBlock(); - VAddr GetCpuAddr() const override { - return cpu_addr; - } - - std::size_t GetSizeInBytes() const override { - return size; - } - - std::size_t GetSize() const { - return size; - } - - u64 GetOffset() const { - return offset; - } - - std::size_t GetAlignment() const { - return alignment; + const vk::Buffer* GetHandle() const { + return &*buffer.handle; } private: - VAddr cpu_addr{}; - std::size_t size{}; - u64 offset{}; - std::size_t alignment{}; + VKBuffer buffer; }; -class VKBufferCache final : public RasterizerCache> { +using Buffer = std::shared_ptr; + +class VKBufferCache final : public VideoCommon::BufferCache { public: - explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, Memory::Memory& cpu_memory_, - VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, - VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size); + explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKBufferCache(); - /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been - /// allocated. - u64 UploadMemory(GPUVAddr gpu_addr, std::size_t size, u64 alignment = 4, bool cache = true); - - /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. - u64 UploadHostMemory(const u8* raw_pointer, std::size_t size, u64 alignment = 4); - - /// Reserves memory to be used by host's CPU. Returns mapped address and offset. - std::tuple ReserveMemory(std::size_t size, u64 alignment = 4); - - /// Reserves a region of memory to be used in subsequent upload/reserve operations. - void Reserve(std::size_t max_size); - - /// Ensures that the set data is sent to the device. - void Send(); - - /// Returns the buffer cache handle. - vk::Buffer GetBuffer() const { - return buffer_handle; - } + const vk::Buffer* GetEmptyBuffer(std::size_t size) override; protected: - // We do not have to flush this cache as things in it are never modified by us. - void FlushObjectInner(const std::shared_ptr& object) override {} + void WriteBarrier() override {} + + Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; + + const vk::Buffer* ToHandle(const Buffer& buffer) override; + + void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + const u8* data) override; + + void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + u8* data) override; + + void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, + std::size_t dst_offset, std::size_t size) override; private: - void AlignBuffer(std::size_t alignment); - - Tegra::MemoryManager& tegra_memory_manager; - Memory::Memory& cpu_memory; - - std::unique_ptr stream_buffer; - vk::Buffer buffer_handle; - - u8* buffer_ptr = nullptr; - u64 buffer_offset = 0; - u64 buffer_offset_base = 0; + const VKDevice& device; + VKMemoryManager& memory_manager; + VKScheduler& scheduler; + VKStagingBufferPool& staging_pool; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 0451babbf..9cc9979d0 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp @@ -6,6 +6,7 @@ #include #include #include + #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" @@ -16,34 +17,32 @@ namespace Vulkan { -// TODO(Rodrigo): Fine tune this number -constexpr u64 ALLOC_CHUNK_SIZE = 64 * 1024 * 1024; +namespace { + +u64 GetAllocationChunkSize(u64 required_size) { + static constexpr u64 sizes[] = {16ULL << 20, 32ULL << 20, 64ULL << 20, 128ULL << 20}; + auto it = std::lower_bound(std::begin(sizes), std::end(sizes), required_size); + return it != std::end(sizes) ? *it : Common::AlignUp(required_size, 256ULL << 20); +} + +} // Anonymous namespace class VKMemoryAllocation final { public: explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, - vk::MemoryPropertyFlags properties, u64 alloc_size, u32 type) - : device{device}, memory{memory}, properties{properties}, alloc_size{alloc_size}, - shifted_type{ShiftType(type)}, is_mappable{properties & - vk::MemoryPropertyFlagBits::eHostVisible} { - if (is_mappable) { - const auto dev = device.GetLogical(); - const auto& dld = device.GetDispatchLoader(); - base_address = static_cast(dev.mapMemory(memory, 0, alloc_size, {}, dld)); - } - } + vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type) + : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size}, + shifted_type{ShiftType(type)} {} ~VKMemoryAllocation() { const auto dev = device.GetLogical(); const auto& dld = device.GetDispatchLoader(); - if (is_mappable) - dev.unmapMemory(memory, dld); dev.free(memory, nullptr, dld); } VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { - auto found = TryFindFreeSection(free_iterator, alloc_size, static_cast(commit_size), - static_cast(alignment)); + auto found = TryFindFreeSection(free_iterator, allocation_size, + static_cast(commit_size), static_cast(alignment)); if (!found) { found = TryFindFreeSection(0, free_iterator, static_cast(commit_size), static_cast(alignment)); @@ -52,8 +51,7 @@ public: return nullptr; } } - u8* address = is_mappable ? base_address + *found : nullptr; - auto commit = std::make_unique(this, memory, address, *found, + auto commit = std::make_unique(device, this, memory, *found, *found + commit_size); commits.push_back(commit.get()); @@ -65,12 +63,10 @@ public: void Free(const VKMemoryCommitImpl* commit) { ASSERT(commit); - const auto it = - std::find_if(commits.begin(), commits.end(), - [&](const auto& stored_commit) { return stored_commit == commit; }); + + const auto it = std::find(std::begin(commits), std::end(commits), commit); if (it == commits.end()) { - LOG_CRITICAL(Render_Vulkan, "Freeing unallocated commit!"); - UNREACHABLE(); + UNREACHABLE_MSG("Freeing unallocated commit!"); return; } commits.erase(it); @@ -88,11 +84,11 @@ private: } /// A memory allocator, it may return a free region between "start" and "end" with the solicited - /// requeriments. + /// requirements. std::optional TryFindFreeSection(u64 start, u64 end, u64 size, u64 alignment) const { - u64 iterator = start; - while (iterator + size < end) { - const u64 try_left = Common::AlignUp(iterator, alignment); + u64 iterator = Common::AlignUp(start, alignment); + while (iterator + size <= end) { + const u64 try_left = iterator; const u64 try_right = try_left + size; bool overlap = false; @@ -100,7 +96,7 @@ private: const auto [commit_left, commit_right] = commit->interval; if (try_left < commit_right && commit_left < try_right) { // There's an overlap, continue the search where the overlapping commit ends. - iterator = commit_right; + iterator = Common::AlignUp(commit_right, alignment); overlap = true; break; } @@ -110,6 +106,7 @@ private: return try_left; } } + // No free regions where found, return an empty optional. return std::nullopt; } @@ -117,12 +114,8 @@ private: const VKDevice& device; ///< Vulkan device. const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. const vk::MemoryPropertyFlags properties; ///< Vulkan properties. - const u64 alloc_size; ///< Size of this allocation. + const u64 allocation_size; ///< Size of this allocation. const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. - const bool is_mappable; ///< Whether the allocation is mappable. - - /// Base address of the mapped pointer. - u8* base_address{}; /// Hints where the next free region is likely going to be. u64 free_iterator{}; @@ -132,13 +125,15 @@ private: }; VKMemoryManager::VKMemoryManager(const VKDevice& device) - : device{device}, props{device.GetPhysical().getMemoryProperties(device.GetDispatchLoader())}, - is_memory_unified{GetMemoryUnified(props)} {} + : device{device}, properties{device.GetPhysical().getMemoryProperties( + device.GetDispatchLoader())}, + is_memory_unified{GetMemoryUnified(properties)} {} VKMemoryManager::~VKMemoryManager() = default; -VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool host_visible) { - ASSERT(reqs.size < ALLOC_CHUNK_SIZE); +VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements, + bool host_visible) { + const u64 chunk_size = GetAllocationChunkSize(requirements.size); // When a host visible commit is asked, search for host visible and coherent, otherwise search // for a fast device local type. @@ -147,32 +142,21 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent : vk::MemoryPropertyFlagBits::eDeviceLocal; - const auto TryCommit = [&]() -> VKMemoryCommit { - for (auto& alloc : allocs) { - if (!alloc->IsCompatible(wanted_properties, reqs.memoryTypeBits)) - continue; - - if (auto commit = alloc->Commit(reqs.size, reqs.alignment); commit) { - return commit; - } - } - return {}; - }; - - if (auto commit = TryCommit(); commit) { + if (auto commit = TryAllocCommit(requirements, wanted_properties)) { return commit; } // Commit has failed, allocate more memory. - if (!AllocMemory(wanted_properties, reqs.memoryTypeBits, ALLOC_CHUNK_SIZE)) { - // TODO(Rodrigo): Try to use host memory. - LOG_CRITICAL(Render_Vulkan, "Ran out of memory!"); - UNREACHABLE(); + if (!AllocMemory(wanted_properties, requirements.memoryTypeBits, chunk_size)) { + // TODO(Rodrigo): Handle these situations in some way like flushing to guest memory. + // Allocation has failed, panic. + UNREACHABLE_MSG("Ran out of VRAM!"); + return {}; } // Commit again, this time it won't fail since there's a fresh allocation above. If it does, // there's a bug. - auto commit = TryCommit(); + auto commit = TryAllocCommit(requirements, wanted_properties); ASSERT(commit); return commit; } @@ -180,8 +164,7 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& reqs, bool VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { const auto dev = device.GetLogical(); const auto& dld = device.GetDispatchLoader(); - const auto requeriments = dev.getBufferMemoryRequirements(buffer, dld); - auto commit = Commit(requeriments, host_visible); + auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible); dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld); return commit; } @@ -189,25 +172,23 @@ VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { const auto dev = device.GetLogical(); const auto& dld = device.GetDispatchLoader(); - const auto requeriments = dev.getImageMemoryRequirements(image, dld); - auto commit = Commit(requeriments, host_visible); + auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible); dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld); return commit; } bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size) { - const u32 type = [&]() { - for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { - const auto flags = props.memoryTypes[type_index].propertyFlags; + const u32 type = [&] { + for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { + const auto flags = properties.memoryTypes[type_index].propertyFlags; if ((type_mask & (1U << type_index)) && (flags & wanted_properties)) { // The type matches in type and in the wanted properties. return type_index; } } - LOG_CRITICAL(Render_Vulkan, "Couldn't find a compatible memory type!"); - UNREACHABLE(); - return 0u; + UNREACHABLE_MSG("Couldn't find a compatible memory type!"); + return 0U; }(); const auto dev = device.GetLogical(); @@ -216,19 +197,33 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 // Try to allocate found type. const vk::MemoryAllocateInfo memory_ai(size, type); vk::DeviceMemory memory; - if (const vk::Result res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); + if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); res != vk::Result::eSuccess) { LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); return false; } - allocs.push_back( + allocations.push_back( std::make_unique(device, memory, wanted_properties, size, type)); return true; } -/*static*/ bool VKMemoryManager::GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props) { - for (u32 heap_index = 0; heap_index < props.memoryHeapCount; ++heap_index) { - if (!(props.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { +VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements, + vk::MemoryPropertyFlags wanted_properties) { + for (auto& allocation : allocations) { + if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) { + continue; + } + if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) { + return commit; + } + } + return {}; +} + +/*static*/ bool VKMemoryManager::GetMemoryUnified( + const vk::PhysicalDeviceMemoryProperties& properties) { + for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) { + if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { // Memory is considered unified when heaps are device local only. return false; } @@ -236,17 +231,28 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 return true; } -VKMemoryCommitImpl::VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, - u8* data, u64 begin, u64 end) - : interval(std::make_pair(begin, end)), memory{memory}, allocation{allocation}, data{data} {} +VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, + vk::DeviceMemory memory, u64 begin, u64 end) + : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {} VKMemoryCommitImpl::~VKMemoryCommitImpl() { allocation->Free(this); } -u8* VKMemoryCommitImpl::GetData() const { - ASSERT_MSG(data != nullptr, "Trying to access an unmapped commit."); - return data; +MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { + const auto dev = device.GetLogical(); + const auto address = reinterpret_cast( + dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader())); + return MemoryMap{this, address}; +} + +void VKMemoryCommitImpl::Unmap() const { + const auto dev = device.GetLogical(); + dev.unmapMemory(memory, device.GetDispatchLoader()); +} + +MemoryMap VKMemoryCommitImpl::Map() const { + return Map(interval.second - interval.first); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 073597b35..cd00bb91b 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -12,6 +12,7 @@ namespace Vulkan { +class MemoryMap; class VKDevice; class VKMemoryAllocation; class VKMemoryCommitImpl; @@ -21,13 +22,14 @@ using VKMemoryCommit = std::unique_ptr; class VKMemoryManager final { public: explicit VKMemoryManager(const VKDevice& device); + VKMemoryManager(const VKMemoryManager&) = delete; ~VKMemoryManager(); /** * Commits a memory with the specified requeriments. - * @param reqs Requeriments returned from a Vulkan call. + * @param requirements Requirements returned from a Vulkan call. * @param host_visible Signals the allocator that it *must* use host visible and coherent - * memory. When passing false, it will try to allocate device local memory. + * memory. When passing false, it will try to allocate device local memory. * @returns A memory commit. */ VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); @@ -47,25 +49,35 @@ private: /// Allocates a chunk of memory. bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); - /// Returns true if the device uses an unified memory model. - static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& props); + /// Tries to allocate a memory commit. + VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements, + vk::MemoryPropertyFlags wanted_properties); - const VKDevice& device; ///< Device handler. - const vk::PhysicalDeviceMemoryProperties props; ///< Physical device properties. - const bool is_memory_unified; ///< True if memory model is unified. - std::vector> allocs; ///< Current allocations. + /// Returns true if the device uses an unified memory model. + static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties); + + const VKDevice& device; ///< Device handler. + const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties. + const bool is_memory_unified; ///< True if memory model is unified. + std::vector> allocations; ///< Current allocations. }; class VKMemoryCommitImpl final { friend VKMemoryAllocation; + friend MemoryMap; public: - explicit VKMemoryCommitImpl(VKMemoryAllocation* allocation, vk::DeviceMemory memory, u8* data, - u64 begin, u64 end); + explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, + vk::DeviceMemory memory, u64 begin, u64 end); ~VKMemoryCommitImpl(); - /// Returns the writeable memory map. The commit has to be mappable. - u8* GetData() const; + /// Maps a memory region and returns a pointer to it. + /// It's illegal to have more than one memory map at the same time. + MemoryMap Map(u64 size, u64 offset = 0) const; + + /// Maps the whole commit and returns a pointer to it. + /// It's illegal to have more than one memory map at the same time. + MemoryMap Map() const; /// Returns the Vulkan memory handler. vk::DeviceMemory GetMemory() const { @@ -78,10 +90,46 @@ public: } private: + /// Unmaps memory. + void Unmap() const; + + const VKDevice& device; ///< Vulkan device. std::pair interval{}; ///< Interval where the commit exists. vk::DeviceMemory memory; ///< Vulkan device memory handler. VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. - u8* data{}; ///< Pointer to the host mapped memory, it has the commit offset included. +}; + +/// Holds ownership of a memory map. +class MemoryMap final { +public: + explicit MemoryMap(const VKMemoryCommitImpl* commit, u8* address) + : commit{commit}, address{address} {} + + ~MemoryMap() { + if (commit) { + commit->Unmap(); + } + } + + /// Prematurely releases the memory map. + void Release() { + commit->Unmap(); + commit = nullptr; + } + + /// Returns the address of the memory map. + u8* GetAddress() const { + return address; + } + + /// Returns the address of the memory map; + operator u8*() const { + return address; + } + +private: + const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit. + u8* address{}; ///< Address to the mapped memory. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 62f1427f5..d48d3b44c 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -3,86 +3,144 @@ // Refer to the license.txt file included. #include -#include #include +#include #include +#include "common/alignment.h" #include "common/assert.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" namespace Vulkan { +namespace { + constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; -VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, - vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage) - : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{ - pipeline_stage} { - CreateBuffers(memory_manager, usage); - ReserveWatches(WATCHES_INITIAL_RESERVE); +constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; + +std::optional FindMemoryType(const VKDevice& device, u32 filter, + vk::MemoryPropertyFlags wanted) { + const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader()); + for (u32 i = 0; i < properties.memoryTypeCount; i++) { + if (!(filter & (1 << i))) { + continue; + } + if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) { + return i; + } + } + return {}; +} + +} // Anonymous namespace + +VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, + vk::BufferUsageFlags usage) + : device{device}, scheduler{scheduler} { + CreateBuffers(usage); + ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); + ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); } VKStreamBuffer::~VKStreamBuffer() = default; -std::tuple VKStreamBuffer::Reserve(u64 size) { - ASSERT(size <= buffer_size); +std::tuple VKStreamBuffer::Map(u64 size, u64 alignment) { + ASSERT(size <= STREAM_BUFFER_SIZE); mapped_size = size; - if (offset + size > buffer_size) { - // The buffer would overflow, save the amount of used buffers, signal an invalidation and - // reset the state. - invalidation_mark = used_watches; - used_watches = 0; + if (alignment > 0) { + offset = Common::AlignUp(offset, alignment); + } + + WaitPendingOperations(offset); + + bool invalidated = false; + if (offset + size > STREAM_BUFFER_SIZE) { + // The buffer would overflow, save the amount of used watches and reset the state. + invalidation_mark = current_watch_cursor; + current_watch_cursor = 0; offset = 0; - } - return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; -} + // Swap watches and reset waiting cursors. + std::swap(previous_watches, current_watches); + wait_cursor = 0; + wait_bound = 0; -void VKStreamBuffer::Send(u64 size) { - ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); - - if (invalidation_mark) { - // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. + // Ensure that we don't wait for uncommitted fences. scheduler.Flush(); - std::for_each(watches.begin(), watches.begin() + *invalidation_mark, - [&](auto& resource) { resource->Wait(); }); - invalidation_mark = std::nullopt; + + invalidated = true; } - if (used_watches + 1 >= watches.size()) { - // Ensure that there are enough watches. - ReserveWatches(WATCHES_RESERVE_CHUNK); - } - // Add a watch for this allocation. - watches[used_watches++]->Watch(scheduler.GetFence()); - - offset += size; -} - -void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { - const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0, - nullptr); - const auto dev = device.GetLogical(); const auto& dld = device.GetDispatchLoader(); - buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); - commit = memory_manager.Commit(*buffer, true); - mapped_pointer = commit->GetData(); + const auto pointer = reinterpret_cast(dev.mapMemory(*memory, offset, size, {}, dld)); + return {pointer, offset, invalidated}; } -void VKStreamBuffer::ReserveWatches(std::size_t grow_size) { - const std::size_t previous_size = watches.size(); - watches.resize(previous_size + grow_size); - std::generate(watches.begin() + previous_size, watches.end(), - []() { return std::make_unique(); }); +void VKStreamBuffer::Unmap(u64 size) { + ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); + + const auto dev = device.GetLogical(); + dev.unmapMemory(*memory, device.GetDispatchLoader()); + + offset += size; + + if (current_watch_cursor + 1 >= current_watches.size()) { + // Ensure that there are enough watches. + ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK); + } + auto& watch = current_watches[current_watch_cursor++]; + watch.upper_bound = offset; + watch.fence.Watch(scheduler.GetFence()); +} + +void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) { + const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive, + 0, nullptr); + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); + + const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld); + // Prefer device local host visible allocations (this should hit AMD's pinned memory). + auto type = FindMemoryType(device, requirements.memoryTypeBits, + vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent | + vk::MemoryPropertyFlagBits::eDeviceLocal); + if (!type) { + // Otherwise search for a host visible allocation. + type = FindMemoryType(device, requirements.memoryTypeBits, + vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent); + ASSERT_MSG(type, "No host visible and coherent memory type found"); + } + const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type); + memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld); + + dev.bindBufferMemory(*buffer, *memory, 0, dld); +} + +void VKStreamBuffer::ReserveWatches(std::vector& watches, std::size_t grow_size) { + watches.resize(watches.size() + grow_size); +} + +void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { + if (!invalidation_mark) { + return; + } + while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) { + auto& watch = previous_watches[wait_cursor]; + wait_bound = watch.upper_bound; + watch.fence.Wait(); + ++wait_cursor; + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 842e54162..187c0c612 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -4,28 +4,24 @@ #pragma once -#include #include #include #include #include "common/common_types.h" #include "video_core/renderer_vulkan/declarations.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" namespace Vulkan { class VKDevice; class VKFence; class VKFenceWatch; -class VKResourceManager; class VKScheduler; -class VKStreamBuffer { +class VKStreamBuffer final { public: - explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, - vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage); + explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, + vk::BufferUsageFlags usage); ~VKStreamBuffer(); /** @@ -34,39 +30,47 @@ public: * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer * offset and a boolean that's true when buffer has been invalidated. */ - std::tuple Reserve(u64 size); + std::tuple Map(u64 size, u64 alignment); /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. - void Send(u64 size); + void Unmap(u64 size); - vk::Buffer GetBuffer() const { + vk::Buffer GetHandle() const { return *buffer; } private: + struct Watch final { + VKFenceWatch fence; + u64 upper_bound{}; + }; + /// Creates Vulkan buffer handles committing the required the required memory. - void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage); + void CreateBuffers(vk::BufferUsageFlags usage); /// Increases the amount of watches available. - void ReserveWatches(std::size_t grow_size); + void ReserveWatches(std::vector& watches, std::size_t grow_size); + + void WaitPendingOperations(u64 requested_upper_bound); const VKDevice& device; ///< Vulkan device manager. VKScheduler& scheduler; ///< Command scheduler. - const u64 buffer_size; ///< Total size of the stream buffer. const vk::AccessFlags access; ///< Access usage of this stream buffer. const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. - UniqueBuffer buffer; ///< Mapped buffer. - VKMemoryCommit commit; ///< Memory commit. - u8* mapped_pointer{}; ///< Pointer to the host visible commit + UniqueBuffer buffer; ///< Mapped buffer. + UniqueDeviceMemory memory; ///< Memory allocation. u64 offset{}; ///< Buffer iterator. u64 mapped_size{}; ///< Size reserved for the current copy. - std::vector> watches; ///< Total watches - std::size_t used_watches{}; ///< Count of watches, reset on invalidation. - std::optional - invalidation_mark{}; ///< Number of watches used in the current invalidation. + std::vector current_watches; ///< Watches recorded in the current iteration. + std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation. + std::optional invalidation_mark; ///< Number of watches used in the previous cycle. + + std::vector previous_watches; ///< Watches used in the previous iteration. + std::size_t wait_cursor{}; ///< Last watch being waited for completion. + u64 wait_bound{}; ///< Highest offset being watched for completion. }; } // namespace Vulkan