vulkan_memory_allocator: Add "download" memory usage hint

Allow users of the allocator to hint memory usage for downloads. This
removes the non-descriptive boolean passed for "host visible" or not
host visible memory commits, and uses an enum to hint device local,
upload and download usages.
This commit is contained in:
ReinUsesLisp 2021-01-03 18:38:15 -03:00
parent fade63b58e
commit 72541af3bc
9 changed files with 86 additions and 45 deletions

View File

@ -657,7 +657,7 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff
}; };
buffer = device.GetLogical().CreateBuffer(ci); buffer = device.GetLogical().CreateBuffer(ci);
buffer_commit = memory_allocator.Commit(buffer, true); buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload);
} }
void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
@ -688,7 +688,7 @@ void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer)
.pQueueFamilyIndices = nullptr, .pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
}); });
raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], false); raw_buffer_commits[i] = memory_allocator.Commit(raw_images[i], MemoryUsage::DeviceLocal);
raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,

View File

@ -50,13 +50,13 @@ Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKSched
.queueFamilyIndexCount = 0, .queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr, .pQueueFamilyIndices = nullptr,
}); });
commit = memory_allocator.Commit(buffer, false); commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
} }
Buffer::~Buffer() = default; Buffer::~Buffer() = default;
void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
const auto& staging = staging_pool.Request(data_size, true); const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload);
std::memcpy(staging.mapped_span.data(), data, data_size); std::memcpy(staging.mapped_span.data(), data, data_size);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
@ -98,7 +98,7 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
} }
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
auto staging = staging_pool.Request(data_size, true); auto staging = staging_pool.Request(data_size, MemoryUsage::Download);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
const VkBuffer handle = Handle(); const VkBuffer handle = Handle();
@ -179,7 +179,7 @@ std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t s
VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) {
size = std::max(size, std::size_t(4)); size = std::max(size, std::size_t(4));
const auto& empty = staging_pool.Request(size, false); const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) { scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, size, 0); cmdbuf.FillBuffer(buffer, 0, size, 0);

View File

@ -177,7 +177,7 @@ QuadArrayPass::~QuadArrayPass() = default;
std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
const u32 num_triangle_vertices = (num_vertices / 4) * 6; const u32 num_triangle_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_triangle_vertices * sizeof(u32); const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
const auto staging_ref = staging_buffer_pool.Request(staging_size, false); const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire(); update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size);
@ -224,7 +224,7 @@ Uint8Pass::~Uint8Pass() = default;
std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u64 src_offset) { u64 src_offset) {
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
const auto staging_ref = staging_buffer_pool.Request(staging_size, false); const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire(); update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
@ -286,7 +286,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
const u32 num_tri_vertices = (num_vertices / 4) * 6; const u32 num_tri_vertices = (num_vertices / 4) * 6;
const std::size_t staging_size = num_tri_vertices * sizeof(u32); const std::size_t staging_size = num_tri_vertices * sizeof(u32);
const auto staging_ref = staging_buffer_pool.Request(staging_size, false); const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
update_descriptor_queue.Acquire(); update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);

View File

@ -1445,7 +1445,7 @@ VkBuffer RasterizerVulkan::DefaultBuffer() {
.queueFamilyIndexCount = 0, .queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr, .pQueueFamilyIndices = nullptr,
}); });
default_buffer_commit = memory_allocator.Commit(default_buffer, false); default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) { scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) {

View File

@ -8,6 +8,7 @@
#include <fmt/format.h> #include <fmt/format.h>
#include "common/assert.h"
#include "common/bit_util.h" #include "common/bit_util.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
@ -23,23 +24,24 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
StagingBufferPool::~StagingBufferPool() = default; StagingBufferPool::~StagingBufferPool() = default;
StagingBufferRef StagingBufferPool::Request(size_t size, bool host_visible) { StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) {
if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, host_visible)) { if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) {
return *ref; return *ref;
} }
return CreateStagingBuffer(size, host_visible); return CreateStagingBuffer(size, usage);
} }
void StagingBufferPool::TickFrame() { void StagingBufferPool::TickFrame() {
current_delete_level = (current_delete_level + 1) % NUM_LEVELS; current_delete_level = (current_delete_level + 1) % NUM_LEVELS;
ReleaseCache(true); ReleaseCache(MemoryUsage::DeviceLocal);
ReleaseCache(false); ReleaseCache(MemoryUsage::Upload);
ReleaseCache(MemoryUsage::Download);
} }
std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size,
bool host_visible) { MemoryUsage usage) {
StagingBuffers& cache_level = GetCache(host_visible)[Common::Log2Ceil64(size)]; StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)];
const auto is_free = [this](const StagingBuffer& entry) { const auto is_free = [this](const StagingBuffer& entry) {
return scheduler.IsFree(entry.tick); return scheduler.IsFree(entry.tick);
@ -58,7 +60,7 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s
return it->Ref(); return it->Ref();
} }
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, bool host_visible) { StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage) {
const u32 log2 = Common::Log2Ceil64(size); const u32 log2 = Common::Log2Ceil64(size);
vk::Buffer buffer = device.GetLogical().CreateBuffer({ vk::Buffer buffer = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
@ -76,10 +78,10 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, bool host_v
++buffer_index; ++buffer_index;
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str()); buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
} }
MemoryCommit commit = memory_allocator.Commit(buffer, host_visible); MemoryCommit commit = memory_allocator.Commit(buffer, usage);
const std::span<u8> mapped_span = host_visible ? commit.Map() : std::span<u8>{}; const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{};
StagingBuffer& entry = GetCache(host_visible)[log2].entries.emplace_back(StagingBuffer{ StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
.buffer = std::move(buffer), .buffer = std::move(buffer),
.commit = std::move(commit), .commit = std::move(commit),
.mapped_span = mapped_span, .mapped_span = mapped_span,
@ -88,12 +90,22 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, bool host_v
return entry.Ref(); return entry.Ref();
} }
StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(bool host_visible) { StagingBufferPool::StagingBuffersCache& StagingBufferPool::GetCache(MemoryUsage usage) {
return host_visible ? host_staging_buffers : device_staging_buffers; switch (usage) {
case MemoryUsage::DeviceLocal:
return device_local_cache;
case MemoryUsage::Upload:
return upload_cache;
case MemoryUsage::Download:
return download_cache;
default:
UNREACHABLE_MSG("Invalid memory usage={}", usage);
return upload_cache;
}
} }
void StagingBufferPool::ReleaseCache(bool host_visible) { void StagingBufferPool::ReleaseCache(MemoryUsage usage) {
ReleaseLevel(GetCache(host_visible), current_delete_level); ReleaseLevel(GetCache(usage), current_delete_level);
} }
void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) { void StagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, size_t log2) {

View File

@ -28,7 +28,7 @@ public:
VKScheduler& scheduler); VKScheduler& scheduler);
~StagingBufferPool(); ~StagingBufferPool();
StagingBufferRef Request(size_t size, bool host_visible); StagingBufferRef Request(size_t size, MemoryUsage usage);
void TickFrame(); void TickFrame();
@ -56,13 +56,13 @@ private:
static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT;
using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>;
std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, bool host_visible); std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage);
StagingBufferRef CreateStagingBuffer(size_t size, bool host_visible); StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage);
StagingBuffersCache& GetCache(bool host_visible); StagingBuffersCache& GetCache(MemoryUsage usage);
void ReleaseCache(bool host_visible); void ReleaseCache(MemoryUsage usage);
void ReleaseLevel(StagingBuffersCache& cache, size_t log2); void ReleaseLevel(StagingBuffersCache& cache, size_t log2);
@ -70,8 +70,9 @@ private:
MemoryAllocator& memory_allocator; MemoryAllocator& memory_allocator;
VKScheduler& scheduler; VKScheduler& scheduler;
StagingBuffersCache host_staging_buffers; StagingBuffersCache device_local_cache;
StagingBuffersCache device_staging_buffers; StagingBuffersCache upload_cache;
StagingBuffersCache download_cache;
size_t current_delete_level = 0; size_t current_delete_level = 0;
u64 buffer_index = 0; u64 buffer_index = 0;

View File

@ -554,7 +554,7 @@ void TextureCacheRuntime::Finish() {
} }
ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) {
const auto staging_ref = staging_buffer_pool.Request(size, true); const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload);
return ImageBufferMap{ return ImageBufferMap{
.handle = staging_ref.buffer, .handle = staging_ref.buffer,
.span = staging_ref.mapped_span, .span = staging_ref.mapped_span,
@ -788,9 +788,9 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)), image(MakeImage(runtime.device, info)), buffer(MakeBuffer(runtime.device, info)),
aspect_mask(ImageAspectMask(info.format)) { aspect_mask(ImageAspectMask(info.format)) {
if (image) { if (image) {
commit = runtime.memory_allocator.Commit(image, false); commit = runtime.memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
} else { } else {
commit = runtime.memory_allocator.Commit(buffer, false); commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
} }
if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) { if (IsPixelFormatASTC(info.format) && !runtime.device.IsOptimalAstcSupported()) {
flags |= VideoCommon::ImageFlagBits::Converted; flags |= VideoCommon::ImageFlagBits::Converted;

View File

@ -156,11 +156,13 @@ MemoryAllocator::MemoryAllocator(const Device& device_)
MemoryAllocator::~MemoryAllocator() = default; MemoryAllocator::~MemoryAllocator() = default;
MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, bool host_visible) { MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
const u64 chunk_size = GetAllocationChunkSize(requirements.size); const u64 chunk_size = GetAllocationChunkSize(requirements.size);
// When a host visible commit is asked, search for host visible and coherent, otherwise search // When a host visible commit is asked, search for host visible and coherent, otherwise search
// for a fast device local type. // for a fast device local type.
// TODO: Deduce memory types from usage in a better way
const bool host_visible = IsHostVisible(usage);
const VkMemoryPropertyFlags wanted_properties = const VkMemoryPropertyFlags wanted_properties =
host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
: VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
@ -176,14 +178,14 @@ MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, b
return TryAllocCommit(requirements, wanted_properties).value(); return TryAllocCommit(requirements, wanted_properties).value();
} }
MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, bool host_visible) { MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage) {
auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), host_visible); auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), usage);
buffer.BindMemory(commit.Memory(), commit.Offset()); buffer.BindMemory(commit.Memory(), commit.Offset());
return commit; return commit;
} }
MemoryCommit MemoryAllocator::Commit(const vk::Image& image, bool host_visible) { MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) {
auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), host_visible); auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), usage);
image.BindMemory(commit.Memory(), commit.Offset()); image.BindMemory(commit.Memory(), commit.Offset());
return commit; return commit;
} }
@ -224,4 +226,16 @@ std::optional<MemoryCommit> MemoryAllocator::TryAllocCommit(
return std::nullopt; return std::nullopt;
} }
bool IsHostVisible(MemoryUsage usage) noexcept {
switch (usage) {
case MemoryUsage::DeviceLocal:
return false;
case MemoryUsage::Upload:
case MemoryUsage::Download:
return true;
}
UNREACHABLE_MSG("Invalid memory usage={}", usage);
return false;
}
} // namespace Vulkan } // namespace Vulkan

View File

@ -17,7 +17,16 @@ class Device;
class MemoryMap; class MemoryMap;
class MemoryAllocation; class MemoryAllocation;
class MemoryCommit final { /// Hints and requirements for the backing memory type of a commit
enum class MemoryUsage {
DeviceLocal, ///< Hints device local usages, fastest memory type to read and write from the GPU
Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads
Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks
};
/// Ownership handle of a memory commitment.
/// Points to a subregion of a memory allocation.
class MemoryCommit {
public: public:
explicit MemoryCommit() noexcept = default; explicit MemoryCommit() noexcept = default;
explicit MemoryCommit(const Device& device_, MemoryAllocation* allocation_, explicit MemoryCommit(const Device& device_, MemoryAllocation* allocation_,
@ -54,7 +63,9 @@ private:
std::span<u8> span; ///< Host visible memory span. Empty if not queried before. std::span<u8> span; ///< Host visible memory span. Empty if not queried before.
}; };
class MemoryAllocator final { /// Memory allocator container.
/// Allocates and releases memory allocations on demand.
class MemoryAllocator {
public: public:
explicit MemoryAllocator(const Device& device_); explicit MemoryAllocator(const Device& device_);
~MemoryAllocator(); ~MemoryAllocator();
@ -71,13 +82,13 @@ public:
* *
* @returns A memory commit. * @returns A memory commit.
*/ */
MemoryCommit Commit(const VkMemoryRequirements& requirements, bool host_visible); MemoryCommit Commit(const VkMemoryRequirements& requirements, MemoryUsage usage);
/// Commits memory required by the buffer and binds it. /// Commits memory required by the buffer and binds it.
MemoryCommit Commit(const vk::Buffer& buffer, bool host_visible); MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage);
/// Commits memory required by the image and binds it. /// Commits memory required by the image and binds it.
MemoryCommit Commit(const vk::Image& image, bool host_visible); MemoryCommit Commit(const vk::Image& image, MemoryUsage usage);
private: private:
/// Allocates a chunk of memory. /// Allocates a chunk of memory.
@ -92,4 +103,7 @@ private:
std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
}; };
/// Returns true when a memory usage is guaranteed to be host visible.
bool IsHostVisible(MemoryUsage usage) noexcept;
} // namespace Vulkan } // namespace Vulkan