texture_cache: Handle out of bound texture blits

Some games interleave a texture blit using regions which are out-of-bounds. This addresses the interleaving to avoid oob reads from the src texture.
This commit is contained in:
ameerj 2021-05-07 22:14:21 -04:00
parent 260b841dc3
commit 3671fd0a97
8 changed files with 99 additions and 61 deletions

View File

@ -543,8 +543,7 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
} }
void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
const std::array<Offset2D, 2>& dst_region, const Region2D& dst_region, const Region2D& src_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) { Tegra::Engines::Fermi2D::Operation operation) {
state_tracker.NotifyScissor0(); state_tracker.NotifyScissor0();
@ -560,9 +559,9 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
const GLbitfield buffer_bits = dst->BufferBits(); const GLbitfield buffer_bits = dst->BufferBits();
const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0; const bool has_depth = (buffer_bits & ~GL_COLOR_BUFFER_BIT) != 0;
const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear; const bool is_linear = !has_depth && filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region[0].x, src_region[0].y, glBlitNamedFramebuffer(src->Handle(), dst->Handle(), src_region.start.x, src_region.start.y,
src_region[1].x, src_region[1].y, dst_region[0].x, dst_region[0].y, src_region.end.x, src_region.end.y, dst_region.start.x,
dst_region[1].x, dst_region[1].y, buffer_bits, dst_region.start.y, dst_region.end.x, dst_region.end.y, buffer_bits,
is_linear ? GL_LINEAR : GL_NEAREST); is_linear ? GL_LINEAR : GL_NEAREST);
} }

View File

@ -28,7 +28,7 @@ using VideoCommon::ImageId;
using VideoCommon::ImageViewId; using VideoCommon::ImageViewId;
using VideoCommon::ImageViewType; using VideoCommon::ImageViewType;
using VideoCommon::NUM_RT; using VideoCommon::NUM_RT;
using VideoCommon::Offset2D; using VideoCommon::Region2D;
using VideoCommon::RenderTargets; using VideoCommon::RenderTargets;
struct ImageBufferMap { struct ImageBufferMap {
@ -73,10 +73,8 @@ public:
void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, void BlitFramebuffer(Framebuffer* dst, Framebuffer* src, const Region2D& dst_region,
const std::array<Offset2D, 2>& dst_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation); Tegra::Engines::Fermi2D::Operation operation);
void AccelerateImageUpload(Image& image, const ImageBufferMap& map, void AccelerateImageUpload(Image& image, const ImageBufferMap& map,

View File

@ -289,16 +289,15 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri
device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr); device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr);
} }
void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region,
const std::array<Offset2D, 2>& dst_region, const Region2D& src_region) {
const std::array<Offset2D, 2>& src_region) {
const VkOffset2D offset{ const VkOffset2D offset{
.x = std::min(dst_region[0].x, dst_region[1].x), .x = std::min(dst_region.start.x, dst_region.end.x),
.y = std::min(dst_region[0].y, dst_region[1].y), .y = std::min(dst_region.start.y, dst_region.end.y),
}; };
const VkExtent2D extent{ const VkExtent2D extent{
.width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)), .width = static_cast<u32>(std::abs(dst_region.end.x - dst_region.start.x)),
.height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)), .height = static_cast<u32>(std::abs(dst_region.end.y - dst_region.start.y)),
}; };
const VkViewport viewport{ const VkViewport viewport{
.x = static_cast<float>(offset.x), .x = static_cast<float>(offset.x),
@ -313,11 +312,12 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout,
.offset = offset, .offset = offset,
.extent = extent, .extent = extent,
}; };
const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x); const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x);
const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y); const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y);
const PushConstants push_constants{ const PushConstants push_constants{
.tex_scale = {scale_x, scale_y}, .tex_scale = {scale_x, scale_y},
.tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)}, .tex_offset = {static_cast<float>(src_region.start.x),
static_cast<float>(src_region.start.y)},
}; };
cmdbuf.SetViewport(0, viewport); cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor); cmdbuf.SetScissor(0, scissor);
@ -353,8 +353,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
BlitImageHelper::~BlitImageHelper() = default; BlitImageHelper::~BlitImageHelper() = default;
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
const std::array<Offset2D, 2>& dst_region, const Region2D& dst_region, const Region2D& src_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) { Tegra::Engines::Fermi2D::Operation operation) {
const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear; const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
@ -383,8 +382,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageV
void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
VkImageView src_depth_view, VkImageView src_stencil_view, VkImageView src_depth_view, VkImageView src_stencil_view,
const std::array<Offset2D, 2>& dst_region, const Region2D& dst_region, const Region2D& src_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) { Tegra::Engines::Fermi2D::Operation operation) {
ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);

View File

@ -13,7 +13,7 @@
namespace Vulkan { namespace Vulkan {
using VideoCommon::Offset2D; using VideoCommon::Region2D;
class Device; class Device;
class Framebuffer; class Framebuffer;
@ -35,15 +35,13 @@ public:
~BlitImageHelper(); ~BlitImageHelper();
void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view, void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
const std::array<Offset2D, 2>& dst_region, const Region2D& dst_region, const Region2D& src_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation); Tegra::Engines::Fermi2D::Operation operation);
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view, void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region, VkImageView src_stencil_view, const Region2D& dst_region,
const std::array<Offset2D, 2>& src_region, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation); Tegra::Engines::Fermi2D::Operation operation);
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);

View File

@ -490,8 +490,7 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
write_barrier); write_barrier);
} }
[[nodiscard]] VkImageBlit MakeImageBlit(const std::array<Offset2D, 2>& dst_region, [[nodiscard]] VkImageBlit MakeImageBlit(const Region2D& dst_region, const Region2D& src_region,
const std::array<Offset2D, 2>& src_region,
const VkImageSubresourceLayers& dst_layers, const VkImageSubresourceLayers& dst_layers,
const VkImageSubresourceLayers& src_layers) { const VkImageSubresourceLayers& src_layers) {
return VkImageBlit{ return VkImageBlit{
@ -499,13 +498,13 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
.srcOffsets = .srcOffsets =
{ {
{ {
.x = src_region[0].x, .x = src_region.start.x,
.y = src_region[0].y, .y = src_region.start.y,
.z = 0, .z = 0,
}, },
{ {
.x = src_region[1].x, .x = src_region.end.x,
.y = src_region[1].y, .y = src_region.end.y,
.z = 1, .z = 1,
}, },
}, },
@ -513,42 +512,42 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
.dstOffsets = .dstOffsets =
{ {
{ {
.x = dst_region[0].x, .x = dst_region.start.x,
.y = dst_region[0].y, .y = dst_region.start.y,
.z = 0, .z = 0,
}, },
{ {
.x = dst_region[1].x, .x = dst_region.end.x,
.y = dst_region[1].y, .y = dst_region.end.y,
.z = 1, .z = 1,
}, },
}, },
}; };
} }
[[nodiscard]] VkImageResolve MakeImageResolve(const std::array<Offset2D, 2>& dst_region, [[nodiscard]] VkImageResolve MakeImageResolve(const Region2D& dst_region,
const std::array<Offset2D, 2>& src_region, const Region2D& src_region,
const VkImageSubresourceLayers& dst_layers, const VkImageSubresourceLayers& dst_layers,
const VkImageSubresourceLayers& src_layers) { const VkImageSubresourceLayers& src_layers) {
return VkImageResolve{ return VkImageResolve{
.srcSubresource = src_layers, .srcSubresource = src_layers,
.srcOffset = .srcOffset =
{ {
.x = src_region[0].x, .x = src_region.start.x,
.y = src_region[0].y, .y = src_region.start.y,
.z = 0, .z = 0,
}, },
.dstSubresource = dst_layers, .dstSubresource = dst_layers,
.dstOffset = .dstOffset =
{ {
.x = dst_region[0].x, .x = dst_region.start.x,
.y = dst_region[0].y, .y = dst_region.start.y,
.z = 0, .z = 0,
}, },
.extent = .extent =
{ {
.width = static_cast<u32>(dst_region[1].x - dst_region[0].x), .width = static_cast<u32>(dst_region.end.x - dst_region.start.x),
.height = static_cast<u32>(dst_region[1].y - dst_region[0].y), .height = static_cast<u32>(dst_region.end.y - dst_region.start.y),
.depth = 1, .depth = 1,
}, },
}; };
@ -602,8 +601,7 @@ StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
} }
void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const std::array<Offset2D, 2>& dst_region, const Region2D& dst_region, const Region2D& src_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation) { Tegra::Engines::Fermi2D::Operation operation) {
const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format); const VkImageAspectFlags aspect_mask = ImageAspectMask(src.format);

View File

@ -16,7 +16,7 @@ namespace Vulkan {
using VideoCommon::ImageId; using VideoCommon::ImageId;
using VideoCommon::NUM_RT; using VideoCommon::NUM_RT;
using VideoCommon::Offset2D; using VideoCommon::Region2D;
using VideoCommon::RenderTargets; using VideoCommon::RenderTargets;
using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormat;
@ -71,8 +71,7 @@ struct TextureCacheRuntime {
[[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size);
void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src,
const std::array<Offset2D, 2>& dst_region, const Region2D& dst_region, const Region2D& src_region,
const std::array<Offset2D, 2>& src_region,
Tegra::Engines::Fermi2D::Filter filter, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation); Tegra::Engines::Fermi2D::Operation operation);

View File

@ -148,7 +148,9 @@ public:
/// Blit an image with the given parameters /// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy); const Tegra::Engines::Fermi2D::Config& copy,
std::optional<Region2D> src_region_override = {},
std::optional<Region2D> dst_region_override = {});
/// Invalidate the contents of the color buffer index /// Invalidate the contents of the color buffer index
/// These contents become unspecified, the cache can assume aggressive optimizations. /// These contents become unspecified, the cache can assume aggressive optimizations.
@ -615,7 +617,9 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
template <class P> template <class P>
void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy) { const Tegra::Engines::Fermi2D::Config& copy,
std::optional<Region2D> src_override,
std::optional<Region2D> dst_override) {
const BlitImages images = GetBlitImages(dst, src); const BlitImages images = GetBlitImages(dst, src);
const ImageId dst_id = images.dst_id; const ImageId dst_id = images.dst_id;
const ImageId src_id = images.src_id; const ImageId src_id = images.src_id;
@ -631,20 +635,42 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
const std::array src_region{
Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, // out of bounds texture blit checking
Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, const bool use_override = src_override.has_value();
const s32 src_x0 = copy.src_x0 >> src_samples_x;
s32 src_x1 = use_override ? src_override->end.x : copy.src_x1 >> src_samples_x;
const s32 src_y0 = copy.src_y0 >> src_samples_y;
const s32 src_y1 = copy.src_y1 >> src_samples_y;
const auto src_width = static_cast<s32>(src_image.info.size.width);
const bool width_oob = src_x1 > src_width;
const auto width_diff = width_oob ? src_x1 - src_width : 0;
if (width_oob) {
src_x1 = src_width;
}
const Region2D src_dimensions{
Offset2D{.x = src_x0, .y = src_y0},
Offset2D{.x = src_x1, .y = src_y1},
}; };
const auto src_region = use_override ? *src_override : src_dimensions;
const std::optional src_base = src_image.TryFindBase(src.Address()); const std::optional src_base = src_image.TryFindBase(src.Address());
const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
const std::array dst_region{
Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, const s32 dst_x0 = copy.dst_x0 >> dst_samples_x;
Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, const s32 dst_x1 = copy.dst_x1 >> dst_samples_x;
const s32 dst_y0 = copy.dst_y0 >> dst_samples_y;
const s32 dst_y1 = copy.dst_y1 >> dst_samples_y;
const Region2D dst_dimensions{
Offset2D{.x = dst_x0, .y = dst_y0},
Offset2D{.x = dst_x1 - width_diff, .y = dst_y1},
}; };
const auto dst_region = use_override ? *dst_override : dst_dimensions;
// Always call this after src_framebuffer_id was queried, as the address might be invalidated. // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
@ -661,6 +687,21 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
copy.operation); copy.operation);
} }
if (width_oob) {
// Continue copy of the oob region of the texture on the next row
auto oob_src = src;
oob_src.height++;
const Region2D src_region_override{
Offset2D{.x = 0, .y = src_y0 + 1},
Offset2D{.x = width_diff, .y = src_y1 + 1},
};
const Region2D dst_region_override{
Offset2D{.x = dst_x1 - width_diff, .y = dst_y0},
Offset2D{.x = dst_x1, .y = dst_y1},
};
BlitImage(dst, oob_src, copy, src_region_override, dst_region_override);
}
} }
template <class P> template <class P>

View File

@ -64,6 +64,13 @@ struct Offset3D {
s32 z; s32 z;
}; };
struct Region2D {
constexpr auto operator<=>(const Region2D&) const noexcept = default;
Offset2D start;
Offset2D end;
};
struct Extent2D { struct Extent2D {
constexpr auto operator<=>(const Extent2D&) const noexcept = default; constexpr auto operator<=>(const Extent2D&) const noexcept = default;