From 1ea8073783b3e12c10812192e34a5d557727fd34 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 19 Sep 2021 20:35:07 -0400 Subject: [PATCH 1/2] buffer_cache: Minor fixes Loop through the tmp_intervals by reference, rather than by copy, and fix gl clear buffer size calculation. --- src/video_core/buffer_cache/buffer_cache.h | 7 +++---- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7bfd57369..d350c9b36 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -570,13 +570,12 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am ForEachWrittenRange(*cpu_src_address, amount, mirror); // This subtraction in this order is important for overlapping copies. common_ranges.subtract(subtract_interval); - bool atleast_1_download = tmp_intervals.size() != 0; - for (const IntervalType add_interval : tmp_intervals) { + const bool has_new_downloads = tmp_intervals.size() != 0; + for (const IntervalType& add_interval : tmp_intervals) { common_ranges.add(add_interval); } - runtime.CopyBuffer(dest_buffer, src_buffer, copies); - if (atleast_1_download) { + if (has_new_downloads) { dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); } std::vector tmp_buffer(amount); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 07a995f7d..187a28e4d 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -147,8 +147,7 @@ void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { glClearNamedBufferSubData(dest_buffer.Handle(), GL_R32UI, static_cast(offset), - static_cast(size / sizeof(u32)), GL_RED, GL_UNSIGNED_INT, - &value); + static_cast(size), GL_RED, GL_UNSIGNED_INT, &value); } void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { From 24049591f6a0737d663904b633cf720fa8264774 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 19 Sep 2021 20:36:41 -0400 Subject: [PATCH 2/2] maxwell_dma: Minor refactoring --- src/video_core/engines/maxwell_dma.cpp | 64 +++++++++++++------------- src/video_core/engines/maxwell_dma.h | 2 +- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c7ec1eac9..67388d980 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -82,41 +82,41 @@ void MaxwellDMA::Launch() { } void MaxwellDMA::CopyPitchToPitch() { - // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D - // buffer of length `line_length_in`. - // Otherwise we copy a 2D image of dimensions (line_length_in, line_count). - auto& accelerate = rasterizer->AccessAccelerateDMA(); - if (!regs.launch_dma.multi_line_enable) { - const bool is_buffer_clear = regs.launch_dma.remap_enable != 0 && - regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; - // TODO: allow multisized components. - if (is_buffer_clear) { - ASSERT(regs.remap_const.component_size_minus_one == 3); - accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); - std::vector tmp_buffer(regs.line_length_in, regs.remap_consta_value); - memory_manager.WriteBlockUnsafe(regs.offset_out, - reinterpret_cast(tmp_buffer.data()), - regs.line_length_in * sizeof(u32)); - return; - } - UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); - if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { - std::vector tmp_buffer(regs.line_length_in); - memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); - memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in); + // When `multi_line_enable` bit is enabled we copy a 2D image of dimensions + // (line_length_in, line_count). + // Otherwise the copy is performed as if we were copying a 1D buffer of length line_length_in. + const bool remap_enabled = regs.launch_dma.remap_enable != 0; + if (regs.launch_dma.multi_line_enable) { + UNIMPLEMENTED_IF(remap_enabled); + + // Perform a line-by-line copy. + // We're going to take a subrect of size (line_length_in, line_count) from the source + // rectangle. There is no need to manually flush/invalidate the regions because CopyBlock + // does that for us. + for (u32 line = 0; line < regs.line_count; ++line) { + const GPUVAddr source_line = regs.offset_in + static_cast(line) * regs.pitch_in; + const GPUVAddr dest_line = regs.offset_out + static_cast(line) * regs.pitch_out; + memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in); } return; } - - UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); - - // Perform a line-by-line copy. - // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle. - // There is no need to manually flush/invalidate the regions because CopyBlock does that for us. - for (u32 line = 0; line < regs.line_count; ++line) { - const GPUVAddr source_line = regs.offset_in + static_cast(line) * regs.pitch_in; - const GPUVAddr dest_line = regs.offset_out + static_cast(line) * regs.pitch_out; - memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in); + // TODO: allow multisized components. + auto& accelerate = rasterizer->AccessAccelerateDMA(); + const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; + const bool is_buffer_clear = remap_enabled && is_const_a_dst; + if (is_buffer_clear) { + ASSERT(regs.remap_const.component_size_minus_one == 3); + accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); + std::vector tmp_buffer(regs.line_length_in, regs.remap_consta_value); + memory_manager.WriteBlockUnsafe(regs.offset_out, reinterpret_cast(tmp_buffer.data()), + regs.line_length_in * sizeof(u32)); + return; + } + UNIMPLEMENTED_IF(remap_enabled); + if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { + std::vector tmp_buffer(regs.line_length_in); + memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); + memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in); } } diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 9e457ae16..a04514425 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -175,7 +175,7 @@ public: static_assert(sizeof(LaunchDMA) == 4); struct RemapConst { - enum Swizzle : u32 { + enum class Swizzle : u32 { SRC_X = 0, SRC_Y = 1, SRC_Z = 2,