dynarmic: Inline exclusive memory accesses

Inlines implementation of exclusive instructions into JITted code,
improving performance of applications relying heavily on these
instructions.

We also fastmem these instructions for additional speed, with
support for appropriate recompilation on fastmem failure.

An unsafe optimization to disable the intercore global_monitor is also
provided, should one wish to rely solely on cmpxchg semantics for
safety.

See also: merryhime/dynarmic#664
This commit is contained in:
merry 2022-02-27 19:40:05 +00:00
parent 96d90be59f
commit 16784e5bb3
17 changed files with 114 additions and 8 deletions

2
externals/dynarmic vendored

@ -1 +1 @@
Subproject commit 19a423034e1abcaf1a61fa61ceffffebf45a0240
Subproject commit f9696760db4f63a413093dedd185875da64dff58

View File

@ -176,6 +176,7 @@ void RestoreGlobalState(bool is_powered_on) {
values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true);
values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true);
values.cpuopt_unsafe_fastmem_check.SetGlobal(true);
values.cpuopt_unsafe_ignore_global_monitor.SetGlobal(true);
// Renderer
values.renderer_backend.SetGlobal(true);

View File

@ -484,12 +484,15 @@ struct Values {
BasicSetting<bool> cpuopt_misc_ir{true, "cpuopt_misc_ir"};
BasicSetting<bool> cpuopt_reduce_misalign_checks{true, "cpuopt_reduce_misalign_checks"};
BasicSetting<bool> cpuopt_fastmem{true, "cpuopt_fastmem"};
BasicSetting<bool> cpuopt_fastmem_exclusives{true, "cpuopt_fastmem_exclusives"};
BasicSetting<bool> cpuopt_recompile_exclusives{true, "cpuopt_recompile_exclusives"};
Setting<bool> cpuopt_unsafe_unfuse_fma{true, "cpuopt_unsafe_unfuse_fma"};
Setting<bool> cpuopt_unsafe_reduce_fp_error{true, "cpuopt_unsafe_reduce_fp_error"};
Setting<bool> cpuopt_unsafe_ignore_standard_fpcr{true, "cpuopt_unsafe_ignore_standard_fpcr"};
Setting<bool> cpuopt_unsafe_inaccurate_nan{true, "cpuopt_unsafe_inaccurate_nan"};
Setting<bool> cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"};
Setting<bool> cpuopt_unsafe_ignore_global_monitor{true, "cpuopt_unsafe_ignore_global_monitor"};
// Renderer
RangedSetting<RendererBackend> renderer_backend{

View File

@ -137,6 +137,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
config.only_detect_misalignment_via_page_table_on_page_boundary = true;
config.fastmem_exclusive_access = true;
config.recompile_on_exclusive_fastmem_failure = true;
// Multi-process state
config.processor_id = core_index;
@ -178,6 +180,12 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
if (!Settings::values.cpuopt_fastmem) {
config.fastmem_pointer = nullptr;
}
if (!Settings::values.cpuopt_fastmem_exclusives) {
config.fastmem_exclusive_access = false;
}
if (!Settings::values.cpuopt_recompile_exclusives) {
config.recompile_on_exclusive_fastmem_failure = false;
}
}
// Unsafe optimizations
@ -195,6 +203,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
}
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
}
// Curated optimizations
@ -203,6 +214,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
return std::make_unique<Dynarmic::A32::Jit>(config);

View File

@ -185,6 +185,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.fastmem_pointer = page_table->fastmem_arena;
config.fastmem_address_space_bits = address_space_bits;
config.silently_mirror_fastmem = false;
config.fastmem_exclusive_access = true;
config.recompile_on_exclusive_fastmem_failure = true;
}
// Multi-process state
@ -237,6 +240,12 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
if (!Settings::values.cpuopt_fastmem) {
config.fastmem_pointer = nullptr;
}
if (!Settings::values.cpuopt_fastmem_exclusives) {
config.fastmem_exclusive_access = false;
}
if (!Settings::values.cpuopt_recompile_exclusives) {
config.recompile_on_exclusive_fastmem_failure = false;
}
}
// Unsafe optimizations
@ -254,6 +263,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
if (Settings::values.cpuopt_unsafe_fastmem_check) {
config.fastmem_address_space_bits = 64;
}
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
}
// Curated optimizations
@ -262,6 +274,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
config.fastmem_address_space_bits = 64;
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
}
return std::make_shared<Dynarmic::A64::Jit>(config);

View File

@ -37,8 +37,8 @@ u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr ad
});
}
void DynarmicExclusiveMonitor::ClearExclusive() {
monitor.Clear();
void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) {
monitor.ClearProcessor(core_index);
}
bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {

View File

@ -29,7 +29,7 @@ public:
u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override;
u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override;
u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override;
void ClearExclusive() override;
void ClearExclusive(std::size_t core_index) override;
bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;

View File

@ -23,7 +23,7 @@ public:
virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0;
virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0;
virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0;
virtual void ClearExclusive() = 0;
virtual void ClearExclusive(std::size_t core_index) = 0;
virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0;

View File

@ -49,7 +49,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu
}
} else {
// Otherwise, clear our exclusive hold and finish
monitor.ClearExclusive();
monitor.ClearExclusive(current_core);
}
// We're done.
@ -78,7 +78,7 @@ bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32
}
} else {
// Otherwise, clear our exclusive hold and finish.
monitor.ClearExclusive();
monitor.ClearExclusive(current_core);
}
// We're done.

View File

@ -609,6 +609,7 @@ void Config::ReadCpuValues() {
ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
ReadGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
ReadGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor);
if (global) {
ReadBasicSetting(Settings::values.cpu_debug_mode);
@ -621,6 +622,8 @@ void Config::ReadCpuValues() {
ReadBasicSetting(Settings::values.cpuopt_misc_ir);
ReadBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
ReadBasicSetting(Settings::values.cpuopt_fastmem);
ReadBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
ReadBasicSetting(Settings::values.cpuopt_recompile_exclusives);
}
qt_config->endGroup();
@ -1139,6 +1142,7 @@ void Config::SaveCpuValues() {
WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
WriteGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
WriteGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor);
if (global) {
WriteBasicSetting(Settings::values.cpu_debug_mode);

View File

@ -36,6 +36,7 @@ void ConfigureCpu::SetConfiguration() {
ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock);
ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock);
ui->cpuopt_unsafe_ignore_global_monitor->setEnabled(runtime_lock);
ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue());
ui->cpuopt_unsafe_reduce_fp_error->setChecked(
@ -46,6 +47,8 @@ void ConfigureCpu::SetConfiguration() {
Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue());
ui->cpuopt_unsafe_fastmem_check->setChecked(
Settings::values.cpuopt_unsafe_fastmem_check.GetValue());
ui->cpuopt_unsafe_ignore_global_monitor->setChecked(
Settings::values.cpuopt_unsafe_ignore_global_monitor.GetValue());
if (Settings::IsConfiguringGlobal()) {
ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue()));
@ -82,6 +85,9 @@ void ConfigureCpu::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check,
ui->cpuopt_unsafe_fastmem_check,
cpuopt_unsafe_fastmem_check);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_global_monitor,
ui->cpuopt_unsafe_ignore_global_monitor,
cpuopt_unsafe_ignore_global_monitor);
}
void ConfigureCpu::changeEvent(QEvent* event) {
@ -120,4 +126,7 @@ void ConfigureCpu::SetupPerGameUI() {
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check,
Settings::values.cpuopt_unsafe_fastmem_check,
cpuopt_unsafe_fastmem_check);
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_global_monitor,
Settings::values.cpuopt_unsafe_ignore_global_monitor,
cpuopt_unsafe_ignore_global_monitor);
}

View File

@ -45,6 +45,7 @@ private:
ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr;
ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan;
ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check;
ConfigurationShared::CheckState cpuopt_unsafe_ignore_global_monitor;
const Core::System& system;
};

View File

@ -150,6 +150,18 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_unsafe_ignore_global_monitor">
<property name="toolTip">
<string>
&lt;div&gt;This option improves speed by relying only on the semantics of cmpxchg to ensure safety of exclusive access instructions. Please note this may result in deadlocks and other race conditions.&lt;/div&gt;
</string>
</property>
<property name="text">
<string>Ignore global monitor</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>

View File

@ -44,6 +44,12 @@ void ConfigureCpuDebug::SetConfiguration() {
Settings::values.cpuopt_reduce_misalign_checks.GetValue());
ui->cpuopt_fastmem->setEnabled(runtime_lock);
ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem.GetValue());
ui->cpuopt_fastmem_exclusives->setEnabled(runtime_lock);
ui->cpuopt_fastmem_exclusives->setChecked(
Settings::values.cpuopt_fastmem_exclusives.GetValue());
ui->cpuopt_recompile_exclusives->setEnabled(runtime_lock);
ui->cpuopt_recompile_exclusives->setChecked(
Settings::values.cpuopt_recompile_exclusives.GetValue());
}
void ConfigureCpuDebug::ApplyConfiguration() {
@ -56,6 +62,8 @@ void ConfigureCpuDebug::ApplyConfiguration() {
Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked();
Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked();
Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked();
Settings::values.cpuopt_fastmem_exclusives = ui->cpuopt_fastmem_exclusives->isChecked();
Settings::values.cpuopt_recompile_exclusives = ui->cpuopt_recompile_exclusives->isChecked();
}
void ConfigureCpuDebug::changeEvent(QEvent* event) {

View File

@ -144,7 +144,34 @@
</string>
</property>
<property name="text">
<string>Enable Host MMU Emulation</string>
<string>Enable Host MMU Emulation (general memory instructions)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_fastmem_exclusives">
<property name="toolTip">
<string>
&lt;div style=&quot;white-space: nowrap&quot;&gt;This optimization speeds up exclusive memory accesses by the guest program.&lt;/div&gt;
&lt;div style=&quot;white-space: nowrap&quot;&gt;Enabling it causes guest exclusive memory reads/writes to be done directly into memory and make use of Host's MMU.&lt;/div&gt;
&lt;div style=&quot;white-space: nowrap&quot;&gt;Disabling this forces all exclusive memory accesses to use Software MMU Emulation.&lt;/div&gt;
</string>
</property>
<property name="text">
<string>Enable Host MMU Emulation (exclusive memory instructions)</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="cpuopt_recompile_exclusives">
<property name="toolTip">
<string>
&lt;div style=&quot;white-space: nowrap&quot;&gt;This optimization speeds up exclusive memory accesses by the guest program.&lt;/div&gt;
&lt;div style=&quot;white-space: nowrap&quot;&gt;Enabling it reduces the overhead of fastmem failure of exclusive memory accesses.&lt;/div&gt;
</string>
</property>
<property name="text">
<string>Enable recompilation of exclusive memory instructions</string>
</property>
</widget>
</item>

View File

@ -280,11 +280,14 @@ void Config::ReadValues() {
ReadSetting("Cpu", Settings::values.cpuopt_misc_ir);
ReadSetting("Cpu", Settings::values.cpuopt_reduce_misalign_checks);
ReadSetting("Cpu", Settings::values.cpuopt_fastmem);
ReadSetting("Cpu", Settings::values.cpuopt_fastmem_exclusives);
ReadSetting("Cpu", Settings::values.cpuopt_recompile_exclusives);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_unfuse_fma);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_reduce_fp_error);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_inaccurate_nan);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_fastmem_check);
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_global_monitor);
// Renderer
ReadSetting("Renderer", Settings::values.renderer_backend);

View File

@ -174,6 +174,14 @@ cpuopt_reduce_misalign_checks =
# 0: Disabled, 1 (default): Enabled
cpuopt_fastmem =
# Enable Host MMU Emulation for exclusive memory instructions (faster guest memory access)
# 0: Disabled, 1 (default): Enabled
cpuopt_fastmem_exclusives =
# Enable fallback on failure of fastmem of exclusive memory instructions (faster guest memory access)
# 0: Disabled, 1 (default): Enabled
cpuopt_recompile_exclusives =
# Enable unfuse FMA (improve performance on CPUs without FMA)
# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
# 0: Disabled, 1 (default): Enabled
@ -199,6 +207,11 @@ cpuopt_unsafe_inaccurate_nan =
# 0: Disabled, 1 (default): Enabled
cpuopt_unsafe_fastmem_check =
# Enable faster exclusive instructions
# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
# 0: Disabled, 1 (default): Enabled
cpuopt_unsafe_ignore_global_monitor =
[Renderer]
# Which backend API to use.
# 0 (default): OpenGL, 1: Vulkan