dynarmic: Inline exclusive memory accesses
Inlines implementation of exclusive instructions into JITted code, improving performance of applications relying heavily on these instructions. We also fastmem these instructions for additional speed, with support for appropriate recompilation on fastmem failure. An unsafe optimization to disable the intercore global_monitor is also provided, should one wish to rely solely on cmpxchg semantics for safety. See also: merryhime/dynarmic#664
This commit is contained in:
parent
96d90be59f
commit
16784e5bb3
|
@ -1 +1 @@
|
|||
Subproject commit 19a423034e1abcaf1a61fa61ceffffebf45a0240
|
||||
Subproject commit f9696760db4f63a413093dedd185875da64dff58
|
|
@ -176,6 +176,7 @@ void RestoreGlobalState(bool is_powered_on) {
|
|||
values.cpuopt_unsafe_ignore_standard_fpcr.SetGlobal(true);
|
||||
values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true);
|
||||
values.cpuopt_unsafe_fastmem_check.SetGlobal(true);
|
||||
values.cpuopt_unsafe_ignore_global_monitor.SetGlobal(true);
|
||||
|
||||
// Renderer
|
||||
values.renderer_backend.SetGlobal(true);
|
||||
|
|
|
@ -484,12 +484,15 @@ struct Values {
|
|||
BasicSetting<bool> cpuopt_misc_ir{true, "cpuopt_misc_ir"};
|
||||
BasicSetting<bool> cpuopt_reduce_misalign_checks{true, "cpuopt_reduce_misalign_checks"};
|
||||
BasicSetting<bool> cpuopt_fastmem{true, "cpuopt_fastmem"};
|
||||
BasicSetting<bool> cpuopt_fastmem_exclusives{true, "cpuopt_fastmem_exclusives"};
|
||||
BasicSetting<bool> cpuopt_recompile_exclusives{true, "cpuopt_recompile_exclusives"};
|
||||
|
||||
Setting<bool> cpuopt_unsafe_unfuse_fma{true, "cpuopt_unsafe_unfuse_fma"};
|
||||
Setting<bool> cpuopt_unsafe_reduce_fp_error{true, "cpuopt_unsafe_reduce_fp_error"};
|
||||
Setting<bool> cpuopt_unsafe_ignore_standard_fpcr{true, "cpuopt_unsafe_ignore_standard_fpcr"};
|
||||
Setting<bool> cpuopt_unsafe_inaccurate_nan{true, "cpuopt_unsafe_inaccurate_nan"};
|
||||
Setting<bool> cpuopt_unsafe_fastmem_check{true, "cpuopt_unsafe_fastmem_check"};
|
||||
Setting<bool> cpuopt_unsafe_ignore_global_monitor{true, "cpuopt_unsafe_ignore_global_monitor"};
|
||||
|
||||
// Renderer
|
||||
RangedSetting<RendererBackend> renderer_backend{
|
||||
|
|
|
@ -137,6 +137,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
|
|||
config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
|
||||
config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
|
||||
config.only_detect_misalignment_via_page_table_on_page_boundary = true;
|
||||
config.fastmem_exclusive_access = true;
|
||||
config.recompile_on_exclusive_fastmem_failure = true;
|
||||
|
||||
// Multi-process state
|
||||
config.processor_id = core_index;
|
||||
|
@ -178,6 +180,12 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
|
|||
if (!Settings::values.cpuopt_fastmem) {
|
||||
config.fastmem_pointer = nullptr;
|
||||
}
|
||||
if (!Settings::values.cpuopt_fastmem_exclusives) {
|
||||
config.fastmem_exclusive_access = false;
|
||||
}
|
||||
if (!Settings::values.cpuopt_recompile_exclusives) {
|
||||
config.recompile_on_exclusive_fastmem_failure = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Unsafe optimizations
|
||||
|
@ -195,6 +203,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
|
|||
if (Settings::values.cpuopt_unsafe_inaccurate_nan) {
|
||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
||||
}
|
||||
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
|
||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
|
||||
}
|
||||
}
|
||||
|
||||
// Curated optimizations
|
||||
|
@ -203,6 +214,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
|
|||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
|
||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreStandardFPCRValue;
|
||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
|
||||
}
|
||||
|
||||
return std::make_unique<Dynarmic::A32::Jit>(config);
|
||||
|
|
|
@ -185,6 +185,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
|
|||
config.fastmem_pointer = page_table->fastmem_arena;
|
||||
config.fastmem_address_space_bits = address_space_bits;
|
||||
config.silently_mirror_fastmem = false;
|
||||
|
||||
config.fastmem_exclusive_access = true;
|
||||
config.recompile_on_exclusive_fastmem_failure = true;
|
||||
}
|
||||
|
||||
// Multi-process state
|
||||
|
@ -237,6 +240,12 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
|
|||
if (!Settings::values.cpuopt_fastmem) {
|
||||
config.fastmem_pointer = nullptr;
|
||||
}
|
||||
if (!Settings::values.cpuopt_fastmem_exclusives) {
|
||||
config.fastmem_exclusive_access = false;
|
||||
}
|
||||
if (!Settings::values.cpuopt_recompile_exclusives) {
|
||||
config.recompile_on_exclusive_fastmem_failure = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Unsafe optimizations
|
||||
|
@ -254,6 +263,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
|
|||
if (Settings::values.cpuopt_unsafe_fastmem_check) {
|
||||
config.fastmem_address_space_bits = 64;
|
||||
}
|
||||
if (Settings::values.cpuopt_unsafe_ignore_global_monitor) {
|
||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
|
||||
}
|
||||
}
|
||||
|
||||
// Curated optimizations
|
||||
|
@ -262,6 +274,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
|
|||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_UnfuseFMA;
|
||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
|
||||
config.fastmem_address_space_bits = 64;
|
||||
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_IgnoreGlobalMonitor;
|
||||
}
|
||||
|
||||
return std::make_shared<Dynarmic::A64::Jit>(config);
|
||||
|
|
|
@ -37,8 +37,8 @@ u128 DynarmicExclusiveMonitor::ExclusiveRead128(std::size_t core_index, VAddr ad
|
|||
});
|
||||
}
|
||||
|
||||
void DynarmicExclusiveMonitor::ClearExclusive() {
|
||||
monitor.Clear();
|
||||
void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) {
|
||||
monitor.ClearProcessor(core_index);
|
||||
}
|
||||
|
||||
bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
|
||||
|
|
|
@ -29,7 +29,7 @@ public:
|
|||
u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override;
|
||||
u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override;
|
||||
u128 ExclusiveRead128(std::size_t core_index, VAddr addr) override;
|
||||
void ClearExclusive() override;
|
||||
void ClearExclusive(std::size_t core_index) override;
|
||||
|
||||
bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
|
||||
bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
|
||||
|
|
|
@ -23,7 +23,7 @@ public:
|
|||
virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0;
|
||||
virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0;
|
||||
virtual u128 ExclusiveRead128(std::size_t core_index, VAddr addr) = 0;
|
||||
virtual void ClearExclusive() = 0;
|
||||
virtual void ClearExclusive(std::size_t core_index) = 0;
|
||||
|
||||
virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
|
||||
virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0;
|
||||
|
|
|
@ -49,7 +49,7 @@ bool DecrementIfLessThan(Core::System& system, s32* out, VAddr address, s32 valu
|
|||
}
|
||||
} else {
|
||||
// Otherwise, clear our exclusive hold and finish
|
||||
monitor.ClearExclusive();
|
||||
monitor.ClearExclusive(current_core);
|
||||
}
|
||||
|
||||
// We're done.
|
||||
|
@ -78,7 +78,7 @@ bool UpdateIfEqual(Core::System& system, s32* out, VAddr address, s32 value, s32
|
|||
}
|
||||
} else {
|
||||
// Otherwise, clear our exclusive hold and finish.
|
||||
monitor.ClearExclusive();
|
||||
monitor.ClearExclusive(current_core);
|
||||
}
|
||||
|
||||
// We're done.
|
||||
|
|
|
@ -609,6 +609,7 @@ void Config::ReadCpuValues() {
|
|||
ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
|
||||
ReadGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
|
||||
ReadGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
|
||||
ReadGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor);
|
||||
|
||||
if (global) {
|
||||
ReadBasicSetting(Settings::values.cpu_debug_mode);
|
||||
|
@ -621,6 +622,8 @@ void Config::ReadCpuValues() {
|
|||
ReadBasicSetting(Settings::values.cpuopt_misc_ir);
|
||||
ReadBasicSetting(Settings::values.cpuopt_reduce_misalign_checks);
|
||||
ReadBasicSetting(Settings::values.cpuopt_fastmem);
|
||||
ReadBasicSetting(Settings::values.cpuopt_fastmem_exclusives);
|
||||
ReadBasicSetting(Settings::values.cpuopt_recompile_exclusives);
|
||||
}
|
||||
|
||||
qt_config->endGroup();
|
||||
|
@ -1139,6 +1142,7 @@ void Config::SaveCpuValues() {
|
|||
WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
|
||||
WriteGlobalSetting(Settings::values.cpuopt_unsafe_inaccurate_nan);
|
||||
WriteGlobalSetting(Settings::values.cpuopt_unsafe_fastmem_check);
|
||||
WriteGlobalSetting(Settings::values.cpuopt_unsafe_ignore_global_monitor);
|
||||
|
||||
if (global) {
|
||||
WriteBasicSetting(Settings::values.cpu_debug_mode);
|
||||
|
|
|
@ -36,6 +36,7 @@ void ConfigureCpu::SetConfiguration() {
|
|||
ui->cpuopt_unsafe_ignore_standard_fpcr->setEnabled(runtime_lock);
|
||||
ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
|
||||
ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock);
|
||||
ui->cpuopt_unsafe_ignore_global_monitor->setEnabled(runtime_lock);
|
||||
|
||||
ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue());
|
||||
ui->cpuopt_unsafe_reduce_fp_error->setChecked(
|
||||
|
@ -46,6 +47,8 @@ void ConfigureCpu::SetConfiguration() {
|
|||
Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue());
|
||||
ui->cpuopt_unsafe_fastmem_check->setChecked(
|
||||
Settings::values.cpuopt_unsafe_fastmem_check.GetValue());
|
||||
ui->cpuopt_unsafe_ignore_global_monitor->setChecked(
|
||||
Settings::values.cpuopt_unsafe_ignore_global_monitor.GetValue());
|
||||
|
||||
if (Settings::IsConfiguringGlobal()) {
|
||||
ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue()));
|
||||
|
@ -82,6 +85,9 @@ void ConfigureCpu::ApplyConfiguration() {
|
|||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check,
|
||||
ui->cpuopt_unsafe_fastmem_check,
|
||||
cpuopt_unsafe_fastmem_check);
|
||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_ignore_global_monitor,
|
||||
ui->cpuopt_unsafe_ignore_global_monitor,
|
||||
cpuopt_unsafe_ignore_global_monitor);
|
||||
}
|
||||
|
||||
void ConfigureCpu::changeEvent(QEvent* event) {
|
||||
|
@ -120,4 +126,7 @@ void ConfigureCpu::SetupPerGameUI() {
|
|||
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check,
|
||||
Settings::values.cpuopt_unsafe_fastmem_check,
|
||||
cpuopt_unsafe_fastmem_check);
|
||||
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_ignore_global_monitor,
|
||||
Settings::values.cpuopt_unsafe_ignore_global_monitor,
|
||||
cpuopt_unsafe_ignore_global_monitor);
|
||||
}
|
||||
|
|
|
@ -45,6 +45,7 @@ private:
|
|||
ConfigurationShared::CheckState cpuopt_unsafe_ignore_standard_fpcr;
|
||||
ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan;
|
||||
ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check;
|
||||
ConfigurationShared::CheckState cpuopt_unsafe_ignore_global_monitor;
|
||||
|
||||
const Core::System& system;
|
||||
};
|
||||
|
|
|
@ -150,6 +150,18 @@
|
|||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cpuopt_unsafe_ignore_global_monitor">
|
||||
<property name="toolTip">
|
||||
<string>
|
||||
<div>This option improves speed by relying only on the semantics of cmpxchg to ensure safety of exclusive access instructions. Please note this may result in deadlocks and other race conditions.</div>
|
||||
</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Ignore global monitor</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
|
|
|
@ -44,6 +44,12 @@ void ConfigureCpuDebug::SetConfiguration() {
|
|||
Settings::values.cpuopt_reduce_misalign_checks.GetValue());
|
||||
ui->cpuopt_fastmem->setEnabled(runtime_lock);
|
||||
ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem.GetValue());
|
||||
ui->cpuopt_fastmem_exclusives->setEnabled(runtime_lock);
|
||||
ui->cpuopt_fastmem_exclusives->setChecked(
|
||||
Settings::values.cpuopt_fastmem_exclusives.GetValue());
|
||||
ui->cpuopt_recompile_exclusives->setEnabled(runtime_lock);
|
||||
ui->cpuopt_recompile_exclusives->setChecked(
|
||||
Settings::values.cpuopt_recompile_exclusives.GetValue());
|
||||
}
|
||||
|
||||
void ConfigureCpuDebug::ApplyConfiguration() {
|
||||
|
@ -56,6 +62,8 @@ void ConfigureCpuDebug::ApplyConfiguration() {
|
|||
Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked();
|
||||
Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked();
|
||||
Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked();
|
||||
Settings::values.cpuopt_fastmem_exclusives = ui->cpuopt_fastmem_exclusives->isChecked();
|
||||
Settings::values.cpuopt_recompile_exclusives = ui->cpuopt_recompile_exclusives->isChecked();
|
||||
}
|
||||
|
||||
void ConfigureCpuDebug::changeEvent(QEvent* event) {
|
||||
|
|
|
@ -144,7 +144,34 @@
|
|||
</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Enable Host MMU Emulation</string>
|
||||
<string>Enable Host MMU Emulation (general memory instructions)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cpuopt_fastmem_exclusives">
|
||||
<property name="toolTip">
|
||||
<string>
|
||||
<div style="white-space: nowrap">This optimization speeds up exclusive memory accesses by the guest program.</div>
|
||||
<div style="white-space: nowrap">Enabling it causes guest exclusive memory reads/writes to be done directly into memory and make use of Host's MMU.</div>
|
||||
<div style="white-space: nowrap">Disabling this forces all exclusive memory accesses to use Software MMU Emulation.</div>
|
||||
</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Enable Host MMU Emulation (exclusive memory instructions)</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QCheckBox" name="cpuopt_recompile_exclusives">
|
||||
<property name="toolTip">
|
||||
<string>
|
||||
<div style="white-space: nowrap">This optimization speeds up exclusive memory accesses by the guest program.</div>
|
||||
<div style="white-space: nowrap">Enabling it reduces the overhead of fastmem failure of exclusive memory accesses.</div>
|
||||
</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Enable recompilation of exclusive memory instructions</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
|
|
@ -280,11 +280,14 @@ void Config::ReadValues() {
|
|||
ReadSetting("Cpu", Settings::values.cpuopt_misc_ir);
|
||||
ReadSetting("Cpu", Settings::values.cpuopt_reduce_misalign_checks);
|
||||
ReadSetting("Cpu", Settings::values.cpuopt_fastmem);
|
||||
ReadSetting("Cpu", Settings::values.cpuopt_fastmem_exclusives);
|
||||
ReadSetting("Cpu", Settings::values.cpuopt_recompile_exclusives);
|
||||
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_unfuse_fma);
|
||||
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_reduce_fp_error);
|
||||
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_standard_fpcr);
|
||||
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_inaccurate_nan);
|
||||
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_fastmem_check);
|
||||
ReadSetting("Cpu", Settings::values.cpuopt_unsafe_ignore_global_monitor);
|
||||
|
||||
// Renderer
|
||||
ReadSetting("Renderer", Settings::values.renderer_backend);
|
||||
|
|
|
@ -174,6 +174,14 @@ cpuopt_reduce_misalign_checks =
|
|||
# 0: Disabled, 1 (default): Enabled
|
||||
cpuopt_fastmem =
|
||||
|
||||
# Enable Host MMU Emulation for exclusive memory instructions (faster guest memory access)
|
||||
# 0: Disabled, 1 (default): Enabled
|
||||
cpuopt_fastmem_exclusives =
|
||||
|
||||
# Enable fallback on failure of fastmem of exclusive memory instructions (faster guest memory access)
|
||||
# 0: Disabled, 1 (default): Enabled
|
||||
cpuopt_recompile_exclusives =
|
||||
|
||||
# Enable unfuse FMA (improve performance on CPUs without FMA)
|
||||
# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
|
||||
# 0: Disabled, 1 (default): Enabled
|
||||
|
@ -199,6 +207,11 @@ cpuopt_unsafe_inaccurate_nan =
|
|||
# 0: Disabled, 1 (default): Enabled
|
||||
cpuopt_unsafe_fastmem_check =
|
||||
|
||||
# Enable faster exclusive instructions
|
||||
# Only enabled if cpu_accuracy is set to Unsafe. Automatically chosen with cpu_accuracy = Auto-select.
|
||||
# 0: Disabled, 1 (default): Enabled
|
||||
cpuopt_unsafe_ignore_global_monitor =
|
||||
|
||||
[Renderer]
|
||||
# Which backend API to use.
|
||||
# 0 (default): OpenGL, 1: Vulkan
|
||||
|
|
Loading…
Reference in New Issue