From b4746529e15daabd24ce4a8e07cf033f2802f345 Mon Sep 17 00:00:00 2001 From: Merry Date: Mon, 28 Mar 2022 23:05:54 +0100 Subject: [PATCH 1/4] atomic_ops: Implement AtomicLoad128 --- src/common/atomic_ops.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h index b94d73c7a..b963e7b99 100644 --- a/src/common/atomic_ops.h +++ b/src/common/atomic_ops.h @@ -46,6 +46,13 @@ namespace Common { reinterpret_cast<__int64*>(expected.data())) != 0; } +[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) { + u128 result{}; + _InterlockedCompareExchange128(reinterpret_cast(pointer), result[1], + result[0], reinterpret_cast<__int64*>(result.data())); + return result; +} + #else [[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) { @@ -72,6 +79,16 @@ namespace Common { return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); } +[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) { + unsigned __int128 zeros_a = 0; + unsigned __int128 result_a = + __sync_val_compare_and_swap((unsigned __int128*)pointer, zeros_a, zeros_a); + + u128 result; + std::memcpy(result.data(), &result_a, sizeof(u128)); + return result; +} + #endif } // namespace Common From c562c1d6be01e27b0725650fcce743b3da5a1828 Mon Sep 17 00:00:00 2001 From: Merry Date: Mon, 28 Mar 2022 23:06:04 +0100 Subject: [PATCH 2/4] native_clock: Use AtomicLoad128 --- src/common/x64/native_clock.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 347e41efc..2a2664e5d 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -56,7 +56,7 @@ u64 NativeClock::GetRTSC() { TimePoint new_time_point{}; TimePoint current_time_point{}; do { - current_time_point.pack = time_point.pack; + current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); _mm_mfence(); const u64 current_measure = __rdtsc(); u64 diff = current_measure - current_time_point.inner.last_measure; @@ -76,7 +76,7 @@ void NativeClock::Pause(bool is_paused) { TimePoint current_time_point{}; TimePoint new_time_point{}; do { - current_time_point.pack = time_point.pack; + current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); new_time_point.pack = current_time_point.pack; _mm_mfence(); new_time_point.inner.last_measure = __rdtsc(); From 084bd225dc1d41870ac2cdf2485c06141bb01ef1 Mon Sep 17 00:00:00 2001 From: merry Date: Sat, 2 Apr 2022 21:05:31 +0100 Subject: [PATCH 3/4] atomic_ops: Implement AtomicCompareAndSwap with writeback --- src/common/atomic_ops.h | 73 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/src/common/atomic_ops.h b/src/common/atomic_ops.h index b963e7b99..69fde8421 100644 --- a/src/common/atomic_ops.h +++ b/src/common/atomic_ops.h @@ -46,6 +46,43 @@ namespace Common { reinterpret_cast<__int64*>(expected.data())) != 0; } +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected, + u8& actual) { + actual = + _InterlockedCompareExchange8(reinterpret_cast(pointer), value, expected); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected, + u16& actual) { + actual = + _InterlockedCompareExchange16(reinterpret_cast(pointer), value, expected); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected, + u32& actual) { + actual = + _InterlockedCompareExchange(reinterpret_cast(pointer), value, expected); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected, + u64& actual) { + actual = _InterlockedCompareExchange64(reinterpret_cast(pointer), value, + expected); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected, + u128& actual) { + const bool result = + _InterlockedCompareExchange128(reinterpret_cast(pointer), value[1], + value[0], reinterpret_cast<__int64*>(expected.data())) != 0; + actual = expected; + return result; +} + [[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) { u128 result{}; _InterlockedCompareExchange128(reinterpret_cast(pointer), result[1], @@ -79,6 +116,42 @@ namespace Common { return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); } +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected, + u8& actual) { + actual = __sync_val_compare_and_swap(pointer, expected, value); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected, + u16& actual) { + actual = __sync_val_compare_and_swap(pointer, expected, value); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected, + u32& actual) { + actual = __sync_val_compare_and_swap(pointer, expected, value); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected, + u64& actual) { + actual = __sync_val_compare_and_swap(pointer, expected, value); + return actual == expected; +} + +[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected, + u128& actual) { + unsigned __int128 value_a; + unsigned __int128 expected_a; + unsigned __int128 actual_a; + std::memcpy(&value_a, value.data(), sizeof(u128)); + std::memcpy(&expected_a, expected.data(), sizeof(u128)); + actual_a = __sync_val_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a); + std::memcpy(actual.data(), &actual_a, sizeof(u128)); + return actual_a == expected_a; +} + [[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) { unsigned __int128 zeros_a = 0; unsigned __int128 result_a = From 979e53b87b5288c582392beff618da978ca4152c Mon Sep 17 00:00:00 2001 From: merry Date: Sat, 2 Apr 2022 21:05:49 +0100 Subject: [PATCH 4/4] native_clock: Use writeback from CAS to avoid double-loading --- src/common/x64/native_clock.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 2a2664e5d..7a3f21dcf 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -55,8 +55,9 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen u64 NativeClock::GetRTSC() { TimePoint new_time_point{}; TimePoint current_time_point{}; + + current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); do { - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); _mm_mfence(); const u64 current_measure = __rdtsc(); u64 diff = current_measure - current_time_point.inner.last_measure; @@ -66,7 +67,7 @@ u64 NativeClock::GetRTSC() { : current_time_point.inner.last_measure; new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack)); + current_time_point.pack, current_time_point.pack)); /// The clock cannot be more precise than the guest timer, remove the lower bits return new_time_point.inner.accumulated_ticks & inaccuracy_mask; } @@ -75,13 +76,14 @@ void NativeClock::Pause(bool is_paused) { if (!is_paused) { TimePoint current_time_point{}; TimePoint new_time_point{}; + + current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); do { - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); new_time_point.pack = current_time_point.pack; _mm_mfence(); new_time_point.inner.last_measure = __rdtsc(); } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack)); + current_time_point.pack, current_time_point.pack)); } }