kernel/process: Decouple TLS handling from threads

Extracts out all of the thread local storage management from thread
instances themselves and makes the owning process handle the management
of the memory. This brings the memory management slightly more in line
with how the kernel handles these allocations.

Furthermore, this also makes the TLS page management a little more
readable compared to the lingering implementation that was carried over
from Citra.
This commit is contained in:
Lioncash 2019-06-05 14:32:33 -04:00
parent 55481df50f
commit abdce723eb
4 changed files with 97 additions and 66 deletions

View File

@ -3,6 +3,7 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <algorithm> #include <algorithm>
#include <bitset>
#include <memory> #include <memory>
#include <random> #include <random>
#include "common/alignment.h" #include "common/alignment.h"
@ -48,8 +49,58 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) {
} }
} // Anonymous namespace } // Anonymous namespace
SharedPtr<Process> Process::Create(Core::System& system, std::string name, // Represents a page used for thread-local storage.
Process::ProcessType type) { //
// Each TLS page contains slots that may be used by processes and threads.
// Every process and thread is created with a slot in some arbitrary page
// (whichever page happens to have an available slot).
class TLSPage {
public:
static constexpr std::size_t num_slot_entries = Memory::PAGE_SIZE / Memory::TLS_ENTRY_SIZE;
explicit TLSPage(VAddr address) : base_address{address} {}
bool HasAvailableSlots() const {
return !is_slot_used.all();
}
VAddr GetBaseAddress() const {
return base_address;
}
std::optional<VAddr> ReserveSlot() {
for (std::size_t i = 0; i < is_slot_used.size(); i++) {
if (is_slot_used[i]) {
continue;
}
is_slot_used[i] = true;
return base_address + (i * Memory::TLS_ENTRY_SIZE);
}
return std::nullopt;
}
void ReleaseSlot(VAddr address) {
// Ensure that all given addresses are consistent with how TLS pages
// are intended to be used when releasing slots.
ASSERT(IsWithinPage(address));
ASSERT((address % Memory::TLS_ENTRY_SIZE) == 0);
const std::size_t index = (address - base_address) / Memory::TLS_ENTRY_SIZE;
is_slot_used[index] = false;
}
private:
bool IsWithinPage(VAddr address) const {
return base_address <= address && address < base_address + Memory::PAGE_SIZE;
}
VAddr base_address;
std::bitset<num_slot_entries> is_slot_used;
};
SharedPtr<Process> Process::Create(Core::System& system, std::string name, ProcessType type) {
auto& kernel = system.Kernel(); auto& kernel = system.Kernel();
SharedPtr<Process> process(new Process(system)); SharedPtr<Process> process(new Process(system));
@ -181,61 +232,55 @@ void Process::PrepareForTermination() {
} }
/** /**
* Finds a free location for the TLS section of a thread. * Attempts to find a TLS page that contains a free slot for
* @param tls_slots The TLS page array of the thread's owner process. * use by a thread.
* Returns a tuple of (page, slot, alloc_needed) where: *
* page: The index of the first allocated TLS page that has free slots. * @returns If a page with an available slot is found, then an iterator
* slot: The index of the first free slot in the indicated page. * pointing to the page is returned. Otherwise the end iterator
* alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full). * is returned instead.
*/ */
static std::tuple<std::size_t, std::size_t, bool> FindFreeThreadLocalSlot( static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) {
const std::vector<std::bitset<8>>& tls_slots) { return std::find_if(tls_pages.begin(), tls_pages.end(),
// Iterate over all the allocated pages, and try to find one where not all slots are used. [](const auto& page) { return page.HasAvailableSlots(); });
for (std::size_t page = 0; page < tls_slots.size(); ++page) {
const auto& page_tls_slots = tls_slots[page];
if (!page_tls_slots.all()) {
// We found a page with at least one free slot, find which slot it is
for (std::size_t slot = 0; slot < page_tls_slots.size(); ++slot) {
if (!page_tls_slots.test(slot)) {
return std::make_tuple(page, slot, false);
}
}
}
}
return std::make_tuple(0, 0, true);
} }
VAddr Process::MarkNextAvailableTLSSlotAsUsed(Thread& thread) { VAddr Process::CreateTLSRegion() {
auto [available_page, available_slot, needs_allocation] = FindFreeThreadLocalSlot(tls_slots); auto tls_page_iter = FindTLSPageWithAvailableSlots(tls_pages);
const VAddr tls_begin = vm_manager.GetTLSIORegionBaseAddress();
if (needs_allocation) { if (tls_page_iter == tls_pages.cend()) {
tls_slots.emplace_back(0); // The page is completely available at the start const auto region_address =
available_page = tls_slots.size() - 1; vm_manager.FindFreeRegion(vm_manager.GetTLSIORegionBaseAddress(),
available_slot = 0; // Use the first slot in the new page vm_manager.GetTLSIORegionEndAddress(), Memory::PAGE_SIZE);
ASSERT(region_address.Succeeded());
// Allocate some memory from the end of the linear heap for this region. const auto map_result = vm_manager.MapMemoryBlock(
auto& tls_memory = thread.GetTLSMemory(); *region_address, std::make_shared<std::vector<u8>>(Memory::PAGE_SIZE), 0,
tls_memory->insert(tls_memory->end(), Memory::PAGE_SIZE, 0); Memory::PAGE_SIZE, MemoryState::ThreadLocal);
ASSERT(map_result.Succeeded());
vm_manager.RefreshMemoryBlockMappings(tls_memory.get()); tls_pages.emplace_back(*region_address);
vm_manager.MapMemoryBlock(tls_begin + available_page * Memory::PAGE_SIZE, tls_memory, 0, const auto reserve_result = tls_pages.back().ReserveSlot();
Memory::PAGE_SIZE, MemoryState::ThreadLocal); ASSERT(reserve_result.has_value());
return *reserve_result;
} }
tls_slots[available_page].set(available_slot); return *tls_page_iter->ReserveSlot();
return tls_begin + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE;
} }
void Process::FreeTLSSlot(VAddr tls_address) { void Process::FreeTLSRegion(VAddr tls_address) {
const VAddr tls_base = tls_address - vm_manager.GetTLSIORegionBaseAddress(); const VAddr aligned_address = Common::AlignDown(tls_address, Memory::PAGE_SIZE);
const VAddr tls_page = tls_base / Memory::PAGE_SIZE; auto iter =
const VAddr tls_slot = (tls_base % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE; std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) {
return page.GetBaseAddress() == aligned_address;
});
tls_slots[tls_page].reset(tls_slot); // Something has gone very wrong if we're freeing a region
// with no actual page available.
ASSERT(iter != tls_pages.cend());
iter->ReleaseSlot(tls_address);
} }
void Process::LoadModule(CodeSet module_, VAddr base_addr) { void Process::LoadModule(CodeSet module_, VAddr base_addr) {

View File

@ -5,7 +5,6 @@
#pragma once #pragma once
#include <array> #include <array>
#include <bitset>
#include <cstddef> #include <cstddef>
#include <list> #include <list>
#include <string> #include <string>
@ -32,6 +31,7 @@ namespace Kernel {
class KernelCore; class KernelCore;
class ResourceLimit; class ResourceLimit;
class Thread; class Thread;
class TLSPage;
struct CodeSet; struct CodeSet;
@ -260,10 +260,10 @@ public:
// Thread-local storage management // Thread-local storage management
// Marks the next available region as used and returns the address of the slot. // Marks the next available region as used and returns the address of the slot.
VAddr MarkNextAvailableTLSSlotAsUsed(Thread& thread); [[nodiscard]] VAddr CreateTLSRegion();
// Frees a used TLS slot identified by the given address // Frees a used TLS slot identified by the given address
void FreeTLSSlot(VAddr tls_address); void FreeTLSRegion(VAddr tls_address);
private: private:
explicit Process(Core::System& system); explicit Process(Core::System& system);
@ -310,7 +310,7 @@ private:
/// holds the TLS for a specific thread. This vector contains which parts are in use for each /// holds the TLS for a specific thread. This vector contains which parts are in use for each
/// page as a bitmask. /// page as a bitmask.
/// This vector will grow as more pages are allocated for new threads. /// This vector will grow as more pages are allocated for new threads.
std::vector<std::bitset<8>> tls_slots; std::vector<TLSPage> tls_pages;
/// Contains the parsed process capability descriptors. /// Contains the parsed process capability descriptors.
ProcessCapabilities capabilities; ProcessCapabilities capabilities;

View File

@ -65,7 +65,7 @@ void Thread::Stop() {
owner_process->UnregisterThread(this); owner_process->UnregisterThread(this);
// Mark the TLS slot in the thread's page as free. // Mark the TLS slot in the thread's page as free.
owner_process->FreeTLSSlot(tls_address); owner_process->FreeTLSRegion(tls_address);
} }
void Thread::WakeAfterDelay(s64 nanoseconds) { void Thread::WakeAfterDelay(s64 nanoseconds) {
@ -205,9 +205,9 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
thread->name = std::move(name); thread->name = std::move(name);
thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap(); thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
thread->owner_process = &owner_process; thread->owner_process = &owner_process;
thread->tls_address = thread->owner_process->CreateTLSRegion();
thread->scheduler = &system.Scheduler(processor_id); thread->scheduler = &system.Scheduler(processor_id);
thread->scheduler->AddThread(thread); thread->scheduler->AddThread(thread);
thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);
thread->owner_process->RegisterThread(thread.get()); thread->owner_process->RegisterThread(thread.get());

View File

@ -5,7 +5,6 @@
#pragma once #pragma once
#include <functional> #include <functional>
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
@ -78,9 +77,6 @@ enum class ThreadActivity : u32 {
class Thread final : public WaitObject { class Thread final : public WaitObject {
public: public:
using TLSMemory = std::vector<u8>;
using TLSMemoryPtr = std::shared_ptr<TLSMemory>;
using MutexWaitingThreads = std::vector<SharedPtr<Thread>>; using MutexWaitingThreads = std::vector<SharedPtr<Thread>>;
using ThreadContext = Core::ARM_Interface::ThreadContext; using ThreadContext = Core::ARM_Interface::ThreadContext;
@ -169,14 +165,6 @@ public:
return thread_id; return thread_id;
} }
TLSMemoryPtr& GetTLSMemory() {
return tls_memory;
}
const TLSMemoryPtr& GetTLSMemory() const {
return tls_memory;
}
/// Resumes a thread from waiting /// Resumes a thread from waiting
void ResumeFromWait(); void ResumeFromWait();
@ -463,11 +451,9 @@ private:
u32 ideal_core{0xFFFFFFFF}; u32 ideal_core{0xFFFFFFFF};
u64 affinity_mask{0x1}; u64 affinity_mask{0x1};
TLSMemoryPtr tls_memory = std::make_shared<TLSMemory>(); ThreadActivity activity = ThreadActivity::Normal;
std::string name; std::string name;
ThreadActivity activity = ThreadActivity::Normal;
}; };
/** /**