/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#ifndef incl_HPHP_THREAD_LOCAL_H_
#define incl_HPHP_THREAD_LOCAL_H_
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
#include <errno.h>
#include <stdexcept>
#include <type_traits>
#include <utility>
#include "portability.h"
namespace HPHP {
// return the location of the current thread's tdata section
std::pair<void*,size_t> getCppTdata();
inline uintptr_t tlsBase() {
uintptr_t retval;
#if defined(__x86_64__)
asm ("movq %%fs:0, %0" : "=r" (retval));
#elif defined(__AARCH64EL__)
// mrs == "move register <-- system"
// tpidr_el0 == "thread process id register for exception level 0"
asm ("mrs %0, tpidr_el0" : "=r" (retval));
#elif defined (__powerpc64__)
asm ("xor %0,%0,%0\n\t"
"or %0,%0,13\n\t"
: "=r" (retval));
#elif defined(_M_X64)
retval = (uintptr_t)_readfsbase_u64();
retval = *(uintptr_t*)(retval + 88);
#else
# error How do you access thread-local storage on this machine?
#endif
return retval;
}
///////////////////////////////////////////////////////////////////////////////
// gcc >= 4.3.0 supports the '__thread' keyword for thread locals
//
// Clang seems to have added this feature, or at the very least it is ignoring
// __thread keyword and compiling anyway
//
// On OSX, gcc does emulate TLS but in a manner that invalidates assumptions
// we have made about __thread and makes accessing thread-local variables in a
// JIT-friendly fashion difficult (as the compiler is doing a lot of magic that
// is not contractual or documented that we would need to duplicate in emitted
// code) so for now we're not going to use it. One possibility if we really
// want to do this is to generate functions that access variables of interest
// in ThreadLocal* (all of them are NoCheck right now) and use the bytes of
// gcc's compiled functions to find the values we would need to pass to
// __emutls_get_address.
//
// icc 13.0.0 appears to support it as well but we end up with
// assembler warnings of unknown importance about incorrect section
// types
//
// __thread on cygwin and mingw uses pthreads emulation not native tls so
// the emulation for thread local must be used as well
//
// So we use __thread on gcc, icc and clang, unless we are on OSX. On OSX, we
// use our own emulation. Use the DECLARE_THREAD_LOCAL() and
// IMPLEMENT_THREAD_LOCAL() macros to access either __thread or the emulation
// as appropriate.
#if !defined(NO_TLS) && \
!defined(__CYGWIN__) && !defined(__MINGW__) && \
((__llvm__ && __clang__) || \
__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || \
__INTEL_COMPILER || defined(_MSC_VER))
#define USE_GCC_FAST_TLS
#endif
///////////////////////////////////////////////////////////////////////////////
// helper
inline void ThreadLocalCheckReturn(int ret, const char *funcName) {
if (ret != 0) {
// This is used from global constructors so the safest thing to do is just
// print to stderr and exit().
fprintf(stderr, "%s returned %d", funcName, ret);
exit(1);
}
}
inline void ThreadLocalCreateKey(pthread_key_t *key, void (*del)(void*)) {
int ret = pthread_key_create(key, del);
ThreadLocalCheckReturn(ret, "pthread_key_create");
}
inline void ThreadLocalSetValue(pthread_key_t key, const void* value) {
int ret = pthread_setspecific(key, value);
ThreadLocalCheckReturn(ret, "pthread_setspecific");
}
#ifdef __APPLE__
typedef struct __darwin_pthread_handler_rec darwin_pthread_handler;
#endif
///////////////////////////////////////////////////////////////////////////////
/**
* A thread-local object is a "global" object within a thread. This is useful
* for writing apartment-threaded code, where nothing is actually shared
* between different threads (hence no locking) but those variables are not
* on stack in local scope. To use it, just do something like this,
*
* IMPLEMENT_THREAD_LOCAL(MyClass, static_object);
* static_object->data_ = ...;
* static_object->doSomething();
*
* IMPLEMENT_THREAD_LOCAL(int, static_number);
* int value = *static_number;
*
* So, syntax-wise it's similar to pointers. The type parameter can be a
* primitive types. If it's a class, there has to be a default constructor.
*/
///////////////////////////////////////////////////////////////////////////////
#if defined(USE_GCC_FAST_TLS)
/**
* We keep a linked list of destructors in ThreadLocalManager to be called on
* thread exit. ThreadLocalNode is a node in this list.
*/
template <typename T>
struct ThreadLocalNode {
T * m_p;
void (*m_on_thread_exit_fn)(void * p);
void * m_next;
size_t m_size;
};
struct ThreadLocalManager {
template<class T>
static void PushTop(ThreadLocalNode<T>& node) {
PushTop(&node, sizeof(T));
}
template<class F> void scan(F& mark) const;
private:
static void PushTop(void* node, size_t size);
struct ThreadLocalList {
void* head{nullptr};
#ifdef __APPLE__
ThreadLocalList();
darwin_pthread_handler handler;
#endif
};
static ThreadLocalList* getList(void* p) {
return static_cast<ThreadLocalList*>(p);
}
ThreadLocalManager() : m_key(0) {
#ifdef __APPLE__
ThreadLocalCreateKey(&m_key, nullptr);
#else
ThreadLocalCreateKey(&m_key, ThreadLocalManager::OnThreadExit);
#endif
};
static void OnThreadExit(void *p);
pthread_key_t m_key;
static ThreadLocalManager& GetManager();
};
///////////////////////////////////////////////////////////////////////////////
// ThreadLocal allocates by calling new without parameters and frees by calling
// delete
template<typename T>
void ThreadLocalOnThreadExit(void * p) {
ThreadLocalNode<T> * pNode = (ThreadLocalNode<T>*)p;
delete pNode->m_p;
pNode->m_p = nullptr;
}
/**
* The USE_GCC_FAST_TLS implementation of ThreadLocal is just a lazy-initialized
* pointer wrapper. In this case, we have one ThreadLocal object per thread.
*/
template<typename T>
struct ThreadLocal {
T *get() const {
if (m_node.m_p == nullptr) {
const_cast<ThreadLocal<T>*>(this)->create();
}
return m_node.m_p;
}
NEVER_INLINE void create();
bool isNull() const { return m_node.m_p == nullptr; }
void destroy() {
delete m_node.m_p;
m_node.m_p = nullptr;
}
void nullOut() {
m_node.m_p = nullptr;
}
T *operator->() const {
return get();
}
T &operator*() const {
return *get();
}
ThreadLocalNode<T> m_node;
};
template<typename T>
void ThreadLocal<T>::create() {
if (m_node.m_on_thread_exit_fn == nullptr) {
m_node.m_on_thread_exit_fn = ThreadLocalOnThreadExit<T>;
ThreadLocalManager::PushTop(m_node);
}
assert(m_node.m_p == nullptr);
m_node.m_p = new T();
}
/**
* ThreadLocalNoCheck is a pointer wrapper like ThreadLocal, except that it is
* explicitly initialized with getCheck(), rather than being initialized when
* it is first dereferenced.
*/
template<typename T>
struct ThreadLocalNoCheck {
NEVER_INLINE T *getCheck() const;
T* getNoCheck() const {
assert(m_node.m_p);
return m_node.m_p;
}
NEVER_INLINE void create();
bool isNull() const { return m_node.m_p == nullptr; }
void destroy() {
delete m_node.m_p;
m_node.m_p = nullptr;
}
T *operator->() const {
return getNoCheck();
}
T &operator*() const {
return *getNoCheck();
}
ThreadLocalNode<T> m_node;
private:
void setNull() { m_node.m_p = nullptr; }
};
template<typename T>
void ThreadLocalNoCheck<T>::create() {
if (m_node.m_on_thread_exit_fn == nullptr) {
m_node.m_on_thread_exit_fn = ThreadLocalOnThreadExit<T>;
ThreadLocalManager::PushTop(m_node);
}
assert(m_node.m_p == nullptr);
m_node.m_p = new T();
}
template<typename T>
T *ThreadLocalNoCheck<T>::getCheck() const {
if (m_node.m_p == nullptr) {
const_cast<ThreadLocalNoCheck<T>*>(this)->create();
}
return m_node.m_p;
}
///////////////////////////////////////////////////////////////////////////////
// Singleton thread-local storage for T
template<typename T>
void ThreadLocalSingletonOnThreadExit(void *obj) {
T::OnThreadExit((T*)obj);
}
// ThreadLocalSingleton has NoCheck property
template <typename T>
class ThreadLocalSingleton {
public:
ThreadLocalSingleton() { s_inited = true; }
NEVER_INLINE static T *getCheck();
static T* getNoCheck() {
assert(s_inited);
assert(s_singleton == (T*)&s_storage);
return (T*)&s_storage;
}
static bool isNull() { return s_singleton == nullptr; }
static void destroy() {
assert(!s_singleton || s_singleton == (T*)&s_storage);
T* p = s_singleton;
if (p) {
T::Delete(p);
s_singleton = nullptr;
}
}
T *operator->() const {
return getNoCheck();
}
T &operator*() const {
return *getNoCheck();
}
private:
static __thread T *s_singleton;
typedef typename std::aligned_storage<sizeof(T), sizeof(void*)>::type
StorageType;
static __thread StorageType s_storage;
static bool s_inited; // no-fast-TLS requires construction so be consistent
};
template<typename T>
bool ThreadLocalSingleton<T>::s_inited = false;
template<typename T>
T *ThreadLocalSingleton<T>::getCheck() {
assert(s_inited);
if (!s_singleton) {
T* p = (T*) &s_storage;
T::Create(p);
s_singleton = p;
}
return s_singleton;
}
template<typename T> __thread T *ThreadLocalSingleton<T>::s_singleton;
template<typename T> __thread typename ThreadLocalSingleton<T>::StorageType
ThreadLocalSingleton<T>::s_storage;
///////////////////////////////////////////////////////////////////////////////
// some classes don't need new/delete at all
template<typename T, bool throwOnNull = true>
struct ThreadLocalProxy {
T *get() const {
if (m_p == nullptr && throwOnNull) {
throw std::runtime_error("ThreadLocalProxy::get() called before set()");
}
return m_p;
}
void set(T* obj) {
m_p = obj;
}
bool isNull() const { return m_p == nullptr; }
void destroy() {
m_p = nullptr;
}
T *operator->() const {
return get();
}
T &operator*() const {
return *get();
}
T * m_p;
};
/*
* How to use the thread-local macros:
*
* Use DECLARE_THREAD_LOCAL to declare a *static* class field as thread local:
* class SomeClass {
* static DECLARE_THREAD_LOCAL(SomeFieldType, f);
* }
*
* Use IMPLEMENT_THREAD_LOCAL in the cpp file to implement the field:
* IMPLEMENT_THREAD_LOCAL(SomeFieldType, SomeClass::f);
*
* Remember: *Never* write IMPLEMENT_THREAD_LOCAL in a header file.
*/
#define DECLARE_THREAD_LOCAL(T, f) \
__thread HPHP::ThreadLocal<T> f
#define IMPLEMENT_THREAD_LOCAL(T, f) \
__thread HPHP::ThreadLocal<T> f
#define DECLARE_THREAD_LOCAL_NO_CHECK(T, f) \
__thread HPHP::ThreadLocalNoCheck<T> f
#define IMPLEMENT_THREAD_LOCAL_NO_CHECK(T, f) \
__thread HPHP::ThreadLocalNoCheck<T> f
#define DECLARE_THREAD_LOCAL_PROXY(T, N, f) \
__thread HPHP::ThreadLocalProxy<T, N> f
#define IMPLEMENT_THREAD_LOCAL_PROXY(T, N, f) \
__thread HPHP::ThreadLocalProxy<T, N> f
#else /* USE_GCC_FAST_TLS */
///////////////////////////////////////////////////////////////////////////////
// ThreadLocal allocates by calling new() without parameters
template<typename T>
void ThreadLocalOnThreadExit(void *p) {
delete (T*)p;
}
#ifdef __APPLE__
// The __thread variables in class T will be freed when pthread calls
// the destructor function on Mac. We can register a handler in
// pthread_t->__cleanup_stack similar to pthread_cleanup_push(). The handler
// will be called earlier so the __thread variables will still exist in the
// handler when the thread exits.
//
// See the details at:
// https://github.com/facebook/hhvm/issues/4444#issuecomment-92497582
typedef struct __darwin_pthread_handler_rec darwin_pthread_handler;
template<typename T>
void ThreadLocalOnThreadCleanup(void *key) {
void *obj = pthread_getspecific((pthread_key_t)key);
if (obj) {
ThreadLocalOnThreadExit<T>(obj);
}
}
inline void ThreadLocalSetCleanupHandler(pthread_key_t cleanup_key,
pthread_key_t key,
void (*del)(void*)) {
// Prevent from adding the handler for multiple times.
darwin_pthread_handler *handler =
(darwin_pthread_handler*)pthread_getspecific(cleanup_key);
if (handler)
return;
pthread_t self = pthread_self();
handler = new darwin_pthread_handler();
handler->__routine = del;
handler->__arg = (void*)key;
handler->__next = self->__cleanup_stack;
self->__cleanup_stack = handler;
ThreadLocalSetValue(cleanup_key, handler);
}
#endif
/**
* This is the emulation version of ThreadLocal. In this case, the ThreadLocal
* object is a true global, and the get() method returns a thread-dependent
* pointer from pthread's thread-specific data management.
*/
template<typename T>
class ThreadLocal {
public:
/**
* Constructor that has to be called from a thread-neutral place.
*/
ThreadLocal() : m_key(0) {
#ifdef __APPLE__
ThreadLocalCreateKey(&m_key, nullptr);
ThreadLocalCreateKey(&m_cleanup_key,
ThreadLocalOnThreadExit<darwin_pthread_handler>);
#else
ThreadLocalCreateKey(&m_key, ThreadLocalOnThreadExit<T>);
#endif
}
T *get() const {
T *obj = (T*)pthread_getspecific(m_key);
if (obj == nullptr) {
obj = new T();
ThreadLocalSetValue(m_key, obj);
#ifdef __APPLE__
ThreadLocalSetCleanupHandler(m_cleanup_key, m_key,
ThreadLocalOnThreadCleanup<T>);
#endif
}
return obj;
}
bool isNull() const { return pthread_getspecific(m_key) == nullptr; }
void destroy() {
delete (T*)pthread_getspecific(m_key);
ThreadLocalSetValue(m_key, nullptr);
}
void nullOut() {
ThreadLocalSetValue(m_key, nullptr);
}
/**
* Access object's member or method through this operator overload.
*/
T *operator->() const {
return get();
}
T &operator*() const {
return *get();
}
private:
pthread_key_t m_key;
#ifdef __APPLE__
pthread_key_t m_cleanup_key;
#endif
};
template<typename T>
class ThreadLocalNoCheck {
public:
/**
* Constructor that has to be called from a thread-neutral place.
*/
ThreadLocalNoCheck() : m_key(0) {
#ifdef __APPLE__
ThreadLocalCreateKey(&m_key, nullptr);
ThreadLocalCreateKey(&m_cleanup_key,
ThreadLocalOnThreadExit<darwin_pthread_handler>);
#else
ThreadLocalCreateKey(&m_key, ThreadLocalOnThreadExit<T>);
#endif
}
NEVER_INLINE T *getCheck() const;
T* getNoCheck() const {
T *obj = (T*)pthread_getspecific(m_key);
assert(obj);
return obj;
}
bool isNull() const { return pthread_getspecific(m_key) == nullptr; }
void destroy() {
delete (T*)pthread_getspecific(m_key);
ThreadLocalSetValue(m_key, nullptr);
}
/**
* Access object's member or method through this operator overload.
*/
T *operator->() const {
return getNoCheck();
}
T &operator*() const {
return *getNoCheck();
}
public:
void setNull() { ThreadLocalSetValue(m_key, nullptr); }
pthread_key_t m_key;
#ifdef __APPLE__
pthread_key_t m_cleanup_key;
#endif
};
template<typename T>
T *ThreadLocalNoCheck<T>::getCheck() const {
T *obj = (T*)pthread_getspecific(m_key);
if (obj == nullptr) {
obj = new T();
ThreadLocalSetValue(m_key, obj);
#ifdef __APPLE__
ThreadLocalSetCleanupHandler(m_cleanup_key, m_key,
ThreadLocalOnThreadCleanup<T>);
#endif
}
return obj;
}
///////////////////////////////////////////////////////////////////////////////
// Singleton thread-local storage for T
template<typename T>
void ThreadLocalSingletonOnThreadExit(void *obj) {
T::OnThreadExit((T*)obj);
free(obj);
}
#ifdef __APPLE__
template<typename T>
void ThreadLocalSingletonOnThreadCleanup(void *key) {
void *obj = pthread_getspecific((pthread_key_t)key);
if (obj) {
ThreadLocalSingletonOnThreadExit<T>(obj);
}
}
#endif
// ThreadLocalSingleton has NoCheck property
template<typename T>
class ThreadLocalSingleton {
public:
ThreadLocalSingleton() { getKey(); }
NEVER_INLINE static T *getCheck();
static T* getNoCheck() {
assert(s_inited);
T *obj = (T*)pthread_getspecific(s_key);
assert(obj);
return obj;
}
static bool isNull() {
return !s_inited || pthread_getspecific(s_key) == nullptr;
}
static void destroy() {
void* p = pthread_getspecific(s_key);
T::Delete((T*)p);
free(p);
ThreadLocalSetValue(s_key, nullptr);
}
T *operator->() const {
return getNoCheck();
}
T &operator*() const {
return *getNoCheck();
}
private:
static pthread_key_t s_key;
static bool s_inited; // pthread_key_t has no portable valid sentinel
#ifdef __APPLE__
static pthread_key_t s_cleanup_key;
#endif
static pthread_key_t getKey() {
if (!s_inited) {
s_inited = true;
#ifdef __APPLE__
ThreadLocalCreateKey(&s_key, nullptr);
ThreadLocalCreateKey(&s_cleanup_key,
ThreadLocalOnThreadExit<darwin_pthread_handler>);
#else
ThreadLocalCreateKey(&s_key, ThreadLocalSingletonOnThreadExit<T>);
#endif
}
return s_key;
}
};
template<typename T>
T *ThreadLocalSingleton<T>::getCheck() {
assert(s_inited);
T *obj = (T*)pthread_getspecific(s_key);
if (obj == nullptr) {
obj = (T*)malloc(sizeof(T));
T::Create(obj);
ThreadLocalSetValue(s_key, obj);
#ifdef __APPLE__
ThreadLocalSetCleanupHandler(s_cleanup_key, s_key,
ThreadLocalSingletonOnThreadCleanup<T>);
#endif
}
return obj;
}
template<typename T>
pthread_key_t ThreadLocalSingleton<T>::s_key;
template<typename T>
bool ThreadLocalSingleton<T>::s_inited = false;
#ifdef __APPLE__
template<typename T>
pthread_key_t ThreadLocalSingleton<T>::s_cleanup_key;
#endif
///////////////////////////////////////////////////////////////////////////////
// some classes don't need new/delete at all
template<typename T, bool throwOnNull = true>
class ThreadLocalProxy {
public:
/**
* Constructor that has to be called from a thread-neutral place.
*/
ThreadLocalProxy() : m_key(0) {
ThreadLocalCreateKey(&m_key, nullptr);
}
T *get() const {
T *obj = (T*)pthread_getspecific(m_key);
if (obj == nullptr && throwOnNull) {
throw std::runtime_error("ThreadLocalProxy::get() called before set()");
}
return obj;
}
void set(T* obj) {
ThreadLocalSetValue(m_key, obj);
}
bool isNull() const { return pthread_getspecific(m_key) == nullptr; }
void destroy() {
ThreadLocalSetValue(m_key, nullptr);
}
/**
* Access object's member or method through this operator overload.
*/
T *operator->() const {
return get();
}
T &operator*() const {
return *get();
}
public:
pthread_key_t m_key;
};
/**
* The emulation version of the thread-local macros
*/
#define DECLARE_THREAD_LOCAL(T, f) HPHP::ThreadLocal<T> f
#define IMPLEMENT_THREAD_LOCAL(T, f) HPHP::ThreadLocal<T> f
#define DECLARE_THREAD_LOCAL_NO_CHECK(T, f) HPHP::ThreadLocalNoCheck<T> f
#define IMPLEMENT_THREAD_LOCAL_NO_CHECK(T, f) HPHP::ThreadLocalNoCheck<T> f
#define DECLARE_THREAD_LOCAL_PROXY(T, N, f) HPHP::ThreadLocalProxy<T, N> f
#define IMPLEMENT_THREAD_LOCAL_PROXY(T, N, f) HPHP::ThreadLocalProxy<T, N> f
#endif /* USE_GCC_FAST_TLS */
///////////////////////////////////////////////////////////////////////////////
}
#endif // incl_HPHP_THREAD_LOCAL_H_