[libc] Take 2: Add linux implementations of thrd_create and thrd_join functions.
authorSiva Chandra Reddy <sivachandra@google.com>
Fri, 6 Mar 2020 23:05:50 +0000 (15:05 -0800)
committerSiva Chandra Reddy <sivachandra@google.com>
Tue, 10 Mar 2020 04:28:11 +0000 (21:28 -0700)
The following are the differences from the first version:

1. The kernel does not copy the stack for the new thread (it cannot).
The previous version missed this fact. In this new version, the new
thread's start args are copied on to the new stack in a known location
so that the new thread can sniff them out.
2. A start args sniffer for x86_64 has been added.
2. Default stack size has been increased to 64KB.

Reviewers: abrachet, phosek

Differential Revision: https://reviews.llvm.org/D75818

19 files changed:
libc/cmake/modules/LLVMLibCRules.cmake
libc/config/linux/api.td
libc/config/linux/threads.h.in [new file with mode: 0644]
libc/include/CMakeLists.txt
libc/include/threads.h.def
libc/lib/CMakeLists.txt
libc/src/CMakeLists.txt
libc/src/threads/CMakeLists.txt [new file with mode: 0644]
libc/src/threads/linux/CMakeLists.txt [new file with mode: 0644]
libc/src/threads/linux/thrd_create.cpp [new file with mode: 0644]
libc/src/threads/linux/thrd_join.cpp [new file with mode: 0644]
libc/src/threads/linux/thread_start_args.h.def [new file with mode: 0644]
libc/src/threads/linux/thread_utils.h [new file with mode: 0644]
libc/src/threads/linux/x86_64/thread_start_args.h.in [new file with mode: 0644]
libc/src/threads/thrd_create.h [new file with mode: 0644]
libc/src/threads/thrd_join.h [new file with mode: 0644]
libc/test/src/CMakeLists.txt
libc/test/src/threads/CMakeLists.txt [new file with mode: 0644]
libc/test/src/threads/thrd_test.cpp [new file with mode: 0644]

index 3575c7b..2fdaa63 100644 (file)
@@ -105,13 +105,14 @@ set(ENTRYPOINT_OBJ_TARGET_TYPE "ENTRYPOINT_OBJ")
 #       SRCS <list of .cpp files>
 #       HDRS <list of .h files>
 #       DEPENDS <list of dependencies>
+#       COMPILE_OPTIONS <list of special compile options for this target>
 #     )
 function(add_entrypoint_object target_name)
   cmake_parse_arguments(
     "ADD_ENTRYPOINT_OBJ"
     "REDIRECTED" # Optional argument
     "NAME" # Single value arguments
-    "SRCS;HDRS;DEPENDS"  # Multi value arguments
+    "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS"  # Multi value arguments
     ${ARGN}
   )
   if(NOT ADD_ENTRYPOINT_OBJ_SRCS)
@@ -155,6 +156,12 @@ function(add_entrypoint_object target_name)
       ${ADD_ENTRYPOINT_OBJ_DEPENDS}
     )
   endif()
+  if(ADD_ENTRYPOINT_OBJ_COMPILE_OPTIONS)
+    target_compile_options(
+      ${target_name}_objects
+      PRIVATE ${ADD_ENTRYPOINT_OBJ_COMPILE_OPTIONS}
+    )
+  endif()
 
   set(object_file_raw "${CMAKE_CURRENT_BINARY_DIR}/${target_name}_raw.o")
   set(object_file "${CMAKE_CURRENT_BINARY_DIR}/${target_name}.o")
index 673521f..ea2e908 100644 (file)
@@ -153,7 +153,15 @@ def SignalAPI : PublicAPI<"signal.h"> {
   ];
 }
 
+def ThreadStartT : TypeDecl<"thrd_start_t"> {
+  let Decl = "typedef int (*thrd_start_t)(void *);";
+}
+
 def ThreadsAPI : PublicAPI<"threads.h"> {
+  let TypeDeclarations = [
+    ThreadStartT,
+  ];
+
   let Enumerations = [
     "mtx_plain",
     "mtx_recursive",
@@ -164,4 +172,9 @@ def ThreadsAPI : PublicAPI<"threads.h"> {
     "thrd_error",
     "thrd_nomem",
   ];
+
+  let Functions = [
+    "thrd_create",
+    "thrd_join",
+  ];
 }
diff --git a/libc/config/linux/threads.h.in b/libc/config/linux/threads.h.in
new file mode 100644 (file)
index 0000000..58f58a3
--- /dev/null
@@ -0,0 +1,17 @@
+//===--------- Linux specific definitions of types from threads.h ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+%%begin()
+
+typedef struct {
+  unsigned char __clear_tid[4];
+  int __tid;
+  void *__stack;
+  int __stack_size;
+  int __retval;
+} thrd_t;
index db5f371..bbc41b0 100644 (file)
@@ -39,8 +39,12 @@ add_gen_header(
   threads_h
   DEF_FILE threads.h.def
   GEN_HDR threads.h
+  PARAMS
+    platform_threads=../config/${LIBC_TARGET_OS}/threads.h.in
   DEPENDS
     llvm_libc_common_h
+  DATA_FILES
+    ../config/${LIBC_TARGET_OS}/threads.h.in
 )
 
 add_gen_header(
index 276f78b..e99fcfc 100644 (file)
@@ -11,6 +11,8 @@
 
 #include <__llvm-libc-common.h>
 
+%%include_file(${platform_threads})
+
 %%public_api()
 
 #endif // LLVM_LIBC_THREADS_H
index b67f8e4..83b19fd 100644 (file)
@@ -22,6 +22,10 @@ add_entrypoint_library(
     # stdlib.h entrypoints
     _Exit
     abort
+
+    # threads.h entrypoints
+    thrd_create
+    thrd_join
 )
 
 add_entrypoint_library(
index 7f8ef92..4e661c8 100644 (file)
@@ -5,5 +5,6 @@ add_subdirectory(stdlib)
 add_subdirectory(string)
 # TODO: Add this target conditional to the target OS.
 add_subdirectory(sys)
+add_subdirectory(threads)
 
 add_subdirectory(__support)
diff --git a/libc/src/threads/CMakeLists.txt b/libc/src/threads/CMakeLists.txt
new file mode 100644 (file)
index 0000000..b4bbe81
--- /dev/null
@@ -0,0 +1,3 @@
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
+  add_subdirectory(${LIBC_TARGET_OS})
+endif()
diff --git a/libc/src/threads/linux/CMakeLists.txt b/libc/src/threads/linux/CMakeLists.txt
new file mode 100644 (file)
index 0000000..526ad4e
--- /dev/null
@@ -0,0 +1,52 @@
+add_gen_header(
+  thread_start_args_h
+  DEF_FILE thread_start_args.h.def
+  GEN_HDR thread_start_args.h
+  PARAMS
+    thread_start_args=${LIBC_TARGET_MACHINE}/thread_start_args.h.in
+  DATA_FILES
+    ${LIBC_TARGET_MACHINE}/thread_start_args.h.in
+)
+
+add_header_library(
+  threads_utils
+  HDRS
+    thread_utils.h
+  DEPENDS
+    thread_start_args_h
+)
+
+add_entrypoint_object(
+  thrd_create
+  SRCS
+    thrd_create.cpp
+  HDRS
+    ../thrd_create.h
+  DEPENDS
+    errno_h
+    linux_syscall_h
+    mmap
+    support_common_h
+    sys_syscall_h
+    threads_h
+    threads_utils
+    __errno_location
+  COMPILE_OPTIONS
+    -fno-omit-frame-pointer # This allows us to sniff out the thread args from
+                            # the new thread's stack reliably.
+)
+
+add_entrypoint_object(
+  thrd_join
+  SRCS
+    thrd_join.cpp
+  HDRS
+    ../thrd_join.h
+  DEPENDS
+    linux_syscall_h
+    munmap
+    support_common_h
+    sys_syscall_h
+    threads_h
+    threads_utils
+)
diff --git a/libc/src/threads/linux/thrd_create.cpp b/libc/src/threads/linux/thrd_create.cpp
new file mode 100644 (file)
index 0000000..b7d2960
--- /dev/null
@@ -0,0 +1,98 @@
+//===---------- Linux implementation of the thrd_create function ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "config/linux/syscall.h" // For syscall function.
+#include "include/errno.h"        // For E* error values.
+#include "include/sys/mman.h"     // For PROT_* and MAP_* definitions.
+#include "include/sys/syscall.h"  // For syscall numbers.
+#include "include/threads.h"      // For thrd_* type definitions.
+#include "src/__support/common.h"
+#include "src/errno/llvmlibc_errno.h"
+#include "src/sys/mman/mmap.h"
+#include "src/sys/mman/munmap.h"
+#include "src/threads/linux/thread_utils.h"
+
+#include <linux/futex.h> // For futex operations.
+#include <linux/sched.h> // For CLONE_* flags.
+#include <stdint.h>
+
+namespace __llvm_libc {
+
+struct StartArgs {
+  thrd_t *thread;
+  thrd_start_t func;
+  void *arg;
+};
+
+static __attribute__((noinline)) void start_thread() {
+  StartArgs *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr());
+  __llvm_libc::syscall(SYS_exit, start_args->thread->__retval =
+                                     start_args->func(start_args->arg));
+}
+
+int LLVM_LIBC_ENTRYPOINT(thrd_create)(thrd_t *thread, thrd_start_t func,
+                                      void *arg) {
+  unsigned clone_flags =
+      CLONE_VM        // Share the memory space with the parent.
+      | CLONE_FS      // Share the file system with the parent.
+      | CLONE_FILES   // Share the files with the parent.
+      | CLONE_SIGHAND // Share the signal handlers with the parent.
+      | CLONE_THREAD  // Same thread group as the parent.
+      | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment
+                      // values
+      | CLONE_PARENT_SETTID   // Set child thread ID in |ptid| of the parent.
+      | CLONE_CHILD_CLEARTID; // Let the kernel clear the tid address and futex
+                              // wake the joining thread.
+  // TODO: Add the CLONE_SETTLS flag and setup the TLS area correctly when
+  // making the clone syscall.
+
+  void *stack = __llvm_libc::mmap(nullptr, ThreadParams::DefaultStackSize,
+                                  PROT_READ | PROT_WRITE,
+                                  MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+  if (stack == MAP_FAILED)
+    return llvmlibc_errno == ENOMEM ? thrd_nomem : thrd_error;
+
+  thread->__stack = stack;
+  thread->__stack_size = ThreadParams::DefaultStackSize;
+  thread->__retval = -1;
+  FutexData *clear_tid_address =
+      reinterpret_cast<FutexData *>(thread->__clear_tid);
+  *clear_tid_address = ThreadParams::ClearTIDValue;
+
+  // When the new thread is spawned by the kernel, the new thread gets the
+  // stack we pass to the clone syscall. However, this stack is empty and does
+  // not have any local vars present in this function. Hence, one cannot
+  // pass arguments to the thread start function, or use any local vars from
+  // here. So, we pack them into the new stack from where the thread can sniff
+  // them out.
+  uintptr_t adjusted_stack = reinterpret_cast<uintptr_t>(stack) +
+                             ThreadParams::DefaultStackSize - sizeof(StartArgs);
+  StartArgs *start_args = reinterpret_cast<StartArgs *>(adjusted_stack);
+  start_args->thread = thread;
+  start_args->func = func;
+  start_args->arg = arg;
+
+  // TODO: The arguments to the clone syscall below is correct for x86_64
+  // but it might differ for other architectures. So, make this call
+  // architecture independent. May be implement a glibc like wrapper for clone
+  // and use it here.
+  long clone_result =
+      __llvm_libc::syscall(SYS_clone, clone_flags, adjusted_stack,
+                           &thread->__tid, clear_tid_address, 0);
+
+  if (clone_result == 0) {
+    start_thread();
+  } else if (clone_result < 0) {
+    int error_val = -clone_result;
+    return error_val == ENOMEM ? thrd_nomem : thrd_error;
+  }
+
+  return thrd_success;
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/threads/linux/thrd_join.cpp b/libc/src/threads/linux/thrd_join.cpp
new file mode 100644 (file)
index 0000000..c56ed3b
--- /dev/null
@@ -0,0 +1,43 @@
+//===----------- Linux implementation of the thrd_join function -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "config/linux/syscall.h" // For syscall function.
+#include "include/sys/syscall.h"  // For syscall numbers.
+#include "include/threads.h"      // For thrd_* type definitions.
+#include "src/__support/common.h"
+#include "src/sys/mman/munmap.h"
+#include "src/threads/linux/thread_utils.h"
+
+#include <linux/futex.h> // For futex operations.
+#include <stdatomic.h>   // For atomic_load.
+
+namespace __llvm_libc {
+
+int LLVM_LIBC_ENTRYPOINT(thrd_join)(thrd_t *thread, int *retval) {
+  FutexData *clear_tid_address =
+      reinterpret_cast<FutexData *>(thread->__clear_tid);
+
+  // The kernel should set the value at the clear tid address to zero.
+  // If not, it is a spurious wake and we should continue to wait on
+  // the futex.
+  while (atomic_load(clear_tid_address) != 0) {
+    // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a
+    // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE.
+    __llvm_libc::syscall(SYS_futex, clear_tid_address, FUTEX_WAIT,
+                         ThreadParams::ClearTIDValue, nullptr);
+  }
+
+  *retval = thread->__retval;
+
+  if (__llvm_libc::munmap(thread->__stack, thread->__stack_size) == -1)
+    return thrd_error;
+
+  return thrd_success;
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/threads/linux/thread_start_args.h.def b/libc/src/threads/linux/thread_start_args.h.def
new file mode 100644 (file)
index 0000000..781a1de
--- /dev/null
@@ -0,0 +1,11 @@
+//===---- Implementation of the get_start_args_addr function -----*- C++ -*===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+%%include_file(${thread_start_args})
diff --git a/libc/src/threads/linux/thread_utils.h b/libc/src/threads/linux/thread_utils.h
new file mode 100644 (file)
index 0000000..626c069
--- /dev/null
@@ -0,0 +1,33 @@
+//===-- Linux specific definitions for threads implementations. --*- C++ -*===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_THREADS_LINUX_THREAD_UTILS_H
+#define LLVM_LIBC_SRC_THREADS_LINUX_THREAD_UTILS_H
+
+#include "thread_start_args.h"
+
+#include <stdatomic.h>
+#include <stdint.h>
+
+// The futex data has to be exactly 4 bytes long. However, we use a uint type
+// here as we do not want to use `_Atomic uint32_t` as the _Atomic keyword which
+// is C only. The header stdatomic.h does not define an atomic type
+// corresponding to `uint32_t` or to something which is exaclty 4 bytes wide.
+using FutexData = atomic_uint;
+
+static_assert(sizeof(atomic_uint) == 4,
+              "Size of the `atomic_uint` type is not 4 bytes on your platform. "
+              "The implementation of the standard threads library for linux "
+              "requires that size of `atomic_uint` be 4 bytes.");
+
+struct ThreadParams {
+  static constexpr uintptr_t DefaultStackSize = 1 << 16; // 64 KB
+  static constexpr uint32_t ClearTIDValue = 0xABCD1234;
+};
+
+#endif // LLVM_LIBC_SRC_THREADS_LINUX_THREAD_UTILS_H
diff --git a/libc/src/threads/linux/x86_64/thread_start_args.h.in b/libc/src/threads/linux/x86_64/thread_start_args.h.in
new file mode 100644 (file)
index 0000000..deb0b3b
--- /dev/null
@@ -0,0 +1,21 @@
+//===- x86_64 implementation of the get_start_args_addr function -*- C++ -*===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+%%begin()
+
+__attribute__((always_inline)) inline uintptr_t get_start_args_addr() {
+  // NOTE: For __builtin_frame_address to work reliably across compilers,
+  // architectures and various optimization levels, the TU including this file
+  // should be compiled with -fno-omit-frame-pointer.
+  return reinterpret_cast<uintptr_t>(__builtin_frame_address(0)) +
+         // The x86_64 call instruction pushes resume address on to the stack.
+         // Next, The x86_64 SysV ABI also pushes the frame pointer on the
+         // stack. Hence, we look past these items to get to the start args.
+         sizeof(uintptr_t) * 2;
+}
+
diff --git a/libc/src/threads/thrd_create.h b/libc/src/threads/thrd_create.h
new file mode 100644 (file)
index 0000000..3119290
--- /dev/null
@@ -0,0 +1,20 @@
+//===------- Implementation header for thrd_create function ------ *-C++-* ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_THREADS_LINUX_THRD_CREATE_H
+#define LLVM_LIBC_SRC_THREADS_LINUX_THRD_CREATE_H
+
+#include "include/threads.h"
+
+namespace __llvm_libc {
+
+int thrd_create(thrd_t *thread, thrd_start_t func, void *arg);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_THREADS_LINUX_THRD_CREATE_H
diff --git a/libc/src/threads/thrd_join.h b/libc/src/threads/thrd_join.h
new file mode 100644 (file)
index 0000000..495b049
--- /dev/null
@@ -0,0 +1,20 @@
+//===-------- Implementation header for thrd_join function ------- *-C++-* ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_THREADS_LINUX_THRD_JOIN_H
+#define LLVM_LIBC_SRC_THREADS_LINUX_THRD_JOIN_H
+
+#include "include/threads.h"
+
+namespace __llvm_libc {
+
+int thrd_join(thrd_t *thread, int *retval);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_THREADS_LINUX_THRD_JOIN_H
index bf56591..2ff4ca4 100644 (file)
@@ -3,3 +3,4 @@ add_subdirectory(signal)
 add_subdirectory(stdlib)
 add_subdirectory(string)
 add_subdirectory(sys)
+add_subdirectory(threads)
diff --git a/libc/test/src/threads/CMakeLists.txt b/libc/test/src/threads/CMakeLists.txt
new file mode 100644 (file)
index 0000000..9607a25
--- /dev/null
@@ -0,0 +1,16 @@
+add_libc_testsuite(libc_threads_unittests)
+
+add_libc_unittest(
+  thrd_test
+  SUITE
+    libc_threads_unittests
+  SRCS
+    thrd_test.cpp
+  DEPENDS
+    __errno_location
+    mmap
+    munmap
+    threads_h
+    thrd_create
+    thrd_join
+)
diff --git a/libc/test/src/threads/thrd_test.cpp b/libc/test/src/threads/thrd_test.cpp
new file mode 100644 (file)
index 0000000..be9e410
--- /dev/null
@@ -0,0 +1,52 @@
+//===---------------------- Unittests for thrd_t --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "include/threads.h"
+#include "src/threads/thrd_create.h"
+#include "src/threads/thrd_join.h"
+#include "utils/UnitTest/Test.h"
+
+static constexpr int thread_count = 1000;
+static int counter = 0;
+static int thread_func(void *) {
+  ++counter;
+  return 0;
+}
+
+TEST(ThreadTest, CreateAndJoin) {
+  for (counter = 0; counter <= thread_count;) {
+    thrd_t thread;
+    int old_counter_val = counter;
+    ASSERT_EQ(__llvm_libc::thrd_create(&thread, thread_func, nullptr),
+              (int)thrd_success);
+    int retval = thread_count + 1; // Start with a retval we dont expect.
+    ASSERT_EQ(__llvm_libc::thrd_join(&thread, &retval), (int)thrd_success);
+    ASSERT_EQ(retval, 0);
+    ASSERT_EQ(counter, old_counter_val + 1);
+  }
+}
+
+static int return_arg(void *arg) { return *reinterpret_cast<int *>(arg); }
+
+TEST(ThreadTest, SpawnAndJoin) {
+  thrd_t thread_list[thread_count];
+  int args[thread_count];
+
+  for (int i = 0; i < thread_count; ++i) {
+    args[i] = i;
+    ASSERT_EQ(__llvm_libc::thrd_create(thread_list + i, return_arg, args + i),
+              (int)thrd_success);
+  }
+
+  for (int i = 0; i < thread_count; ++i) {
+    int retval = thread_count + 1; // Start with a retval we dont expect.
+    ASSERT_EQ(__llvm_libc::thrd_join(&thread_list[i], &retval),
+              (int)thrd_success);
+    ASSERT_EQ(retval, i);
+  }
+}