[libc] Add a simple x86_64 linux loader.
authorSiva Chandra Reddy <sivachandra@google.com>
Wed, 18 Mar 2020 19:46:33 +0000 (12:46 -0700)
committerSiva Chandra Reddy <sivachandra@google.com>
Wed, 25 Mar 2020 17:12:35 +0000 (10:12 -0700)
This adds a very simple loader. This will be extended to a full loader
in future patches. A utility rule to add unittests has been added to
serve us while we are building out the full loader.

Reviewers: abrachet, phosek

Differential Revision: https://reviews.llvm.org/D76412

libc/CMakeLists.txt
libc/loader/CMakeLists.txt [new file with mode: 0644]
libc/loader/linux/CMakeLists.txt [new file with mode: 0644]
libc/loader/linux/x86_64/CMakeLists.txt [new file with mode: 0644]
libc/loader/linux/x86_64/start.cpp [new file with mode: 0644]
libc/test/CMakeLists.txt
libc/test/loader/CMakeLists.txt [new file with mode: 0644]
libc/test/loader/linux/CMakeLists.txt [new file with mode: 0644]
libc/test/loader/linux/args_test.cpp [new file with mode: 0644]
libc/test/loader/linux/main_without_args.cpp [new file with mode: 0644]
libc/test/loader/linux/main_without_envp.cpp [new file with mode: 0644]

index 40c7f70..bcac7cb 100644 (file)
@@ -28,6 +28,10 @@ add_subdirectory(config)
 add_subdirectory(include)
 add_subdirectory(utils)
 
+# The loader can potentially depend on the library components so add it
+# after the library implementation directories.
+add_subdirectory(loader)
+
 # The lib and test directories are added at the very end as tests
 # and libraries potentially draw from the components present in all
 # of the other directories.
diff --git a/libc/loader/CMakeLists.txt b/libc/loader/CMakeLists.txt
new file mode 100644 (file)
index 0000000..b4bbe81
--- /dev/null
@@ -0,0 +1,3 @@
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
+  add_subdirectory(${LIBC_TARGET_OS})
+endif()
diff --git a/libc/loader/linux/CMakeLists.txt b/libc/loader/linux/CMakeLists.txt
new file mode 100644 (file)
index 0000000..4ef4b88
--- /dev/null
@@ -0,0 +1,34 @@
+function(add_loader_object name)
+  cmake_parse_arguments(
+    "ADD_LOADER_OBJECT"
+    ""    # No option arguments
+    "SRC" # Single value arguments
+    "DEPENDS;COMPILE_OPTIONS" # Multi value arguments
+    ${ARGN}
+  )
+  add_object(
+    ${name}_object
+    SRC ${ADD_LOADER_OBJECT_SRC}
+    DEPENDS ${ADD_LOADER_OBJECT_DEPENDS}
+    COMPILE_OPTIONS ${ADD_LOADER_OBJECT_COMPILE_OPTIONS}
+  )
+
+  set(objfile ${LIBC_BUILD_DIR}/lib/${name}.o)
+  add_custom_command(
+    OUTPUT ${objfile}
+    COMMAND cp $<TARGET_OBJECTS:${name}_object> ${objfile}
+    DEPENDS $<TARGET_OBJECTS:${name}_object>
+  )
+  add_custom_target(
+    ${name}
+    DEPENDS ${objfile}
+  )
+  set_target_properties(
+    ${name}
+    PROPERTIES
+      "TARGET_TYPE" "LOADER_OBJECT"
+      "OBJECT_FILE" ${objfile}
+  )
+endfunction()
+
+add_subdirectory(${LIBC_TARGET_MACHINE})
diff --git a/libc/loader/linux/x86_64/CMakeLists.txt b/libc/loader/linux/x86_64/CMakeLists.txt
new file mode 100644 (file)
index 0000000..e7d7ee5
--- /dev/null
@@ -0,0 +1,11 @@
+add_loader_object(
+  crt1
+  SRC
+    start.cpp
+  DEPENDS
+    linux_syscall_h
+    sys_syscall_h
+  COMPILE_OPTIONS
+    -fno-omit-frame-pointer
+    -ffreestanding # To avoid compiler warnings about calling the main function.
+)
diff --git a/libc/loader/linux/x86_64/start.cpp b/libc/loader/linux/x86_64/start.cpp
new file mode 100644 (file)
index 0000000..57af0b1
--- /dev/null
@@ -0,0 +1,65 @@
+//===------------------ Implementation of crt for x86_64 ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "config/linux/syscall.h"
+#include "include/sys/syscall.h"
+
+#include <linux/auxvec.h>
+#include <stdint.h>
+
+extern "C" int main(int, char **, char **);
+
+struct Args {
+  // At the language level, argc is an int. But we use uint64_t as the x86_64
+  // ABI specifies it as an 8 byte value.
+  uint64_t argc;
+
+  // At the language level, argv is a char** value. However, we use uint64_t as
+  // the x86_64 ABI specifies the argv vector be an |argc| long array of 8-byte
+  // values. Even though a flexible length array would be more suitable here, we
+  // set the array length to 1 to avoid a compiler warning about it being a C99
+  // extension. Length of 1 is not really wrong as |argc| is guaranteed to be
+  // atleast 1, and there is an 8-byte null entry at the end of the argv array.
+  uint64_t argv[1];
+};
+
+// TODO: Would be nice to use the aux entry structure from elf.h when available.
+struct AuxEntry {
+  uint64_t type;
+  uint64_t value;
+};
+
+extern "C" void _start() {
+  uintptr_t *frame_ptr =
+      reinterpret_cast<uintptr_t *>(__builtin_frame_address(0));
+
+  // This TU is compiled with -fno-omit-frame-pointer. Hence, the previous value
+  // of the base pointer is pushed on to the stack. So, we step over it (the
+  // "+ 1" below) to get to the args.
+  Args *args = reinterpret_cast<Args *>(frame_ptr + 1);
+
+  // After the argv array, is a 8-byte long NULL value before the array of env
+  // values. The end of the env values is marked by another 8-byte long NULL
+  // value. We step over it (the "+ 1" below) to get to the env values.
+  uint64_t *env_ptr = args->argv + args->argc + 1;
+  uint64_t *env_end_marker = env_ptr;
+  while (*env_end_marker)
+    ++env_end_marker;
+
+  // After the env array, is the aux-vector. The end of the aux-vector is
+  // denoted by an AT_NULL entry.
+  for (AuxEntry *aux_entry = reinterpret_cast<AuxEntry *>(env_end_marker + 1);
+       aux_entry->type != AT_NULL; ++aux_entry) {
+    // TODO: Read the aux vector and store necessary information in a libc wide
+    // data structure.
+  }
+
+  __llvm_libc::syscall(SYS_exit,
+                       main(args->argc, reinterpret_cast<char **>(args->argv),
+                            reinterpret_cast<char **>(env_ptr)));
+}
index 8f5f0bb..aa77dcd 100644 (file)
@@ -1,4 +1,5 @@
 add_custom_target(check-libc)
 
 add_subdirectory(config)
+add_subdirectory(loader)
 add_subdirectory(src)
diff --git a/libc/test/loader/CMakeLists.txt b/libc/test/loader/CMakeLists.txt
new file mode 100644 (file)
index 0000000..3a2eb58
--- /dev/null
@@ -0,0 +1,74 @@
+add_custom_target(libc_loader_tests)
+
+# A rule to add loader tests. When we have a complete loader, we should
+# be able to use the add_libc_unittest rule or an extension of it. But,
+# while the loader is getting built, we need to use a special rule like
+# this.
+function(add_loader_test target_name)
+  if(NOT CMAKE_HOST_UNIX)
+    message(
+        WARNING
+        "Loader tests currently assume a POSIX/Unix like environment and "
+        "may not work on your platform.")
+  endif()
+
+  cmake_parse_arguments(
+    "ADD_LOADER_TEST"
+    ""    # No option arguments
+    "SRC" # Single value arguments
+    "DEPENDS;ARGS;ENV" # Multivalue arguments.
+    ${ARGN}
+  )
+
+  add_executable(
+    ${target_name}
+    EXCLUDE_FROM_ALL
+    ${ADD_LOADER_TEST_SRC}
+  )
+
+  set_target_properties(${target_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+
+  set(dep_objects "")
+  if(ADD_LOADER_TEST_DEPENDS)
+    add_dependencies(${target_name} ${ADD_LOADER_TEST_DEPENDS})
+    foreach(dep IN LISTS ADD_LOADER_TEST_DEPENDS)
+      get_target_property(objfile ${dep} "OBJECT_FILE")
+      if(NOT objfile)
+        message(
+            FATAL_ERROR
+            "Unexpected dependency of an `add_loader_test` target. A dependency "
+            "should be a target of type `add_entrypoint_object, `add_object`, or "
+            "`add_loader_object`.")
+      endif()
+      list(APPEND dep_objects ${objfile})
+    endforeach(dep)
+  endif()
+
+  target_include_directories(
+    ${target_name}
+    PRIVATE
+      ${LIBC_SOURCE_DIR}
+      ${LIBC_BUILD_DIR}
+      ${LIBC_BUILD_DIR}/include
+  )
+
+  target_link_libraries(${target_name} ${dep_objects})
+
+  target_link_options(
+    ${target_name}
+    BEFORE PRIVATE
+    -nostdlib
+  )
+
+  add_custom_command(
+    TARGET ${target_name}
+    POST_BUILD
+    COMMAND ${ADD_LOADER_TEST_ENV} $<TARGET_FILE:${target_name}> ${ADD_LOADER_TEST_ARGS}
+  )
+
+  add_dependencies(libc_loader_tests ${target_name})
+endfunction(add_loader_test)
+
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
+  add_subdirectory(${LIBC_TARGET_OS})
+endif()
diff --git a/libc/test/loader/linux/CMakeLists.txt b/libc/test/loader/linux/CMakeLists.txt
new file mode 100644 (file)
index 0000000..004d167
--- /dev/null
@@ -0,0 +1,32 @@
+add_loader_test(
+  loader_args_test
+  SRC
+    args_test.cpp
+  DEPENDS
+    __assert_fail
+    _Exit
+    abort
+    crt1
+    raise
+  ARGS
+    1 2 3
+  ENV
+    FRANCE=Paris
+    GERMANY=Berlin
+)
+
+add_loader_test(
+  loader_no_envp_test
+  SRC
+    main_without_envp.cpp
+  DEPENDS
+    crt1
+)
+
+add_loader_test(
+  loader_no_args_test
+  SRC
+    main_without_args.cpp
+  DEPENDS
+    crt1
+)
diff --git a/libc/test/loader/linux/args_test.cpp b/libc/test/loader/linux/args_test.cpp
new file mode 100644 (file)
index 0000000..a3c5852
--- /dev/null
@@ -0,0 +1,40 @@
+//===----------------- Loader test to check args to main ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#undef NDEBUG
+#include "src/assert/assert.h"
+
+static bool my_streq(const char *lhs, const char *rhs) {
+  const char *l, *r;
+  for (l = lhs, r = rhs; *l != '\0' && *r != '\0'; ++l, ++r)
+    if (*l != *r)
+      return false;
+
+  return *l == '\0' && *r == '\0';
+}
+
+int main(int argc, char **argv, char **envp) {
+  assert(argc == 4 && "Unexpected argc.");
+  assert(my_streq(argv[1], "1") && "Unexpected argv[1].");
+  assert(my_streq(argv[2], "2") && "Unexpected argv[2].");
+  assert(my_streq(argv[3], "3") && "Unexpected argv[3].");
+
+  bool found_france = false;
+  bool found_germany = false;
+  for (; *envp != nullptr; ++envp) {
+    if (my_streq(*envp, "FRANCE=Paris"))
+      found_france = true;
+    if (my_streq(*envp, "GERMANY=Berlin"))
+      found_germany = true;
+  }
+
+  assert(found_france && found_germany &&
+         "Did not find whats expected in envp.");
+
+  return 0;
+}
diff --git a/libc/test/loader/linux/main_without_args.cpp b/libc/test/loader/linux/main_without_args.cpp
new file mode 100644 (file)
index 0000000..d08ee3b
--- /dev/null
@@ -0,0 +1,9 @@
+//===----------------- Loader test for main without args ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+int main() { return 0; }
diff --git a/libc/test/loader/linux/main_without_envp.cpp b/libc/test/loader/linux/main_without_envp.cpp
new file mode 100644 (file)
index 0000000..ec46a64
--- /dev/null
@@ -0,0 +1,9 @@
+//===----------------- Loader test for main without envp ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+int main(int argc, char **argv) { return 0; }