-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[win][arm64ec] Fixes to unblock building LLVM and Clang as Arm64EC #150068
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-support @llvm/pr-subscribers-platform-windows Author: Daniel Paoliello (dpaoliello) ChangesThese changes allow LLVM and Clang to be built with Clang targeting Arm64EC using the MSVC linker. Built with these options:
Full diff: https://github.com/llvm/llvm-project/pull/150068.diff 4 Files Affected:
diff --git a/clang/tools/clang-repl/CMakeLists.txt b/clang/tools/clang-repl/CMakeLists.txt
index 68d86dd98cac3..c3d14ceb79792 100644
--- a/clang/tools/clang-repl/CMakeLists.txt
+++ b/clang/tools/clang-repl/CMakeLists.txt
@@ -19,14 +19,14 @@ if(MSVC)
set_target_properties(clang-repl PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS 1)
# RTTI/C++ symbols
- set(clang_repl_exports ${clang_repl_exports} ??_7type_info@@6B@
- ?__type_info_root_node@@3U__type_info_node@@A
- ?nothrow@std@@3Unothrow_t@1@B
+ set(clang_repl_exports ${clang_repl_exports} ??_7type_info@@6B@,DATA
+ ?__type_info_root_node@@3U__type_info_node@@A,DATA
+ ?nothrow@std@@3Unothrow_t@1@B,DATA
)
# Compiler added symbols for static variables. NOT for VStudio < 2015
- set(clang_repl_exports ${clang_repl_exports} _Init_thread_abort _Init_thread_epoch
- _Init_thread_footer _Init_thread_header _tls_index
+ set(clang_repl_exports ${clang_repl_exports} _Init_thread_abort _Init_thread_epoch,DATA
+ _Init_thread_footer _Init_thread_header _tls_index,DATA
)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
@@ -50,7 +50,10 @@ if(MSVC)
endif()
# List to '/EXPORT:sym0 /EXPORT:sym1 /EXPORT:sym2 ...'
- list(TRANSFORM clang_repl_exports PREPEND "LINKER:/EXPORT:")
+ # The 'SHELL' prefix tells CMake to use a space instead of comma as the
+ # separator between the driver and linker options, which we need since MSVC's
+ # linker uses `,DATA` as a suffix to indicate that data is being exported.
+ list(TRANSFORM clang_repl_exports PREPEND "LINKER:SHELL:/EXPORT:")
set_property(TARGET clang-repl APPEND PROPERTY LINK_OPTIONS ${clang_repl_exports})
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 78bd5b4b5bd25..6d089df03cf77 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -587,8 +587,8 @@ StringRef sys::detail::getHostCPUNameForBPF() {
#endif
}
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
- defined(_M_X64)
+#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
+ defined(_M_X64)) && !defined(_M_ARM64EC)
/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
/// the specified arguments. If we can't run cpuid on the host, return true.
@@ -1853,8 +1853,8 @@ VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
} // namespace llvm
#endif
-#if defined(__i386__) || defined(_M_IX86) || \
- defined(__x86_64__) || defined(_M_X64)
+#if (defined(__i386__) || defined(_M_IX86) || \
+ defined(__x86_64__) || defined(_M_X64)) && !defined(_M_ARM64EC)
StringMap<bool> sys::getHostCPUFeatures() {
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
unsigned MaxLevel;
@@ -2067,7 +2067,7 @@ StringMap<bool> sys::getHostCPUFeatures() {
return Features;
}
-#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
+#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__) || defined(__arm64ec__))
StringMap<bool> sys::getHostCPUFeatures() {
StringMap<bool> Features;
std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 1659cfb31f117..f6022b6bdd428 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -30,11 +30,11 @@
#include <memory>
#include <string>
#include <vector>
-#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && !defined(_M_ARM64EC)
#include <immintrin.h>
#include <intrin.h>
#endif
-#if defined(_MSC_VER) && defined(_M_X64)
+#if defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64EC)
#include <float.h> // For _clearfp in ~X86SavedState().
#endif
@@ -654,7 +654,7 @@ namespace {
class X86SavedState : public ExegesisTarget::SavedState {
public:
X86SavedState() {
-#if defined(_MSC_VER) && defined(_M_X64)
+#if defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64EC)
_fxsave64(FPState);
Eflags = __readeflags();
#elif defined(__GNUC__) && defined(__x86_64__)
@@ -668,7 +668,7 @@ class X86SavedState : public ExegesisTarget::SavedState {
~X86SavedState() {
// Restoring the X87 state does not flush pending exceptions, make sure
// these exceptions are flushed now.
-#if defined(_MSC_VER) && defined(_M_X64)
+#if defined(_MSC_VER) && defined(_M_X64) && !defined(_M_ARM64EC)
_clearfp();
_fxrstor64(FPState);
__writeeflags(Eflags);
@@ -682,7 +682,7 @@ class X86SavedState : public ExegesisTarget::SavedState {
}
private:
-#if defined(__x86_64__) || defined(_M_X64)
+#if defined(__x86_64__) || defined(_M_X64) && !defined(_M_ARM64EC)
alignas(16) char FPState[512];
uint64_t Eflags;
#endif
@@ -824,8 +824,8 @@ class ExegesisX86Target : public ExegesisTarget {
// For now, only do the check if we see an Intel machine because
// the counter uses some intel-specific magic and it could
// be confuse and think an AMD machine actually has LBR support.
-#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
- defined(_M_X64)
+#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
+ defined(_M_X64)) && !defined(_M_ARM64EC)
using namespace sys::detail::x86;
if (getVendorSignature() == VendorSignatures::GENUINE_INTEL)
diff --git a/third-party/benchmark/src/cycleclock.h b/third-party/benchmark/src/cycleclock.h
index d4f1330b671db..c0dffcf4b35f6 100644
--- a/third-party/benchmark/src/cycleclock.h
+++ b/third-party/benchmark/src/cycleclock.h
@@ -79,7 +79,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
int64_t ret;
__asm__ volatile("rdtsc" : "=A"(ret));
return ret;
-#elif defined(__x86_64__) || defined(__amd64__)
+#elif (defined(__x86_64__) || defined(__amd64__)) && !defined(__arm64ec__)
uint64_t low, high;
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
return (high << 32) | low;
@@ -139,7 +139,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
struct timespec ts = {0, 0};
clock_gettime(CLOCK_MONOTONIC, &ts);
return static_cast<int64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
-#elif defined(__aarch64__)
+#elif defined(__aarch64__) || defined(__arm64ec__)
// System timer of ARMv8 runs at a different frequency than the CPU's.
// The frequency is fixed, typically in the range 1-50MHz. It can be
// read at CNTFRQ special register. We assume the OS has set up
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
llvm/lib/TargetParser/Host.cpp
Outdated
@@ -2067,7 +2069,8 @@ StringMap<bool> sys::getHostCPUFeatures() { | |||
|
|||
return Features; | |||
} | |||
#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) | |||
#elif defined(__linux__) && \ | |||
(defined(__arm__) || defined(__aarch64__) || defined(__arm64ec__)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
__linux__
and __arm64ec__
can't be defined at the same time.
The code you want starts around line 2150.
@@ -139,7 +139,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { | |||
struct timespec ts = {0, 0}; | |||
clock_gettime(CLOCK_MONOTONIC, &ts); | |||
return static_cast<int64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec; | |||
#elif defined(__aarch64__) | |||
#elif defined(__aarch64__) || defined(__arm64ec__) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is third-party code; can we submit this upstream?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Submitted google/benchmark#2012
Does clang not pick the right MACHINE automatically? I thought we had code for that.
Probably should address this at some point, if only so people don't keep stumbling over it. |
Trying it out now, it looks like it is working fine without this. May have been from a previous attempt when I was using
Yep, found the issue and pushed a fix. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure if we want to wait on the upstream review in google/benchmark? Otherwise LGTM.
These changes allow LLVM and Clang to be built with Clang targeting Arm64EC using the MSVC linker.
Built with these options: