-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Add soft-float ABI #74460
[AArch64] Add soft-float ABI #74460
Conversation
@llvm/pr-subscribers-clang-driver @llvm/pr-subscribers-clang-codegen Author: None (ostannard) ChangesThis adds support for the AArch64 soft-float ABI. The specification for this ABI is currently in review at ARM-software/abi-aa#232, and I won't commit this until that PR is merged. Because all existing AArch64 hardware has floating-point hardware, we expect this to be a niche option, only used for embedded systems on R-profile systems. We are going to document that SysV-like systems should only ever use the base (hard-float) PCS variant: ARM-software/abi-aa#233. For that reason, I've not added an option to select the ABI independently of the FPU hardware, instead the new ABI is enabled iff the target architecture does not have an FPU. For testing, I have run this through an ABI fuzzer, but since this is the first implementation it can only test for internal consistency (callers and callees agree on the PCS), not for conformance to the ABI spec. Patch is 22.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74460.diff 6 Files Affected:
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index c31f2e0bee543..23090dad83ad7 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -649,7 +649,8 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
return llvm::StringSwitch<bool>(Feature)
.Cases("aarch64", "arm64", "arm", true)
.Case("fmv", HasFMV)
- .Cases("neon", "fp", "simd", FPU & NeonMode)
+ .Case("fp", FPU & FPUMode)
+ .Cases("neon", "simd", FPU & NeonMode)
.Case("jscvt", HasJSCVT)
.Case("fcma", HasFCMA)
.Case("rng", HasRandGen)
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index dea58a7ff4146..2e730fdb0b83f 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -143,6 +143,8 @@ createTargetCodeGenInfo(CodeGenModule &CGM) {
Kind = AArch64ABIKind::DarwinPCS;
else if (Triple.isOSWindows())
return createWindowsAArch64TargetCodeGenInfo(CGM, AArch64ABIKind::Win64);
+ else if (!Target.hasFeature("fp"))
+ Kind = AArch64ABIKind::AAPCSSoft;
return createAArch64TargetCodeGenInfo(CGM, Kind);
}
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 0c0781a2d5ab9..0b69d92b70cee 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -416,6 +416,7 @@ enum class AArch64ABIKind {
AAPCS = 0,
DarwinPCS,
Win64,
+ AAPCSSoft,
};
std::unique_ptr<TargetCodeGenInfo>
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index be5145daa00b7..a4089bb6c70f1 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -53,8 +53,8 @@ class AArch64ABIInfo : public ABIInfo {
Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
CodeGenFunction &CGF) const;
- Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const;
+ Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
+ AArch64ABIKind Kind) const;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override {
@@ -65,7 +65,7 @@ class AArch64ABIInfo : public ABIInfo {
return Kind == AArch64ABIKind::Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty)
: isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
- : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
+ : EmitAAPCSVAArg(VAListAddr, Ty, CGF, Kind);
}
Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
@@ -478,6 +478,11 @@ bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
}
bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+ // For the soft-float ABI variant, no types are considered to be homogeneous
+ // aggregates.
+ if (Kind == AArch64ABIKind::AAPCSSoft)
+ return false;
+
// Homogeneous aggregates for AAPCS64 must have base types of a floating
// point type or a short-vector type. This is the same as the 32-bit ABI,
// but with the difference that any floating-point type is allowed,
@@ -509,7 +514,8 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
}
Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
- CodeGenFunction &CGF) const {
+ CodeGenFunction &CGF,
+ AArch64ABIKind Kind) const {
ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
CGF.CurFnInfo->getCallingConvention());
// Empty records are ignored for parameter passing purposes.
@@ -534,7 +540,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
BaseTy = ArrTy->getElementType();
NumRegs = ArrTy->getNumElements();
}
- bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
+ bool IsFPR = Kind == AArch64ABIKind::AAPCS &&
+ (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
// The AArch64 va_list type and handling is specified in the Procedure Call
// Standard, section B.4:
diff --git a/clang/test/CodeGen/aarch64-soft-float-abi.c b/clang/test/CodeGen/aarch64-soft-float-abi.c
new file mode 100644
index 0000000000000..687647d650e1d
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD
+// RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,SOFT
+
+// The va_list type does not change between the ABIs
+// CHECK: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
+// Floats are passed in integer registers, this will be handled by the backend.
+// CHECK: define dso_local half @test0(half noundef %a)
+// CHECK: define dso_local bfloat @test1(bfloat noundef %a)
+// CHECK: define dso_local float @test2(float noundef %a)
+// CHECK: define dso_local double @test3(double noundef %a)
+// CHECK: define dso_local fp128 @test4(fp128 noundef %a)
+__fp16 test0(__fp16 a) { return a; }
+__bf16 test1(__bf16 a) { return a; }
+float test2(float a) { return a; }
+double test3(double a) { return a; }
+long double test4(long double a) { return a; }
+
+// No types are considered to be HFAs or HVAs by the soft-float PCS, so these
+// are converted to integer types.
+struct A {
+ float x;
+};
+// SOFT: define dso_local i32 @test10(i64 %a.coerce)
+// HARD: define dso_local %struct.A @test10([1 x float] alignstack(8) %a.coerce)
+struct A test10(struct A a) { return a; }
+
+struct B {
+ double x;
+ double y;
+};
+// SOFT: define dso_local [2 x i64] @test11([2 x i64] %a.coerce)
+// HARD: define dso_local %struct.B @test11([2 x double] alignstack(8) %a.coerce)
+struct B test11(struct B a) { return a; }
+
+#include <stdarg.h>
+
+// For variadic arguments, va_arg will always retreive
+// CHECK-LABEL: define dso_local double @test20(i32 noundef %a, ...)
+// CHECK: %vl = alloca %struct.__va_list, align 8
+// SOFT: %gr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 3
+// SOFT: %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 1
+// HARD: %vr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 4
+// HARD: %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 2
+double test20(int a, ...) {
+ va_list vl;
+ va_start(vl, a);
+ return va_arg(vl, double);
+}
+
+// Vector types are only available for targets with the correct hardware, and
+// their calling-convention is left undefined by the soft-float ABI, so they
+// aren't tested here.
diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c
index 3f2f2fdd24e8a..4404dd0da8e5e 100644
--- a/clang/test/CodeGen/attr-target-clones-aarch64.c
+++ b/clang/test/CodeGen/attr-target-clones-aarch64.c
@@ -1,6 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fmv -S -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -target-feature -fmv -S -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV
int __attribute__((target_clones("lse+aes", "sve2"))) ftc(void) { return 0; }
int __attribute__((target_clones("sha2", "sha2+memtag2", " default "))) ftc_def(void) { return 1; }
@@ -22,6 +22,8 @@ int __attribute__((target_clones("default"))) main() {
inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))) ftc_inline2(void) { return 2; };
+
+//.
// CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
// CHECK: @ftc.ifunc = weak_odr ifunc i32 (), ptr @ftc.resolver
// CHECK: @ftc_def.ifunc = weak_odr ifunc i32 (), ptr @ftc_def.resolver
@@ -30,19 +32,25 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK: @ftc_inline1.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline1.resolver
// CHECK: @ftc_inline2.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline2.resolver
// CHECK: @ftc_inline3.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline3.resolver
-
+//.
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc._MlseMaes(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 0
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc._Msve2(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 0
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 0
+//
+//
// CHECK-LABEL: @ftc.resolver(
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
@@ -63,18 +71,26 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: ret ptr @ftc._Msve2
// CHECK: resolver_else2:
// CHECK-NEXT: ret ptr @ftc
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_def._Msha2(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 1
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_def._Msha2Mmemtag2(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 1
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_def(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 1
+//
+//
// CHECK-LABEL: @ftc_def.resolver(
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
@@ -95,14 +111,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: ret ptr @ftc_def._Msha2
// CHECK: resolver_else2:
// CHECK-NEXT: ret ptr @ftc_def
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_dup1._Msha2(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 2
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_dup1(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 2
+//
+//
// CHECK-LABEL: @ftc_dup1.resolver(
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
@@ -115,18 +137,26 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: ret ptr @ftc_dup1._Msha2
// CHECK: resolver_else:
// CHECK-NEXT: ret ptr @ftc_dup1
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_dup2._Mfp(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 3
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_dup2._MdotprodMcrc(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 3
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_dup2(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 3
+//
+//
// CHECK-LABEL: @ftc_dup2.resolver(
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
@@ -147,6 +177,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: ret ptr @ftc_dup2._Mfp
// CHECK: resolver_else2:
// CHECK-NEXT: ret ptr @ftc_dup2
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @foo(
// CHECK-NEXT: entry:
@@ -158,10 +190,14 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: [[CALL4:%.*]] = call i32 @ftc_dup2.ifunc()
// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
// CHECK-NEXT: ret i32 [[ADD5]]
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_direct(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 4
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @main(
// CHECK-NEXT: entry:
@@ -175,6 +211,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: [[CALL4:%.*]] = call i32 @ftc_direct()
// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
// CHECK-NEXT: ret i32 [[ADD5]]
+//
+//
// CHECK-LABEL: @ftc_inline1.resolver(
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
@@ -203,6 +241,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: ret ptr @ftc_inline1._MrngMsimd
// CHECK: resolver_else4:
// CHECK-NEXT: ret ptr @ftc_inline1
+//
+//
// CHECK-LABEL: @ftc_inline2.resolver(
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
@@ -223,6 +263,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: ret ptr @ftc_inline2._Mfp16
// CHECK: resolver_else2:
// CHECK-NEXT: ret ptr @ftc_inline2
+//
+//
// CHECK-LABEL: @ftc_inline3.resolver(
// CHECK-NEXT: resolver_entry:
// CHECK-NEXT: call void @__init_cpu_features_resolver()
@@ -243,62 +285,92 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NEXT: ret ptr @ftc_inline3._Mbti
// CHECK: resolver_else2:
// CHECK-NEXT: ret ptr @ftc_inline3
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_inline1._MrngMsimd(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 1
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_inline1._MrcpcMpredres(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 1
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_inline1._Msve2-aesMwfxt(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 1
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_inline1(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 1
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_inline2._Mfp16(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 2
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_inline2._MfcmaMsve2-bitperm(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 2
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_inline2(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 2
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_inline3._Mbti(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 3
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_inline3._MsveMsb(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 3
+//
+//
// CHECK: Function Attrs: noinline nounwind optnone
// CHECK-LABEL: @ftc_inline3(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret i32 3
+//
+//
// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
// CHECK-NOFMV-LABEL: @ftc(
// CHECK-NOFMV-NEXT: entry:
// CHECK-NOFMV-NEXT: ret i32 0
+//
+//
// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
// CHECK-NOFMV-LABEL: @ftc_def(
// CHECK-NOFMV-NEXT: entry:
// CHECK-NOFMV-NEXT: ret i32 1
+//
+//
// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
// CHECK-NOFMV-LABEL: @ftc_dup1(
// CHECK-NOFMV-NEXT: entry:
// CHECK-NOFMV-NEXT: ret i32 2
+//
+//
// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
// CHECK-NOFMV-LABEL: @ftc_dup2(
// CHECK-NOFMV-NEXT: entry:
// CHECK-NOFMV-NEXT: ret i32 3
+//
+//
// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
// CHECK-NOFMV-LABEL: @foo(
// CHECK-NOFMV-NEXT: entry:
@@ -310,10 +382,14 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NOFMV-NEXT: [[CALL4:%.*]] = call i32 @ftc_dup2()
// CHECK-NOFMV-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
// CHECK-NOFMV-NEXT: ret i32 [[ADD5]]
+//
+//
// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
// CHECK-NOFMV-LABEL: @ftc_direct(
// CHECK-NOFMV-NEXT: entry:
// CHECK-NOFMV-NEXT: ret i32 4
+//
+//
// CHECK-NOFMV: Function Attrs: noinline nounwind optnone
// CHECK-NOFMV-LABEL: @main(
// CHECK-NOFMV-NEXT: entry:
@@ -327,21 +403,29 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
// CHECK-NOFMV-NEXT: [[CALL4:%.*]] = call i32 @ftc_direct()
// CHECK-NOFMV-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
// CHECK-NOFMV-NEXT: ret i32 [[ADD5]]
-
-// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+lse,+neon" }
-// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2" }
-// CHECK: attributes #2 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CHECK: attributes #3 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sha2" }
-// CHECK: attributes #4 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+mte,+neon,+sha2" }
-// CHECK: attributes #5 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" }
-// CHECK: attributes #6 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+dotprod,+fp-armv8,+neon" }
-// CHECK: attributes #7 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rand" }
-// CHECK: attributes #8 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+predres,+rcpc" }
-// CHECK: attributes #9 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+wfxt" }
-// CHECK: attributes #10 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon" }
-// CHECK: attributes #11 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+complxnum,+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-bitperm" }
-// CHECK: attributes #12 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti" }
-// CHECK: attributes #13 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sb,+sve" }
-
-// CHECK-NOFMV: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
-// CHECK-NOFMV: attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
+//
+//.
+// CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse,+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR1:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sve,+sve2,-fp-armv8" }
+// CHECK: attributes #[[ATTR2:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fp-armv8" }
+// CHECK: attributes #[[ATTR3:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,+sha2,-fp-armv8" }
+// CHECK: attributes #[[ATTR4:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+mte,+neon,+sha2,-fp-armv8" }
+// CHECK: attributes #[[ATTR5:[0-9]+]] = { noinline nounwind op...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
(maybe this is for a later patch but...) Does this patch include the clang side options for the ABI or does |
This doesn't add any new options, and I'd like to avoid that if possible, instead it is turned on by |
That makes sense. Didn't realise we had an |
I'm a little surprised we don't need any LLVM backend changes, but I guess in soft-float mode we basically treat floats as ints anyway, so maybe things just work. I'd like to see appropriate regression tests, though (if they don't already exist). |
@@ -534,7 +540,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty, | |||
BaseTy = ArrTy->getElementType(); | |||
NumRegs = ArrTy->getNumElements(); | |||
} | |||
bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy(); | |||
bool IsFPR = Kind == AArch64ABIKind::AAPCS && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this be Kind != AArch64ABIKind::AAPCS` in case other ABIKinds make their way here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
Yes, the backend already treated floats as if they were an integer of the same width, so no changes are needed there. I've added a test for the backend. |
Changing the ABI through the architecture flags i.e. +nofp has potential to greatly confuse users, who may come from other architectures that have soft-float ABIs already (including 32-bit arm)
|
I originally implemented it this way to avoid making two ABIs valid for one target, but I agree that it would be confusing to have the ABI change automatically. How about we use the
|
AArch64TargetInfo defaults to having the FP feature enabled, but this function was ignoring that and checking for SIMD instructions instead. This won't affect most users, because the driver explicitly enables or disables fp-armv8, which gets handled by AArch64TargetInfo::handleTargetFeatures to turn FP and SIMD on or off. However, it will make testing future patches easier, and allow testing for the presense of FP registers/instructions in CC1 tests. Change-Id: I2d2b3569dca5fa1dc40c5c6d1dabf7741b8c480e
This patch just adds the ABI enum and tests existing behaviour, it doesn't make any functional changes yet. I've not added an option to turn this on, instead it is automatically selected if the target does not have FP registers. This is because the vast majority of existing AArch64 hardware does have an FPU, so we expect this to only be used when abbsoultely necessary, unlike AArch32 where both ABIs were commonly used. Change-Id: I6f3a4562ca60c31b49a926cdce1881418677c604
If we can't pass them in FP registers, then homogeneous floating-point and vector aggregates should be treated like any other composite type, and passed either in registers or on the stack. Change-Id: Icd56e122ad586462d6059069f923ffca4b32a8d2
The AArch64 back-end already avoids saving the FP registers to the va_list when the FP registers aren't present, but clang also needs to know not to load them from the FP register save area when generating code for va_arg. The layout of va_list remains the same, but the vr_top and vr_offs fields are unused. Change-Id: I5d3dee1ac4a29f189432957910662939b79d9329
If any other AArch64ABIKinds reach here in future they will use floating-point registers, which is a better default.
* The default is unchanged ("aapcs", except for targets where it already defaults to "darwinpcs"), independent of whether the target has an FPU * If -mabi=aapcs-soft is used and the target has an FPU, the driver reports an error, to prevent having two incompatible ABIs for one target. * If a hard-float ABI is requested for a target without an FPU, this is accepted by the driver, but any use of floating-point types is an error. This matches GCC's behaviour with -mgeneral-regs-only, and allows for writing code which is compatible with both ABIs.
5ee37ae
to
3017b8a
Compare
Ping |
1 similar comment
Ping |
Ping, the ABI specification change (ARM-software/abi-aa#232) has now been merged. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems like a sensible and unobtrusive solution to me. Also the change is backed by a change in the Arm ABI, and it looks like the other review comments have been addressed. I've also built this change locally and ran the lit tests, and everything passes.
One test caused a buildbot failure when the AArch64 backend is not built, fixed by 5b8e7ed |
For what it's worth, this breaks building the Linux kernel for
I suspect this may be related to the kernel's use of |
This commit breaks building Fuchsia's Kernel as well. Sample builder log: https://logs.chromium.org/logs/fuchsia/buildbucket/cr-buildbucket/8755980051536646593/+/u/build/ninja/stdout
|
@ostannard -- Can you please fix or submit a revert? Thank you. |
This reverts commit 9cc98e3. Issue: ClangBuiltLinux/linux#1997
This reverts commit 9cc98e3.
This reverts commit 9cc98e3. Issue: ClangBuiltLinux/linux#1997
This is re-working of #74460, which adds a soft-float ABI for AArch64. That was reverted because it causes errors when building the linux and fuchsia kernels. The problem is that GCC's implementation of the ABI compatibility checks when using the hard-float ABI on a target without FP registers does it's checks after optimisation. The previous version of this patch reported errors for all uses of floating-point types, which is stricter than what GCC does in practice. This changes two things compared to the first version: * Only check the types of function arguments and returns, not the types of other values. This is more relaxed than GCC, while still guaranteeing ABI compatibility. * Move the check from Sema to CodeGen, so that inline functions are only checked if they are actually used. There are some cases in the linux kernel which depend on this behaviour of GCC.
This is re-working of llvm#74460, which adds a soft-float ABI for AArch64. That was reverted because it causes errors when building the linux and fuchsia kernels. The problem is that GCC's implementation of the ABI compatibility checks when using the hard-float ABI on a target without FP registers does it's checks after optimisation. The previous version of this patch reported errors for all uses of floating-point types, which is stricter than what GCC does in practice. This changes two things compared to the first version: * Only check the types of function arguments and returns, not the types of other values. This is more relaxed than GCC, while still guaranteeing ABI compatibility. * Move the check from Sema to CodeGen, so that inline functions are only checked if they are actually used. There are some cases in the linux kernel which depend on this behaviour of GCC.
This adds support for the AArch64 soft-float ABI. The specification for this ABI is currently in review at ARM-software/abi-aa#232, and I won't commit this until that PR is merged.
Because all existing AArch64 hardware has floating-point hardware, we expect this to be a niche option, only used for embedded systems on R-profile systems. We are going to document that SysV-like systems should only ever use the base (hard-float) PCS variant: ARM-software/abi-aa#233. For that reason, I've not added an option to select the ABI independently of the FPU hardware, instead the new ABI is enabled iff the target architecture does not have an FPU.
For testing, I have run this through an ABI fuzzer, but since this is the first implementation it can only test for internal consistency (callers and callees agree on the PCS), not for conformance to the ABI spec.