diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 951372adcfa93e..42828b4f416800 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -89,6 +89,8 @@ STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); STATISTIC(NumInternalFunc, "Number of internal functions"); STATISTIC(NumColdCC, "Number of functions marked coldcc"); +STATISTIC(NumIFuncsResolved, "Number of statically resolved IFuncs"); +STATISTIC(NumIFuncsDeleted, "Number of IFuncs removed"); static cl::opt EnableColdCCStressTest("enable-coldcc-stress-test", @@ -2404,6 +2406,60 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { return Changed; } +static Function *hasSideeffectFreeStaticResolution(GlobalIFunc &IF) { + if (IF.isInterposable()) + return nullptr; + + Function *Resolver = IF.getResolverFunction(); + if (!Resolver) + return nullptr; + + if (Resolver->isInterposable()) + return nullptr; + + // Only handle functions that have been optimized into a single basic block. + auto It = Resolver->begin(); + if (++It != Resolver->end()) + return nullptr; + + BasicBlock &BB = Resolver->getEntryBlock(); + + if (any_of(BB, [](Instruction &I) { return I.mayHaveSideEffects(); })) + return nullptr; + + auto *Ret = dyn_cast(BB.getTerminator()); + if (!Ret) + return nullptr; + + return dyn_cast(Ret->getReturnValue()); +} + +/// Find IFuncs that have resolvers that always point at the same statically +/// known callee, and replace their callers with a direct call. +static bool OptimizeStaticIFuncs(Module &M) { + bool Changed = false; + for (GlobalIFunc &IF : M.ifuncs()) + if (Function *Callee = hasSideeffectFreeStaticResolution(IF)) + if (!IF.use_empty()) { + IF.replaceAllUsesWith(Callee); + NumIFuncsResolved++; + Changed = true; + } + return Changed; +} + +static bool +DeleteDeadIFuncs(Module &M, + SmallPtrSetImpl &NotDiscardableComdats) { + bool Changed = false; + for (GlobalIFunc &IF : make_early_inc_range(M.ifuncs())) + if (deleteIfDead(IF, NotDiscardableComdats)) { + NumIFuncsDeleted++; + Changed = true; + } + return Changed; +} + static bool optimizeGlobalsInModule(Module &M, const DataLayout &DL, function_ref GetTLI, @@ -2464,6 +2520,12 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL, if (CXAAtExitFn) LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); + // Optimize IFuncs whose callee's are statically known. + LocalChange |= OptimizeStaticIFuncs(M); + + // Remove any IFuncs that are now dead. + LocalChange |= DeleteDeadIFuncs(M, NotDiscardableComdats); + Changed |= LocalChange; } diff --git a/llvm/test/Transforms/GlobalOpt/resolve-static-ifunc.ll b/llvm/test/Transforms/GlobalOpt/resolve-static-ifunc.ll new file mode 100644 index 00000000000000..2a1717304fb4cd --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/resolve-static-ifunc.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function caller --check-globals all --version 4 +; RUN: opt --passes=globalopt -o - -S < %s | FileCheck %s --implicit-check-not=trivial\.ifunc --implicit-check-not=dead_ifunc + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@trivial.ifunc = internal ifunc void (), ptr @trivial.resolver +;. +; CHECK: @unknown_condition = external local_unnamed_addr global i1 +; CHECK: @external_ifunc.ifunc = dso_local ifunc void (), ptr @external_ifunc.resolver +; CHECK: @complex.ifunc = internal ifunc void (), ptr @complex.resolver +; CHECK: @sideeffects.ifunc = internal ifunc void (), ptr @sideeffects.resolver +; CHECK: @interposable_ifunc.ifunc = internal ifunc void (), ptr @interposable_ifunc.resolver +; CHECK: @interposable_resolver.ifunc = weak ifunc void (), ptr @interposable_resolver.resolver +;. +define ptr @trivial.resolver() { + ret ptr @trivial._Msimd +} +define void @trivial._Msimd() { + ret void +} +define void @trivial.default() { + ret void +} + + +@dead_ifunc.ifunc = internal ifunc void (), ptr @trivial.resolver + +@external_ifunc.ifunc = dso_local ifunc void (), ptr @external_ifunc.resolver +define ptr @external_ifunc.resolver() { + ret ptr @external_ifunc._Msimd +} +define void @external_ifunc._Msimd() { + ret void +} +define void @external_ifunc.default() { + ret void +} + +@unknown_condition = external global i1 +@complex.ifunc = internal ifunc void (), ptr @complex.resolver +define ptr @complex.resolver() { +entry: + %v = load i1, ptr @unknown_condition + br i1 %v, label %fast, label %slow +fast: + ret ptr @complex._Msimd +slow: + ret ptr @complex._Msimd +} +define void @complex._Msimd() { + ret void +} +define void @complex.default() { + ret void +} + +@sideeffects.ifunc = internal ifunc void (), ptr @sideeffects.resolver +define ptr @sideeffects.resolver() { + store i1 0, ptr @unknown_condition + ret ptr @sideeffects.default +} +define void @sideeffects._Msimd() { + ret void +} +define void @sideeffects.default() { + ret void +} + +@interposable_ifunc.ifunc = internal ifunc void (), ptr @interposable_ifunc.resolver +define weak ptr @interposable_ifunc.resolver() { + ret ptr @interposable_ifunc.resolver +} +define void @interposable_ifunc._Msimd() { + ret void +} +define void @interposable_ifunc.default() { + ret void +} + +@interposable_resolver.ifunc = weak ifunc void (), ptr @interposable_resolver.resolver +define ptr @interposable_resolver.resolver() { + ret ptr @interposable_resolver.resolver +} +define void @interposable_resolver._Msimd() { + ret void +} +define void @interposable_resolver.default() { + ret void +} + +define void @caller() { +; CHECK-LABEL: define void @caller() local_unnamed_addr { +; CHECK-NEXT: call void @trivial._Msimd() +; CHECK-NEXT: call void @external_ifunc._Msimd() +; CHECK-NEXT: call void @complex.ifunc() +; CHECK-NEXT: call void @sideeffects.ifunc() +; CHECK-NEXT: call void @interposable_ifunc.ifunc() +; CHECK-NEXT: call void @interposable_resolver.ifunc() +; CHECK-NEXT: ret void +; + call void @trivial.ifunc() + call void @external_ifunc.ifunc() + call void @complex.ifunc() + call void @sideeffects.ifunc() + call void @interposable_ifunc.ifunc() + call void @interposable_resolver.ifunc() + ret void +}