From 13b1b19a46296b87427ad9a2484661c23c84a14b Mon Sep 17 00:00:00 2001 From: xla authors Date: Sun, 19 Jan 2025 12:53:09 -0800 Subject: [PATCH] Integrate LLVM at llvm/llvm-project@13c761789753 Updates LLVM usage to match [13c761789753](https://github.com/llvm/llvm-project/commit/13c761789753) PiperOrigin-RevId: 717293402 --- third_party/llvm/generated.patch | 1337 ++++++++++++-- third_party/llvm/workspace.bzl | 4 +- third_party/shardy/temporary.patch | 1590 ++++++++++++++--- third_party/shardy/workspace.bzl | 4 +- third_party/stablehlo/temporary.patch | 12 + .../triton/llvm_integration/cl717293402.patch | 127 ++ .../triton/llvm_integration/series.bzl | 1 + .../tsl/third_party/llvm/generated.patch | 1337 ++++++++++++-- .../tsl/third_party/llvm/workspace.bzl | 4 +- .../transforms/vectorize_loads_stores.cc | 3 +- 10 files changed, 3800 insertions(+), 619 deletions(-) create mode 100644 third_party/triton/llvm_integration/cl717293402.patch diff --git a/third_party/llvm/generated.patch b/third_party/llvm/generated.patch index 3d2a2525c37a9..8b54ffba772b7 100644 --- a/third_party/llvm/generated.patch +++ b/third_party/llvm/generated.patch @@ -1,207 +1,1156 @@ Auto generated patch. Do not edit or delete it, even if empty. -diff -ruN --strip-trailing-cr a/mlir/include/mlir/IR/TypeRange.h b/mlir/include/mlir/IR/TypeRange.h ---- a/mlir/include/mlir/IR/TypeRange.h -+++ b/mlir/include/mlir/IR/TypeRange.h -@@ -29,12 +29,11 @@ - /// a SmallVector/std::vector. This class should be used in places that are not - /// suitable for a more derived type (e.g. ArrayRef) or a template range - /// parameter. --class TypeRange -- : public llvm::detail::indexed_accessor_range_base< -- TypeRange, -- llvm::PointerUnion, -- Type, Type, Type> { -+class TypeRange : public llvm::detail::indexed_accessor_range_base< -+ TypeRange, -+ llvm::PointerUnion, -+ Type, Type, Type> { - public: - using RangeBaseT::RangeBaseT; - TypeRange(ArrayRef types = std::nullopt); -@@ -45,11 +44,8 @@ - TypeRange(ValueTypeRange values) - : TypeRange(ValueRange(ValueRangeT(values.begin().getCurrent(), - values.end().getCurrent()))) {} -- -- TypeRange(Type type) : TypeRange(type, /*count=*/1) {} -- template , Arg> && -- !std::is_constructible_v>> -+ template , Arg>::value>> - TypeRange(Arg &&arg) : TypeRange(ArrayRef(std::forward(arg))) {} - TypeRange(std::initializer_list types) - : TypeRange(ArrayRef(types)) {} -@@ -60,9 +56,8 @@ - /// * A pointer to the first element of an array of types. - /// * A pointer to the first element of an array of operands. - /// * A pointer to the first element of an array of results. -- /// * A single 'Type' instance. - using OwnerT = llvm::PointerUnion; -+ detail::OpResultImpl *>; - - /// See `llvm::detail::indexed_accessor_range_base` for details. - static OwnerT offset_base(OwnerT object, ptrdiff_t index); -diff -ruN --strip-trailing-cr a/mlir/include/mlir/IR/ValueRange.h b/mlir/include/mlir/IR/ValueRange.h ---- a/mlir/include/mlir/IR/ValueRange.h -+++ b/mlir/include/mlir/IR/ValueRange.h -@@ -374,16 +374,16 @@ - /// SmallVector/std::vector. This class should be used in places that are not - /// suitable for a more derived type (e.g. ArrayRef) or a template range - /// parameter. --class ValueRange final : public llvm::detail::indexed_accessor_range_base< -- ValueRange, -- PointerUnion, -- Value, Value, Value> { -+class ValueRange final -+ : public llvm::detail::indexed_accessor_range_base< -+ ValueRange, -+ PointerUnion, -+ Value, Value, Value> { - public: - /// The type representing the owner of a ValueRange. This is either a list of -- /// values, operands, or results or a single value. -+ /// values, operands, or results. - using OwnerT = -- PointerUnion; -+ PointerUnion; - - using RangeBaseT::RangeBaseT; - -@@ -392,7 +392,7 @@ - std::is_constructible, Arg>::value && - !std::is_convertible::value>> - ValueRange(Arg &&arg) : ValueRange(ArrayRef(std::forward(arg))) {} -- ValueRange(Value value) : ValueRange(value, /*count=*/1) {} -+ ValueRange(const Value &value) : ValueRange(&value, /*count=*/1) {} - ValueRange(const std::initializer_list &values) - : ValueRange(ArrayRef(values)) {} - ValueRange(iterator_range values) -diff -ruN --strip-trailing-cr a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp ---- a/mlir/lib/IR/OperationSupport.cpp -+++ b/mlir/lib/IR/OperationSupport.cpp -@@ -653,15 +653,6 @@ - /// See `llvm::detail::indexed_accessor_range_base` for details. - ValueRange::OwnerT ValueRange::offset_base(const OwnerT &owner, - ptrdiff_t index) { -- if (llvm::isa_and_nonnull(owner)) { -- // Prevent out-of-bounds indexing for single values. -- // Note that we do allow an index of 1 as is required by 'slice'ing that -- // returns an empty range. This also matches the usual rules of C++ of being -- // allowed to index past the last element of an array. -- assert(index <= 1 && "out-of-bound offset into single-value 'ValueRange'"); -- // Return nullptr to quickly cause segmentation faults on misuse. -- return index == 0 ? owner : nullptr; -- } - if (const auto *value = llvm::dyn_cast_if_present(owner)) - return {value + index}; - if (auto *operand = llvm::dyn_cast_if_present(owner)) -@@ -670,10 +661,6 @@ +diff -ruN --strip-trailing-cr a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h +--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h ++++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h +@@ -513,12 +513,6 @@ + Entity loadElementAt(mlir::Location loc, fir::FirOpBuilder &builder, + Entity entity, mlir::ValueRange oneBasedIndices); + +-/// Return a vector of extents for the given entity. +-/// The function creates new operations, but tries to clean-up +-/// after itself. +-llvm::SmallVector +-genExtentsVector(mlir::Location loc, fir::FirOpBuilder &builder, Entity entity); +- + } // namespace hlfir + + #endif // FORTRAN_OPTIMIZER_BUILDER_HLFIRTOOLS_H +diff -ruN --strip-trailing-cr a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp +--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp ++++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp +@@ -1421,15 +1421,3 @@ + return loadTrivialScalar(loc, builder, + getElementAt(loc, builder, entity, oneBasedIndices)); } - /// See `llvm::detail::indexed_accessor_range_base` for details. - Value ValueRange::dereference_iterator(const OwnerT &owner, ptrdiff_t index) { -- if (auto value = llvm::dyn_cast_if_present(owner)) { -- assert(index == 0 && "cannot offset into single-value 'ValueRange'"); -- return value; +- +-llvm::SmallVector +-hlfir::genExtentsVector(mlir::Location loc, fir::FirOpBuilder &builder, +- hlfir::Entity entity) { +- entity = hlfir::derefPointersAndAllocatables(loc, builder, entity); +- mlir::Value shape = hlfir::genShape(loc, builder, entity); +- llvm::SmallVector extents = +- hlfir::getExplicitExtentsFromShape(shape, builder); +- if (shape.getUses().empty()) +- shape.getDefiningOp()->erase(); +- return extents; +-} +diff -ruN --strip-trailing-cr a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp ++++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +@@ -37,79 +37,6 @@ + + namespace { + +-// Helper class to generate operations related to computing +-// product of values. +-class ProductFactory { +-public: +- ProductFactory(mlir::Location loc, fir::FirOpBuilder &builder) +- : loc(loc), builder(builder) {} +- +- // Generate an update of the inner product value: +- // acc += v1 * v2, OR +- // acc += CONJ(v1) * v2, OR +- // acc ||= v1 && v2 +- // +- // CONJ parameter specifies whether the first complex product argument +- // needs to be conjugated. +- template +- mlir::Value genAccumulateProduct(mlir::Value acc, mlir::Value v1, +- mlir::Value v2) { +- mlir::Type resultType = acc.getType(); +- acc = castToProductType(acc, resultType); +- v1 = castToProductType(v1, resultType); +- v2 = castToProductType(v2, resultType); +- mlir::Value result; +- if (mlir::isa(resultType)) { +- result = builder.create( +- loc, acc, builder.create(loc, v1, v2)); +- } else if (mlir::isa(resultType)) { +- if constexpr (CONJ) +- result = fir::IntrinsicLibrary{builder, loc}.genConjg(resultType, v1); +- else +- result = v1; +- +- result = builder.create( +- loc, acc, builder.create(loc, result, v2)); +- } else if (mlir::isa(resultType)) { +- result = builder.create( +- loc, acc, builder.create(loc, v1, v2)); +- } else if (mlir::isa(resultType)) { +- result = builder.create( +- loc, acc, builder.create(loc, v1, v2)); +- } else { +- llvm_unreachable("unsupported type"); +- } +- +- return builder.createConvert(loc, resultType, result); - } - if (const auto *value = llvm::dyn_cast_if_present(owner)) - return value[index]; - if (auto *operand = llvm::dyn_cast_if_present(owner)) -diff -ruN --strip-trailing-cr a/mlir/lib/IR/TypeRange.cpp b/mlir/lib/IR/TypeRange.cpp ---- a/mlir/lib/IR/TypeRange.cpp -+++ b/mlir/lib/IR/TypeRange.cpp -@@ -31,23 +31,12 @@ - this->base = result; - else if (auto *operand = llvm::dyn_cast_if_present(owner)) - this->base = operand; -- else if (auto value = llvm::dyn_cast_if_present(owner)) -- this->base = value.getType(); - else - this->base = cast(owner); - } +- +-private: +- mlir::Location loc; +- fir::FirOpBuilder &builder; +- +- mlir::Value castToProductType(mlir::Value value, mlir::Type type) { +- if (mlir::isa(type)) +- return builder.createConvert(loc, builder.getIntegerType(1), value); +- +- // TODO: the multiplications/additions by/of zero resulting from +- // complex * real are optimized by LLVM under -fno-signed-zeros +- // -fno-honor-nans. +- // We can make them disappear by default if we: +- // * either expand the complex multiplication into real +- // operations, OR +- // * set nnan nsz fast-math flags to the complex operations. +- if (fir::isa_complex(type) && !fir::isa_complex(value.getType())) { +- mlir::Value zeroCmplx = fir::factory::createZeroValue(builder, loc, type); +- fir::factory::Complex helper(builder, loc); +- mlir::Type partType = helper.getComplexPartType(type); +- return helper.insertComplexPart(zeroCmplx, +- castToProductType(value, partType), +- /*isImagPart=*/false); +- } +- return builder.createConvert(loc, type, value); +- } +-}; +- + class TransposeAsElementalConversion + : public mlir::OpRewritePattern { + public: +@@ -163,8 +90,11 @@ + static mlir::Value genResultShape(mlir::Location loc, + fir::FirOpBuilder &builder, + hlfir::Entity array) { +- llvm::SmallVector inExtents = +- hlfir::genExtentsVector(loc, builder, array); ++ mlir::Value inShape = hlfir::genShape(loc, builder, array); ++ llvm::SmallVector inExtents = ++ hlfir::getExplicitExtentsFromShape(inShape, builder); ++ if (inShape.getUses().empty()) ++ inShape.getDefiningOp()->erase(); + + // transpose indices + assert(inExtents.size() == 2 && "checked in TransposeOp::validate"); +@@ -207,7 +137,7 @@ + mlir::Value resultShape, dimExtent; + llvm::SmallVector arrayExtents; + if (isTotalReduction) +- arrayExtents = hlfir::genExtentsVector(loc, builder, array); ++ arrayExtents = genArrayExtents(loc, builder, array); + else + std::tie(resultShape, dimExtent) = + genResultShapeForPartialReduction(loc, builder, array, dimVal); +@@ -233,8 +163,7 @@ + // If DIM is not present, do total reduction. + + // Initial value for the reduction. +- mlir::Value reductionInitValue = +- fir::factory::createZeroValue(builder, loc, elementType); ++ mlir::Value reductionInitValue = genInitValue(loc, builder, elementType); + + // The reduction loop may be unordered if FastMathFlags::reassoc + // transformations are allowed. The integer reduction is always +@@ -335,6 +264,17 @@ + } + + private: ++ static llvm::SmallVector ++ genArrayExtents(mlir::Location loc, fir::FirOpBuilder &builder, ++ hlfir::Entity array) { ++ mlir::Value inShape = hlfir::genShape(loc, builder, array); ++ llvm::SmallVector inExtents = ++ hlfir::getExplicitExtentsFromShape(inShape, builder); ++ if (inShape.getUses().empty()) ++ inShape.getDefiningOp()->erase(); ++ return inExtents; ++ } ++ + // Return fir.shape specifying the shape of the result + // of a SUM reduction with DIM=dimVal. The second return value + // is the extent of the DIM dimension. +@@ -343,7 +283,7 @@ + fir::FirOpBuilder &builder, + hlfir::Entity array, int64_t dimVal) { + llvm::SmallVector inExtents = +- hlfir::genExtentsVector(loc, builder, array); ++ genArrayExtents(loc, builder, array); + assert(dimVal > 0 && dimVal <= static_cast(inExtents.size()) && + "DIM must be present and a positive constant not exceeding " + "the array's rank"); +@@ -353,6 +293,26 @@ + return {builder.create(loc, inExtents), dimExtent}; + } + ++ // Generate the initial value for a SUM reduction with the given ++ // data type. ++ static mlir::Value genInitValue(mlir::Location loc, ++ fir::FirOpBuilder &builder, ++ mlir::Type elementType) { ++ if (auto ty = mlir::dyn_cast(elementType)) { ++ const llvm::fltSemantics &sem = ty.getFloatSemantics(); ++ return builder.createRealConstant(loc, elementType, ++ llvm::APFloat::getZero(sem)); ++ } else if (auto ty = mlir::dyn_cast(elementType)) { ++ mlir::Value initValue = genInitValue(loc, builder, ty.getElementType()); ++ return fir::factory::Complex{builder, loc}.createComplex(ty, initValue, ++ initValue); ++ } else if (mlir::isa(elementType)) { ++ return builder.createIntegerConstant(loc, elementType, 0); ++ } ++ ++ llvm_unreachable("unsupported SUM reduction type"); ++ } ++ + // Generate scalar addition of the two values (of the same data type). + static mlir::Value genScalarAdd(mlir::Location loc, + fir::FirOpBuilder &builder, +@@ -610,10 +570,16 @@ + static std::tuple + genResultShape(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::Entity input1, hlfir::Entity input2) { +- llvm::SmallVector input1Extents = +- hlfir::genExtentsVector(loc, builder, input1); +- llvm::SmallVector input2Extents = +- hlfir::genExtentsVector(loc, builder, input2); ++ mlir::Value input1Shape = hlfir::genShape(loc, builder, input1); ++ llvm::SmallVector input1Extents = ++ hlfir::getExplicitExtentsFromShape(input1Shape, builder); ++ if (input1Shape.getUses().empty()) ++ input1Shape.getDefiningOp()->erase(); ++ mlir::Value input2Shape = hlfir::genShape(loc, builder, input2); ++ llvm::SmallVector input2Extents = ++ hlfir::getExplicitExtentsFromShape(input2Shape, builder); ++ if (input2Shape.getUses().empty()) ++ input2Shape.getDefiningOp()->erase(); - /// See `llvm::detail::indexed_accessor_range_base` for details. - TypeRange::OwnerT TypeRange::offset_base(OwnerT object, ptrdiff_t index) { -- if (llvm::isa_and_nonnull(object)) { -- // Prevent out-of-bounds indexing for single values. -- // Note that we do allow an index of 1 as is required by 'slice'ing that -- // returns an empty range. This also matches the usual rules of C++ of being -- // allowed to index past the last element of an array. -- assert(index <= 1 && "out-of-bound offset into single-value 'ValueRange'"); -- // Return nullptr to quickly cause segmentation faults on misuse. -- return index == 0 ? object : nullptr; + llvm::SmallVector newExtents; + mlir::Value innerProduct1Extent, innerProduct2Extent; +@@ -661,6 +627,60 @@ + innerProductExtent[0]}; + } + ++ static mlir::Value castToProductType(mlir::Location loc, ++ fir::FirOpBuilder &builder, ++ mlir::Value value, mlir::Type type) { ++ if (mlir::isa(type)) ++ return builder.createConvert(loc, builder.getIntegerType(1), value); ++ ++ // TODO: the multiplications/additions by/of zero resulting from ++ // complex * real are optimized by LLVM under -fno-signed-zeros ++ // -fno-honor-nans. ++ // We can make them disappear by default if we: ++ // * either expand the complex multiplication into real ++ // operations, OR ++ // * set nnan nsz fast-math flags to the complex operations. ++ if (fir::isa_complex(type) && !fir::isa_complex(value.getType())) { ++ mlir::Value zeroCmplx = fir::factory::createZeroValue(builder, loc, type); ++ fir::factory::Complex helper(builder, loc); ++ mlir::Type partType = helper.getComplexPartType(type); ++ return helper.insertComplexPart( ++ zeroCmplx, castToProductType(loc, builder, value, partType), ++ /*isImagPart=*/false); ++ } ++ return builder.createConvert(loc, type, value); ++ } ++ ++ // Generate an update of the inner product value: ++ // acc += v1 * v2, OR ++ // acc ||= v1 && v2 ++ static mlir::Value genAccumulateProduct(mlir::Location loc, ++ fir::FirOpBuilder &builder, ++ mlir::Type resultType, ++ mlir::Value acc, mlir::Value v1, ++ mlir::Value v2) { ++ acc = castToProductType(loc, builder, acc, resultType); ++ v1 = castToProductType(loc, builder, v1, resultType); ++ v2 = castToProductType(loc, builder, v2, resultType); ++ mlir::Value result; ++ if (mlir::isa(resultType)) ++ result = builder.create( ++ loc, acc, builder.create(loc, v1, v2)); ++ else if (mlir::isa(resultType)) ++ result = builder.create( ++ loc, acc, builder.create(loc, v1, v2)); ++ else if (mlir::isa(resultType)) ++ result = builder.create( ++ loc, acc, builder.create(loc, v1, v2)); ++ else if (mlir::isa(resultType)) ++ result = builder.create( ++ loc, acc, builder.create(loc, v1, v2)); ++ else ++ llvm_unreachable("unsupported type"); ++ ++ return builder.createConvert(loc, resultType, result); ++ } ++ + static mlir::LogicalResult + genContiguousMatmul(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::Entity result, mlir::Value resultShape, +@@ -728,9 +748,9 @@ + hlfir::loadElementAt(loc, builder, lhs, {I, K}); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, {K, J}); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- resultElementValue, lhsElementValue, rhsElementValue); ++ mlir::Value productValue = genAccumulateProduct( ++ loc, builder, resultElementType, resultElementValue, ++ lhsElementValue, rhsElementValue); + builder.create(loc, productValue, resultElement); + return {}; + }; +@@ -765,9 +785,9 @@ + hlfir::loadElementAt(loc, builder, lhs, {J, K}); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, {K}); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- resultElementValue, lhsElementValue, rhsElementValue); ++ mlir::Value productValue = genAccumulateProduct( ++ loc, builder, resultElementType, resultElementValue, ++ lhsElementValue, rhsElementValue); + builder.create(loc, productValue, resultElement); + return {}; + }; +@@ -797,9 +817,9 @@ + hlfir::loadElementAt(loc, builder, lhs, {K}); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, {K, J}); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- resultElementValue, lhsElementValue, rhsElementValue); ++ mlir::Value productValue = genAccumulateProduct( ++ loc, builder, resultElementType, resultElementValue, ++ lhsElementValue, rhsElementValue); + builder.create(loc, productValue, resultElement); + return {}; + }; +@@ -865,9 +885,9 @@ + hlfir::loadElementAt(loc, builder, lhs, lhsIndices); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, rhsIndices); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- reductionArgs[0], lhsElementValue, rhsElementValue); ++ mlir::Value productValue = genAccumulateProduct( ++ loc, builder, resultElementType, reductionArgs[0], lhsElementValue, ++ rhsElementValue); + return {productValue}; + }; + llvm::SmallVector innerProductValue = +@@ -884,73 +904,6 @@ + } + }; + +-class DotProductConversion +- : public mlir::OpRewritePattern { +-public: +- using mlir::OpRewritePattern::OpRewritePattern; +- +- llvm::LogicalResult +- matchAndRewrite(hlfir::DotProductOp product, +- mlir::PatternRewriter &rewriter) const override { +- hlfir::Entity op = hlfir::Entity{product}; +- if (!op.isScalar()) +- return rewriter.notifyMatchFailure(product, "produces non-scalar result"); +- +- mlir::Location loc = product.getLoc(); +- fir::FirOpBuilder builder{rewriter, product.getOperation()}; +- hlfir::Entity lhs = hlfir::Entity{product.getLhs()}; +- hlfir::Entity rhs = hlfir::Entity{product.getRhs()}; +- mlir::Type resultElementType = product.getType(); +- bool isUnordered = mlir::isa(resultElementType) || +- mlir::isa(resultElementType) || +- static_cast(builder.getFastMathFlags() & +- mlir::arith::FastMathFlags::reassoc); +- +- mlir::Value extent = genProductExtent(loc, builder, lhs, rhs); +- +- auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder, +- mlir::ValueRange oneBasedIndices, +- mlir::ValueRange reductionArgs) +- -> llvm::SmallVector { +- hlfir::Entity lhsElementValue = +- hlfir::loadElementAt(loc, builder, lhs, oneBasedIndices); +- hlfir::Entity rhsElementValue = +- hlfir::loadElementAt(loc, builder, rhs, oneBasedIndices); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- reductionArgs[0], lhsElementValue, rhsElementValue); +- return {productValue}; +- }; +- +- mlir::Value initValue = +- fir::factory::createZeroValue(builder, loc, resultElementType); +- +- llvm::SmallVector result = hlfir::genLoopNestWithReductions( +- loc, builder, {extent}, +- /*reductionInits=*/{initValue}, genBody, isUnordered); +- +- rewriter.replaceOp(product, result[0]); +- return mlir::success(); - } - if (const auto *value = llvm::dyn_cast_if_present(object)) - return {value + index}; - if (auto *operand = llvm::dyn_cast_if_present(object)) -@@ -59,10 +48,6 @@ - - /// See `llvm::detail::indexed_accessor_range_base` for details. - Type TypeRange::dereference_iterator(OwnerT object, ptrdiff_t index) { -- if (auto type = llvm::dyn_cast_if_present(object)) { -- assert(index == 0 && "cannot offset into single-value 'TypeRange'"); -- return type; +- +-private: +- static mlir::Value genProductExtent(mlir::Location loc, +- fir::FirOpBuilder &builder, +- hlfir::Entity input1, +- hlfir::Entity input2) { +- llvm::SmallVector input1Extents = +- hlfir::genExtentsVector(loc, builder, input1); +- llvm::SmallVector input2Extents = +- hlfir::genExtentsVector(loc, builder, input2); +- +- assert(input1Extents.size() == 1 && input2Extents.size() == 1 && +- "hlfir.dot_product arguments must be vectors"); +- llvm::SmallVector extent = +- fir::factory::deduceOptimalExtents(input1Extents, input2Extents); +- return extent[0]; - } - if (const auto *value = llvm::dyn_cast_if_present(object)) - return (value + index)->getType(); - if (auto *operand = llvm::dyn_cast_if_present(object)) -diff -ruN --strip-trailing-cr a/mlir/unittests/IR/OperationSupportTest.cpp b/mlir/unittests/IR/OperationSupportTest.cpp ---- a/mlir/unittests/IR/OperationSupportTest.cpp -+++ b/mlir/unittests/IR/OperationSupportTest.cpp -@@ -313,21 +313,4 @@ - op2->destroy(); +-}; +- + class SimplifyHLFIRIntrinsics + : public hlfir::impl::SimplifyHLFIRIntrinsicsBase { + public: +@@ -986,8 +939,6 @@ + if (forceMatmulAsElemental || this->allowNewSideEffects) + patterns.insert>(context); + +- patterns.insert(context); +- + if (mlir::failed(mlir::applyPatternsGreedily( + getOperation(), std::move(patterns), config))) { + mlir::emitError(getOperation()->getLoc(), +diff -ruN --strip-trailing-cr a/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir +--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir ++++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir +@@ -1,144 +0,0 @@ +-// Test hlfir.dot_product simplification to a reduction loop: +-// RUN: fir-opt --simplify-hlfir-intrinsics %s | FileCheck %s +- +-func.func @dot_product_integer(%arg0: !hlfir.expr, %arg1: !hlfir.expr) -> i32 { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr) -> i32 +- return %res : i32 +-} +-// CHECK-LABEL: func.func @dot_product_integer( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr) -> i32 { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant 0 : i32 +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_3]]) -> (i32) { +-// CHECK: %[[VAL_9:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]] : (!hlfir.expr, index) -> i16 +-// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_7]] : (!hlfir.expr, index) -> i32 +-// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32 +-// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_11]], %[[VAL_10]] : i32 +-// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_12]] : i32 +-// CHECK: fir.result %[[VAL_13]] : i32 +-// CHECK: } +-// CHECK: return %[[VAL_6]] : i32 +-// CHECK: } +- +-func.func @dot_product_real(%arg0: !hlfir.expr, %arg1: !hlfir.expr) -> f32 { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr) -> f32 +- return %res : f32 +-} +-// CHECK-LABEL: func.func @dot_product_real( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr) -> f32 { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_8:.*]] = %[[VAL_3]]) -> (f32) { +-// CHECK: %[[VAL_9:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]] : (!hlfir.expr, index) -> f32 +-// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_7]] : (!hlfir.expr, index) -> f16 +-// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (f16) -> f32 +-// CHECK: %[[VAL_12:.*]] = arith.mulf %[[VAL_9]], %[[VAL_11]] : f32 +-// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_12]] : f32 +-// CHECK: fir.result %[[VAL_13]] : f32 +-// CHECK: } +-// CHECK: return %[[VAL_6]] : f32 +-// CHECK: } +- +-func.func @dot_product_complex(%arg0: !hlfir.expr>, %arg1: !hlfir.expr>) -> complex { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr>, !hlfir.expr>) -> complex +- return %res : complex +-} +-// CHECK-LABEL: func.func @dot_product_complex( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr>, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> complex { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr>) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.undefined complex +-// CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (complex) { +-// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex +-// CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex +-// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (complex) -> complex +-// CHECK: %[[VAL_15:.*]] = fir.extract_value %[[VAL_12]], [1 : index] : (complex) -> f32 +-// CHECK: %[[VAL_16:.*]] = arith.negf %[[VAL_15]] : f32 +-// CHECK: %[[VAL_17:.*]] = fir.insert_value %[[VAL_12]], %[[VAL_16]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_18:.*]] = fir.mulc %[[VAL_17]], %[[VAL_14]] : complex +-// CHECK: %[[VAL_19:.*]] = fir.addc %[[VAL_11]], %[[VAL_18]] : complex +-// CHECK: fir.result %[[VAL_19]] : complex +-// CHECK: } +-// CHECK: return %[[VAL_9]] : complex +-// CHECK: } +- +-func.func @dot_product_real_complex(%arg0: !hlfir.expr, %arg1: !hlfir.expr>) -> complex { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr>) -> complex +- return %res : complex +-} +-// CHECK-LABEL: func.func @dot_product_real_complex( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> complex { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.undefined complex +-// CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (complex) { +-// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr, index) -> f32 +-// CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex +-// CHECK: %[[VAL_14:.*]] = fir.undefined complex +-// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_17:.*]] = fir.insert_value %[[VAL_16]], %[[VAL_12]], [0 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_13]] : (complex) -> complex +-// CHECK: %[[VAL_19:.*]] = fir.extract_value %[[VAL_17]], [1 : index] : (complex) -> f32 +-// CHECK: %[[VAL_20:.*]] = arith.negf %[[VAL_19]] : f32 +-// CHECK: %[[VAL_21:.*]] = fir.insert_value %[[VAL_17]], %[[VAL_20]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_22:.*]] = fir.mulc %[[VAL_21]], %[[VAL_18]] : complex +-// CHECK: %[[VAL_23:.*]] = fir.addc %[[VAL_11]], %[[VAL_22]] : complex +-// CHECK: fir.result %[[VAL_23]] : complex +-// CHECK: } +-// CHECK: return %[[VAL_9]] : complex +-// CHECK: } +- +-func.func @dot_product_logical(%arg0: !hlfir.expr>, %arg1: !hlfir.expr>) -> !fir.logical<4> { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr>, !hlfir.expr>) -> !fir.logical<4> +- return %res : !fir.logical<4> +-} +-// CHECK-LABEL: func.func @dot_product_logical( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr>, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> !fir.logical<4> { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant false +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr>) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4> +-// CHECK: %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_6]]) -> (!fir.logical<4>) { +-// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_8]] : (!hlfir.expr>, index) -> !fir.logical<1> +-// CHECK: %[[VAL_11:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_8]] : (!hlfir.expr>, index) -> !fir.logical<4> +-// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_9]] : (!fir.logical<4>) -> i1 +-// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (!fir.logical<1>) -> i1 +-// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1 +-// CHECK: %[[VAL_15:.*]] = arith.andi %[[VAL_13]], %[[VAL_14]] : i1 +-// CHECK: %[[VAL_16:.*]] = arith.ori %[[VAL_12]], %[[VAL_15]] : i1 +-// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4> +-// CHECK: fir.result %[[VAL_17]] : !fir.logical<4> +-// CHECK: } +-// CHECK: return %[[VAL_7]] : !fir.logical<4> +-// CHECK: } +- +-func.func @dot_product_known_dim(%arg0: !hlfir.expr<10xf32>, %arg1: !hlfir.expr) -> f32 { +- %res1 = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr<10xf32>, !hlfir.expr) -> f32 +- %res2 = hlfir.dot_product %arg1 %arg0 : (!hlfir.expr, !hlfir.expr<10xf32>) -> f32 +- %res = arith.addf %res1, %res2 : f32 +- return %res : f32 +-} +-// CHECK-LABEL: func.func @dot_product_known_dim( +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_4:.*]] = arith.constant 10 : index +-// CHECK: fir.do_loop %{{.*}} = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_2]] +-// CHECK: fir.do_loop %{{.*}} = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_2]] +diff -ruN --strip-trailing-cr a/libcxx/include/__config b/libcxx/include/__config +--- a/libcxx/include/__config ++++ b/libcxx/include/__config +@@ -1166,9 +1166,7 @@ + # define _LIBCPP_NOESCAPE + # endif + +-// FIXME: Expand this to [[__gnu__::__nodebug__]] again once the testcase reported in +-// https://github.com/llvm/llvm-project/pull/118710 has been analyzed +-# define _LIBCPP_NODEBUG ++# define _LIBCPP_NODEBUG [[__gnu__::__nodebug__]] + + # if __has_attribute(__standalone_debug__) + # define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__)) +diff -ruN --strip-trailing-cr a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp +--- a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp ++++ b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp +@@ -27,7 +27,7 @@ + check_factories.registerCheck("libcpp-header-exportable-declarations"); + check_factories.registerCheck("libcpp-hide-from-abi"); + check_factories.registerCheck("libcpp-internal-ftms"); +- // check_factories.registerCheck("libcpp-nodebug-on-aliases"); ++ check_factories.registerCheck("libcpp-nodebug-on-aliases"); + check_factories.registerCheck("libcpp-cpp-version-check"); + check_factories.registerCheck("libcpp-robust-against-adl"); + check_factories.registerCheck("libcpp-uglify-attributes"); +diff -ruN --strip-trailing-cr a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp ++++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +@@ -1140,8 +1140,6 @@ + + setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); + +- setTargetDAGCombine(ISD::SHL); +- + // In case of strict alignment, avoid an excessive number of byte wide stores. + MaxStoresPerMemsetOptSize = 8; + MaxStoresPerMemset = +@@ -26473,43 +26471,6 @@ + return NVCAST; } --TEST(ValueRangeTest, ValueConstructable) { -- MLIRContext context; -- Builder builder(&context); +-/// If the operand is a bitwise AND with a constant RHS, and the shift has a +-/// constant RHS and is the only use, we can pull it out of the shift, i.e. +-/// +-/// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2)) +-/// +-/// We prefer this canonical form to match existing isel patterns. +-static SDValue performSHLCombine(SDNode *N, +- TargetLowering::DAGCombinerInfo &DCI, +- SelectionDAG &DAG) { +- if (DCI.isBeforeLegalizeOps()) +- return SDValue(); - -- Operation *useOp = -- createOp(&context, /*operands=*/std::nullopt, builder.getIntegerType(16)); -- // Valid construction despite a temporary 'OpResult'. -- ValueRange operands = useOp->getResult(0); +- SDValue Op0 = N->getOperand(0); +- if (Op0.getOpcode() != ISD::AND || !Op0.hasOneUse()) +- return SDValue(); - -- useOp->setOperands(operands); -- EXPECT_EQ(useOp->getNumOperands(), 1u); -- EXPECT_EQ(useOp->getOperand(0), useOp->getResult(0)); +- SDValue C1 = Op0->getOperand(1); +- SDValue C2 = N->getOperand(1); +- if (!isa(C1) || !isa(C2)) +- return SDValue(); - -- useOp->dropAllUses(); -- useOp->destroy(); +- // Might be folded into shifted op, do not lower. +- if (N->hasOneUse()) { +- unsigned UseOpc = N->user_begin()->getOpcode(); +- if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC || +- UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS) +- return SDValue(); +- } +- +- SDLoc DL(N); +- EVT VT = N->getValueType(0); +- SDValue X = Op0->getOperand(0); +- SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2); +- SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2); +- return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS); +-} +- + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; +@@ -26855,8 +26816,6 @@ + return performCTLZCombine(N, DAG, Subtarget); + case ISD::SCALAR_TO_VECTOR: + return performScalarToVectorCombine(N, DCI, DAG); +- case ISD::SHL: +- return performSHLCombine(N, DCI, DAG); + } + return SDValue(); + } +diff -ruN --strip-trailing-cr a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp ++++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +@@ -4979,7 +4979,7 @@ + // the subvector length. + const unsigned VecVF = getNumElements(Vec->getType()); + SmallVector Mask(VecVF, PoisonMaskElem); +- std::iota(Mask.begin(), std::next(Mask.begin(), Index), 0); ++ std::iota(Mask.begin(), Mask.end(), 0); + for (unsigned I : seq(SubVecVF)) + Mask[I + Index] = I + VecVF; + if (Generator) { +@@ -13956,11 +13956,12 @@ + Instruction *InsElt; + if (auto *VecTy = dyn_cast(Scalar->getType())) { + assert(SLPReVec && "FixedVectorType is not expected."); +- Vec = InsElt = cast(createInsertVector( +- Builder, Vec, Scalar, Pos * getNumElements(VecTy))); +- auto *II = dyn_cast(InsElt); ++ Vec = ++ createInsertVector(Builder, Vec, Scalar, Pos * getNumElements(VecTy)); ++ auto *II = dyn_cast(Vec); + if (!II || II->getIntrinsicID() != Intrinsic::vector_insert) + return Vec; ++ InsElt = II; + } else { + Vec = Builder.CreateInsertElement(Vec, Scalar, Builder.getInt32(Pos)); + InsElt = dyn_cast(Vec); +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll +--- a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll ++++ b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll +@@ -190,7 +190,8 @@ + define i8 @test_i8_7_mask_shl_1(i8 %a0) { + ; CHECK-LABEL: test_i8_7_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #1, #3 ++; CHECK-NEXT: and w8, w0, #0x7 ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 7 + %t1 = shl i8 %t0, 1 +@@ -199,7 +200,8 @@ + define i8 @test_i8_7_mask_shl_4(i8 %a0) { + ; CHECK-LABEL: test_i8_7_mask_shl_4: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #4, #3 ++; CHECK-NEXT: and w8, w0, #0x7 ++; CHECK-NEXT: lsl w0, w8, #4 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 7 + %t1 = shl i8 %t0, 4 +@@ -227,8 +229,8 @@ + define i8 @test_i8_28_mask_shl_1(i8 %a0) { + ; CHECK-LABEL: test_i8_28_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #1 +-; CHECK-NEXT: and w0, w8, #0x38 ++; CHECK-NEXT: and w8, w0, #0x1c ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 28 + %t1 = shl i8 %t0, 1 +@@ -237,8 +239,8 @@ + define i8 @test_i8_28_mask_shl_2(i8 %a0) { + ; CHECK-LABEL: test_i8_28_mask_shl_2: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #2 +-; CHECK-NEXT: and w0, w8, #0x70 ++; CHECK-NEXT: and w8, w0, #0x1c ++; CHECK-NEXT: lsl w0, w8, #2 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 28 + %t1 = shl i8 %t0, 2 +@@ -247,8 +249,8 @@ + define i8 @test_i8_28_mask_shl_3(i8 %a0) { + ; CHECK-LABEL: test_i8_28_mask_shl_3: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #3 +-; CHECK-NEXT: and w0, w8, #0xe0 ++; CHECK-NEXT: and w8, w0, #0x1c ++; CHECK-NEXT: lsl w0, w8, #3 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 28 + %t1 = shl i8 %t0, 3 +@@ -257,8 +259,8 @@ + define i8 @test_i8_28_mask_shl_4(i8 %a0) { + ; CHECK-LABEL: test_i8_28_mask_shl_4: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #4 +-; CHECK-NEXT: and w0, w8, #0xc0 ++; CHECK-NEXT: and w8, w0, #0xc ++; CHECK-NEXT: lsl w0, w8, #4 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 28 + %t1 = shl i8 %t0, 4 +@@ -268,8 +270,8 @@ + define i8 @test_i8_224_mask_shl_1(i8 %a0) { + ; CHECK-LABEL: test_i8_224_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #1 +-; CHECK-NEXT: and w0, w8, #0xc0 ++; CHECK-NEXT: and w8, w0, #0x60 ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 224 + %t1 = shl i8 %t0, 1 +@@ -463,7 +465,8 @@ + define i16 @test_i16_127_mask_shl_1(i16 %a0) { + ; CHECK-LABEL: test_i16_127_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #1, #7 ++; CHECK-NEXT: and w8, w0, #0x7f ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 127 + %t1 = shl i16 %t0, 1 +@@ -472,7 +475,8 @@ + define i16 @test_i16_127_mask_shl_8(i16 %a0) { + ; CHECK-LABEL: test_i16_127_mask_shl_8: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #8, #7 ++; CHECK-NEXT: and w8, w0, #0x7f ++; CHECK-NEXT: lsl w0, w8, #8 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 127 + %t1 = shl i16 %t0, 8 +@@ -500,8 +504,8 @@ + define i16 @test_i16_2032_mask_shl_3(i16 %a0) { + ; CHECK-LABEL: test_i16_2032_mask_shl_3: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #3 +-; CHECK-NEXT: and w0, w8, #0x3f80 ++; CHECK-NEXT: and w8, w0, #0x7f0 ++; CHECK-NEXT: lsl w0, w8, #3 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 2032 + %t1 = shl i16 %t0, 3 +@@ -510,8 +514,8 @@ + define i16 @test_i16_2032_mask_shl_4(i16 %a0) { + ; CHECK-LABEL: test_i16_2032_mask_shl_4: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #4 +-; CHECK-NEXT: and w0, w8, #0x7f00 ++; CHECK-NEXT: and w8, w0, #0x7f0 ++; CHECK-NEXT: lsl w0, w8, #4 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 2032 + %t1 = shl i16 %t0, 4 +@@ -520,8 +524,8 @@ + define i16 @test_i16_2032_mask_shl_5(i16 %a0) { + ; CHECK-LABEL: test_i16_2032_mask_shl_5: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #5 +-; CHECK-NEXT: and w0, w8, #0xfe00 ++; CHECK-NEXT: and w8, w0, #0x7f0 ++; CHECK-NEXT: lsl w0, w8, #5 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 2032 + %t1 = shl i16 %t0, 5 +@@ -530,8 +534,8 @@ + define i16 @test_i16_2032_mask_shl_6(i16 %a0) { + ; CHECK-LABEL: test_i16_2032_mask_shl_6: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #6 +-; CHECK-NEXT: and w0, w8, #0xfc00 ++; CHECK-NEXT: and w8, w0, #0x3f0 ++; CHECK-NEXT: lsl w0, w8, #6 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 2032 + %t1 = shl i16 %t0, 6 +@@ -541,8 +545,8 @@ + define i16 @test_i16_65024_mask_shl_1(i16 %a0) { + ; CHECK-LABEL: test_i16_65024_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #1 +-; CHECK-NEXT: and w0, w8, #0xfc00 ++; CHECK-NEXT: and w8, w0, #0x7e00 ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 65024 + %t1 = shl i16 %t0, 1 +@@ -736,7 +740,8 @@ + define i32 @test_i32_32767_mask_shl_1(i32 %a0) { + ; CHECK-LABEL: test_i32_32767_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #1, #15 ++; CHECK-NEXT: and w8, w0, #0x7fff ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 32767 + %t1 = shl i32 %t0, 1 +@@ -745,7 +750,8 @@ + define i32 @test_i32_32767_mask_shl_16(i32 %a0) { + ; CHECK-LABEL: test_i32_32767_mask_shl_16: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #16, #15 ++; CHECK-NEXT: and w8, w0, #0x7fff ++; CHECK-NEXT: lsl w0, w8, #16 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 32767 + %t1 = shl i32 %t0, 16 +@@ -773,8 +779,8 @@ + define i32 @test_i32_8388352_mask_shl_7(i32 %a0) { + ; CHECK-LABEL: test_i32_8388352_mask_shl_7: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #7 +-; CHECK-NEXT: and w0, w8, #0x3fff8000 ++; CHECK-NEXT: and w8, w0, #0x7fff00 ++; CHECK-NEXT: lsl w0, w8, #7 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 8388352 + %t1 = shl i32 %t0, 7 +@@ -783,8 +789,8 @@ + define i32 @test_i32_8388352_mask_shl_8(i32 %a0) { + ; CHECK-LABEL: test_i32_8388352_mask_shl_8: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #8 +-; CHECK-NEXT: and w0, w8, #0x7fff0000 ++; CHECK-NEXT: and w8, w0, #0x7fff00 ++; CHECK-NEXT: lsl w0, w8, #8 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 8388352 + %t1 = shl i32 %t0, 8 +@@ -793,8 +799,8 @@ + define i32 @test_i32_8388352_mask_shl_9(i32 %a0) { + ; CHECK-LABEL: test_i32_8388352_mask_shl_9: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #9 +-; CHECK-NEXT: and w0, w8, #0xfffe0000 ++; CHECK-NEXT: and w8, w0, #0x7fff00 ++; CHECK-NEXT: lsl w0, w8, #9 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 8388352 + %t1 = shl i32 %t0, 9 +@@ -803,8 +809,8 @@ + define i32 @test_i32_8388352_mask_shl_10(i32 %a0) { + ; CHECK-LABEL: test_i32_8388352_mask_shl_10: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #10 +-; CHECK-NEXT: and w0, w8, #0xfffc0000 ++; CHECK-NEXT: and w8, w0, #0x3fff00 ++; CHECK-NEXT: lsl w0, w8, #10 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 8388352 + %t1 = shl i32 %t0, 10 +@@ -814,8 +820,8 @@ + define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) { + ; CHECK-LABEL: test_i32_4294836224_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #1 +-; CHECK-NEXT: and w0, w8, #0xfffc0000 ++; CHECK-NEXT: and w8, w0, #0x7ffe0000 ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 4294836224 + %t1 = shl i32 %t0, 1 +@@ -1009,7 +1015,8 @@ + define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) { + ; CHECK-LABEL: test_i64_2147483647_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w0, w0, #1 ++; CHECK-NEXT: and x8, x0, #0x7fffffff ++; CHECK-NEXT: lsl x0, x8, #1 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 2147483647 + %t1 = shl i64 %t0, 1 +@@ -1047,8 +1054,8 @@ + define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { + ; CHECK-LABEL: test_i64_140737488289792_mask_shl_15: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #15 +-; CHECK-NEXT: and x0, x8, #0x3fffffff80000000 ++; CHECK-NEXT: and x8, x0, #0x7fffffff0000 ++; CHECK-NEXT: lsl x0, x8, #15 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 140737488289792 + %t1 = shl i64 %t0, 15 +@@ -1057,8 +1064,8 @@ + define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { + ; CHECK-LABEL: test_i64_140737488289792_mask_shl_16: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #16 +-; CHECK-NEXT: and x0, x8, #0x7fffffff00000000 ++; CHECK-NEXT: and x8, x0, #0x7fffffff0000 ++; CHECK-NEXT: lsl x0, x8, #16 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 140737488289792 + %t1 = shl i64 %t0, 16 +@@ -1067,8 +1074,8 @@ + define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { + ; CHECK-LABEL: test_i64_140737488289792_mask_shl_17: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #17 +-; CHECK-NEXT: and x0, x8, #0xfffffffe00000000 ++; CHECK-NEXT: and x8, x0, #0x7fffffff0000 ++; CHECK-NEXT: lsl x0, x8, #17 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 140737488289792 + %t1 = shl i64 %t0, 17 +@@ -1077,8 +1084,8 @@ + define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { + ; CHECK-LABEL: test_i64_140737488289792_mask_shl_18: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #18 +-; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 ++; CHECK-NEXT: and x8, x0, #0x3fffffff0000 ++; CHECK-NEXT: lsl x0, x8, #18 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 140737488289792 + %t1 = shl i64 %t0, 18 +@@ -1088,8 +1095,8 @@ + define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) { + ; CHECK-LABEL: test_i64_18446744065119617024_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #1 +-; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 ++; CHECK-NEXT: and x8, x0, #0x7ffffffe00000000 ++; CHECK-NEXT: lsl x0, x8, #1 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 18446744065119617024 + %t1 = shl i64 %t0, 1 +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll +--- a/llvm/test/CodeGen/AArch64/extract-bits.ll ++++ b/llvm/test/CodeGen/AArch64/extract-bits.ll +@@ -1013,8 +1013,8 @@ + define i32 @c2_i32(i32 %arg) nounwind { + ; CHECK-LABEL: c2_i32: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsr w8, w0, #17 +-; CHECK-NEXT: and w0, w8, #0xffc ++; CHECK-NEXT: ubfx w8, w0, #19, #10 ++; CHECK-NEXT: lsl w0, w8, #2 + ; CHECK-NEXT: ret + %tmp0 = lshr i32 %arg, 19 + %tmp1 = and i32 %tmp0, 1023 +@@ -1063,8 +1063,8 @@ + define i64 @c2_i64(i64 %arg) nounwind { + ; CHECK-LABEL: c2_i64: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsr x8, x0, #49 +-; CHECK-NEXT: and x0, x8, #0xffc ++; CHECK-NEXT: ubfx x8, x0, #51, #10 ++; CHECK-NEXT: lsl x0, x8, #2 + ; CHECK-NEXT: ret + %tmp0 = lshr i64 %arg, 51 + %tmp1 = and i64 %tmp0, 1023 +@@ -1120,8 +1120,8 @@ + define void @c7_i32(i32 %arg, ptr %ptr) nounwind { + ; CHECK-LABEL: c7_i32: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsr w8, w0, #17 +-; CHECK-NEXT: and w8, w8, #0xffc ++; CHECK-NEXT: ubfx w8, w0, #19, #10 ++; CHECK-NEXT: lsl w8, w8, #2 + ; CHECK-NEXT: str w8, [x1] + ; CHECK-NEXT: ret + %tmp0 = lshr i32 %arg, 19 +@@ -1163,8 +1163,8 @@ + define void @c7_i64(i64 %arg, ptr %ptr) nounwind { + ; CHECK-LABEL: c7_i64: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsr x8, x0, #49 +-; CHECK-NEXT: and x8, x8, #0xffc ++; CHECK-NEXT: ubfx x8, x0, #51, #10 ++; CHECK-NEXT: lsl x8, x8, #2 + ; CHECK-NEXT: str x8, [x1] + ; CHECK-NEXT: ret + %tmp0 = lshr i64 %arg, 51 +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/fpenv.ll b/llvm/test/CodeGen/AArch64/fpenv.ll +--- a/llvm/test/CodeGen/AArch64/fpenv.ll ++++ b/llvm/test/CodeGen/AArch64/fpenv.ll +@@ -4,11 +4,11 @@ + define void @func_set_rounding_dyn(i32 %rm) { + ; CHECK-LABEL: func_set_rounding_dyn: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w9, w0, #22 ++; CHECK-NEXT: sub w9, w0, #1 + ; CHECK-NEXT: mrs x8, FPCR ++; CHECK-NEXT: and w9, w9, #0x3 + ; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff +-; CHECK-NEXT: sub w9, w9, #1024, lsl #12 // =4194304 +-; CHECK-NEXT: and w9, w9, #0xc00000 ++; CHECK-NEXT: lsl w9, w9, #22 + ; CHECK-NEXT: orr x8, x8, x9 + ; CHECK-NEXT: msr FPCR, x8 + ; CHECK-NEXT: ret +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll +--- a/llvm/test/CodeGen/AArch64/xbfiz.ll ++++ b/llvm/test/CodeGen/AArch64/xbfiz.ll +@@ -69,19 +69,3 @@ + %and = and i64 %shl, 4294967295 + ret i64 %and + } +- +-define i64 @lsl_zext_i8_i64(i8 %b) { +-; CHECK-LABEL: lsl_zext_i8_i64: +-; CHECK: ubfiz x0, x0, #1, #8 +- %1 = zext i8 %b to i64 +- %2 = shl i64 %1, 1 +- ret i64 %2 -} - - } // namespace -diff -ruN --strip-trailing-cr a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel ---- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel -+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel -@@ -43,10 +43,7 @@ - - gentbl( - name = "diagnostic_defs_gen", -- tbl_outs = [( -- "-gen-clang-diags-defs -clang-component=%s" % c, -- "include/clang/Basic/Diagnostic%sKinds.inc" % c, -- ) for c in [ -+ tbl_outs = [out for c in [ - "AST", - "Analysis", - "Comment", -@@ -60,6 +57,15 @@ - "Refactoring", - "Sema", - "Serialization", -+ ] for out in [ -+ ( -+ "-gen-clang-diags-defs -clang-component=%s" % c, -+ "include/clang/Basic/Diagnostic%sKinds.inc" % c, -+ ), -+ ( -+ "-gen-clang-diags-enums -clang-component=%s" % c, -+ "include/clang/Basic/Diagnostic%sEnums.inc" % c, -+ ), - ]] + [ - ( - "-gen-clang-diag-groups", +-define i64 @lsl_zext_i16_i64(i16 %b) { +-; CHECK-LABEL: lsl_zext_i16_i64: +-; CHECK: ubfiz x0, x0, #1, #16 +- %1 = zext i16 %b to i64 +- %2 = shl i64 %1, 1 +- ret i64 %2 +-} +diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll +--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll ++++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll +@@ -0,0 +1,81 @@ ++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ++; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s ++ ++define <16 x double> @test(ptr %x, double %v, double %a) { ++; CHECK-LABEL: define <16 x double> @test( ++; CHECK-SAME: ptr [[X:%.*]], double [[V:%.*]], double [[A:%.*]]) { ++; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8 ++; CHECK-NEXT: [[GEP8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 9 ++; CHECK-NEXT: [[TMP1:%.*]] = load <6 x double>, ptr [[X]], align 4 ++; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr [[GEP6]], align 4 ++; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[GEP8]], align 4 ++; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> poison, double [[A]], i32 0 ++; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x double> [[TMP4]], <16 x double> poison, <16 x i32> zeroinitializer ++; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> poison, double [[V]], i32 0 ++; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer ++; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[V]], i32 0 ++; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> poison, <2 x i32> zeroinitializer ++; CHECK-NEXT: [[TMP10:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v6f64(<16 x double> poison, <6 x double> [[TMP1]], i64 0) ++; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <16 x i32> ++; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x double> [[TMP10]], <16 x double> [[TMP11]], <16 x i32> ++; CHECK-NEXT: [[TMP13:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP12]], <2 x double> [[TMP6]], i64 6) ++; CHECK-NEXT: [[TMP14:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP13]], <2 x double> [[TMP7]], i64 8) ++; CHECK-NEXT: [[TMP15:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP14]], <2 x double> [[TMP9]], i64 10) ++; CHECK-NEXT: [[TMP16:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP15]], <2 x double> [[TMP9]], i64 12) ++; CHECK-NEXT: [[TMP17:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP16]], <2 x double> [[TMP9]], i64 14) ++; CHECK-NEXT: [[TMP18:%.*]] = fadd <16 x double> [[TMP5]], [[TMP17]] ++; CHECK-NEXT: ret <16 x double> [[TMP18]] ++; ++ %gep1 = getelementptr inbounds double, ptr %x, i64 1 ++ %gep2 = getelementptr inbounds double, ptr %x, i64 2 ++ %gep3 = getelementptr inbounds double, ptr %x, i64 3 ++ %gep4 = getelementptr inbounds double, ptr %x, i64 4 ++ %gep5 = getelementptr inbounds double, ptr %x, i64 5 ++ %gep6 = getelementptr inbounds double, ptr %x, i64 8 ++ %gep7 = getelementptr inbounds double, ptr %x, i64 9 ++ %gep8 = getelementptr inbounds double, ptr %x, i64 9 ++ %gep9 = getelementptr inbounds double, ptr %x, i64 10 ++ %x0 = load double, ptr %x, align 4 ++ %x1 = load double, ptr %gep1, align 4 ++ %x2 = load double, ptr %gep2, align 4 ++ %x3 = load double, ptr %gep3, align 4 ++ %x4 = load double, ptr %gep4, align 4 ++ %x5 = load double, ptr %gep5, align 4 ++ %x6 = load double, ptr %gep6, align 4 ++ %x7 = load double, ptr %gep7, align 4 ++ %x8 = load double, ptr %gep8, align 4 ++ %x9 = load double, ptr %gep9, align 4 ++ %add1 = fadd double %a, %x0 ++ %add2 = fadd double %a, %x1 ++ %add3 = fadd double %a, %x2 ++ %add4 = fadd double %a, %x3 ++ %add5 = fadd double %a, %x4 ++ %add6 = fadd double %a, %x5 ++ %add7 = fadd double %a, %x6 ++ %add8 = fadd double %a, %x7 ++ %add9 = fadd double %a, %x8 ++ %add10 = fadd double %a, %x9 ++ %add11 = fadd double %a, %v ++ %add12 = fadd double %a, %v ++ %add13 = fadd double %a, %v ++ %add14 = fadd double %a, %v ++ %add15 = fadd double %a, %v ++ %add16 = fadd double %a, %v ++ %i0 = insertelement <16 x double> poison, double %add1, i32 0 ++ %i1 = insertelement <16 x double> %i0, double %add2, i32 1 ++ %i2 = insertelement <16 x double> %i1, double %add3, i32 2 ++ %i3 = insertelement <16 x double> %i2, double %add4, i32 3 ++ %i4 = insertelement <16 x double> %i3, double %add5, i32 4 ++ %i5 = insertelement <16 x double> %i4, double %add6, i32 5 ++ %i6 = insertelement <16 x double> %i5, double %add7, i32 6 ++ %i7 = insertelement <16 x double> %i6, double %add8, i32 7 ++ %i8 = insertelement <16 x double> %i7, double %add9, i32 8 ++ %i9 = insertelement <16 x double> %i8, double %add10, i32 9 ++ %i10 = insertelement <16 x double> %i9, double %add11, i32 10 ++ %i11 = insertelement <16 x double> %i10, double %add12, i32 11 ++ %i12 = insertelement <16 x double> %i11, double %add13, i32 12 ++ %i13 = insertelement <16 x double> %i12, double %add14, i32 13 ++ %i14 = insertelement <16 x double> %i13, double %add15, i32 14 ++ %i15 = insertelement <16 x double> %i14, double %add16, i32 15 ++ ret <16 x double> %i15 ++} diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl index 4706c63c0e1cc..cb092919de358 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "bf17016a92bc8a23d2cdd2b51355dd4eb5019c68" - LLVM_SHA256 = "ba09f12e5019f5aca531b1733275f0a10b181d6f894deb1a4610e017f76b172a" + LLVM_COMMIT = "13c761789753862a7cc31a2a26f23010afa668b9" + LLVM_SHA256 = "587f3eda6d00d751cbfc69fa5a15475ae4232e191ace04031b343e4e8ae16355" tf_http_archive( name = name, diff --git a/third_party/shardy/temporary.patch b/third_party/shardy/temporary.patch index d68a9c7c5255c..6a074a44f8e2c 100644 --- a/third_party/shardy/temporary.patch +++ b/third_party/shardy/temporary.patch @@ -1,246 +1,1390 @@ diff --git a/third_party/llvm/generated.patch b/third_party/llvm/generated.patch -index 2331b44..3d2a252 100644 +index 3d2a252..8b54ffb 100644 --- a/third_party/llvm/generated.patch +++ b/third_party/llvm/generated.patch -@@ -1,22 +1,207 @@ +@@ -1,207 +1,1156 @@ Auto generated patch. Do not edit or delete it, even if empty. --diff -ruN --strip-trailing-cr a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp ----- a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp --+++ b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp --@@ -1291,7 +1291,7 @@ -- /// bitwise ops to avoid leaving LLVM to scramble with peephole optimizations. -- static Value rewriteI4ToI8Ext(PatternRewriter &rewriter, Location loc, -- Value srcValue, const ExtractNBitsFn &extFn) { --- auto srcVecType = cast(srcValue.getType()); --+ [[maybe_unused]] auto srcVecType = cast(srcValue.getType()); -- assert(srcVecType.getElementType().isSignlessInteger(4) && -- "Expected i4 type"); -+diff -ruN --strip-trailing-cr a/mlir/include/mlir/IR/TypeRange.h b/mlir/include/mlir/IR/TypeRange.h -+--- a/mlir/include/mlir/IR/TypeRange.h -++++ b/mlir/include/mlir/IR/TypeRange.h -+@@ -29,12 +29,11 @@ -+ /// a SmallVector/std::vector. This class should be used in places that are not -+ /// suitable for a more derived type (e.g. ArrayRef) or a template range -+ /// parameter. -+-class TypeRange -+- : public llvm::detail::indexed_accessor_range_base< -+- TypeRange, -+- llvm::PointerUnion, -+- Type, Type, Type> { -++class TypeRange : public llvm::detail::indexed_accessor_range_base< -++ TypeRange, -++ llvm::PointerUnion, -++ Type, Type, Type> { -+ public: -+ using RangeBaseT::RangeBaseT; -+ TypeRange(ArrayRef types = std::nullopt); -+@@ -45,11 +44,8 @@ -+ TypeRange(ValueTypeRange values) -+ : TypeRange(ValueRange(ValueRangeT(values.begin().getCurrent(), -+ values.end().getCurrent()))) {} -+- -+- TypeRange(Type type) : TypeRange(type, /*count=*/1) {} -+- template , Arg> && -+- !std::is_constructible_v>> -++ template , Arg>::value>> -+ TypeRange(Arg &&arg) : TypeRange(ArrayRef(std::forward(arg))) {} -+ TypeRange(std::initializer_list types) -+ : TypeRange(ArrayRef(types)) {} -+@@ -60,9 +56,8 @@ -+ /// * A pointer to the first element of an array of types. -+ /// * A pointer to the first element of an array of operands. -+ /// * A pointer to the first element of an array of results. -+- /// * A single 'Type' instance. -+ using OwnerT = llvm::PointerUnion; -++ detail::OpResultImpl *>; - --@@ -1311,7 +1311,7 @@ -- /// bitwise ops to avoid leaving LLVM to scramble with peephole optimizations. -- static Value rewriteI2ToI8Ext(PatternRewriter &rewriter, Location loc, -- Value srcValue, const ExtractNBitsFn &extFn) { --- VectorType srcVecType = cast(srcValue.getType()); --+ [[maybe_unused]] VectorType srcVecType = cast(srcValue.getType()); -- assert(srcVecType.getElementType().isSignlessInteger(2) && -- "Expected i2 type"); -+ /// See `llvm::detail::indexed_accessor_range_base` for details. -+ static OwnerT offset_base(OwnerT object, ptrdiff_t index); -+diff -ruN --strip-trailing-cr a/mlir/include/mlir/IR/ValueRange.h b/mlir/include/mlir/IR/ValueRange.h -+--- a/mlir/include/mlir/IR/ValueRange.h -++++ b/mlir/include/mlir/IR/ValueRange.h -+@@ -374,16 +374,16 @@ -+ /// SmallVector/std::vector. This class should be used in places that are not -+ /// suitable for a more derived type (e.g. ArrayRef) or a template range -+ /// parameter. -+-class ValueRange final : public llvm::detail::indexed_accessor_range_base< -+- ValueRange, -+- PointerUnion, -+- Value, Value, Value> { -++class ValueRange final -++ : public llvm::detail::indexed_accessor_range_base< -++ ValueRange, -++ PointerUnion, -++ Value, Value, Value> { +-diff -ruN --strip-trailing-cr a/mlir/include/mlir/IR/TypeRange.h b/mlir/include/mlir/IR/TypeRange.h +---- a/mlir/include/mlir/IR/TypeRange.h +-+++ b/mlir/include/mlir/IR/TypeRange.h +-@@ -29,12 +29,11 @@ +- /// a SmallVector/std::vector. This class should be used in places that are not +- /// suitable for a more derived type (e.g. ArrayRef) or a template range +- /// parameter. +--class TypeRange +-- : public llvm::detail::indexed_accessor_range_base< +-- TypeRange, +-- llvm::PointerUnion, +-- Type, Type, Type> { +-+class TypeRange : public llvm::detail::indexed_accessor_range_base< +-+ TypeRange, +-+ llvm::PointerUnion, +-+ Type, Type, Type> { +- public: +- using RangeBaseT::RangeBaseT; +- TypeRange(ArrayRef types = std::nullopt); +-@@ -45,11 +44,8 @@ +- TypeRange(ValueTypeRange values) +- : TypeRange(ValueRange(ValueRangeT(values.begin().getCurrent(), +- values.end().getCurrent()))) {} +-- +-- TypeRange(Type type) : TypeRange(type, /*count=*/1) {} +-- template , Arg> && +-- !std::is_constructible_v>> +-+ template , Arg>::value>> +- TypeRange(Arg &&arg) : TypeRange(ArrayRef(std::forward(arg))) {} +- TypeRange(std::initializer_list types) +- : TypeRange(ArrayRef(types)) {} +-@@ -60,9 +56,8 @@ +- /// * A pointer to the first element of an array of types. +- /// * A pointer to the first element of an array of operands. +- /// * A pointer to the first element of an array of results. +-- /// * A single 'Type' instance. +- using OwnerT = llvm::PointerUnion; +-+ detail::OpResultImpl *>; +- +- /// See `llvm::detail::indexed_accessor_range_base` for details. +- static OwnerT offset_base(OwnerT object, ptrdiff_t index); +-diff -ruN --strip-trailing-cr a/mlir/include/mlir/IR/ValueRange.h b/mlir/include/mlir/IR/ValueRange.h +---- a/mlir/include/mlir/IR/ValueRange.h +-+++ b/mlir/include/mlir/IR/ValueRange.h +-@@ -374,16 +374,16 @@ +- /// SmallVector/std::vector. This class should be used in places that are not +- /// suitable for a more derived type (e.g. ArrayRef) or a template range +- /// parameter. +--class ValueRange final : public llvm::detail::indexed_accessor_range_base< +-- ValueRange, +-- PointerUnion, +-- Value, Value, Value> { +-+class ValueRange final +-+ : public llvm::detail::indexed_accessor_range_base< +-+ ValueRange, +-+ PointerUnion, +-+ Value, Value, Value> { +- public: +- /// The type representing the owner of a ValueRange. This is either a list of +-- /// values, operands, or results or a single value. +-+ /// values, operands, or results. +- using OwnerT = +-- PointerUnion; +-+ PointerUnion; +- +- using RangeBaseT::RangeBaseT; +- +-@@ -392,7 +392,7 @@ +- std::is_constructible, Arg>::value && +- !std::is_convertible::value>> +- ValueRange(Arg &&arg) : ValueRange(ArrayRef(std::forward(arg))) {} +-- ValueRange(Value value) : ValueRange(value, /*count=*/1) {} +-+ ValueRange(const Value &value) : ValueRange(&value, /*count=*/1) {} +- ValueRange(const std::initializer_list &values) +- : ValueRange(ArrayRef(values)) {} +- ValueRange(iterator_range values) +-diff -ruN --strip-trailing-cr a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp +---- a/mlir/lib/IR/OperationSupport.cpp +-+++ b/mlir/lib/IR/OperationSupport.cpp +-@@ -653,15 +653,6 @@ +- /// See `llvm::detail::indexed_accessor_range_base` for details. +- ValueRange::OwnerT ValueRange::offset_base(const OwnerT &owner, +- ptrdiff_t index) { +-- if (llvm::isa_and_nonnull(owner)) { +-- // Prevent out-of-bounds indexing for single values. +-- // Note that we do allow an index of 1 as is required by 'slice'ing that +-- // returns an empty range. This also matches the usual rules of C++ of being +-- // allowed to index past the last element of an array. +-- assert(index <= 1 && "out-of-bound offset into single-value 'ValueRange'"); +-- // Return nullptr to quickly cause segmentation faults on misuse. +-- return index == 0 ? owner : nullptr; +-- } +- if (const auto *value = llvm::dyn_cast_if_present(owner)) +- return {value + index}; +- if (auto *operand = llvm::dyn_cast_if_present(owner)) +-@@ -670,10 +661,6 @@ ++diff -ruN --strip-trailing-cr a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h ++--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h +++++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h ++@@ -513,12 +513,6 @@ ++ Entity loadElementAt(mlir::Location loc, fir::FirOpBuilder &builder, ++ Entity entity, mlir::ValueRange oneBasedIndices); ++ ++-/// Return a vector of extents for the given entity. ++-/// The function creates new operations, but tries to clean-up ++-/// after itself. ++-llvm::SmallVector ++-genExtentsVector(mlir::Location loc, fir::FirOpBuilder &builder, Entity entity); ++- ++ } // namespace hlfir ++ ++ #endif // FORTRAN_OPTIMIZER_BUILDER_HLFIRTOOLS_H ++diff -ruN --strip-trailing-cr a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp ++--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp +++++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp ++@@ -1421,15 +1421,3 @@ ++ return loadTrivialScalar(loc, builder, ++ getElementAt(loc, builder, entity, oneBasedIndices)); + } +- /// See `llvm::detail::indexed_accessor_range_base` for details. +- Value ValueRange::dereference_iterator(const OwnerT &owner, ptrdiff_t index) { +-- if (auto value = llvm::dyn_cast_if_present(owner)) { +-- assert(index == 0 && "cannot offset into single-value 'ValueRange'"); +-- return value; ++- ++-llvm::SmallVector ++-hlfir::genExtentsVector(mlir::Location loc, fir::FirOpBuilder &builder, ++- hlfir::Entity entity) { ++- entity = hlfir::derefPointersAndAllocatables(loc, builder, entity); ++- mlir::Value shape = hlfir::genShape(loc, builder, entity); ++- llvm::SmallVector extents = ++- hlfir::getExplicitExtentsFromShape(shape, builder); ++- if (shape.getUses().empty()) ++- shape.getDefiningOp()->erase(); ++- return extents; ++-} ++diff -ruN --strip-trailing-cr a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp ++--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +++++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp ++@@ -37,79 +37,6 @@ ++ ++ namespace { ++ ++-// Helper class to generate operations related to computing ++-// product of values. ++-class ProductFactory { ++-public: ++- ProductFactory(mlir::Location loc, fir::FirOpBuilder &builder) ++- : loc(loc), builder(builder) {} ++- ++- // Generate an update of the inner product value: ++- // acc += v1 * v2, OR ++- // acc += CONJ(v1) * v2, OR ++- // acc ||= v1 && v2 ++- // ++- // CONJ parameter specifies whether the first complex product argument ++- // needs to be conjugated. ++- template ++- mlir::Value genAccumulateProduct(mlir::Value acc, mlir::Value v1, ++- mlir::Value v2) { ++- mlir::Type resultType = acc.getType(); ++- acc = castToProductType(acc, resultType); ++- v1 = castToProductType(v1, resultType); ++- v2 = castToProductType(v2, resultType); ++- mlir::Value result; ++- if (mlir::isa(resultType)) { ++- result = builder.create( ++- loc, acc, builder.create(loc, v1, v2)); ++- } else if (mlir::isa(resultType)) { ++- if constexpr (CONJ) ++- result = fir::IntrinsicLibrary{builder, loc}.genConjg(resultType, v1); ++- else ++- result = v1; ++- ++- result = builder.create( ++- loc, acc, builder.create(loc, result, v2)); ++- } else if (mlir::isa(resultType)) { ++- result = builder.create( ++- loc, acc, builder.create(loc, v1, v2)); ++- } else if (mlir::isa(resultType)) { ++- result = builder.create( ++- loc, acc, builder.create(loc, v1, v2)); ++- } else { ++- llvm_unreachable("unsupported type"); ++- } ++- ++- return builder.createConvert(loc, resultType, result); + - } +- if (const auto *value = llvm::dyn_cast_if_present(owner)) +- return value[index]; +- if (auto *operand = llvm::dyn_cast_if_present(owner)) +-diff -ruN --strip-trailing-cr a/mlir/lib/IR/TypeRange.cpp b/mlir/lib/IR/TypeRange.cpp +---- a/mlir/lib/IR/TypeRange.cpp +-+++ b/mlir/lib/IR/TypeRange.cpp +-@@ -31,23 +31,12 @@ +- this->base = result; +- else if (auto *operand = llvm::dyn_cast_if_present(owner)) +- this->base = operand; +-- else if (auto value = llvm::dyn_cast_if_present(owner)) +-- this->base = value.getType(); +- else +- this->base = cast(owner); +- } ++- ++-private: ++- mlir::Location loc; ++- fir::FirOpBuilder &builder; ++- ++- mlir::Value castToProductType(mlir::Value value, mlir::Type type) { ++- if (mlir::isa(type)) ++- return builder.createConvert(loc, builder.getIntegerType(1), value); ++- ++- // TODO: the multiplications/additions by/of zero resulting from ++- // complex * real are optimized by LLVM under -fno-signed-zeros ++- // -fno-honor-nans. ++- // We can make them disappear by default if we: ++- // * either expand the complex multiplication into real ++- // operations, OR ++- // * set nnan nsz fast-math flags to the complex operations. ++- if (fir::isa_complex(type) && !fir::isa_complex(value.getType())) { ++- mlir::Value zeroCmplx = fir::factory::createZeroValue(builder, loc, type); ++- fir::factory::Complex helper(builder, loc); ++- mlir::Type partType = helper.getComplexPartType(type); ++- return helper.insertComplexPart(zeroCmplx, ++- castToProductType(value, partType), ++- /*isImagPart=*/false); ++- } ++- return builder.createConvert(loc, type, value); ++- } ++-}; ++- ++ class TransposeAsElementalConversion ++ : public mlir::OpRewritePattern { + public: -+ /// The type representing the owner of a ValueRange. This is either a list of -+- /// values, operands, or results or a single value. -++ /// values, operands, or results. -+ using OwnerT = -+- PointerUnion; -++ PointerUnion; ++@@ -163,8 +90,11 @@ ++ static mlir::Value genResultShape(mlir::Location loc, ++ fir::FirOpBuilder &builder, ++ hlfir::Entity array) { ++- llvm::SmallVector inExtents = ++- hlfir::genExtentsVector(loc, builder, array); +++ mlir::Value inShape = hlfir::genShape(loc, builder, array); +++ llvm::SmallVector inExtents = +++ hlfir::getExplicitExtentsFromShape(inShape, builder); +++ if (inShape.getUses().empty()) +++ inShape.getDefiningOp()->erase(); ++ ++ // transpose indices ++ assert(inExtents.size() == 2 && "checked in TransposeOp::validate"); ++@@ -207,7 +137,7 @@ ++ mlir::Value resultShape, dimExtent; ++ llvm::SmallVector arrayExtents; ++ if (isTotalReduction) ++- arrayExtents = hlfir::genExtentsVector(loc, builder, array); +++ arrayExtents = genArrayExtents(loc, builder, array); ++ else ++ std::tie(resultShape, dimExtent) = ++ genResultShapeForPartialReduction(loc, builder, array, dimVal); ++@@ -233,8 +163,7 @@ ++ // If DIM is not present, do total reduction. ++ ++ // Initial value for the reduction. ++- mlir::Value reductionInitValue = ++- fir::factory::createZeroValue(builder, loc, elementType); +++ mlir::Value reductionInitValue = genInitValue(loc, builder, elementType); ++ ++ // The reduction loop may be unordered if FastMathFlags::reassoc ++ // transformations are allowed. The integer reduction is always ++@@ -335,6 +264,17 @@ ++ } ++ ++ private: +++ static llvm::SmallVector +++ genArrayExtents(mlir::Location loc, fir::FirOpBuilder &builder, +++ hlfir::Entity array) { +++ mlir::Value inShape = hlfir::genShape(loc, builder, array); +++ llvm::SmallVector inExtents = +++ hlfir::getExplicitExtentsFromShape(inShape, builder); +++ if (inShape.getUses().empty()) +++ inShape.getDefiningOp()->erase(); +++ return inExtents; +++ } +++ ++ // Return fir.shape specifying the shape of the result ++ // of a SUM reduction with DIM=dimVal. The second return value ++ // is the extent of the DIM dimension. ++@@ -343,7 +283,7 @@ ++ fir::FirOpBuilder &builder, ++ hlfir::Entity array, int64_t dimVal) { ++ llvm::SmallVector inExtents = ++- hlfir::genExtentsVector(loc, builder, array); +++ genArrayExtents(loc, builder, array); ++ assert(dimVal > 0 && dimVal <= static_cast(inExtents.size()) && ++ "DIM must be present and a positive constant not exceeding " ++ "the array's rank"); ++@@ -353,6 +293,26 @@ ++ return {builder.create(loc, inExtents), dimExtent}; ++ } ++ +++ // Generate the initial value for a SUM reduction with the given +++ // data type. +++ static mlir::Value genInitValue(mlir::Location loc, +++ fir::FirOpBuilder &builder, +++ mlir::Type elementType) { +++ if (auto ty = mlir::dyn_cast(elementType)) { +++ const llvm::fltSemantics &sem = ty.getFloatSemantics(); +++ return builder.createRealConstant(loc, elementType, +++ llvm::APFloat::getZero(sem)); +++ } else if (auto ty = mlir::dyn_cast(elementType)) { +++ mlir::Value initValue = genInitValue(loc, builder, ty.getElementType()); +++ return fir::factory::Complex{builder, loc}.createComplex(ty, initValue, +++ initValue); +++ } else if (mlir::isa(elementType)) { +++ return builder.createIntegerConstant(loc, elementType, 0); +++ } +++ +++ llvm_unreachable("unsupported SUM reduction type"); +++ } +++ ++ // Generate scalar addition of the two values (of the same data type). ++ static mlir::Value genScalarAdd(mlir::Location loc, ++ fir::FirOpBuilder &builder, ++@@ -610,10 +570,16 @@ ++ static std::tuple ++ genResultShape(mlir::Location loc, fir::FirOpBuilder &builder, ++ hlfir::Entity input1, hlfir::Entity input2) { ++- llvm::SmallVector input1Extents = ++- hlfir::genExtentsVector(loc, builder, input1); ++- llvm::SmallVector input2Extents = ++- hlfir::genExtentsVector(loc, builder, input2); +++ mlir::Value input1Shape = hlfir::genShape(loc, builder, input1); +++ llvm::SmallVector input1Extents = +++ hlfir::getExplicitExtentsFromShape(input1Shape, builder); +++ if (input1Shape.getUses().empty()) +++ input1Shape.getDefiningOp()->erase(); +++ mlir::Value input2Shape = hlfir::genShape(loc, builder, input2); +++ llvm::SmallVector input2Extents = +++ hlfir::getExplicitExtentsFromShape(input2Shape, builder); +++ if (input2Shape.getUses().empty()) +++ input2Shape.getDefiningOp()->erase(); -+ using RangeBaseT::RangeBaseT; -+ -+@@ -392,7 +392,7 @@ -+ std::is_constructible, Arg>::value && -+ !std::is_convertible::value>> -+ ValueRange(Arg &&arg) : ValueRange(ArrayRef(std::forward(arg))) {} -+- ValueRange(Value value) : ValueRange(value, /*count=*/1) {} -++ ValueRange(const Value &value) : ValueRange(&value, /*count=*/1) {} -+ ValueRange(const std::initializer_list &values) -+ : ValueRange(ArrayRef(values)) {} -+ ValueRange(iterator_range values) -+diff -ruN --strip-trailing-cr a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp -+--- a/mlir/lib/IR/OperationSupport.cpp -++++ b/mlir/lib/IR/OperationSupport.cpp -+@@ -653,15 +653,6 @@ -+ /// See `llvm::detail::indexed_accessor_range_base` for details. -+ ValueRange::OwnerT ValueRange::offset_base(const OwnerT &owner, -+ ptrdiff_t index) { -+- if (llvm::isa_and_nonnull(owner)) { -+- // Prevent out-of-bounds indexing for single values. -+- // Note that we do allow an index of 1 as is required by 'slice'ing that -+- // returns an empty range. This also matches the usual rules of C++ of being -+- // allowed to index past the last element of an array. -+- assert(index <= 1 && "out-of-bound offset into single-value 'ValueRange'"); -+- // Return nullptr to quickly cause segmentation faults on misuse. -+- return index == 0 ? owner : nullptr; -+- } -+ if (const auto *value = llvm::dyn_cast_if_present(owner)) -+ return {value + index}; -+ if (auto *operand = llvm::dyn_cast_if_present(owner)) -+@@ -670,10 +661,6 @@ -+ } -+ /// See `llvm::detail::indexed_accessor_range_base` for details. -+ Value ValueRange::dereference_iterator(const OwnerT &owner, ptrdiff_t index) { -+- if (auto value = llvm::dyn_cast_if_present(owner)) { -+- assert(index == 0 && "cannot offset into single-value 'ValueRange'"); -+- return value; -+- } -+ if (const auto *value = llvm::dyn_cast_if_present(owner)) -+ return value[index]; -+ if (auto *operand = llvm::dyn_cast_if_present(owner)) -+diff -ruN --strip-trailing-cr a/mlir/lib/IR/TypeRange.cpp b/mlir/lib/IR/TypeRange.cpp -+--- a/mlir/lib/IR/TypeRange.cpp -++++ b/mlir/lib/IR/TypeRange.cpp -+@@ -31,23 +31,12 @@ -+ this->base = result; -+ else if (auto *operand = llvm::dyn_cast_if_present(owner)) -+ this->base = operand; -+- else if (auto value = llvm::dyn_cast_if_present(owner)) -+- this->base = value.getType(); -+ else -+ this->base = cast(owner); -+ } +- /// See `llvm::detail::indexed_accessor_range_base` for details. +- TypeRange::OwnerT TypeRange::offset_base(OwnerT object, ptrdiff_t index) { +-- if (llvm::isa_and_nonnull(object)) { +-- // Prevent out-of-bounds indexing for single values. +-- // Note that we do allow an index of 1 as is required by 'slice'ing that +-- // returns an empty range. This also matches the usual rules of C++ of being +-- // allowed to index past the last element of an array. +-- assert(index <= 1 && "out-of-bound offset into single-value 'ValueRange'"); +-- // Return nullptr to quickly cause segmentation faults on misuse. +-- return index == 0 ? object : nullptr; ++ llvm::SmallVector newExtents; ++ mlir::Value innerProduct1Extent, innerProduct2Extent; ++@@ -661,6 +627,60 @@ ++ innerProductExtent[0]}; ++ } + -+ /// See `llvm::detail::indexed_accessor_range_base` for details. -+ TypeRange::OwnerT TypeRange::offset_base(OwnerT object, ptrdiff_t index) { -+- if (llvm::isa_and_nonnull(object)) { -+- // Prevent out-of-bounds indexing for single values. -+- // Note that we do allow an index of 1 as is required by 'slice'ing that -+- // returns an empty range. This also matches the usual rules of C++ of being -+- // allowed to index past the last element of an array. -+- assert(index <= 1 && "out-of-bound offset into single-value 'ValueRange'"); -+- // Return nullptr to quickly cause segmentation faults on misuse. -+- return index == 0 ? object : nullptr; -+- } -+ if (const auto *value = llvm::dyn_cast_if_present(object)) -+ return {value + index}; -+ if (auto *operand = llvm::dyn_cast_if_present(object)) -+@@ -59,10 +48,6 @@ -+ -+ /// See `llvm::detail::indexed_accessor_range_base` for details. -+ Type TypeRange::dereference_iterator(OwnerT object, ptrdiff_t index) { -+- if (auto type = llvm::dyn_cast_if_present(object)) { -+- assert(index == 0 && "cannot offset into single-value 'TypeRange'"); -+- return type; -+- } -+ if (const auto *value = llvm::dyn_cast_if_present(object)) -+ return (value + index)->getType(); -+ if (auto *operand = llvm::dyn_cast_if_present(object)) -+diff -ruN --strip-trailing-cr a/mlir/unittests/IR/OperationSupportTest.cpp b/mlir/unittests/IR/OperationSupportTest.cpp -+--- a/mlir/unittests/IR/OperationSupportTest.cpp -++++ b/mlir/unittests/IR/OperationSupportTest.cpp -+@@ -313,21 +313,4 @@ -+ op2->destroy(); -+ } +++ static mlir::Value castToProductType(mlir::Location loc, +++ fir::FirOpBuilder &builder, +++ mlir::Value value, mlir::Type type) { +++ if (mlir::isa(type)) +++ return builder.createConvert(loc, builder.getIntegerType(1), value); +++ +++ // TODO: the multiplications/additions by/of zero resulting from +++ // complex * real are optimized by LLVM under -fno-signed-zeros +++ // -fno-honor-nans. +++ // We can make them disappear by default if we: +++ // * either expand the complex multiplication into real +++ // operations, OR +++ // * set nnan nsz fast-math flags to the complex operations. +++ if (fir::isa_complex(type) && !fir::isa_complex(value.getType())) { +++ mlir::Value zeroCmplx = fir::factory::createZeroValue(builder, loc, type); +++ fir::factory::Complex helper(builder, loc); +++ mlir::Type partType = helper.getComplexPartType(type); +++ return helper.insertComplexPart( +++ zeroCmplx, castToProductType(loc, builder, value, partType), +++ /*isImagPart=*/false); +++ } +++ return builder.createConvert(loc, type, value); +++ } +++ +++ // Generate an update of the inner product value: +++ // acc += v1 * v2, OR +++ // acc ||= v1 && v2 +++ static mlir::Value genAccumulateProduct(mlir::Location loc, +++ fir::FirOpBuilder &builder, +++ mlir::Type resultType, +++ mlir::Value acc, mlir::Value v1, +++ mlir::Value v2) { +++ acc = castToProductType(loc, builder, acc, resultType); +++ v1 = castToProductType(loc, builder, v1, resultType); +++ v2 = castToProductType(loc, builder, v2, resultType); +++ mlir::Value result; +++ if (mlir::isa(resultType)) +++ result = builder.create( +++ loc, acc, builder.create(loc, v1, v2)); +++ else if (mlir::isa(resultType)) +++ result = builder.create( +++ loc, acc, builder.create(loc, v1, v2)); +++ else if (mlir::isa(resultType)) +++ result = builder.create( +++ loc, acc, builder.create(loc, v1, v2)); +++ else if (mlir::isa(resultType)) +++ result = builder.create( +++ loc, acc, builder.create(loc, v1, v2)); +++ else +++ llvm_unreachable("unsupported type"); +++ +++ return builder.createConvert(loc, resultType, result); +++ } +++ ++ static mlir::LogicalResult ++ genContiguousMatmul(mlir::Location loc, fir::FirOpBuilder &builder, ++ hlfir::Entity result, mlir::Value resultShape, ++@@ -728,9 +748,9 @@ ++ hlfir::loadElementAt(loc, builder, lhs, {I, K}); ++ hlfir::Entity rhsElementValue = ++ hlfir::loadElementAt(loc, builder, rhs, {K, J}); ++- mlir::Value productValue = ++- ProductFactory{loc, builder}.genAccumulateProduct( ++- resultElementValue, lhsElementValue, rhsElementValue); +++ mlir::Value productValue = genAccumulateProduct( +++ loc, builder, resultElementType, resultElementValue, +++ lhsElementValue, rhsElementValue); ++ builder.create(loc, productValue, resultElement); ++ return {}; ++ }; ++@@ -765,9 +785,9 @@ ++ hlfir::loadElementAt(loc, builder, lhs, {J, K}); ++ hlfir::Entity rhsElementValue = ++ hlfir::loadElementAt(loc, builder, rhs, {K}); ++- mlir::Value productValue = ++- ProductFactory{loc, builder}.genAccumulateProduct( ++- resultElementValue, lhsElementValue, rhsElementValue); +++ mlir::Value productValue = genAccumulateProduct( +++ loc, builder, resultElementType, resultElementValue, +++ lhsElementValue, rhsElementValue); ++ builder.create(loc, productValue, resultElement); ++ return {}; ++ }; ++@@ -797,9 +817,9 @@ ++ hlfir::loadElementAt(loc, builder, lhs, {K}); ++ hlfir::Entity rhsElementValue = ++ hlfir::loadElementAt(loc, builder, rhs, {K, J}); ++- mlir::Value productValue = ++- ProductFactory{loc, builder}.genAccumulateProduct( ++- resultElementValue, lhsElementValue, rhsElementValue); +++ mlir::Value productValue = genAccumulateProduct( +++ loc, builder, resultElementType, resultElementValue, +++ lhsElementValue, rhsElementValue); ++ builder.create(loc, productValue, resultElement); ++ return {}; ++ }; ++@@ -865,9 +885,9 @@ ++ hlfir::loadElementAt(loc, builder, lhs, lhsIndices); ++ hlfir::Entity rhsElementValue = ++ hlfir::loadElementAt(loc, builder, rhs, rhsIndices); ++- mlir::Value productValue = ++- ProductFactory{loc, builder}.genAccumulateProduct( ++- reductionArgs[0], lhsElementValue, rhsElementValue); +++ mlir::Value productValue = genAccumulateProduct( +++ loc, builder, resultElementType, reductionArgs[0], lhsElementValue, +++ rhsElementValue); ++ return {productValue}; ++ }; ++ llvm::SmallVector innerProductValue = ++@@ -884,73 +904,6 @@ ++ } ++ }; ++ ++-class DotProductConversion ++- : public mlir::OpRewritePattern { ++-public: ++- using mlir::OpRewritePattern::OpRewritePattern; ++- ++- llvm::LogicalResult ++- matchAndRewrite(hlfir::DotProductOp product, ++- mlir::PatternRewriter &rewriter) const override { ++- hlfir::Entity op = hlfir::Entity{product}; ++- if (!op.isScalar()) ++- return rewriter.notifyMatchFailure(product, "produces non-scalar result"); ++- ++- mlir::Location loc = product.getLoc(); ++- fir::FirOpBuilder builder{rewriter, product.getOperation()}; ++- hlfir::Entity lhs = hlfir::Entity{product.getLhs()}; ++- hlfir::Entity rhs = hlfir::Entity{product.getRhs()}; ++- mlir::Type resultElementType = product.getType(); ++- bool isUnordered = mlir::isa(resultElementType) || ++- mlir::isa(resultElementType) || ++- static_cast(builder.getFastMathFlags() & ++- mlir::arith::FastMathFlags::reassoc); ++- ++- mlir::Value extent = genProductExtent(loc, builder, lhs, rhs); ++- ++- auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder, ++- mlir::ValueRange oneBasedIndices, ++- mlir::ValueRange reductionArgs) ++- -> llvm::SmallVector { ++- hlfir::Entity lhsElementValue = ++- hlfir::loadElementAt(loc, builder, lhs, oneBasedIndices); ++- hlfir::Entity rhsElementValue = ++- hlfir::loadElementAt(loc, builder, rhs, oneBasedIndices); ++- mlir::Value productValue = ++- ProductFactory{loc, builder}.genAccumulateProduct( ++- reductionArgs[0], lhsElementValue, rhsElementValue); ++- return {productValue}; ++- }; ++- ++- mlir::Value initValue = ++- fir::factory::createZeroValue(builder, loc, resultElementType); ++- ++- llvm::SmallVector result = hlfir::genLoopNestWithReductions( ++- loc, builder, {extent}, ++- /*reductionInits=*/{initValue}, genBody, isUnordered); ++- ++- rewriter.replaceOp(product, result[0]); ++- return mlir::success(); + - } +- if (const auto *value = llvm::dyn_cast_if_present(object)) +- return {value + index}; +- if (auto *operand = llvm::dyn_cast_if_present(object)) +-@@ -59,10 +48,6 @@ +- +- /// See `llvm::detail::indexed_accessor_range_base` for details. +- Type TypeRange::dereference_iterator(OwnerT object, ptrdiff_t index) { +-- if (auto type = llvm::dyn_cast_if_present(object)) { +-- assert(index == 0 && "cannot offset into single-value 'TypeRange'"); +-- return type; ++- ++-private: ++- static mlir::Value genProductExtent(mlir::Location loc, ++- fir::FirOpBuilder &builder, ++- hlfir::Entity input1, ++- hlfir::Entity input2) { ++- llvm::SmallVector input1Extents = ++- hlfir::genExtentsVector(loc, builder, input1); ++- llvm::SmallVector input2Extents = ++- hlfir::genExtentsVector(loc, builder, input2); ++- ++- assert(input1Extents.size() == 1 && input2Extents.size() == 1 && ++- "hlfir.dot_product arguments must be vectors"); ++- llvm::SmallVector extent = ++- fir::factory::deduceOptimalExtents(input1Extents, input2Extents); ++- return extent[0]; + - } +- if (const auto *value = llvm::dyn_cast_if_present(object)) +- return (value + index)->getType(); +- if (auto *operand = llvm::dyn_cast_if_present(object)) +-diff -ruN --strip-trailing-cr a/mlir/unittests/IR/OperationSupportTest.cpp b/mlir/unittests/IR/OperationSupportTest.cpp +---- a/mlir/unittests/IR/OperationSupportTest.cpp +-+++ b/mlir/unittests/IR/OperationSupportTest.cpp +-@@ -313,21 +313,4 @@ +- op2->destroy(); ++-}; ++- ++ class SimplifyHLFIRIntrinsics ++ : public hlfir::impl::SimplifyHLFIRIntrinsicsBase { ++ public: ++@@ -986,8 +939,6 @@ ++ if (forceMatmulAsElemental || this->allowNewSideEffects) ++ patterns.insert>(context); + -+-TEST(ValueRangeTest, ValueConstructable) { -+- MLIRContext context; -+- Builder builder(&context); ++- patterns.insert(context); ++- ++ if (mlir::failed(mlir::applyPatternsGreedily( ++ getOperation(), std::move(patterns), config))) { ++ mlir::emitError(getOperation()->getLoc(), ++diff -ruN --strip-trailing-cr a/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir ++--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir +++++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir ++@@ -1,144 +0,0 @@ ++-// Test hlfir.dot_product simplification to a reduction loop: ++-// RUN: fir-opt --simplify-hlfir-intrinsics %s | FileCheck %s ++- ++-func.func @dot_product_integer(%arg0: !hlfir.expr, %arg1: !hlfir.expr) -> i32 { ++- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr) -> i32 ++- return %res : i32 ++-} ++-// CHECK-LABEL: func.func @dot_product_integer( ++-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, ++-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr) -> i32 { ++-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index ++-// CHECK: %[[VAL_3:.*]] = arith.constant 0 : i32 ++-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> ++-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index ++-// CHECK: %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_3]]) -> (i32) { ++-// CHECK: %[[VAL_9:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]] : (!hlfir.expr, index) -> i16 ++-// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_7]] : (!hlfir.expr, index) -> i32 ++-// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32 ++-// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_11]], %[[VAL_10]] : i32 ++-// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_12]] : i32 ++-// CHECK: fir.result %[[VAL_13]] : i32 ++-// CHECK: } ++-// CHECK: return %[[VAL_6]] : i32 ++-// CHECK: } ++- ++-func.func @dot_product_real(%arg0: !hlfir.expr, %arg1: !hlfir.expr) -> f32 { ++- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr) -> f32 ++- return %res : f32 ++-} ++-// CHECK-LABEL: func.func @dot_product_real( ++-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, ++-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr) -> f32 { ++-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index ++-// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 ++-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> ++-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index ++-// CHECK: %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_8:.*]] = %[[VAL_3]]) -> (f32) { ++-// CHECK: %[[VAL_9:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]] : (!hlfir.expr, index) -> f32 ++-// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_7]] : (!hlfir.expr, index) -> f16 ++-// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (f16) -> f32 ++-// CHECK: %[[VAL_12:.*]] = arith.mulf %[[VAL_9]], %[[VAL_11]] : f32 ++-// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_12]] : f32 ++-// CHECK: fir.result %[[VAL_13]] : f32 ++-// CHECK: } ++-// CHECK: return %[[VAL_6]] : f32 ++-// CHECK: } ++- ++-func.func @dot_product_complex(%arg0: !hlfir.expr>, %arg1: !hlfir.expr>) -> complex { ++- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr>, !hlfir.expr>) -> complex ++- return %res : complex ++-} ++-// CHECK-LABEL: func.func @dot_product_complex( ++-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr>, ++-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> complex { ++-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index ++-// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 ++-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr>) -> !fir.shape<1> ++-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index ++-// CHECK: %[[VAL_6:.*]] = fir.undefined complex ++-// CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex ++-// CHECK: %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex ++-// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (complex) { ++-// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex ++-// CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex ++-// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (complex) -> complex ++-// CHECK: %[[VAL_15:.*]] = fir.extract_value %[[VAL_12]], [1 : index] : (complex) -> f32 ++-// CHECK: %[[VAL_16:.*]] = arith.negf %[[VAL_15]] : f32 ++-// CHECK: %[[VAL_17:.*]] = fir.insert_value %[[VAL_12]], %[[VAL_16]], [1 : index] : (complex, f32) -> complex ++-// CHECK: %[[VAL_18:.*]] = fir.mulc %[[VAL_17]], %[[VAL_14]] : complex ++-// CHECK: %[[VAL_19:.*]] = fir.addc %[[VAL_11]], %[[VAL_18]] : complex ++-// CHECK: fir.result %[[VAL_19]] : complex ++-// CHECK: } ++-// CHECK: return %[[VAL_9]] : complex ++-// CHECK: } +- -+- Operation *useOp = -+- createOp(&context, /*operands=*/std::nullopt, builder.getIntegerType(16)); -+- // Valid construction despite a temporary 'OpResult'. -+- ValueRange operands = useOp->getResult(0); ++-func.func @dot_product_real_complex(%arg0: !hlfir.expr, %arg1: !hlfir.expr>) -> complex { ++- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr>) -> complex ++- return %res : complex ++-} ++-// CHECK-LABEL: func.func @dot_product_real_complex( ++-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, ++-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> complex { ++-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index ++-// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 ++-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> ++-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index ++-// CHECK: %[[VAL_6:.*]] = fir.undefined complex ++-// CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex ++-// CHECK: %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex ++-// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (complex) { ++-// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr, index) -> f32 ++-// CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex ++-// CHECK: %[[VAL_14:.*]] = fir.undefined complex ++-// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex ++-// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex ++-// CHECK: %[[VAL_17:.*]] = fir.insert_value %[[VAL_16]], %[[VAL_12]], [0 : index] : (complex, f32) -> complex ++-// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_13]] : (complex) -> complex ++-// CHECK: %[[VAL_19:.*]] = fir.extract_value %[[VAL_17]], [1 : index] : (complex) -> f32 ++-// CHECK: %[[VAL_20:.*]] = arith.negf %[[VAL_19]] : f32 ++-// CHECK: %[[VAL_21:.*]] = fir.insert_value %[[VAL_17]], %[[VAL_20]], [1 : index] : (complex, f32) -> complex ++-// CHECK: %[[VAL_22:.*]] = fir.mulc %[[VAL_21]], %[[VAL_18]] : complex ++-// CHECK: %[[VAL_23:.*]] = fir.addc %[[VAL_11]], %[[VAL_22]] : complex ++-// CHECK: fir.result %[[VAL_23]] : complex ++-// CHECK: } ++-// CHECK: return %[[VAL_9]] : complex ++-// CHECK: } ++- ++-func.func @dot_product_logical(%arg0: !hlfir.expr>, %arg1: !hlfir.expr>) -> !fir.logical<4> { ++- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr>, !hlfir.expr>) -> !fir.logical<4> ++- return %res : !fir.logical<4> ++-} ++-// CHECK-LABEL: func.func @dot_product_logical( ++-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr>, ++-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> !fir.logical<4> { ++-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index ++-// CHECK: %[[VAL_3:.*]] = arith.constant false ++-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr>) -> !fir.shape<1> ++-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index ++-// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4> ++-// CHECK: %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_6]]) -> (!fir.logical<4>) { ++-// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_8]] : (!hlfir.expr>, index) -> !fir.logical<1> ++-// CHECK: %[[VAL_11:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_8]] : (!hlfir.expr>, index) -> !fir.logical<4> ++-// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_9]] : (!fir.logical<4>) -> i1 ++-// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (!fir.logical<1>) -> i1 ++-// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1 ++-// CHECK: %[[VAL_15:.*]] = arith.andi %[[VAL_13]], %[[VAL_14]] : i1 ++-// CHECK: %[[VAL_16:.*]] = arith.ori %[[VAL_12]], %[[VAL_15]] : i1 ++-// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4> ++-// CHECK: fir.result %[[VAL_17]] : !fir.logical<4> ++-// CHECK: } ++-// CHECK: return %[[VAL_7]] : !fir.logical<4> ++-// CHECK: } ++- ++-func.func @dot_product_known_dim(%arg0: !hlfir.expr<10xf32>, %arg1: !hlfir.expr) -> f32 { ++- %res1 = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr<10xf32>, !hlfir.expr) -> f32 ++- %res2 = hlfir.dot_product %arg1 %arg0 : (!hlfir.expr, !hlfir.expr<10xf32>) -> f32 ++- %res = arith.addf %res1, %res2 : f32 ++- return %res : f32 ++-} ++-// CHECK-LABEL: func.func @dot_product_known_dim( ++-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index ++-// CHECK: %[[VAL_4:.*]] = arith.constant 10 : index ++-// CHECK: fir.do_loop %{{.*}} = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_2]] ++-// CHECK: fir.do_loop %{{.*}} = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_2]] ++diff -ruN --strip-trailing-cr a/libcxx/include/__config b/libcxx/include/__config ++--- a/libcxx/include/__config +++++ b/libcxx/include/__config ++@@ -1166,9 +1166,7 @@ ++ # define _LIBCPP_NOESCAPE ++ # endif ++ ++-// FIXME: Expand this to [[__gnu__::__nodebug__]] again once the testcase reported in ++-// https://github.com/llvm/llvm-project/pull/118710 has been analyzed ++-# define _LIBCPP_NODEBUG +++# define _LIBCPP_NODEBUG [[__gnu__::__nodebug__]] ++ ++ # if __has_attribute(__standalone_debug__) ++ # define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__)) ++diff -ruN --strip-trailing-cr a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp ++--- a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp +++++ b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp ++@@ -27,7 +27,7 @@ ++ check_factories.registerCheck("libcpp-header-exportable-declarations"); ++ check_factories.registerCheck("libcpp-hide-from-abi"); ++ check_factories.registerCheck("libcpp-internal-ftms"); ++- // check_factories.registerCheck("libcpp-nodebug-on-aliases"); +++ check_factories.registerCheck("libcpp-nodebug-on-aliases"); ++ check_factories.registerCheck("libcpp-cpp-version-check"); ++ check_factories.registerCheck("libcpp-robust-against-adl"); ++ check_factories.registerCheck("libcpp-uglify-attributes"); ++diff -ruN --strip-trailing-cr a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp ++--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp ++@@ -1140,8 +1140,6 @@ ++ ++ setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); ++ ++- setTargetDAGCombine(ISD::SHL); +- -+- useOp->setOperands(operands); -+- EXPECT_EQ(useOp->getNumOperands(), 1u); -+- EXPECT_EQ(useOp->getOperand(0), useOp->getResult(0)); ++ // In case of strict alignment, avoid an excessive number of byte wide stores. ++ MaxStoresPerMemsetOptSize = 8; ++ MaxStoresPerMemset = ++@@ -26473,43 +26471,6 @@ ++ return NVCAST; + } + +--TEST(ValueRangeTest, ValueConstructable) { +-- MLIRContext context; +-- Builder builder(&context); ++-/// If the operand is a bitwise AND with a constant RHS, and the shift has a ++-/// constant RHS and is the only use, we can pull it out of the shift, i.e. ++-/// ++-/// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2)) ++-/// ++-/// We prefer this canonical form to match existing isel patterns. ++-static SDValue performSHLCombine(SDNode *N, ++- TargetLowering::DAGCombinerInfo &DCI, ++- SelectionDAG &DAG) { ++- if (DCI.isBeforeLegalizeOps()) ++- return SDValue(); + - +-- Operation *useOp = +-- createOp(&context, /*operands=*/std::nullopt, builder.getIntegerType(16)); +-- // Valid construction despite a temporary 'OpResult'. +-- ValueRange operands = useOp->getResult(0); ++- SDValue Op0 = N->getOperand(0); ++- if (Op0.getOpcode() != ISD::AND || !Op0.hasOneUse()) ++- return SDValue(); + - +-- useOp->setOperands(operands); +-- EXPECT_EQ(useOp->getNumOperands(), 1u); +-- EXPECT_EQ(useOp->getOperand(0), useOp->getResult(0)); ++- SDValue C1 = Op0->getOperand(1); ++- SDValue C2 = N->getOperand(1); ++- if (!isa(C1) || !isa(C2)) ++- return SDValue(); + - +-- useOp->dropAllUses(); +-- useOp->destroy(); ++- // Might be folded into shifted op, do not lower. ++- if (N->hasOneUse()) { ++- unsigned UseOpc = N->user_begin()->getOpcode(); ++- if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC || ++- UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS) ++- return SDValue(); ++- } +- -+- useOp->dropAllUses(); -+- useOp->destroy(); ++- SDLoc DL(N); ++- EVT VT = N->getValueType(0); ++- SDValue X = Op0->getOperand(0); ++- SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2); ++- SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2); ++- return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS); +-} +- -+ } // namespace -+diff -ruN --strip-trailing-cr a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel -+--- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel -++++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel -+@@ -43,10 +43,7 @@ -+ -+ gentbl( -+ name = "diagnostic_defs_gen", -+- tbl_outs = [( -+- "-gen-clang-diags-defs -clang-component=%s" % c, -+- "include/clang/Basic/Diagnostic%sKinds.inc" % c, -+- ) for c in [ -++ tbl_outs = [out for c in [ -+ "AST", -+ "Analysis", -+ "Comment", -+@@ -60,6 +57,15 @@ -+ "Refactoring", -+ "Sema", -+ "Serialization", -++ ] for out in [ -++ ( -++ "-gen-clang-diags-defs -clang-component=%s" % c, -++ "include/clang/Basic/Diagnostic%sKinds.inc" % c, -++ ), -++ ( -++ "-gen-clang-diags-enums -clang-component=%s" % c, -++ "include/clang/Basic/Diagnostic%sEnums.inc" % c, -++ ), -+ ]] + [ -+ ( -+ "-gen-clang-diag-groups", ++ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, ++ DAGCombinerInfo &DCI) const { ++ SelectionDAG &DAG = DCI.DAG; ++@@ -26855,8 +26816,6 @@ ++ return performCTLZCombine(N, DAG, Subtarget); ++ case ISD::SCALAR_TO_VECTOR: ++ return performScalarToVectorCombine(N, DCI, DAG); ++- case ISD::SHL: ++- return performSHLCombine(N, DCI, DAG); ++ } ++ return SDValue(); ++ } ++diff -ruN --strip-trailing-cr a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp ++--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp ++@@ -4979,7 +4979,7 @@ ++ // the subvector length. ++ const unsigned VecVF = getNumElements(Vec->getType()); ++ SmallVector Mask(VecVF, PoisonMaskElem); ++- std::iota(Mask.begin(), std::next(Mask.begin(), Index), 0); +++ std::iota(Mask.begin(), Mask.end(), 0); ++ for (unsigned I : seq(SubVecVF)) ++ Mask[I + Index] = I + VecVF; ++ if (Generator) { ++@@ -13956,11 +13956,12 @@ ++ Instruction *InsElt; ++ if (auto *VecTy = dyn_cast(Scalar->getType())) { ++ assert(SLPReVec && "FixedVectorType is not expected."); ++- Vec = InsElt = cast(createInsertVector( ++- Builder, Vec, Scalar, Pos * getNumElements(VecTy))); ++- auto *II = dyn_cast(InsElt); +++ Vec = +++ createInsertVector(Builder, Vec, Scalar, Pos * getNumElements(VecTy)); +++ auto *II = dyn_cast(Vec); ++ if (!II || II->getIntrinsicID() != Intrinsic::vector_insert) ++ return Vec; +++ InsElt = II; ++ } else { ++ Vec = Builder.CreateInsertElement(Vec, Scalar, Builder.getInt32(Pos)); ++ InsElt = dyn_cast(Vec); ++diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll ++--- a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll +++++ b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll ++@@ -190,7 +190,8 @@ ++ define i8 @test_i8_7_mask_shl_1(i8 %a0) { ++ ; CHECK-LABEL: test_i8_7_mask_shl_1: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: ubfiz w0, w0, #1, #3 +++; CHECK-NEXT: and w8, w0, #0x7 +++; CHECK-NEXT: lsl w0, w8, #1 ++ ; CHECK-NEXT: ret ++ %t0 = and i8 %a0, 7 ++ %t1 = shl i8 %t0, 1 ++@@ -199,7 +200,8 @@ ++ define i8 @test_i8_7_mask_shl_4(i8 %a0) { ++ ; CHECK-LABEL: test_i8_7_mask_shl_4: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: ubfiz w0, w0, #4, #3 +++; CHECK-NEXT: and w8, w0, #0x7 +++; CHECK-NEXT: lsl w0, w8, #4 ++ ; CHECK-NEXT: ret ++ %t0 = and i8 %a0, 7 ++ %t1 = shl i8 %t0, 4 ++@@ -227,8 +229,8 @@ ++ define i8 @test_i8_28_mask_shl_1(i8 %a0) { ++ ; CHECK-LABEL: test_i8_28_mask_shl_1: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #1 ++-; CHECK-NEXT: and w0, w8, #0x38 +++; CHECK-NEXT: and w8, w0, #0x1c +++; CHECK-NEXT: lsl w0, w8, #1 ++ ; CHECK-NEXT: ret ++ %t0 = and i8 %a0, 28 ++ %t1 = shl i8 %t0, 1 ++@@ -237,8 +239,8 @@ ++ define i8 @test_i8_28_mask_shl_2(i8 %a0) { ++ ; CHECK-LABEL: test_i8_28_mask_shl_2: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #2 ++-; CHECK-NEXT: and w0, w8, #0x70 +++; CHECK-NEXT: and w8, w0, #0x1c +++; CHECK-NEXT: lsl w0, w8, #2 ++ ; CHECK-NEXT: ret ++ %t0 = and i8 %a0, 28 ++ %t1 = shl i8 %t0, 2 ++@@ -247,8 +249,8 @@ ++ define i8 @test_i8_28_mask_shl_3(i8 %a0) { ++ ; CHECK-LABEL: test_i8_28_mask_shl_3: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #3 ++-; CHECK-NEXT: and w0, w8, #0xe0 +++; CHECK-NEXT: and w8, w0, #0x1c +++; CHECK-NEXT: lsl w0, w8, #3 ++ ; CHECK-NEXT: ret ++ %t0 = and i8 %a0, 28 ++ %t1 = shl i8 %t0, 3 ++@@ -257,8 +259,8 @@ ++ define i8 @test_i8_28_mask_shl_4(i8 %a0) { ++ ; CHECK-LABEL: test_i8_28_mask_shl_4: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #4 ++-; CHECK-NEXT: and w0, w8, #0xc0 +++; CHECK-NEXT: and w8, w0, #0xc +++; CHECK-NEXT: lsl w0, w8, #4 ++ ; CHECK-NEXT: ret ++ %t0 = and i8 %a0, 28 ++ %t1 = shl i8 %t0, 4 ++@@ -268,8 +270,8 @@ ++ define i8 @test_i8_224_mask_shl_1(i8 %a0) { ++ ; CHECK-LABEL: test_i8_224_mask_shl_1: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #1 ++-; CHECK-NEXT: and w0, w8, #0xc0 +++; CHECK-NEXT: and w8, w0, #0x60 +++; CHECK-NEXT: lsl w0, w8, #1 ++ ; CHECK-NEXT: ret ++ %t0 = and i8 %a0, 224 ++ %t1 = shl i8 %t0, 1 ++@@ -463,7 +465,8 @@ ++ define i16 @test_i16_127_mask_shl_1(i16 %a0) { ++ ; CHECK-LABEL: test_i16_127_mask_shl_1: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: ubfiz w0, w0, #1, #7 +++; CHECK-NEXT: and w8, w0, #0x7f +++; CHECK-NEXT: lsl w0, w8, #1 ++ ; CHECK-NEXT: ret ++ %t0 = and i16 %a0, 127 ++ %t1 = shl i16 %t0, 1 ++@@ -472,7 +475,8 @@ ++ define i16 @test_i16_127_mask_shl_8(i16 %a0) { ++ ; CHECK-LABEL: test_i16_127_mask_shl_8: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: ubfiz w0, w0, #8, #7 +++; CHECK-NEXT: and w8, w0, #0x7f +++; CHECK-NEXT: lsl w0, w8, #8 ++ ; CHECK-NEXT: ret ++ %t0 = and i16 %a0, 127 ++ %t1 = shl i16 %t0, 8 ++@@ -500,8 +504,8 @@ ++ define i16 @test_i16_2032_mask_shl_3(i16 %a0) { ++ ; CHECK-LABEL: test_i16_2032_mask_shl_3: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #3 ++-; CHECK-NEXT: and w0, w8, #0x3f80 +++; CHECK-NEXT: and w8, w0, #0x7f0 +++; CHECK-NEXT: lsl w0, w8, #3 ++ ; CHECK-NEXT: ret ++ %t0 = and i16 %a0, 2032 ++ %t1 = shl i16 %t0, 3 ++@@ -510,8 +514,8 @@ ++ define i16 @test_i16_2032_mask_shl_4(i16 %a0) { ++ ; CHECK-LABEL: test_i16_2032_mask_shl_4: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #4 ++-; CHECK-NEXT: and w0, w8, #0x7f00 +++; CHECK-NEXT: and w8, w0, #0x7f0 +++; CHECK-NEXT: lsl w0, w8, #4 ++ ; CHECK-NEXT: ret ++ %t0 = and i16 %a0, 2032 ++ %t1 = shl i16 %t0, 4 ++@@ -520,8 +524,8 @@ ++ define i16 @test_i16_2032_mask_shl_5(i16 %a0) { ++ ; CHECK-LABEL: test_i16_2032_mask_shl_5: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #5 ++-; CHECK-NEXT: and w0, w8, #0xfe00 +++; CHECK-NEXT: and w8, w0, #0x7f0 +++; CHECK-NEXT: lsl w0, w8, #5 ++ ; CHECK-NEXT: ret ++ %t0 = and i16 %a0, 2032 ++ %t1 = shl i16 %t0, 5 ++@@ -530,8 +534,8 @@ ++ define i16 @test_i16_2032_mask_shl_6(i16 %a0) { ++ ; CHECK-LABEL: test_i16_2032_mask_shl_6: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #6 ++-; CHECK-NEXT: and w0, w8, #0xfc00 +++; CHECK-NEXT: and w8, w0, #0x3f0 +++; CHECK-NEXT: lsl w0, w8, #6 ++ ; CHECK-NEXT: ret ++ %t0 = and i16 %a0, 2032 ++ %t1 = shl i16 %t0, 6 ++@@ -541,8 +545,8 @@ ++ define i16 @test_i16_65024_mask_shl_1(i16 %a0) { ++ ; CHECK-LABEL: test_i16_65024_mask_shl_1: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #1 ++-; CHECK-NEXT: and w0, w8, #0xfc00 +++; CHECK-NEXT: and w8, w0, #0x7e00 +++; CHECK-NEXT: lsl w0, w8, #1 ++ ; CHECK-NEXT: ret ++ %t0 = and i16 %a0, 65024 ++ %t1 = shl i16 %t0, 1 ++@@ -736,7 +740,8 @@ ++ define i32 @test_i32_32767_mask_shl_1(i32 %a0) { ++ ; CHECK-LABEL: test_i32_32767_mask_shl_1: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: ubfiz w0, w0, #1, #15 +++; CHECK-NEXT: and w8, w0, #0x7fff +++; CHECK-NEXT: lsl w0, w8, #1 ++ ; CHECK-NEXT: ret ++ %t0 = and i32 %a0, 32767 ++ %t1 = shl i32 %t0, 1 ++@@ -745,7 +750,8 @@ ++ define i32 @test_i32_32767_mask_shl_16(i32 %a0) { ++ ; CHECK-LABEL: test_i32_32767_mask_shl_16: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: ubfiz w0, w0, #16, #15 +++; CHECK-NEXT: and w8, w0, #0x7fff +++; CHECK-NEXT: lsl w0, w8, #16 ++ ; CHECK-NEXT: ret ++ %t0 = and i32 %a0, 32767 ++ %t1 = shl i32 %t0, 16 ++@@ -773,8 +779,8 @@ ++ define i32 @test_i32_8388352_mask_shl_7(i32 %a0) { ++ ; CHECK-LABEL: test_i32_8388352_mask_shl_7: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #7 ++-; CHECK-NEXT: and w0, w8, #0x3fff8000 +++; CHECK-NEXT: and w8, w0, #0x7fff00 +++; CHECK-NEXT: lsl w0, w8, #7 ++ ; CHECK-NEXT: ret ++ %t0 = and i32 %a0, 8388352 ++ %t1 = shl i32 %t0, 7 ++@@ -783,8 +789,8 @@ ++ define i32 @test_i32_8388352_mask_shl_8(i32 %a0) { ++ ; CHECK-LABEL: test_i32_8388352_mask_shl_8: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #8 ++-; CHECK-NEXT: and w0, w8, #0x7fff0000 +++; CHECK-NEXT: and w8, w0, #0x7fff00 +++; CHECK-NEXT: lsl w0, w8, #8 ++ ; CHECK-NEXT: ret ++ %t0 = and i32 %a0, 8388352 ++ %t1 = shl i32 %t0, 8 ++@@ -793,8 +799,8 @@ ++ define i32 @test_i32_8388352_mask_shl_9(i32 %a0) { ++ ; CHECK-LABEL: test_i32_8388352_mask_shl_9: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #9 ++-; CHECK-NEXT: and w0, w8, #0xfffe0000 +++; CHECK-NEXT: and w8, w0, #0x7fff00 +++; CHECK-NEXT: lsl w0, w8, #9 ++ ; CHECK-NEXT: ret ++ %t0 = and i32 %a0, 8388352 ++ %t1 = shl i32 %t0, 9 ++@@ -803,8 +809,8 @@ ++ define i32 @test_i32_8388352_mask_shl_10(i32 %a0) { ++ ; CHECK-LABEL: test_i32_8388352_mask_shl_10: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #10 ++-; CHECK-NEXT: and w0, w8, #0xfffc0000 +++; CHECK-NEXT: and w8, w0, #0x3fff00 +++; CHECK-NEXT: lsl w0, w8, #10 ++ ; CHECK-NEXT: ret ++ %t0 = and i32 %a0, 8388352 ++ %t1 = shl i32 %t0, 10 ++@@ -814,8 +820,8 @@ ++ define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) { ++ ; CHECK-LABEL: test_i32_4294836224_mask_shl_1: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w8, w0, #1 ++-; CHECK-NEXT: and w0, w8, #0xfffc0000 +++; CHECK-NEXT: and w8, w0, #0x7ffe0000 +++; CHECK-NEXT: lsl w0, w8, #1 ++ ; CHECK-NEXT: ret ++ %t0 = and i32 %a0, 4294836224 ++ %t1 = shl i32 %t0, 1 ++@@ -1009,7 +1015,8 @@ ++ define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) { ++ ; CHECK-LABEL: test_i64_2147483647_mask_shl_1: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w0, w0, #1 +++; CHECK-NEXT: and x8, x0, #0x7fffffff +++; CHECK-NEXT: lsl x0, x8, #1 ++ ; CHECK-NEXT: ret ++ %t0 = and i64 %a0, 2147483647 ++ %t1 = shl i64 %t0, 1 ++@@ -1047,8 +1054,8 @@ ++ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { ++ ; CHECK-LABEL: test_i64_140737488289792_mask_shl_15: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl x8, x0, #15 ++-; CHECK-NEXT: and x0, x8, #0x3fffffff80000000 +++; CHECK-NEXT: and x8, x0, #0x7fffffff0000 +++; CHECK-NEXT: lsl x0, x8, #15 ++ ; CHECK-NEXT: ret ++ %t0 = and i64 %a0, 140737488289792 ++ %t1 = shl i64 %t0, 15 ++@@ -1057,8 +1064,8 @@ ++ define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { ++ ; CHECK-LABEL: test_i64_140737488289792_mask_shl_16: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl x8, x0, #16 ++-; CHECK-NEXT: and x0, x8, #0x7fffffff00000000 +++; CHECK-NEXT: and x8, x0, #0x7fffffff0000 +++; CHECK-NEXT: lsl x0, x8, #16 ++ ; CHECK-NEXT: ret ++ %t0 = and i64 %a0, 140737488289792 ++ %t1 = shl i64 %t0, 16 ++@@ -1067,8 +1074,8 @@ ++ define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { ++ ; CHECK-LABEL: test_i64_140737488289792_mask_shl_17: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl x8, x0, #17 ++-; CHECK-NEXT: and x0, x8, #0xfffffffe00000000 +++; CHECK-NEXT: and x8, x0, #0x7fffffff0000 +++; CHECK-NEXT: lsl x0, x8, #17 ++ ; CHECK-NEXT: ret ++ %t0 = and i64 %a0, 140737488289792 ++ %t1 = shl i64 %t0, 17 ++@@ -1077,8 +1084,8 @@ ++ define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { ++ ; CHECK-LABEL: test_i64_140737488289792_mask_shl_18: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl x8, x0, #18 ++-; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 +++; CHECK-NEXT: and x8, x0, #0x3fffffff0000 +++; CHECK-NEXT: lsl x0, x8, #18 ++ ; CHECK-NEXT: ret ++ %t0 = and i64 %a0, 140737488289792 ++ %t1 = shl i64 %t0, 18 ++@@ -1088,8 +1095,8 @@ ++ define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) { ++ ; CHECK-LABEL: test_i64_18446744065119617024_mask_shl_1: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl x8, x0, #1 ++-; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 +++; CHECK-NEXT: and x8, x0, #0x7ffffffe00000000 +++; CHECK-NEXT: lsl x0, x8, #1 ++ ; CHECK-NEXT: ret ++ %t0 = and i64 %a0, 18446744065119617024 ++ %t1 = shl i64 %t0, 1 ++diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll ++--- a/llvm/test/CodeGen/AArch64/extract-bits.ll +++++ b/llvm/test/CodeGen/AArch64/extract-bits.ll ++@@ -1013,8 +1013,8 @@ ++ define i32 @c2_i32(i32 %arg) nounwind { ++ ; CHECK-LABEL: c2_i32: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsr w8, w0, #17 ++-; CHECK-NEXT: and w0, w8, #0xffc +++; CHECK-NEXT: ubfx w8, w0, #19, #10 +++; CHECK-NEXT: lsl w0, w8, #2 ++ ; CHECK-NEXT: ret ++ %tmp0 = lshr i32 %arg, 19 ++ %tmp1 = and i32 %tmp0, 1023 ++@@ -1063,8 +1063,8 @@ ++ define i64 @c2_i64(i64 %arg) nounwind { ++ ; CHECK-LABEL: c2_i64: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsr x8, x0, #49 ++-; CHECK-NEXT: and x0, x8, #0xffc +++; CHECK-NEXT: ubfx x8, x0, #51, #10 +++; CHECK-NEXT: lsl x0, x8, #2 ++ ; CHECK-NEXT: ret ++ %tmp0 = lshr i64 %arg, 51 ++ %tmp1 = and i64 %tmp0, 1023 ++@@ -1120,8 +1120,8 @@ ++ define void @c7_i32(i32 %arg, ptr %ptr) nounwind { ++ ; CHECK-LABEL: c7_i32: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsr w8, w0, #17 ++-; CHECK-NEXT: and w8, w8, #0xffc +++; CHECK-NEXT: ubfx w8, w0, #19, #10 +++; CHECK-NEXT: lsl w8, w8, #2 ++ ; CHECK-NEXT: str w8, [x1] ++ ; CHECK-NEXT: ret ++ %tmp0 = lshr i32 %arg, 19 ++@@ -1163,8 +1163,8 @@ ++ define void @c7_i64(i64 %arg, ptr %ptr) nounwind { ++ ; CHECK-LABEL: c7_i64: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsr x8, x0, #49 ++-; CHECK-NEXT: and x8, x8, #0xffc +++; CHECK-NEXT: ubfx x8, x0, #51, #10 +++; CHECK-NEXT: lsl x8, x8, #2 ++ ; CHECK-NEXT: str x8, [x1] ++ ; CHECK-NEXT: ret ++ %tmp0 = lshr i64 %arg, 51 ++diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/fpenv.ll b/llvm/test/CodeGen/AArch64/fpenv.ll ++--- a/llvm/test/CodeGen/AArch64/fpenv.ll +++++ b/llvm/test/CodeGen/AArch64/fpenv.ll ++@@ -4,11 +4,11 @@ ++ define void @func_set_rounding_dyn(i32 %rm) { ++ ; CHECK-LABEL: func_set_rounding_dyn: ++ ; CHECK: // %bb.0: ++-; CHECK-NEXT: lsl w9, w0, #22 +++; CHECK-NEXT: sub w9, w0, #1 ++ ; CHECK-NEXT: mrs x8, FPCR +++; CHECK-NEXT: and w9, w9, #0x3 ++ ; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff ++-; CHECK-NEXT: sub w9, w9, #1024, lsl #12 // =4194304 ++-; CHECK-NEXT: and w9, w9, #0xc00000 +++; CHECK-NEXT: lsl w9, w9, #22 ++ ; CHECK-NEXT: orr x8, x8, x9 ++ ; CHECK-NEXT: msr FPCR, x8 ++ ; CHECK-NEXT: ret ++diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll ++--- a/llvm/test/CodeGen/AArch64/xbfiz.ll +++++ b/llvm/test/CodeGen/AArch64/xbfiz.ll ++@@ -69,19 +69,3 @@ ++ %and = and i64 %shl, 4294967295 ++ ret i64 %and ++ } ++- ++-define i64 @lsl_zext_i8_i64(i8 %b) { ++-; CHECK-LABEL: lsl_zext_i8_i64: ++-; CHECK: ubfiz x0, x0, #1, #8 ++- %1 = zext i8 %b to i64 ++- %2 = shl i64 %1, 1 ++- ret i64 %2 + -} + - +- } // namespace +-diff -ruN --strip-trailing-cr a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +---- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +-+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +-@@ -43,10 +43,7 @@ +- +- gentbl( +- name = "diagnostic_defs_gen", +-- tbl_outs = [( +-- "-gen-clang-diags-defs -clang-component=%s" % c, +-- "include/clang/Basic/Diagnostic%sKinds.inc" % c, +-- ) for c in [ +-+ tbl_outs = [out for c in [ +- "AST", +- "Analysis", +- "Comment", +-@@ -60,6 +57,15 @@ +- "Refactoring", +- "Sema", +- "Serialization", +-+ ] for out in [ +-+ ( +-+ "-gen-clang-diags-defs -clang-component=%s" % c, +-+ "include/clang/Basic/Diagnostic%sKinds.inc" % c, +-+ ), +-+ ( +-+ "-gen-clang-diags-enums -clang-component=%s" % c, +-+ "include/clang/Basic/Diagnostic%sEnums.inc" % c, +-+ ), +- ]] + [ +- ( +- "-gen-clang-diag-groups", ++-define i64 @lsl_zext_i16_i64(i16 %b) { ++-; CHECK-LABEL: lsl_zext_i16_i64: ++-; CHECK: ubfiz x0, x0, #1, #16 ++- %1 = zext i16 %b to i64 ++- %2 = shl i64 %1, 1 ++- ret i64 %2 ++-} ++diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll ++--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll +++++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll ++@@ -0,0 +1,81 @@ +++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +++; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +++ +++define <16 x double> @test(ptr %x, double %v, double %a) { +++; CHECK-LABEL: define <16 x double> @test( +++; CHECK-SAME: ptr [[X:%.*]], double [[V:%.*]], double [[A:%.*]]) { +++; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8 +++; CHECK-NEXT: [[GEP8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 9 +++; CHECK-NEXT: [[TMP1:%.*]] = load <6 x double>, ptr [[X]], align 4 +++; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr [[GEP6]], align 4 +++; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[GEP8]], align 4 +++; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> poison, double [[A]], i32 0 +++; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x double> [[TMP4]], <16 x double> poison, <16 x i32> zeroinitializer +++; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> poison, double [[V]], i32 0 +++; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer +++; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[V]], i32 0 +++; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> poison, <2 x i32> zeroinitializer +++; CHECK-NEXT: [[TMP10:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v6f64(<16 x double> poison, <6 x double> [[TMP1]], i64 0) +++; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <16 x i32> +++; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x double> [[TMP10]], <16 x double> [[TMP11]], <16 x i32> +++; CHECK-NEXT: [[TMP13:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP12]], <2 x double> [[TMP6]], i64 6) +++; CHECK-NEXT: [[TMP14:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP13]], <2 x double> [[TMP7]], i64 8) +++; CHECK-NEXT: [[TMP15:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP14]], <2 x double> [[TMP9]], i64 10) +++; CHECK-NEXT: [[TMP16:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP15]], <2 x double> [[TMP9]], i64 12) +++; CHECK-NEXT: [[TMP17:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP16]], <2 x double> [[TMP9]], i64 14) +++; CHECK-NEXT: [[TMP18:%.*]] = fadd <16 x double> [[TMP5]], [[TMP17]] +++; CHECK-NEXT: ret <16 x double> [[TMP18]] +++; +++ %gep1 = getelementptr inbounds double, ptr %x, i64 1 +++ %gep2 = getelementptr inbounds double, ptr %x, i64 2 +++ %gep3 = getelementptr inbounds double, ptr %x, i64 3 +++ %gep4 = getelementptr inbounds double, ptr %x, i64 4 +++ %gep5 = getelementptr inbounds double, ptr %x, i64 5 +++ %gep6 = getelementptr inbounds double, ptr %x, i64 8 +++ %gep7 = getelementptr inbounds double, ptr %x, i64 9 +++ %gep8 = getelementptr inbounds double, ptr %x, i64 9 +++ %gep9 = getelementptr inbounds double, ptr %x, i64 10 +++ %x0 = load double, ptr %x, align 4 +++ %x1 = load double, ptr %gep1, align 4 +++ %x2 = load double, ptr %gep2, align 4 +++ %x3 = load double, ptr %gep3, align 4 +++ %x4 = load double, ptr %gep4, align 4 +++ %x5 = load double, ptr %gep5, align 4 +++ %x6 = load double, ptr %gep6, align 4 +++ %x7 = load double, ptr %gep7, align 4 +++ %x8 = load double, ptr %gep8, align 4 +++ %x9 = load double, ptr %gep9, align 4 +++ %add1 = fadd double %a, %x0 +++ %add2 = fadd double %a, %x1 +++ %add3 = fadd double %a, %x2 +++ %add4 = fadd double %a, %x3 +++ %add5 = fadd double %a, %x4 +++ %add6 = fadd double %a, %x5 +++ %add7 = fadd double %a, %x6 +++ %add8 = fadd double %a, %x7 +++ %add9 = fadd double %a, %x8 +++ %add10 = fadd double %a, %x9 +++ %add11 = fadd double %a, %v +++ %add12 = fadd double %a, %v +++ %add13 = fadd double %a, %v +++ %add14 = fadd double %a, %v +++ %add15 = fadd double %a, %v +++ %add16 = fadd double %a, %v +++ %i0 = insertelement <16 x double> poison, double %add1, i32 0 +++ %i1 = insertelement <16 x double> %i0, double %add2, i32 1 +++ %i2 = insertelement <16 x double> %i1, double %add3, i32 2 +++ %i3 = insertelement <16 x double> %i2, double %add4, i32 3 +++ %i4 = insertelement <16 x double> %i3, double %add5, i32 4 +++ %i5 = insertelement <16 x double> %i4, double %add6, i32 5 +++ %i6 = insertelement <16 x double> %i5, double %add7, i32 6 +++ %i7 = insertelement <16 x double> %i6, double %add8, i32 7 +++ %i8 = insertelement <16 x double> %i7, double %add9, i32 8 +++ %i9 = insertelement <16 x double> %i8, double %add10, i32 9 +++ %i10 = insertelement <16 x double> %i9, double %add11, i32 10 +++ %i11 = insertelement <16 x double> %i10, double %add12, i32 11 +++ %i12 = insertelement <16 x double> %i11, double %add13, i32 12 +++ %i13 = insertelement <16 x double> %i12, double %add14, i32 13 +++ %i14 = insertelement <16 x double> %i13, double %add15, i32 14 +++ %i15 = insertelement <16 x double> %i14, double %add16, i32 15 +++ ret <16 x double> %i15 +++} diff --git a/third_party/llvm/workspace.bzl b/third_party/llvm/workspace.bzl -index 4602e35..4706c63 100644 +index 4706c63..cb09291 100644 --- a/third_party/llvm/workspace.bzl +++ b/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" -- LLVM_COMMIT = "c24ce324d56328e4b91c8797ea4935545084303e" -- LLVM_SHA256 = "ef9f02427de91c37b2315203fc60fa71cac5caa385860fd2a1daa620b4867091" -+ LLVM_COMMIT = "bf17016a92bc8a23d2cdd2b51355dd4eb5019c68" -+ LLVM_SHA256 = "ba09f12e5019f5aca531b1733275f0a10b181d6f894deb1a4610e017f76b172a" +- LLVM_COMMIT = "bf17016a92bc8a23d2cdd2b51355dd4eb5019c68" +- LLVM_SHA256 = "ba09f12e5019f5aca531b1733275f0a10b181d6f894deb1a4610e017f76b172a" ++ LLVM_COMMIT = "13c761789753862a7cc31a2a26f23010afa668b9" ++ LLVM_SHA256 = "587f3eda6d00d751cbfc69fa5a15475ae4232e191ace04031b343e4e8ae16355" tf_http_archive( name = name, +diff --git a/third_party/stablehlo/temporary.patch b/third_party/stablehlo/temporary.patch +index d19d903..2dd4f17 100755 +--- a/third_party/stablehlo/temporary.patch ++++ b/third_party/stablehlo/temporary.patch +@@ -1,3 +1,15 @@ ++diff --ruN a/stablehlo/examples/c++/ExampleAdd.cpp b/stablehlo/examples/c++/ExampleAdd.cpp ++--- stablehlo/examples/c++/ExampleAdd.cpp +++++ stablehlo/examples/c++/ExampleAdd.cpp ++@@ -49,7 +49,7 @@ ++ /** create function **/ ++ // create function argument and result types. ++ auto tensorType = ++- mlir::RankedTensorType::get({3, 4}, mlir::FloatType::getF32(&context)); +++ mlir::RankedTensorType::get({3, 4}, mlir::Float32Type::get(&context)); ++ auto func_type = ++ mlir::FunctionType::get(&context, {tensorType, tensorType}, {tensorType}); ++ + diff --ruN a/stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir b/stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir + --- stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir + +++ stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir diff --git a/third_party/shardy/workspace.bzl b/third_party/shardy/workspace.bzl index 3acd9d8ab4ad2..1c4225dcd7134 100644 --- a/third_party/shardy/workspace.bzl +++ b/third_party/shardy/workspace.bzl @@ -3,8 +3,8 @@ load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") def repo(): - SHARDY_COMMIT = "293e28a2b7c745c82fc5de99dad207e29340e7e0" - SHARDY_SHA256 = "36e38a2a7d23ba3c5385a4dc8651d682269c6a8dcf71b9a4cca5522cc32b7216" + SHARDY_COMMIT = "a45b0ae83803b4edb0602f3f5b342571a41b8e91" + SHARDY_SHA256 = "29f97d1838f463a6985f255fc29c80aa0517780a6b08fe1d01e3083a7f573942" tf_http_archive( name = "shardy", diff --git a/third_party/stablehlo/temporary.patch b/third_party/stablehlo/temporary.patch index d19d903fccbad..2dd4f1791a6a6 100755 --- a/third_party/stablehlo/temporary.patch +++ b/third_party/stablehlo/temporary.patch @@ -1,3 +1,15 @@ +diff --ruN a/stablehlo/examples/c++/ExampleAdd.cpp b/stablehlo/examples/c++/ExampleAdd.cpp +--- stablehlo/examples/c++/ExampleAdd.cpp ++++ stablehlo/examples/c++/ExampleAdd.cpp +@@ -49,7 +49,7 @@ + /** create function **/ + // create function argument and result types. + auto tensorType = +- mlir::RankedTensorType::get({3, 4}, mlir::FloatType::getF32(&context)); ++ mlir::RankedTensorType::get({3, 4}, mlir::Float32Type::get(&context)); + auto func_type = + mlir::FunctionType::get(&context, {tensorType, tensorType}, {tensorType}); + diff --ruN a/stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir b/stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir --- stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir +++ stablehlo/stablehlo/conversions/tosa/tests/nullary.mlir diff --git a/third_party/triton/llvm_integration/cl717293402.patch b/third_party/triton/llvm_integration/cl717293402.patch new file mode 100644 index 0000000000000..1d051c8b37f7a --- /dev/null +++ b/third_party/triton/llvm_integration/cl717293402.patch @@ -0,0 +1,127 @@ + +--- a/include/triton/Conversion/MLIRTypes.h 2024-07-03 07:14:55.000000000 -0700 ++++ b/include/triton/Conversion/MLIRTypes.h 2025-01-19 13:19:21.000000000 -0800 +@@ -21,10 +21,10 @@ + } + + // Float types +-inline Type f16Ty(MLIRContext *ctx) { return FloatType::getF16(ctx); } +-inline Type f32Ty(MLIRContext *ctx) { return FloatType::getF32(ctx); } +-inline Type f64Ty(MLIRContext *ctx) { return FloatType::getF64(ctx); } +-inline Type bf16Ty(MLIRContext *ctx) { return FloatType::getBF16(ctx); } ++inline Type f16Ty(MLIRContext *ctx) { return Float16Type::get(ctx); } ++inline Type f32Ty(MLIRContext *ctx) { return Float32Type::get(ctx); } ++inline Type f64Ty(MLIRContext *ctx) { return Float64Type::get(ctx); } ++inline Type bf16Ty(MLIRContext *ctx) { return BFloat16Type::get(ctx); } + + inline bool isFloat(Type type) { + return type.isF32() || type.isF64() || type.isF16() || type.isF128() || + +--- a/lib/Dialect/TritonGPU/IR/Ops.cpp 2025-01-15 12:52:52.000000000 -0800 ++++ b/lib/Dialect/TritonGPU/IR/Ops.cpp 2025-01-19 13:19:21.000000000 -0800 +@@ -15,7 +15,7 @@ + auto xTy = getSrc().getType(); + auto scaleTy = getScale().getType(); + +- if (xTy.getElementType() != FloatType::getBF16(getContext()) && ++ if (xTy.getElementType() != BFloat16Type::get(getContext()) && + xTy.getElementType() != IntegerType::get(getContext(), 8)) { + return emitOpError("element type of the first operand must be bf16 or i8"); + } +@@ -111,7 +111,7 @@ + auto newShape = SmallVector(xShape); + if (!encoding) { + newShape.back() *= 2; +- retTy = RankedTensorType::get(xShape, FloatType::getBF16(ctx)); ++ retTy = RankedTensorType::get(xShape, BFloat16Type::get(ctx)); + } else { + auto oldEncoding = cast(encoding); + auto newVEncoding = DotOperandEncodingAttr::get( +@@ -123,7 +123,7 @@ + const bool hasBatch = xShape.size() == 3; + const int kIdx = (opIdx == 0 ? 1 : 0) + hasBatch; + newShape[kIdx] *= 2; +- retTy = RankedTensorType::get(newShape, FloatType::getBF16(ctx), ++ retTy = RankedTensorType::get(newShape, BFloat16Type::get(ctx), + newVEncoding); + } + inferredReturnTypes.push_back(retTy); + +--- a/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp 2025-01-15 12:52:52.000000000 -0800 ++++ b/third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp 2025-01-19 13:19:22.000000000 -0800 +@@ -56,9 +56,9 @@ + else if (constraint == 'l') + ty = IntegerType::get(rewriter.getContext(), 64); + else if (constraint == 'f') +- ty = FloatType::getF32(rewriter.getContext()); ++ ty = Float32Type::get(rewriter.getContext()); + else if (constraint == 'd') +- ty = FloatType::getF64(rewriter.getContext()); ++ ty = Float64Type::get(rewriter.getContext()); + else { + assert(false && "Unsupported constraint"); + } + +--- a/unittest/Dialect/TritonGPU/DialectTest.cpp 2025-01-15 12:52:52.000000000 -0800 ++++ b/unittest/Dialect/TritonGPU/DialectTest.cpp 2025-01-19 13:19:23.000000000 -0800 +@@ -492,10 +492,10 @@ + llvm::to_vector(llvm::reverse(llvm::seq(rank)))); + + auto srcTy = RankedTensorType::get( +- srcShape, FloatType::getF32(&ctx), ++ srcShape, Float32Type::get(&ctx), + BlockedEncodingAttr::get(&ctx, sizePerThread, threadsPerWarp, + warpsPerCTA, order, ctaLayout)); +- auto dstTy = RankedTensorType::get(dstShape, FloatType::getF32(&ctx)); ++ auto dstTy = RankedTensorType::get(dstShape, Float32Type::get(&ctx)); + + bool couldReshape = false; + testReshape(srcTy, dstTy, /*expectedDstEnc=*/std::nullopt, +@@ -526,7 +526,7 @@ + ctx.getOrLoadDialect(); + ctaLayout = + triton::gpu::CTALayoutAttr::get(&ctx, ctaPerCGA, ctaSplit, ctaOrder); +- f16Ty = FloatType::getF16(&ctx); ++ f16Ty = Float16Type::get(&ctx); + } + + triton::gpu::AMDMfmaEncodingAttr createMFMA(int mDim, int nDim, +@@ -692,7 +692,7 @@ + ASSERT_EQ(linearLayout, expandedLL); + + // Test that methods of DistributedEncoding return the same values +- Type eltTy = FloatType::getF32(&ctx); ++ Type eltTy = Float32Type::get(&ctx); + + ASSERT_EQ(getOrder(distributedEncoding), linearEncoding.getRepOrder()); + ASSERT_EQ(cast(distributedEncoding) + +--- a/unittest/Dialect/TritonGPU/DumpLayoutTest.cpp 2024-10-31 04:36:20.000000000 -0700 ++++ b/unittest/Dialect/TritonGPU/DumpLayoutTest.cpp 2025-01-19 13:19:23.000000000 -0800 +@@ -182,7 +182,7 @@ + {1}, /* ord, row-major */ + {1}); /* cOrd */ + +- auto elemTy = FloatType::getF16(sharedLayout.getContext()); ++ auto elemTy = Float16Type::get(sharedLayout.getContext()); + auto tensorType = RankedTensorType::get({32}, elemTy, sharedLayout); + std::string layout = getLayoutStr(tensorType, /*useHWPointOfView=*/false); + assertSameStr(refStr, layout); +@@ -237,7 +237,7 @@ + {1, 0}, /* ord, row-major */ + {1, 0}); /* cOrd */ + +- auto elemTy = FloatType::getF16(sharedLayout.getContext()); ++ auto elemTy = Float16Type::get(sharedLayout.getContext()); + auto tensorType = RankedTensorType::get({8, 32}, elemTy, sharedLayout); + std::string layout = getLayoutStr(tensorType, /*useHWPointOfView=*/false); + assertSameStr(refStr, layout); +@@ -510,7 +510,7 @@ + {1, 0}, /* ord, row-major */ + {1, 0}); /* cOrd */ + +- auto elemTyHW = FloatType::getF16(sharedLayoutHW.getContext()); ++ auto elemTyHW = Float16Type::get(sharedLayoutHW.getContext()); + auto tensorTypeHW = RankedTensorType::get({8, 32}, elemTyHW, sharedLayoutHW); + + std::string layoutHW = getLayoutStr(tensorTypeHW, /*useHWPointOfView=*/true); diff --git a/third_party/triton/llvm_integration/series.bzl b/third_party/triton/llvm_integration/series.bzl index 656b9c894904d..be374e9d18868 100644 --- a/third_party/triton/llvm_integration/series.bzl +++ b/third_party/triton/llvm_integration/series.bzl @@ -8,5 +8,6 @@ LLVM nor MLIR integrator, please do not add any patches to this list. """ llvm_patch_list = [ + "//third_party/triton:llvm_integration/cl717293402.patch", # Add new patches just above this line ] diff --git a/third_party/tsl/third_party/llvm/generated.patch b/third_party/tsl/third_party/llvm/generated.patch index 3d2a2525c37a9..8b54ffba772b7 100644 --- a/third_party/tsl/third_party/llvm/generated.patch +++ b/third_party/tsl/third_party/llvm/generated.patch @@ -1,207 +1,1156 @@ Auto generated patch. Do not edit or delete it, even if empty. -diff -ruN --strip-trailing-cr a/mlir/include/mlir/IR/TypeRange.h b/mlir/include/mlir/IR/TypeRange.h ---- a/mlir/include/mlir/IR/TypeRange.h -+++ b/mlir/include/mlir/IR/TypeRange.h -@@ -29,12 +29,11 @@ - /// a SmallVector/std::vector. This class should be used in places that are not - /// suitable for a more derived type (e.g. ArrayRef) or a template range - /// parameter. --class TypeRange -- : public llvm::detail::indexed_accessor_range_base< -- TypeRange, -- llvm::PointerUnion, -- Type, Type, Type> { -+class TypeRange : public llvm::detail::indexed_accessor_range_base< -+ TypeRange, -+ llvm::PointerUnion, -+ Type, Type, Type> { - public: - using RangeBaseT::RangeBaseT; - TypeRange(ArrayRef types = std::nullopt); -@@ -45,11 +44,8 @@ - TypeRange(ValueTypeRange values) - : TypeRange(ValueRange(ValueRangeT(values.begin().getCurrent(), - values.end().getCurrent()))) {} -- -- TypeRange(Type type) : TypeRange(type, /*count=*/1) {} -- template , Arg> && -- !std::is_constructible_v>> -+ template , Arg>::value>> - TypeRange(Arg &&arg) : TypeRange(ArrayRef(std::forward(arg))) {} - TypeRange(std::initializer_list types) - : TypeRange(ArrayRef(types)) {} -@@ -60,9 +56,8 @@ - /// * A pointer to the first element of an array of types. - /// * A pointer to the first element of an array of operands. - /// * A pointer to the first element of an array of results. -- /// * A single 'Type' instance. - using OwnerT = llvm::PointerUnion; -+ detail::OpResultImpl *>; - - /// See `llvm::detail::indexed_accessor_range_base` for details. - static OwnerT offset_base(OwnerT object, ptrdiff_t index); -diff -ruN --strip-trailing-cr a/mlir/include/mlir/IR/ValueRange.h b/mlir/include/mlir/IR/ValueRange.h ---- a/mlir/include/mlir/IR/ValueRange.h -+++ b/mlir/include/mlir/IR/ValueRange.h -@@ -374,16 +374,16 @@ - /// SmallVector/std::vector. This class should be used in places that are not - /// suitable for a more derived type (e.g. ArrayRef) or a template range - /// parameter. --class ValueRange final : public llvm::detail::indexed_accessor_range_base< -- ValueRange, -- PointerUnion, -- Value, Value, Value> { -+class ValueRange final -+ : public llvm::detail::indexed_accessor_range_base< -+ ValueRange, -+ PointerUnion, -+ Value, Value, Value> { - public: - /// The type representing the owner of a ValueRange. This is either a list of -- /// values, operands, or results or a single value. -+ /// values, operands, or results. - using OwnerT = -- PointerUnion; -+ PointerUnion; - - using RangeBaseT::RangeBaseT; - -@@ -392,7 +392,7 @@ - std::is_constructible, Arg>::value && - !std::is_convertible::value>> - ValueRange(Arg &&arg) : ValueRange(ArrayRef(std::forward(arg))) {} -- ValueRange(Value value) : ValueRange(value, /*count=*/1) {} -+ ValueRange(const Value &value) : ValueRange(&value, /*count=*/1) {} - ValueRange(const std::initializer_list &values) - : ValueRange(ArrayRef(values)) {} - ValueRange(iterator_range values) -diff -ruN --strip-trailing-cr a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp ---- a/mlir/lib/IR/OperationSupport.cpp -+++ b/mlir/lib/IR/OperationSupport.cpp -@@ -653,15 +653,6 @@ - /// See `llvm::detail::indexed_accessor_range_base` for details. - ValueRange::OwnerT ValueRange::offset_base(const OwnerT &owner, - ptrdiff_t index) { -- if (llvm::isa_and_nonnull(owner)) { -- // Prevent out-of-bounds indexing for single values. -- // Note that we do allow an index of 1 as is required by 'slice'ing that -- // returns an empty range. This also matches the usual rules of C++ of being -- // allowed to index past the last element of an array. -- assert(index <= 1 && "out-of-bound offset into single-value 'ValueRange'"); -- // Return nullptr to quickly cause segmentation faults on misuse. -- return index == 0 ? owner : nullptr; -- } - if (const auto *value = llvm::dyn_cast_if_present(owner)) - return {value + index}; - if (auto *operand = llvm::dyn_cast_if_present(owner)) -@@ -670,10 +661,6 @@ +diff -ruN --strip-trailing-cr a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h +--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h ++++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h +@@ -513,12 +513,6 @@ + Entity loadElementAt(mlir::Location loc, fir::FirOpBuilder &builder, + Entity entity, mlir::ValueRange oneBasedIndices); + +-/// Return a vector of extents for the given entity. +-/// The function creates new operations, but tries to clean-up +-/// after itself. +-llvm::SmallVector +-genExtentsVector(mlir::Location loc, fir::FirOpBuilder &builder, Entity entity); +- + } // namespace hlfir + + #endif // FORTRAN_OPTIMIZER_BUILDER_HLFIRTOOLS_H +diff -ruN --strip-trailing-cr a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp +--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp ++++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp +@@ -1421,15 +1421,3 @@ + return loadTrivialScalar(loc, builder, + getElementAt(loc, builder, entity, oneBasedIndices)); } - /// See `llvm::detail::indexed_accessor_range_base` for details. - Value ValueRange::dereference_iterator(const OwnerT &owner, ptrdiff_t index) { -- if (auto value = llvm::dyn_cast_if_present(owner)) { -- assert(index == 0 && "cannot offset into single-value 'ValueRange'"); -- return value; +- +-llvm::SmallVector +-hlfir::genExtentsVector(mlir::Location loc, fir::FirOpBuilder &builder, +- hlfir::Entity entity) { +- entity = hlfir::derefPointersAndAllocatables(loc, builder, entity); +- mlir::Value shape = hlfir::genShape(loc, builder, entity); +- llvm::SmallVector extents = +- hlfir::getExplicitExtentsFromShape(shape, builder); +- if (shape.getUses().empty()) +- shape.getDefiningOp()->erase(); +- return extents; +-} +diff -ruN --strip-trailing-cr a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp ++++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp +@@ -37,79 +37,6 @@ + + namespace { + +-// Helper class to generate operations related to computing +-// product of values. +-class ProductFactory { +-public: +- ProductFactory(mlir::Location loc, fir::FirOpBuilder &builder) +- : loc(loc), builder(builder) {} +- +- // Generate an update of the inner product value: +- // acc += v1 * v2, OR +- // acc += CONJ(v1) * v2, OR +- // acc ||= v1 && v2 +- // +- // CONJ parameter specifies whether the first complex product argument +- // needs to be conjugated. +- template +- mlir::Value genAccumulateProduct(mlir::Value acc, mlir::Value v1, +- mlir::Value v2) { +- mlir::Type resultType = acc.getType(); +- acc = castToProductType(acc, resultType); +- v1 = castToProductType(v1, resultType); +- v2 = castToProductType(v2, resultType); +- mlir::Value result; +- if (mlir::isa(resultType)) { +- result = builder.create( +- loc, acc, builder.create(loc, v1, v2)); +- } else if (mlir::isa(resultType)) { +- if constexpr (CONJ) +- result = fir::IntrinsicLibrary{builder, loc}.genConjg(resultType, v1); +- else +- result = v1; +- +- result = builder.create( +- loc, acc, builder.create(loc, result, v2)); +- } else if (mlir::isa(resultType)) { +- result = builder.create( +- loc, acc, builder.create(loc, v1, v2)); +- } else if (mlir::isa(resultType)) { +- result = builder.create( +- loc, acc, builder.create(loc, v1, v2)); +- } else { +- llvm_unreachable("unsupported type"); +- } +- +- return builder.createConvert(loc, resultType, result); - } - if (const auto *value = llvm::dyn_cast_if_present(owner)) - return value[index]; - if (auto *operand = llvm::dyn_cast_if_present(owner)) -diff -ruN --strip-trailing-cr a/mlir/lib/IR/TypeRange.cpp b/mlir/lib/IR/TypeRange.cpp ---- a/mlir/lib/IR/TypeRange.cpp -+++ b/mlir/lib/IR/TypeRange.cpp -@@ -31,23 +31,12 @@ - this->base = result; - else if (auto *operand = llvm::dyn_cast_if_present(owner)) - this->base = operand; -- else if (auto value = llvm::dyn_cast_if_present(owner)) -- this->base = value.getType(); - else - this->base = cast(owner); - } +- +-private: +- mlir::Location loc; +- fir::FirOpBuilder &builder; +- +- mlir::Value castToProductType(mlir::Value value, mlir::Type type) { +- if (mlir::isa(type)) +- return builder.createConvert(loc, builder.getIntegerType(1), value); +- +- // TODO: the multiplications/additions by/of zero resulting from +- // complex * real are optimized by LLVM under -fno-signed-zeros +- // -fno-honor-nans. +- // We can make them disappear by default if we: +- // * either expand the complex multiplication into real +- // operations, OR +- // * set nnan nsz fast-math flags to the complex operations. +- if (fir::isa_complex(type) && !fir::isa_complex(value.getType())) { +- mlir::Value zeroCmplx = fir::factory::createZeroValue(builder, loc, type); +- fir::factory::Complex helper(builder, loc); +- mlir::Type partType = helper.getComplexPartType(type); +- return helper.insertComplexPart(zeroCmplx, +- castToProductType(value, partType), +- /*isImagPart=*/false); +- } +- return builder.createConvert(loc, type, value); +- } +-}; +- + class TransposeAsElementalConversion + : public mlir::OpRewritePattern { + public: +@@ -163,8 +90,11 @@ + static mlir::Value genResultShape(mlir::Location loc, + fir::FirOpBuilder &builder, + hlfir::Entity array) { +- llvm::SmallVector inExtents = +- hlfir::genExtentsVector(loc, builder, array); ++ mlir::Value inShape = hlfir::genShape(loc, builder, array); ++ llvm::SmallVector inExtents = ++ hlfir::getExplicitExtentsFromShape(inShape, builder); ++ if (inShape.getUses().empty()) ++ inShape.getDefiningOp()->erase(); + + // transpose indices + assert(inExtents.size() == 2 && "checked in TransposeOp::validate"); +@@ -207,7 +137,7 @@ + mlir::Value resultShape, dimExtent; + llvm::SmallVector arrayExtents; + if (isTotalReduction) +- arrayExtents = hlfir::genExtentsVector(loc, builder, array); ++ arrayExtents = genArrayExtents(loc, builder, array); + else + std::tie(resultShape, dimExtent) = + genResultShapeForPartialReduction(loc, builder, array, dimVal); +@@ -233,8 +163,7 @@ + // If DIM is not present, do total reduction. + + // Initial value for the reduction. +- mlir::Value reductionInitValue = +- fir::factory::createZeroValue(builder, loc, elementType); ++ mlir::Value reductionInitValue = genInitValue(loc, builder, elementType); + + // The reduction loop may be unordered if FastMathFlags::reassoc + // transformations are allowed. The integer reduction is always +@@ -335,6 +264,17 @@ + } + + private: ++ static llvm::SmallVector ++ genArrayExtents(mlir::Location loc, fir::FirOpBuilder &builder, ++ hlfir::Entity array) { ++ mlir::Value inShape = hlfir::genShape(loc, builder, array); ++ llvm::SmallVector inExtents = ++ hlfir::getExplicitExtentsFromShape(inShape, builder); ++ if (inShape.getUses().empty()) ++ inShape.getDefiningOp()->erase(); ++ return inExtents; ++ } ++ + // Return fir.shape specifying the shape of the result + // of a SUM reduction with DIM=dimVal. The second return value + // is the extent of the DIM dimension. +@@ -343,7 +283,7 @@ + fir::FirOpBuilder &builder, + hlfir::Entity array, int64_t dimVal) { + llvm::SmallVector inExtents = +- hlfir::genExtentsVector(loc, builder, array); ++ genArrayExtents(loc, builder, array); + assert(dimVal > 0 && dimVal <= static_cast(inExtents.size()) && + "DIM must be present and a positive constant not exceeding " + "the array's rank"); +@@ -353,6 +293,26 @@ + return {builder.create(loc, inExtents), dimExtent}; + } + ++ // Generate the initial value for a SUM reduction with the given ++ // data type. ++ static mlir::Value genInitValue(mlir::Location loc, ++ fir::FirOpBuilder &builder, ++ mlir::Type elementType) { ++ if (auto ty = mlir::dyn_cast(elementType)) { ++ const llvm::fltSemantics &sem = ty.getFloatSemantics(); ++ return builder.createRealConstant(loc, elementType, ++ llvm::APFloat::getZero(sem)); ++ } else if (auto ty = mlir::dyn_cast(elementType)) { ++ mlir::Value initValue = genInitValue(loc, builder, ty.getElementType()); ++ return fir::factory::Complex{builder, loc}.createComplex(ty, initValue, ++ initValue); ++ } else if (mlir::isa(elementType)) { ++ return builder.createIntegerConstant(loc, elementType, 0); ++ } ++ ++ llvm_unreachable("unsupported SUM reduction type"); ++ } ++ + // Generate scalar addition of the two values (of the same data type). + static mlir::Value genScalarAdd(mlir::Location loc, + fir::FirOpBuilder &builder, +@@ -610,10 +570,16 @@ + static std::tuple + genResultShape(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::Entity input1, hlfir::Entity input2) { +- llvm::SmallVector input1Extents = +- hlfir::genExtentsVector(loc, builder, input1); +- llvm::SmallVector input2Extents = +- hlfir::genExtentsVector(loc, builder, input2); ++ mlir::Value input1Shape = hlfir::genShape(loc, builder, input1); ++ llvm::SmallVector input1Extents = ++ hlfir::getExplicitExtentsFromShape(input1Shape, builder); ++ if (input1Shape.getUses().empty()) ++ input1Shape.getDefiningOp()->erase(); ++ mlir::Value input2Shape = hlfir::genShape(loc, builder, input2); ++ llvm::SmallVector input2Extents = ++ hlfir::getExplicitExtentsFromShape(input2Shape, builder); ++ if (input2Shape.getUses().empty()) ++ input2Shape.getDefiningOp()->erase(); - /// See `llvm::detail::indexed_accessor_range_base` for details. - TypeRange::OwnerT TypeRange::offset_base(OwnerT object, ptrdiff_t index) { -- if (llvm::isa_and_nonnull(object)) { -- // Prevent out-of-bounds indexing for single values. -- // Note that we do allow an index of 1 as is required by 'slice'ing that -- // returns an empty range. This also matches the usual rules of C++ of being -- // allowed to index past the last element of an array. -- assert(index <= 1 && "out-of-bound offset into single-value 'ValueRange'"); -- // Return nullptr to quickly cause segmentation faults on misuse. -- return index == 0 ? object : nullptr; + llvm::SmallVector newExtents; + mlir::Value innerProduct1Extent, innerProduct2Extent; +@@ -661,6 +627,60 @@ + innerProductExtent[0]}; + } + ++ static mlir::Value castToProductType(mlir::Location loc, ++ fir::FirOpBuilder &builder, ++ mlir::Value value, mlir::Type type) { ++ if (mlir::isa(type)) ++ return builder.createConvert(loc, builder.getIntegerType(1), value); ++ ++ // TODO: the multiplications/additions by/of zero resulting from ++ // complex * real are optimized by LLVM under -fno-signed-zeros ++ // -fno-honor-nans. ++ // We can make them disappear by default if we: ++ // * either expand the complex multiplication into real ++ // operations, OR ++ // * set nnan nsz fast-math flags to the complex operations. ++ if (fir::isa_complex(type) && !fir::isa_complex(value.getType())) { ++ mlir::Value zeroCmplx = fir::factory::createZeroValue(builder, loc, type); ++ fir::factory::Complex helper(builder, loc); ++ mlir::Type partType = helper.getComplexPartType(type); ++ return helper.insertComplexPart( ++ zeroCmplx, castToProductType(loc, builder, value, partType), ++ /*isImagPart=*/false); ++ } ++ return builder.createConvert(loc, type, value); ++ } ++ ++ // Generate an update of the inner product value: ++ // acc += v1 * v2, OR ++ // acc ||= v1 && v2 ++ static mlir::Value genAccumulateProduct(mlir::Location loc, ++ fir::FirOpBuilder &builder, ++ mlir::Type resultType, ++ mlir::Value acc, mlir::Value v1, ++ mlir::Value v2) { ++ acc = castToProductType(loc, builder, acc, resultType); ++ v1 = castToProductType(loc, builder, v1, resultType); ++ v2 = castToProductType(loc, builder, v2, resultType); ++ mlir::Value result; ++ if (mlir::isa(resultType)) ++ result = builder.create( ++ loc, acc, builder.create(loc, v1, v2)); ++ else if (mlir::isa(resultType)) ++ result = builder.create( ++ loc, acc, builder.create(loc, v1, v2)); ++ else if (mlir::isa(resultType)) ++ result = builder.create( ++ loc, acc, builder.create(loc, v1, v2)); ++ else if (mlir::isa(resultType)) ++ result = builder.create( ++ loc, acc, builder.create(loc, v1, v2)); ++ else ++ llvm_unreachable("unsupported type"); ++ ++ return builder.createConvert(loc, resultType, result); ++ } ++ + static mlir::LogicalResult + genContiguousMatmul(mlir::Location loc, fir::FirOpBuilder &builder, + hlfir::Entity result, mlir::Value resultShape, +@@ -728,9 +748,9 @@ + hlfir::loadElementAt(loc, builder, lhs, {I, K}); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, {K, J}); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- resultElementValue, lhsElementValue, rhsElementValue); ++ mlir::Value productValue = genAccumulateProduct( ++ loc, builder, resultElementType, resultElementValue, ++ lhsElementValue, rhsElementValue); + builder.create(loc, productValue, resultElement); + return {}; + }; +@@ -765,9 +785,9 @@ + hlfir::loadElementAt(loc, builder, lhs, {J, K}); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, {K}); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- resultElementValue, lhsElementValue, rhsElementValue); ++ mlir::Value productValue = genAccumulateProduct( ++ loc, builder, resultElementType, resultElementValue, ++ lhsElementValue, rhsElementValue); + builder.create(loc, productValue, resultElement); + return {}; + }; +@@ -797,9 +817,9 @@ + hlfir::loadElementAt(loc, builder, lhs, {K}); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, {K, J}); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- resultElementValue, lhsElementValue, rhsElementValue); ++ mlir::Value productValue = genAccumulateProduct( ++ loc, builder, resultElementType, resultElementValue, ++ lhsElementValue, rhsElementValue); + builder.create(loc, productValue, resultElement); + return {}; + }; +@@ -865,9 +885,9 @@ + hlfir::loadElementAt(loc, builder, lhs, lhsIndices); + hlfir::Entity rhsElementValue = + hlfir::loadElementAt(loc, builder, rhs, rhsIndices); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- reductionArgs[0], lhsElementValue, rhsElementValue); ++ mlir::Value productValue = genAccumulateProduct( ++ loc, builder, resultElementType, reductionArgs[0], lhsElementValue, ++ rhsElementValue); + return {productValue}; + }; + llvm::SmallVector innerProductValue = +@@ -884,73 +904,6 @@ + } + }; + +-class DotProductConversion +- : public mlir::OpRewritePattern { +-public: +- using mlir::OpRewritePattern::OpRewritePattern; +- +- llvm::LogicalResult +- matchAndRewrite(hlfir::DotProductOp product, +- mlir::PatternRewriter &rewriter) const override { +- hlfir::Entity op = hlfir::Entity{product}; +- if (!op.isScalar()) +- return rewriter.notifyMatchFailure(product, "produces non-scalar result"); +- +- mlir::Location loc = product.getLoc(); +- fir::FirOpBuilder builder{rewriter, product.getOperation()}; +- hlfir::Entity lhs = hlfir::Entity{product.getLhs()}; +- hlfir::Entity rhs = hlfir::Entity{product.getRhs()}; +- mlir::Type resultElementType = product.getType(); +- bool isUnordered = mlir::isa(resultElementType) || +- mlir::isa(resultElementType) || +- static_cast(builder.getFastMathFlags() & +- mlir::arith::FastMathFlags::reassoc); +- +- mlir::Value extent = genProductExtent(loc, builder, lhs, rhs); +- +- auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder, +- mlir::ValueRange oneBasedIndices, +- mlir::ValueRange reductionArgs) +- -> llvm::SmallVector { +- hlfir::Entity lhsElementValue = +- hlfir::loadElementAt(loc, builder, lhs, oneBasedIndices); +- hlfir::Entity rhsElementValue = +- hlfir::loadElementAt(loc, builder, rhs, oneBasedIndices); +- mlir::Value productValue = +- ProductFactory{loc, builder}.genAccumulateProduct( +- reductionArgs[0], lhsElementValue, rhsElementValue); +- return {productValue}; +- }; +- +- mlir::Value initValue = +- fir::factory::createZeroValue(builder, loc, resultElementType); +- +- llvm::SmallVector result = hlfir::genLoopNestWithReductions( +- loc, builder, {extent}, +- /*reductionInits=*/{initValue}, genBody, isUnordered); +- +- rewriter.replaceOp(product, result[0]); +- return mlir::success(); - } - if (const auto *value = llvm::dyn_cast_if_present(object)) - return {value + index}; - if (auto *operand = llvm::dyn_cast_if_present(object)) -@@ -59,10 +48,6 @@ - - /// See `llvm::detail::indexed_accessor_range_base` for details. - Type TypeRange::dereference_iterator(OwnerT object, ptrdiff_t index) { -- if (auto type = llvm::dyn_cast_if_present(object)) { -- assert(index == 0 && "cannot offset into single-value 'TypeRange'"); -- return type; +- +-private: +- static mlir::Value genProductExtent(mlir::Location loc, +- fir::FirOpBuilder &builder, +- hlfir::Entity input1, +- hlfir::Entity input2) { +- llvm::SmallVector input1Extents = +- hlfir::genExtentsVector(loc, builder, input1); +- llvm::SmallVector input2Extents = +- hlfir::genExtentsVector(loc, builder, input2); +- +- assert(input1Extents.size() == 1 && input2Extents.size() == 1 && +- "hlfir.dot_product arguments must be vectors"); +- llvm::SmallVector extent = +- fir::factory::deduceOptimalExtents(input1Extents, input2Extents); +- return extent[0]; - } - if (const auto *value = llvm::dyn_cast_if_present(object)) - return (value + index)->getType(); - if (auto *operand = llvm::dyn_cast_if_present(object)) -diff -ruN --strip-trailing-cr a/mlir/unittests/IR/OperationSupportTest.cpp b/mlir/unittests/IR/OperationSupportTest.cpp ---- a/mlir/unittests/IR/OperationSupportTest.cpp -+++ b/mlir/unittests/IR/OperationSupportTest.cpp -@@ -313,21 +313,4 @@ - op2->destroy(); +-}; +- + class SimplifyHLFIRIntrinsics + : public hlfir::impl::SimplifyHLFIRIntrinsicsBase { + public: +@@ -986,8 +939,6 @@ + if (forceMatmulAsElemental || this->allowNewSideEffects) + patterns.insert>(context); + +- patterns.insert(context); +- + if (mlir::failed(mlir::applyPatternsGreedily( + getOperation(), std::move(patterns), config))) { + mlir::emitError(getOperation()->getLoc(), +diff -ruN --strip-trailing-cr a/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir +--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir ++++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-dotproduct.fir +@@ -1,144 +0,0 @@ +-// Test hlfir.dot_product simplification to a reduction loop: +-// RUN: fir-opt --simplify-hlfir-intrinsics %s | FileCheck %s +- +-func.func @dot_product_integer(%arg0: !hlfir.expr, %arg1: !hlfir.expr) -> i32 { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr) -> i32 +- return %res : i32 +-} +-// CHECK-LABEL: func.func @dot_product_integer( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr) -> i32 { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant 0 : i32 +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_3]]) -> (i32) { +-// CHECK: %[[VAL_9:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]] : (!hlfir.expr, index) -> i16 +-// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_7]] : (!hlfir.expr, index) -> i32 +-// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_9]] : (i16) -> i32 +-// CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_11]], %[[VAL_10]] : i32 +-// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_12]] : i32 +-// CHECK: fir.result %[[VAL_13]] : i32 +-// CHECK: } +-// CHECK: return %[[VAL_6]] : i32 +-// CHECK: } +- +-func.func @dot_product_real(%arg0: !hlfir.expr, %arg1: !hlfir.expr) -> f32 { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr) -> f32 +- return %res : f32 +-} +-// CHECK-LABEL: func.func @dot_product_real( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr) -> f32 { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_8:.*]] = %[[VAL_3]]) -> (f32) { +-// CHECK: %[[VAL_9:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]] : (!hlfir.expr, index) -> f32 +-// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_7]] : (!hlfir.expr, index) -> f16 +-// CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (f16) -> f32 +-// CHECK: %[[VAL_12:.*]] = arith.mulf %[[VAL_9]], %[[VAL_11]] : f32 +-// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_12]] : f32 +-// CHECK: fir.result %[[VAL_13]] : f32 +-// CHECK: } +-// CHECK: return %[[VAL_6]] : f32 +-// CHECK: } +- +-func.func @dot_product_complex(%arg0: !hlfir.expr>, %arg1: !hlfir.expr>) -> complex { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr>, !hlfir.expr>) -> complex +- return %res : complex +-} +-// CHECK-LABEL: func.func @dot_product_complex( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr>, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> complex { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr>) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.undefined complex +-// CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (complex) { +-// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex +-// CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex +-// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (complex) -> complex +-// CHECK: %[[VAL_15:.*]] = fir.extract_value %[[VAL_12]], [1 : index] : (complex) -> f32 +-// CHECK: %[[VAL_16:.*]] = arith.negf %[[VAL_15]] : f32 +-// CHECK: %[[VAL_17:.*]] = fir.insert_value %[[VAL_12]], %[[VAL_16]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_18:.*]] = fir.mulc %[[VAL_17]], %[[VAL_14]] : complex +-// CHECK: %[[VAL_19:.*]] = fir.addc %[[VAL_11]], %[[VAL_18]] : complex +-// CHECK: fir.result %[[VAL_19]] : complex +-// CHECK: } +-// CHECK: return %[[VAL_9]] : complex +-// CHECK: } +- +-func.func @dot_product_real_complex(%arg0: !hlfir.expr, %arg1: !hlfir.expr>) -> complex { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr, !hlfir.expr>) -> complex +- return %res : complex +-} +-// CHECK-LABEL: func.func @dot_product_real_complex( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> complex { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32 +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.undefined complex +-// CHECK: %[[VAL_7:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_8:.*]] = fir.insert_value %[[VAL_7]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (complex) { +-// CHECK: %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr, index) -> f32 +-// CHECK: %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr>, index) -> complex +-// CHECK: %[[VAL_14:.*]] = fir.undefined complex +-// CHECK: %[[VAL_15:.*]] = fir.insert_value %[[VAL_14]], %[[VAL_3]], [0 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_16:.*]] = fir.insert_value %[[VAL_15]], %[[VAL_3]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_17:.*]] = fir.insert_value %[[VAL_16]], %[[VAL_12]], [0 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_13]] : (complex) -> complex +-// CHECK: %[[VAL_19:.*]] = fir.extract_value %[[VAL_17]], [1 : index] : (complex) -> f32 +-// CHECK: %[[VAL_20:.*]] = arith.negf %[[VAL_19]] : f32 +-// CHECK: %[[VAL_21:.*]] = fir.insert_value %[[VAL_17]], %[[VAL_20]], [1 : index] : (complex, f32) -> complex +-// CHECK: %[[VAL_22:.*]] = fir.mulc %[[VAL_21]], %[[VAL_18]] : complex +-// CHECK: %[[VAL_23:.*]] = fir.addc %[[VAL_11]], %[[VAL_22]] : complex +-// CHECK: fir.result %[[VAL_23]] : complex +-// CHECK: } +-// CHECK: return %[[VAL_9]] : complex +-// CHECK: } +- +-func.func @dot_product_logical(%arg0: !hlfir.expr>, %arg1: !hlfir.expr>) -> !fir.logical<4> { +- %res = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr>, !hlfir.expr>) -> !fir.logical<4> +- return %res : !fir.logical<4> +-} +-// CHECK-LABEL: func.func @dot_product_logical( +-// CHECK-SAME: %[[VAL_0:.*]]: !hlfir.expr>, +-// CHECK-SAME: %[[VAL_1:.*]]: !hlfir.expr>) -> !fir.logical<4> { +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_3:.*]] = arith.constant false +-// CHECK: %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr>) -> !fir.shape<1> +-// CHECK: %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<1>) -> index +-// CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4> +-// CHECK: %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_6]]) -> (!fir.logical<4>) { +-// CHECK: %[[VAL_10:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_8]] : (!hlfir.expr>, index) -> !fir.logical<1> +-// CHECK: %[[VAL_11:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_8]] : (!hlfir.expr>, index) -> !fir.logical<4> +-// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_9]] : (!fir.logical<4>) -> i1 +-// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_10]] : (!fir.logical<1>) -> i1 +-// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_11]] : (!fir.logical<4>) -> i1 +-// CHECK: %[[VAL_15:.*]] = arith.andi %[[VAL_13]], %[[VAL_14]] : i1 +-// CHECK: %[[VAL_16:.*]] = arith.ori %[[VAL_12]], %[[VAL_15]] : i1 +-// CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (i1) -> !fir.logical<4> +-// CHECK: fir.result %[[VAL_17]] : !fir.logical<4> +-// CHECK: } +-// CHECK: return %[[VAL_7]] : !fir.logical<4> +-// CHECK: } +- +-func.func @dot_product_known_dim(%arg0: !hlfir.expr<10xf32>, %arg1: !hlfir.expr) -> f32 { +- %res1 = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr<10xf32>, !hlfir.expr) -> f32 +- %res2 = hlfir.dot_product %arg1 %arg0 : (!hlfir.expr, !hlfir.expr<10xf32>) -> f32 +- %res = arith.addf %res1, %res2 : f32 +- return %res : f32 +-} +-// CHECK-LABEL: func.func @dot_product_known_dim( +-// CHECK: %[[VAL_2:.*]] = arith.constant 1 : index +-// CHECK: %[[VAL_4:.*]] = arith.constant 10 : index +-// CHECK: fir.do_loop %{{.*}} = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_2]] +-// CHECK: fir.do_loop %{{.*}} = %[[VAL_2]] to %[[VAL_4]] step %[[VAL_2]] +diff -ruN --strip-trailing-cr a/libcxx/include/__config b/libcxx/include/__config +--- a/libcxx/include/__config ++++ b/libcxx/include/__config +@@ -1166,9 +1166,7 @@ + # define _LIBCPP_NOESCAPE + # endif + +-// FIXME: Expand this to [[__gnu__::__nodebug__]] again once the testcase reported in +-// https://github.com/llvm/llvm-project/pull/118710 has been analyzed +-# define _LIBCPP_NODEBUG ++# define _LIBCPP_NODEBUG [[__gnu__::__nodebug__]] + + # if __has_attribute(__standalone_debug__) + # define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__)) +diff -ruN --strip-trailing-cr a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp +--- a/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp ++++ b/libcxx/test/tools/clang_tidy_checks/libcpp_module.cpp +@@ -27,7 +27,7 @@ + check_factories.registerCheck("libcpp-header-exportable-declarations"); + check_factories.registerCheck("libcpp-hide-from-abi"); + check_factories.registerCheck("libcpp-internal-ftms"); +- // check_factories.registerCheck("libcpp-nodebug-on-aliases"); ++ check_factories.registerCheck("libcpp-nodebug-on-aliases"); + check_factories.registerCheck("libcpp-cpp-version-check"); + check_factories.registerCheck("libcpp-robust-against-adl"); + check_factories.registerCheck("libcpp-uglify-attributes"); +diff -ruN --strip-trailing-cr a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp ++++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +@@ -1140,8 +1140,6 @@ + + setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); + +- setTargetDAGCombine(ISD::SHL); +- + // In case of strict alignment, avoid an excessive number of byte wide stores. + MaxStoresPerMemsetOptSize = 8; + MaxStoresPerMemset = +@@ -26473,43 +26471,6 @@ + return NVCAST; } --TEST(ValueRangeTest, ValueConstructable) { -- MLIRContext context; -- Builder builder(&context); +-/// If the operand is a bitwise AND with a constant RHS, and the shift has a +-/// constant RHS and is the only use, we can pull it out of the shift, i.e. +-/// +-/// (shl (and X, C1), C2) -> (and (shl X, C2), (shl C1, C2)) +-/// +-/// We prefer this canonical form to match existing isel patterns. +-static SDValue performSHLCombine(SDNode *N, +- TargetLowering::DAGCombinerInfo &DCI, +- SelectionDAG &DAG) { +- if (DCI.isBeforeLegalizeOps()) +- return SDValue(); - -- Operation *useOp = -- createOp(&context, /*operands=*/std::nullopt, builder.getIntegerType(16)); -- // Valid construction despite a temporary 'OpResult'. -- ValueRange operands = useOp->getResult(0); +- SDValue Op0 = N->getOperand(0); +- if (Op0.getOpcode() != ISD::AND || !Op0.hasOneUse()) +- return SDValue(); - -- useOp->setOperands(operands); -- EXPECT_EQ(useOp->getNumOperands(), 1u); -- EXPECT_EQ(useOp->getOperand(0), useOp->getResult(0)); +- SDValue C1 = Op0->getOperand(1); +- SDValue C2 = N->getOperand(1); +- if (!isa(C1) || !isa(C2)) +- return SDValue(); - -- useOp->dropAllUses(); -- useOp->destroy(); +- // Might be folded into shifted op, do not lower. +- if (N->hasOneUse()) { +- unsigned UseOpc = N->user_begin()->getOpcode(); +- if (UseOpc == ISD::ADD || UseOpc == ISD::SUB || UseOpc == ISD::SETCC || +- UseOpc == AArch64ISD::ADDS || UseOpc == AArch64ISD::SUBS) +- return SDValue(); +- } +- +- SDLoc DL(N); +- EVT VT = N->getValueType(0); +- SDValue X = Op0->getOperand(0); +- SDValue NewRHS = DAG.getNode(ISD::SHL, DL, VT, C1, C2); +- SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, X, C2); +- return DAG.getNode(ISD::AND, DL, VT, NewShift, NewRHS); +-} +- + SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; +@@ -26855,8 +26816,6 @@ + return performCTLZCombine(N, DAG, Subtarget); + case ISD::SCALAR_TO_VECTOR: + return performScalarToVectorCombine(N, DCI, DAG); +- case ISD::SHL: +- return performSHLCombine(N, DCI, DAG); + } + return SDValue(); + } +diff -ruN --strip-trailing-cr a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp ++++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +@@ -4979,7 +4979,7 @@ + // the subvector length. + const unsigned VecVF = getNumElements(Vec->getType()); + SmallVector Mask(VecVF, PoisonMaskElem); +- std::iota(Mask.begin(), std::next(Mask.begin(), Index), 0); ++ std::iota(Mask.begin(), Mask.end(), 0); + for (unsigned I : seq(SubVecVF)) + Mask[I + Index] = I + VecVF; + if (Generator) { +@@ -13956,11 +13956,12 @@ + Instruction *InsElt; + if (auto *VecTy = dyn_cast(Scalar->getType())) { + assert(SLPReVec && "FixedVectorType is not expected."); +- Vec = InsElt = cast(createInsertVector( +- Builder, Vec, Scalar, Pos * getNumElements(VecTy))); +- auto *II = dyn_cast(InsElt); ++ Vec = ++ createInsertVector(Builder, Vec, Scalar, Pos * getNumElements(VecTy)); ++ auto *II = dyn_cast(Vec); + if (!II || II->getIntrinsicID() != Intrinsic::vector_insert) + return Vec; ++ InsElt = II; + } else { + Vec = Builder.CreateInsertElement(Vec, Scalar, Builder.getInt32(Pos)); + InsElt = dyn_cast(Vec); +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll +--- a/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll ++++ b/llvm/test/CodeGen/AArch64/const-shift-of-constmasked.ll +@@ -190,7 +190,8 @@ + define i8 @test_i8_7_mask_shl_1(i8 %a0) { + ; CHECK-LABEL: test_i8_7_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #1, #3 ++; CHECK-NEXT: and w8, w0, #0x7 ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 7 + %t1 = shl i8 %t0, 1 +@@ -199,7 +200,8 @@ + define i8 @test_i8_7_mask_shl_4(i8 %a0) { + ; CHECK-LABEL: test_i8_7_mask_shl_4: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #4, #3 ++; CHECK-NEXT: and w8, w0, #0x7 ++; CHECK-NEXT: lsl w0, w8, #4 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 7 + %t1 = shl i8 %t0, 4 +@@ -227,8 +229,8 @@ + define i8 @test_i8_28_mask_shl_1(i8 %a0) { + ; CHECK-LABEL: test_i8_28_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #1 +-; CHECK-NEXT: and w0, w8, #0x38 ++; CHECK-NEXT: and w8, w0, #0x1c ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 28 + %t1 = shl i8 %t0, 1 +@@ -237,8 +239,8 @@ + define i8 @test_i8_28_mask_shl_2(i8 %a0) { + ; CHECK-LABEL: test_i8_28_mask_shl_2: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #2 +-; CHECK-NEXT: and w0, w8, #0x70 ++; CHECK-NEXT: and w8, w0, #0x1c ++; CHECK-NEXT: lsl w0, w8, #2 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 28 + %t1 = shl i8 %t0, 2 +@@ -247,8 +249,8 @@ + define i8 @test_i8_28_mask_shl_3(i8 %a0) { + ; CHECK-LABEL: test_i8_28_mask_shl_3: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #3 +-; CHECK-NEXT: and w0, w8, #0xe0 ++; CHECK-NEXT: and w8, w0, #0x1c ++; CHECK-NEXT: lsl w0, w8, #3 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 28 + %t1 = shl i8 %t0, 3 +@@ -257,8 +259,8 @@ + define i8 @test_i8_28_mask_shl_4(i8 %a0) { + ; CHECK-LABEL: test_i8_28_mask_shl_4: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #4 +-; CHECK-NEXT: and w0, w8, #0xc0 ++; CHECK-NEXT: and w8, w0, #0xc ++; CHECK-NEXT: lsl w0, w8, #4 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 28 + %t1 = shl i8 %t0, 4 +@@ -268,8 +270,8 @@ + define i8 @test_i8_224_mask_shl_1(i8 %a0) { + ; CHECK-LABEL: test_i8_224_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #1 +-; CHECK-NEXT: and w0, w8, #0xc0 ++; CHECK-NEXT: and w8, w0, #0x60 ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i8 %a0, 224 + %t1 = shl i8 %t0, 1 +@@ -463,7 +465,8 @@ + define i16 @test_i16_127_mask_shl_1(i16 %a0) { + ; CHECK-LABEL: test_i16_127_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #1, #7 ++; CHECK-NEXT: and w8, w0, #0x7f ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 127 + %t1 = shl i16 %t0, 1 +@@ -472,7 +475,8 @@ + define i16 @test_i16_127_mask_shl_8(i16 %a0) { + ; CHECK-LABEL: test_i16_127_mask_shl_8: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #8, #7 ++; CHECK-NEXT: and w8, w0, #0x7f ++; CHECK-NEXT: lsl w0, w8, #8 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 127 + %t1 = shl i16 %t0, 8 +@@ -500,8 +504,8 @@ + define i16 @test_i16_2032_mask_shl_3(i16 %a0) { + ; CHECK-LABEL: test_i16_2032_mask_shl_3: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #3 +-; CHECK-NEXT: and w0, w8, #0x3f80 ++; CHECK-NEXT: and w8, w0, #0x7f0 ++; CHECK-NEXT: lsl w0, w8, #3 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 2032 + %t1 = shl i16 %t0, 3 +@@ -510,8 +514,8 @@ + define i16 @test_i16_2032_mask_shl_4(i16 %a0) { + ; CHECK-LABEL: test_i16_2032_mask_shl_4: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #4 +-; CHECK-NEXT: and w0, w8, #0x7f00 ++; CHECK-NEXT: and w8, w0, #0x7f0 ++; CHECK-NEXT: lsl w0, w8, #4 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 2032 + %t1 = shl i16 %t0, 4 +@@ -520,8 +524,8 @@ + define i16 @test_i16_2032_mask_shl_5(i16 %a0) { + ; CHECK-LABEL: test_i16_2032_mask_shl_5: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #5 +-; CHECK-NEXT: and w0, w8, #0xfe00 ++; CHECK-NEXT: and w8, w0, #0x7f0 ++; CHECK-NEXT: lsl w0, w8, #5 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 2032 + %t1 = shl i16 %t0, 5 +@@ -530,8 +534,8 @@ + define i16 @test_i16_2032_mask_shl_6(i16 %a0) { + ; CHECK-LABEL: test_i16_2032_mask_shl_6: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #6 +-; CHECK-NEXT: and w0, w8, #0xfc00 ++; CHECK-NEXT: and w8, w0, #0x3f0 ++; CHECK-NEXT: lsl w0, w8, #6 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 2032 + %t1 = shl i16 %t0, 6 +@@ -541,8 +545,8 @@ + define i16 @test_i16_65024_mask_shl_1(i16 %a0) { + ; CHECK-LABEL: test_i16_65024_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #1 +-; CHECK-NEXT: and w0, w8, #0xfc00 ++; CHECK-NEXT: and w8, w0, #0x7e00 ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i16 %a0, 65024 + %t1 = shl i16 %t0, 1 +@@ -736,7 +740,8 @@ + define i32 @test_i32_32767_mask_shl_1(i32 %a0) { + ; CHECK-LABEL: test_i32_32767_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #1, #15 ++; CHECK-NEXT: and w8, w0, #0x7fff ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 32767 + %t1 = shl i32 %t0, 1 +@@ -745,7 +750,8 @@ + define i32 @test_i32_32767_mask_shl_16(i32 %a0) { + ; CHECK-LABEL: test_i32_32767_mask_shl_16: + ; CHECK: // %bb.0: +-; CHECK-NEXT: ubfiz w0, w0, #16, #15 ++; CHECK-NEXT: and w8, w0, #0x7fff ++; CHECK-NEXT: lsl w0, w8, #16 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 32767 + %t1 = shl i32 %t0, 16 +@@ -773,8 +779,8 @@ + define i32 @test_i32_8388352_mask_shl_7(i32 %a0) { + ; CHECK-LABEL: test_i32_8388352_mask_shl_7: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #7 +-; CHECK-NEXT: and w0, w8, #0x3fff8000 ++; CHECK-NEXT: and w8, w0, #0x7fff00 ++; CHECK-NEXT: lsl w0, w8, #7 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 8388352 + %t1 = shl i32 %t0, 7 +@@ -783,8 +789,8 @@ + define i32 @test_i32_8388352_mask_shl_8(i32 %a0) { + ; CHECK-LABEL: test_i32_8388352_mask_shl_8: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #8 +-; CHECK-NEXT: and w0, w8, #0x7fff0000 ++; CHECK-NEXT: and w8, w0, #0x7fff00 ++; CHECK-NEXT: lsl w0, w8, #8 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 8388352 + %t1 = shl i32 %t0, 8 +@@ -793,8 +799,8 @@ + define i32 @test_i32_8388352_mask_shl_9(i32 %a0) { + ; CHECK-LABEL: test_i32_8388352_mask_shl_9: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #9 +-; CHECK-NEXT: and w0, w8, #0xfffe0000 ++; CHECK-NEXT: and w8, w0, #0x7fff00 ++; CHECK-NEXT: lsl w0, w8, #9 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 8388352 + %t1 = shl i32 %t0, 9 +@@ -803,8 +809,8 @@ + define i32 @test_i32_8388352_mask_shl_10(i32 %a0) { + ; CHECK-LABEL: test_i32_8388352_mask_shl_10: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #10 +-; CHECK-NEXT: and w0, w8, #0xfffc0000 ++; CHECK-NEXT: and w8, w0, #0x3fff00 ++; CHECK-NEXT: lsl w0, w8, #10 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 8388352 + %t1 = shl i32 %t0, 10 +@@ -814,8 +820,8 @@ + define i32 @test_i32_4294836224_mask_shl_1(i32 %a0) { + ; CHECK-LABEL: test_i32_4294836224_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w8, w0, #1 +-; CHECK-NEXT: and w0, w8, #0xfffc0000 ++; CHECK-NEXT: and w8, w0, #0x7ffe0000 ++; CHECK-NEXT: lsl w0, w8, #1 + ; CHECK-NEXT: ret + %t0 = and i32 %a0, 4294836224 + %t1 = shl i32 %t0, 1 +@@ -1009,7 +1015,8 @@ + define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) { + ; CHECK-LABEL: test_i64_2147483647_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w0, w0, #1 ++; CHECK-NEXT: and x8, x0, #0x7fffffff ++; CHECK-NEXT: lsl x0, x8, #1 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 2147483647 + %t1 = shl i64 %t0, 1 +@@ -1047,8 +1054,8 @@ + define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { + ; CHECK-LABEL: test_i64_140737488289792_mask_shl_15: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #15 +-; CHECK-NEXT: and x0, x8, #0x3fffffff80000000 ++; CHECK-NEXT: and x8, x0, #0x7fffffff0000 ++; CHECK-NEXT: lsl x0, x8, #15 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 140737488289792 + %t1 = shl i64 %t0, 15 +@@ -1057,8 +1064,8 @@ + define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { + ; CHECK-LABEL: test_i64_140737488289792_mask_shl_16: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #16 +-; CHECK-NEXT: and x0, x8, #0x7fffffff00000000 ++; CHECK-NEXT: and x8, x0, #0x7fffffff0000 ++; CHECK-NEXT: lsl x0, x8, #16 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 140737488289792 + %t1 = shl i64 %t0, 16 +@@ -1067,8 +1074,8 @@ + define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { + ; CHECK-LABEL: test_i64_140737488289792_mask_shl_17: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #17 +-; CHECK-NEXT: and x0, x8, #0xfffffffe00000000 ++; CHECK-NEXT: and x8, x0, #0x7fffffff0000 ++; CHECK-NEXT: lsl x0, x8, #17 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 140737488289792 + %t1 = shl i64 %t0, 17 +@@ -1077,8 +1084,8 @@ + define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { + ; CHECK-LABEL: test_i64_140737488289792_mask_shl_18: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #18 +-; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 ++; CHECK-NEXT: and x8, x0, #0x3fffffff0000 ++; CHECK-NEXT: lsl x0, x8, #18 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 140737488289792 + %t1 = shl i64 %t0, 18 +@@ -1088,8 +1095,8 @@ + define i64 @test_i64_18446744065119617024_mask_shl_1(i64 %a0) { + ; CHECK-LABEL: test_i64_18446744065119617024_mask_shl_1: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl x8, x0, #1 +-; CHECK-NEXT: and x0, x8, #0xfffffffc00000000 ++; CHECK-NEXT: and x8, x0, #0x7ffffffe00000000 ++; CHECK-NEXT: lsl x0, x8, #1 + ; CHECK-NEXT: ret + %t0 = and i64 %a0, 18446744065119617024 + %t1 = shl i64 %t0, 1 +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll +--- a/llvm/test/CodeGen/AArch64/extract-bits.ll ++++ b/llvm/test/CodeGen/AArch64/extract-bits.ll +@@ -1013,8 +1013,8 @@ + define i32 @c2_i32(i32 %arg) nounwind { + ; CHECK-LABEL: c2_i32: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsr w8, w0, #17 +-; CHECK-NEXT: and w0, w8, #0xffc ++; CHECK-NEXT: ubfx w8, w0, #19, #10 ++; CHECK-NEXT: lsl w0, w8, #2 + ; CHECK-NEXT: ret + %tmp0 = lshr i32 %arg, 19 + %tmp1 = and i32 %tmp0, 1023 +@@ -1063,8 +1063,8 @@ + define i64 @c2_i64(i64 %arg) nounwind { + ; CHECK-LABEL: c2_i64: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsr x8, x0, #49 +-; CHECK-NEXT: and x0, x8, #0xffc ++; CHECK-NEXT: ubfx x8, x0, #51, #10 ++; CHECK-NEXT: lsl x0, x8, #2 + ; CHECK-NEXT: ret + %tmp0 = lshr i64 %arg, 51 + %tmp1 = and i64 %tmp0, 1023 +@@ -1120,8 +1120,8 @@ + define void @c7_i32(i32 %arg, ptr %ptr) nounwind { + ; CHECK-LABEL: c7_i32: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsr w8, w0, #17 +-; CHECK-NEXT: and w8, w8, #0xffc ++; CHECK-NEXT: ubfx w8, w0, #19, #10 ++; CHECK-NEXT: lsl w8, w8, #2 + ; CHECK-NEXT: str w8, [x1] + ; CHECK-NEXT: ret + %tmp0 = lshr i32 %arg, 19 +@@ -1163,8 +1163,8 @@ + define void @c7_i64(i64 %arg, ptr %ptr) nounwind { + ; CHECK-LABEL: c7_i64: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsr x8, x0, #49 +-; CHECK-NEXT: and x8, x8, #0xffc ++; CHECK-NEXT: ubfx x8, x0, #51, #10 ++; CHECK-NEXT: lsl x8, x8, #2 + ; CHECK-NEXT: str x8, [x1] + ; CHECK-NEXT: ret + %tmp0 = lshr i64 %arg, 51 +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/fpenv.ll b/llvm/test/CodeGen/AArch64/fpenv.ll +--- a/llvm/test/CodeGen/AArch64/fpenv.ll ++++ b/llvm/test/CodeGen/AArch64/fpenv.ll +@@ -4,11 +4,11 @@ + define void @func_set_rounding_dyn(i32 %rm) { + ; CHECK-LABEL: func_set_rounding_dyn: + ; CHECK: // %bb.0: +-; CHECK-NEXT: lsl w9, w0, #22 ++; CHECK-NEXT: sub w9, w0, #1 + ; CHECK-NEXT: mrs x8, FPCR ++; CHECK-NEXT: and w9, w9, #0x3 + ; CHECK-NEXT: and x8, x8, #0xffffffffff3fffff +-; CHECK-NEXT: sub w9, w9, #1024, lsl #12 // =4194304 +-; CHECK-NEXT: and w9, w9, #0xc00000 ++; CHECK-NEXT: lsl w9, w9, #22 + ; CHECK-NEXT: orr x8, x8, x9 + ; CHECK-NEXT: msr FPCR, x8 + ; CHECK-NEXT: ret +diff -ruN --strip-trailing-cr a/llvm/test/CodeGen/AArch64/xbfiz.ll b/llvm/test/CodeGen/AArch64/xbfiz.ll +--- a/llvm/test/CodeGen/AArch64/xbfiz.ll ++++ b/llvm/test/CodeGen/AArch64/xbfiz.ll +@@ -69,19 +69,3 @@ + %and = and i64 %shl, 4294967295 + ret i64 %and + } +- +-define i64 @lsl_zext_i8_i64(i8 %b) { +-; CHECK-LABEL: lsl_zext_i8_i64: +-; CHECK: ubfiz x0, x0, #1, #8 +- %1 = zext i8 %b to i64 +- %2 = shl i64 %1, 1 +- ret i64 %2 -} - - } // namespace -diff -ruN --strip-trailing-cr a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel ---- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel -+++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel -@@ -43,10 +43,7 @@ - - gentbl( - name = "diagnostic_defs_gen", -- tbl_outs = [( -- "-gen-clang-diags-defs -clang-component=%s" % c, -- "include/clang/Basic/Diagnostic%sKinds.inc" % c, -- ) for c in [ -+ tbl_outs = [out for c in [ - "AST", - "Analysis", - "Comment", -@@ -60,6 +57,15 @@ - "Refactoring", - "Sema", - "Serialization", -+ ] for out in [ -+ ( -+ "-gen-clang-diags-defs -clang-component=%s" % c, -+ "include/clang/Basic/Diagnostic%sKinds.inc" % c, -+ ), -+ ( -+ "-gen-clang-diags-enums -clang-component=%s" % c, -+ "include/clang/Basic/Diagnostic%sEnums.inc" % c, -+ ), - ]] + [ - ( - "-gen-clang-diag-groups", +-define i64 @lsl_zext_i16_i64(i16 %b) { +-; CHECK-LABEL: lsl_zext_i16_i64: +-; CHECK: ubfiz x0, x0, #1, #16 +- %1 = zext i16 %b to i64 +- %2 = shl i64 %1, 1 +- ret i64 %2 +-} +diff -ruN --strip-trailing-cr a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll +--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll ++++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll +@@ -0,0 +1,81 @@ ++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ++; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s ++ ++define <16 x double> @test(ptr %x, double %v, double %a) { ++; CHECK-LABEL: define <16 x double> @test( ++; CHECK-SAME: ptr [[X:%.*]], double [[V:%.*]], double [[A:%.*]]) { ++; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 8 ++; CHECK-NEXT: [[GEP8:%.*]] = getelementptr inbounds double, ptr [[X]], i64 9 ++; CHECK-NEXT: [[TMP1:%.*]] = load <6 x double>, ptr [[X]], align 4 ++; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, ptr [[GEP6]], align 4 ++; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[GEP8]], align 4 ++; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x double> poison, double [[A]], i32 0 ++; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x double> [[TMP4]], <16 x double> poison, <16 x i32> zeroinitializer ++; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> poison, double [[V]], i32 0 ++; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer ++; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[V]], i32 0 ++; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> poison, <2 x i32> zeroinitializer ++; CHECK-NEXT: [[TMP10:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v6f64(<16 x double> poison, <6 x double> [[TMP1]], i64 0) ++; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <16 x i32> ++; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x double> [[TMP10]], <16 x double> [[TMP11]], <16 x i32> ++; CHECK-NEXT: [[TMP13:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP12]], <2 x double> [[TMP6]], i64 6) ++; CHECK-NEXT: [[TMP14:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP13]], <2 x double> [[TMP7]], i64 8) ++; CHECK-NEXT: [[TMP15:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP14]], <2 x double> [[TMP9]], i64 10) ++; CHECK-NEXT: [[TMP16:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP15]], <2 x double> [[TMP9]], i64 12) ++; CHECK-NEXT: [[TMP17:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP16]], <2 x double> [[TMP9]], i64 14) ++; CHECK-NEXT: [[TMP18:%.*]] = fadd <16 x double> [[TMP5]], [[TMP17]] ++; CHECK-NEXT: ret <16 x double> [[TMP18]] ++; ++ %gep1 = getelementptr inbounds double, ptr %x, i64 1 ++ %gep2 = getelementptr inbounds double, ptr %x, i64 2 ++ %gep3 = getelementptr inbounds double, ptr %x, i64 3 ++ %gep4 = getelementptr inbounds double, ptr %x, i64 4 ++ %gep5 = getelementptr inbounds double, ptr %x, i64 5 ++ %gep6 = getelementptr inbounds double, ptr %x, i64 8 ++ %gep7 = getelementptr inbounds double, ptr %x, i64 9 ++ %gep8 = getelementptr inbounds double, ptr %x, i64 9 ++ %gep9 = getelementptr inbounds double, ptr %x, i64 10 ++ %x0 = load double, ptr %x, align 4 ++ %x1 = load double, ptr %gep1, align 4 ++ %x2 = load double, ptr %gep2, align 4 ++ %x3 = load double, ptr %gep3, align 4 ++ %x4 = load double, ptr %gep4, align 4 ++ %x5 = load double, ptr %gep5, align 4 ++ %x6 = load double, ptr %gep6, align 4 ++ %x7 = load double, ptr %gep7, align 4 ++ %x8 = load double, ptr %gep8, align 4 ++ %x9 = load double, ptr %gep9, align 4 ++ %add1 = fadd double %a, %x0 ++ %add2 = fadd double %a, %x1 ++ %add3 = fadd double %a, %x2 ++ %add4 = fadd double %a, %x3 ++ %add5 = fadd double %a, %x4 ++ %add6 = fadd double %a, %x5 ++ %add7 = fadd double %a, %x6 ++ %add8 = fadd double %a, %x7 ++ %add9 = fadd double %a, %x8 ++ %add10 = fadd double %a, %x9 ++ %add11 = fadd double %a, %v ++ %add12 = fadd double %a, %v ++ %add13 = fadd double %a, %v ++ %add14 = fadd double %a, %v ++ %add15 = fadd double %a, %v ++ %add16 = fadd double %a, %v ++ %i0 = insertelement <16 x double> poison, double %add1, i32 0 ++ %i1 = insertelement <16 x double> %i0, double %add2, i32 1 ++ %i2 = insertelement <16 x double> %i1, double %add3, i32 2 ++ %i3 = insertelement <16 x double> %i2, double %add4, i32 3 ++ %i4 = insertelement <16 x double> %i3, double %add5, i32 4 ++ %i5 = insertelement <16 x double> %i4, double %add6, i32 5 ++ %i6 = insertelement <16 x double> %i5, double %add7, i32 6 ++ %i7 = insertelement <16 x double> %i6, double %add8, i32 7 ++ %i8 = insertelement <16 x double> %i7, double %add9, i32 8 ++ %i9 = insertelement <16 x double> %i8, double %add10, i32 9 ++ %i10 = insertelement <16 x double> %i9, double %add11, i32 10 ++ %i11 = insertelement <16 x double> %i10, double %add12, i32 11 ++ %i12 = insertelement <16 x double> %i11, double %add13, i32 12 ++ %i13 = insertelement <16 x double> %i12, double %add14, i32 13 ++ %i14 = insertelement <16 x double> %i13, double %add15, i32 14 ++ %i15 = insertelement <16 x double> %i14, double %add16, i32 15 ++ ret <16 x double> %i15 ++} diff --git a/third_party/tsl/third_party/llvm/workspace.bzl b/third_party/tsl/third_party/llvm/workspace.bzl index 4706c63c0e1cc..cb092919de358 100644 --- a/third_party/tsl/third_party/llvm/workspace.bzl +++ b/third_party/tsl/third_party/llvm/workspace.bzl @@ -4,8 +4,8 @@ load("//third_party:repo.bzl", "tf_http_archive") def repo(name): """Imports LLVM.""" - LLVM_COMMIT = "bf17016a92bc8a23d2cdd2b51355dd4eb5019c68" - LLVM_SHA256 = "ba09f12e5019f5aca531b1733275f0a10b181d6f894deb1a4610e017f76b172a" + LLVM_COMMIT = "13c761789753862a7cc31a2a26f23010afa668b9" + LLVM_SHA256 = "587f3eda6d00d751cbfc69fa5a15475ae4232e191ace04031b343e4e8ae16355" tf_http_archive( name = name, diff --git a/xla/backends/gpu/codegen/emitters/transforms/vectorize_loads_stores.cc b/xla/backends/gpu/codegen/emitters/transforms/vectorize_loads_stores.cc index 2c14e7e299772..a493ffcd2c4bd 100644 --- a/xla/backends/gpu/codegen/emitters/transforms/vectorize_loads_stores.cc +++ b/xla/backends/gpu/codegen/emitters/transforms/vectorize_loads_stores.cc @@ -176,8 +176,7 @@ mlir::VectorType GetVectorTypeForAtomicRMW(mlir::RankedTensorType tensor_type, return nullptr; } - if (tensor_type.getElementType() != - mlir::FloatType::getF32(loop.getContext())) + if (tensor_type.getElementType() != mlir::Float32Type::get(loop.getContext())) return nullptr; if (mlir::getConstantIntValue(loop.getStep()) != 1 ||