// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// RUN: %clang_cc1                               -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1                        -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS        -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme-f16f16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefix CHECK-CXX

// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme-b16b16 -target-feature +sme2p1 -target-feature +sme-f16f16 -O2 -S -Werror -Wall %s -o /dev/null

// REQUIRES: aarch64-registered-target
#include <arm_sme.h>

#ifdef SME_OVERLOADED_FORMS
#define SME_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED, A5) A1##A3##A5
#else
#define SME_ACLE_FUNC(A1, A2, A3, A4, A5) A1##A2##A3##A4##A5
#endif

// CHECK-LABEL: define dso_local void @test_svmla_single_za16_f16_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZM]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z32test_svmla_single_za16_f16_vg1x2j13svfloat16x2_tu13__SVFloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmla_single_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmla,_single,_za16,_f16,_vg1x2)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmla_single_za16_f16_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZN_COERCE2:%.*]], <vscale x 8 x half> [[ZN_COERCE3:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZN_COERCE2]], <vscale x 8 x half> [[ZN_COERCE3]], <vscale x 8 x half> [[ZM]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z32test_svmla_single_za16_f16_vg1x4j13svfloat16x4_tu13__SVFloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZN_COERCE2:%.*]], <vscale x 8 x half> [[ZN_COERCE3:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZN_COERCE2]], <vscale x 8 x half> [[ZN_COERCE3]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmla_single_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmla,_single,_za16,_f16,_vg1x4)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmls_single_za16_f16_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZM]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z32test_svmls_single_za16_f16_vg1x2j13svfloat16x2_tu13__SVFloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmls_single_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmls,_single,_za16,_f16,_vg1x2)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmls_single_za16_f16_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZN_COERCE2:%.*]], <vscale x 8 x half> [[ZN_COERCE3:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZN_COERCE2]], <vscale x 8 x half> [[ZN_COERCE3]], <vscale x 8 x half> [[ZM]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z32test_svmls_single_za16_f16_vg1x4j13svfloat16x4_tu13__SVFloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZN_COERCE2:%.*]], <vscale x 8 x half> [[ZN_COERCE3:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZN_COERCE2]], <vscale x 8 x half> [[ZN_COERCE3]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmls_single_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmls,_single,_za16,_f16,_vg1x4)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmla_za16_f16_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZM_COERCE0]], <vscale x 8 x half> [[ZM_COERCE1]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z25test_svmla_za16_f16_vg1x2j13svfloat16x2_tS_(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZM_COERCE0]], <vscale x 8 x half> [[ZM_COERCE1]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmla_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmla,,_za16,_f16,_vg1x2)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmla_za16_f16_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZN_COERCE2:%.*]], <vscale x 8 x half> [[ZN_COERCE3:%.*]], <vscale x 8 x half> [[ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZM_COERCE1:%.*]], <vscale x 8 x half> [[ZM_COERCE2:%.*]], <vscale x 8 x half> [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZN_COERCE2]], <vscale x 8 x half> [[ZN_COERCE3]], <vscale x 8 x half> [[ZM_COERCE0]], <vscale x 8 x half> [[ZM_COERCE1]], <vscale x 8 x half> [[ZM_COERCE2]], <vscale x 8 x half> [[ZM_COERCE3]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z25test_svmla_za16_f16_vg1x4j13svfloat16x4_tS_(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZN_COERCE2:%.*]], <vscale x 8 x half> [[ZN_COERCE3:%.*]], <vscale x 8 x half> [[ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZM_COERCE1:%.*]], <vscale x 8 x half> [[ZM_COERCE2:%.*]], <vscale x 8 x half> [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZN_COERCE2]], <vscale x 8 x half> [[ZN_COERCE3]], <vscale x 8 x half> [[ZM_COERCE0]], <vscale x 8 x half> [[ZM_COERCE1]], <vscale x 8 x half> [[ZM_COERCE2]], <vscale x 8 x half> [[ZM_COERCE3]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmla_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmla,,_za16,_f16,_vg1x4)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmls_za16_f16_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZM_COERCE0]], <vscale x 8 x half> [[ZM_COERCE1]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z25test_svmls_za16_f16_vg1x2j13svfloat16x2_tS_(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZM_COERCE0]], <vscale x 8 x half> [[ZM_COERCE1]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmls_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16x2_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmls,,_za16,_f16,_vg1x2)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmls_za16_f16_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZN_COERCE2:%.*]], <vscale x 8 x half> [[ZN_COERCE3:%.*]], <vscale x 8 x half> [[ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZM_COERCE1:%.*]], <vscale x 8 x half> [[ZM_COERCE2:%.*]], <vscale x 8 x half> [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZN_COERCE2]], <vscale x 8 x half> [[ZN_COERCE3]], <vscale x 8 x half> [[ZM_COERCE0]], <vscale x 8 x half> [[ZM_COERCE1]], <vscale x 8 x half> [[ZM_COERCE2]], <vscale x 8 x half> [[ZM_COERCE3]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z25test_svmls_za16_f16_vg1x4j13svfloat16x4_tS_(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZN_COERCE2:%.*]], <vscale x 8 x half> [[ZN_COERCE3:%.*]], <vscale x 8 x half> [[ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZM_COERCE1:%.*]], <vscale x 8 x half> [[ZM_COERCE2:%.*]], <vscale x 8 x half> [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZN_COERCE2]], <vscale x 8 x half> [[ZN_COERCE3]], <vscale x 8 x half> [[ZM_COERCE0]], <vscale x 8 x half> [[ZM_COERCE1]], <vscale x 8 x half> [[ZM_COERCE2]], <vscale x 8 x half> [[ZM_COERCE3]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmls_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16x4_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmls,,_za16,_f16,_vg1x4)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmla_lane_za16_f16_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZM]], i32 7)
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z30test_svmla_lane_za16_f16_vg1x2j13svfloat16x2_tu13__SVFloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZM]], i32 7)
// CHECK-CXX-NEXT:    ret void
//
void test_svmla_lane_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmla_lane,,_za16,_f16,_vg1x2)(slice, zn, zm, 7);
}

// CHECK-LABEL: define dso_local void @test_svmla_lane_za16_f16_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZN_COERCE2:%.*]], <vscale x 8 x half> [[ZN_COERCE3:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZN_COERCE2]], <vscale x 8 x half> [[ZN_COERCE3]], <vscale x 8 x half> [[ZM]], i32 7)
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z30test_svmla_lane_za16_f16_vg1x4j13svfloat16x4_tu13__SVFloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZN_COERCE2:%.*]], <vscale x 8 x half> [[ZN_COERCE3:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZN_COERCE2]], <vscale x 8 x half> [[ZN_COERCE3]], <vscale x 8 x half> [[ZM]], i32 7)
// CHECK-CXX-NEXT:    ret void
//
void test_svmla_lane_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmla_lane,,_za16,_f16,_vg1x4)(slice, zn, zm, 7);
}

// CHECK-LABEL: define dso_local void @test_svmls_lane_za16_f16_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZM]], i32 7)
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z30test_svmls_lane_za16_f16_vg1x2j13svfloat16x2_tu13__SVFloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZM]], i32 7)
// CHECK-CXX-NEXT:    ret void
//
void test_svmls_lane_za16_f16_vg1x2(uint32_t slice, svfloat16x2_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmls_lane,,_za16,_f16,_vg1x2)(slice, zn, zm, 7);
}

// CHECK-LABEL: define dso_local void @test_svmls_lane_za16_f16_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZN_COERCE2:%.*]], <vscale x 8 x half> [[ZN_COERCE3:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZN_COERCE2]], <vscale x 8 x half> [[ZN_COERCE3]], <vscale x 8 x half> [[ZM]], i32 7)
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z30test_svmls_lane_za16_f16_vg1x4j13svfloat16x4_tu13__SVFloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]], <vscale x 8 x half> [[ZN_COERCE2:%.*]], <vscale x 8 x half> [[ZN_COERCE3:%.*]], <vscale x 8 x half> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv8f16(i32 [[SLICE]], <vscale x 8 x half> [[ZN_COERCE0]], <vscale x 8 x half> [[ZN_COERCE1]], <vscale x 8 x half> [[ZN_COERCE2]], <vscale x 8 x half> [[ZN_COERCE3]], <vscale x 8 x half> [[ZM]], i32 7)
// CHECK-CXX-NEXT:    ret void
//
void test_svmls_lane_za16_f16_vg1x4(uint32_t slice, svfloat16x4_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmls_lane,,_za16,_f16,_vg1x4)(slice, zn, zm, 7);
}

// CHECK-LABEL: define dso_local void @test_svmla_single_za16_bf16_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z33test_svmla_single_za16_bf16_vg1x2j14svbfloat16x2_tu14__SVBfloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmla_single_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmla, _single, _za16, _bf16, _vg1x2)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmla_single_za16_bf16_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE3:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZN_COERCE2]], <vscale x 8 x bfloat> [[ZN_COERCE3]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z33test_svmla_single_za16_bf16_vg1x4j14svbfloat16x4_tu14__SVBfloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE3:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZN_COERCE2]], <vscale x 8 x bfloat> [[ZN_COERCE3]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmla_single_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmla, _single, _za16, _bf16, _vg1x4)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmls_single_za16_bf16_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z33test_svmls_single_za16_bf16_vg1x2j14svbfloat16x2_tu14__SVBfloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmls_single_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmls, _single, _za16, _bf16, _vg1x2)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmls_single_za16_bf16_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE3:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZN_COERCE2]], <vscale x 8 x bfloat> [[ZN_COERCE3]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z33test_svmls_single_za16_bf16_vg1x4j14svbfloat16x4_tu14__SVBfloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE3:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZN_COERCE2]], <vscale x 8 x bfloat> [[ZN_COERCE3]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmls_single_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmls, _single, _za16, _bf16, _vg1x4)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmla_za16_bf16_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZM_COERCE0]], <vscale x 8 x bfloat> [[ZM_COERCE1]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z26test_svmla_za16_bf16_vg1x2j14svbfloat16x2_tS_(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZM_COERCE0]], <vscale x 8 x bfloat> [[ZM_COERCE1]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmla_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmla, , _za16, _bf16, _vg1x2)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmla_za16_bf16_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE3:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZN_COERCE2]], <vscale x 8 x bfloat> [[ZN_COERCE3]], <vscale x 8 x bfloat> [[ZM_COERCE0]], <vscale x 8 x bfloat> [[ZM_COERCE1]], <vscale x 8 x bfloat> [[ZM_COERCE2]], <vscale x 8 x bfloat> [[ZM_COERCE3]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z26test_svmla_za16_bf16_vg1x4j14svbfloat16x4_tS_(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE3:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmla.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZN_COERCE2]], <vscale x 8 x bfloat> [[ZN_COERCE3]], <vscale x 8 x bfloat> [[ZM_COERCE0]], <vscale x 8 x bfloat> [[ZM_COERCE1]], <vscale x 8 x bfloat> [[ZM_COERCE2]], <vscale x 8 x bfloat> [[ZM_COERCE3]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmla_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmla, , _za16, _bf16, _vg1x4)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmls_za16_bf16_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZM_COERCE0]], <vscale x 8 x bfloat> [[ZM_COERCE1]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z26test_svmls_za16_bf16_vg1x2j14svbfloat16x2_tS_(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE1:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmls.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZM_COERCE0]], <vscale x 8 x bfloat> [[ZM_COERCE1]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmls_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16x2_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmls, , _za16, _bf16, _vg1x2)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmls_za16_bf16_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE3:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZN_COERCE2]], <vscale x 8 x bfloat> [[ZN_COERCE3]], <vscale x 8 x bfloat> [[ZM_COERCE0]], <vscale x 8 x bfloat> [[ZM_COERCE1]], <vscale x 8 x bfloat> [[ZM_COERCE2]], <vscale x 8 x bfloat> [[ZM_COERCE3]])
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z26test_svmls_za16_bf16_vg1x4j14svbfloat16x4_tS_(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE3:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZM_COERCE3:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmls.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZN_COERCE2]], <vscale x 8 x bfloat> [[ZN_COERCE3]], <vscale x 8 x bfloat> [[ZM_COERCE0]], <vscale x 8 x bfloat> [[ZM_COERCE1]], <vscale x 8 x bfloat> [[ZM_COERCE2]], <vscale x 8 x bfloat> [[ZM_COERCE3]])
// CHECK-CXX-NEXT:    ret void
//
void test_svmls_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16x4_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmls, , _za16, _bf16, _vg1x4)(slice, zn, zm);
}

// CHECK-LABEL: define dso_local void @test_svmla_lane_za16_bf16_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZM]], i32 7)
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z31test_svmla_lane_za16_bf16_vg1x2j14svbfloat16x2_tu14__SVBfloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZM]], i32 7)
// CHECK-CXX-NEXT:    ret void
//
void test_svmla_lane_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmla_lane, , _za16, _bf16, _vg1x2)(slice, zn, zm, 7);
}

// CHECK-LABEL: define dso_local void @test_svmla_lane_za16_bf16_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE3:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZN_COERCE2]], <vscale x 8 x bfloat> [[ZN_COERCE3]], <vscale x 8 x bfloat> [[ZM]], i32 7)
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z31test_svmla_lane_za16_bf16_vg1x4j14svbfloat16x4_tu14__SVBfloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE3:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZN_COERCE2]], <vscale x 8 x bfloat> [[ZN_COERCE3]], <vscale x 8 x bfloat> [[ZM]], i32 7)
// CHECK-CXX-NEXT:    ret void
//
void test_svmla_lane_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmla_lane, , _za16, _bf16, _vg1x4)(slice, zn, zm, 7);
}

// CHECK-LABEL: define dso_local void @test_svmls_lane_za16_bf16_vg1x2(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZM]], i32 7)
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z31test_svmls_lane_za16_bf16_vg1x2j14svbfloat16x2_tu14__SVBfloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZM]], i32 7)
// CHECK-CXX-NEXT:    ret void
//
void test_svmls_lane_za16_bf16_vg1x2(uint32_t slice, svbfloat16x2_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmls_lane, , _za16, _bf16, _vg1x2)(slice, zn, zm, 7);
}

// CHECK-LABEL: define dso_local void @test_svmls_lane_za16_bf16_vg1x4(
// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE3:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZN_COERCE2]], <vscale x 8 x bfloat> [[ZN_COERCE3]], <vscale x 8 x bfloat> [[ZM]], i32 7)
// CHECK-NEXT:    ret void
//
// CHECK-CXX-LABEL: define dso_local void @_Z31test_svmls_lane_za16_bf16_vg1x4j14svbfloat16x4_tu14__SVBfloat16_t(
// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE2:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE3:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) local_unnamed_addr #[[ATTR0]] {
// CHECK-CXX-NEXT:  entry:
// CHECK-CXX-NEXT:    tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv8bf16(i32 [[SLICE]], <vscale x 8 x bfloat> [[ZN_COERCE0]], <vscale x 8 x bfloat> [[ZN_COERCE1]], <vscale x 8 x bfloat> [[ZN_COERCE2]], <vscale x 8 x bfloat> [[ZN_COERCE3]], <vscale x 8 x bfloat> [[ZM]], i32 7)
// CHECK-CXX-NEXT:    ret void
//
void test_svmls_lane_za16_bf16_vg1x4(uint32_t slice, svbfloat16x4_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") {
  SME_ACLE_FUNC(svmls_lane, , _za16, _bf16, _vg1x4)(slice, zn, zm, 7);
}
