Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature](mlu-ops): access variable in tensor struct through function in kernels and logging.h #1190

Merged
merged 4 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 12 additions & 11 deletions core/logging.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
cnrtGetLastError(); \
kernel; \
cnrtRet_t ret = cnrtPeekAtLastError(); \
if (MLUOP_PREDICT_FALSE(cnrtSuccess != ret)) { \
if (MLUOP_PREDICT_FALSE(cnrtSuccess != ret)) { \
LOG(ERROR) << "Check failed: Found " << cnrtGetErrorStr(ret) \
<< " after invoke kernel " #kernel; \
return MLUOP_STATUS_EXECUTION_FAILED; \
Expand Down Expand Up @@ -188,15 +188,15 @@
return MLUOP_STATUS_NOT_SUPPORTED; \
}

#define TENSOR_DIM_SIZE_CHECK(api, desc, max_num, reason, ...) \
for (int i = 0; i < desc->dim; i++) { \
if (!(desc->dims[i] < max_num)) { \
LOG(ERROR) << api << " overflow max supported tensor dim size " \
<< max_num - 1 << ", " \
<< "now tensor's dims[" << i << "] is " << desc->dims[i] \
<< ". " << reason; \
return MLUOP_STATUS_NOT_SUPPORTED; \
} \
#define TENSOR_DIM_SIZE_CHECK(api, desc, max_num, reason, ...) \
for (int i = 0; i < desc->getDim(); i++) { \
if (!(desc->getDimIndex(i) < max_num)) { \
LOG(ERROR) << api << " overflow max supported tensor dim size " \
<< max_num - 1 << ", " \
<< "now tensor's dims[" << i << "] is " \
<< desc->getDimIndex(i) << ". " << reason; \
return MLUOP_STATUS_NOT_SUPPORTED; \
} \
}

extern bool mluop_check_large_tensor_dim_size_;
Expand All @@ -222,7 +222,8 @@ extern bool mluop_check_large_tensor_dim_size_;
if (MLUOP_PREDICT_TRUE(desc != NULL)) { \
if (MLUOP_PREDICT_FALSE( \
MLUOP_PREDICT_TRUE(0 != mluOpGetTensorElementNum(desc)) && \
isStrideTensor(desc->dim, desc->dims, desc->strides))) { \
isStrideTensor(desc->getDim(), desc->getDims(), \
desc->getStrides()))) { \
LOG(ERROR) << api << " stride tensor is not supported. " << reason; \
return MLUOP_STATUS_NOT_SUPPORTED; \
} \
Expand Down
58 changes: 29 additions & 29 deletions core/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,15 @@ struct alignas(64) mluOpTensorStruct {
inline bool isCpuScalar() const;

public:
/* Offset - 52 */
/* TODO(2025.1.3):Variables To be removed*/
int position = 0;
float scale = 1;
int offset = 0;
std::vector<int> positions;
std::vector<float> scales;
std::vector<int> offsets;

inline mluOpTensorLayout_t getLayout() const { return this->layout; }
inline void setLayout(mluOpTensorLayout_t newLayout) {
this->layout = newLayout;
Expand Down Expand Up @@ -203,35 +212,26 @@ struct alignas(64) mluOpTensorStruct {
mluOpPointerMode_t *pointer_mode);

uint64_t getTensorElementNum() { return this->total_element_num; }
// private:
/* Try to pack and align the struct */
/* ------------------- 64 Bytes - 1 -------------------*/
int64_t normal_dims[MLUOP_DIM_MAX];

/* ------------------- 64 Bytes - 2 -------------------*/
int64_t normal_strides[MLUOP_DIM_MAX];

/* ------------------- 64 Bytes - 3 -------------------*/
/* Offset - 0 */
uint64_t total_element_num = 0;
uint64_t total_tensor_size = 0;
int64_t *dims = normal_dims; // point the normal dims as default
int64_t *strides = normal_strides; // point the normal strides as default
/* Offset - 32 */
int dim = 0;
mluOpDataType_t dtype = MLUOP_DTYPE_FLOAT;
mluOpDataType_t onchip_dtype = MLUOP_DTYPE_INVALID;
mluOpTensorLayout_t layout = MLUOP_LAYOUT_ARRAY;
mluOpPointerMode_t pointer_mode = MLUOP_POINTER_MODE_DEVICE;

/* Offset - 52 */
/* To be removed*/
int position = 0;
float scale = 1;
int offset = 0;
std::vector<int> positions;
std::vector<float> scales;
std::vector<int> offsets;
private:
/* Try to pack and align the struct */
/* ------------------- 64 Bytes - 1 -------------------*/
int64_t normal_dims[MLUOP_DIM_MAX];

/* ------------------- 64 Bytes - 2 -------------------*/
int64_t normal_strides[MLUOP_DIM_MAX];

/* ------------------- 64 Bytes - 3 -------------------*/
/* Offset - 0 */
uint64_t total_element_num = 0;
uint64_t total_tensor_size = 0;
int64_t *dims = normal_dims; // point the normal dims as default
int64_t *strides = normal_strides; // point the normal strides as default
/* Offset - 32 */
int dim = 0;
mluOpDataType_t dtype = MLUOP_DTYPE_FLOAT;
mluOpDataType_t onchip_dtype = MLUOP_DTYPE_INVALID;
mluOpTensorLayout_t layout = MLUOP_LAYOUT_ARRAY;
mluOpPointerMode_t pointer_mode = MLUOP_POINTER_MODE_DEVICE;
};

// dim_set(rnn) [layer_num, direction, cap_of_cell]
Expand Down
37 changes: 19 additions & 18 deletions kernels/abs/abs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,40 +45,40 @@ static mluOpStatus_t mluOpAbsParamCheck(mluOpHandle_t handle,
PARAM_CHECK(op_name, x_desc != NULL);
PARAM_CHECK(op_name, y_desc != NULL);
// check dim and dtype
if (x_desc->dtype == MLUOP_DTYPE_COMPLEX_FLOAT) {
PARAM_CHECK_EQ(op_name, y_desc->dtype, MLUOP_DTYPE_FLOAT);
if (x_desc->getDtype() == MLUOP_DTYPE_COMPLEX_FLOAT) {
PARAM_CHECK_EQ(op_name, y_desc->getDtype(), MLUOP_DTYPE_FLOAT);
} else {
PARAM_CHECK_EQ(op_name, x_desc->dtype, y_desc->dtype);
PARAM_CHECK_EQ(op_name, x_desc->getDtype(), y_desc->getDtype());
}
PARAM_CHECK_EQ(op_name, x_desc->dim, y_desc->dim);
PARAM_CHECK_EQ(op_name, x_desc->getDim(), y_desc->getDim());
// check data type
mluOpStatus_t param_check;
if (handle->arch >= MLUOP_MLU590) {
mluOpDataType_t support_type[5] = {MLUOP_DTYPE_HALF, MLUOP_DTYPE_BFLOAT16,
MLUOP_DTYPE_FLOAT, MLUOP_DTYPE_INT32,
MLUOP_DTYPE_COMPLEX_FLOAT};
if (!isAbsSupportType(x_desc->dtype, support_type, 5)) {
if (!isAbsSupportType(x_desc->getDtype(), support_type, 5)) {
LOG(ERROR) << op_name << ":x_desc's data type is not supported.";
return MLUOP_STATUS_BAD_PARAM;
}
} else {
mluOpDataType_t support_type[4] = {MLUOP_DTYPE_HALF, MLUOP_DTYPE_FLOAT,
MLUOP_DTYPE_INT32,
MLUOP_DTYPE_COMPLEX_FLOAT};
if (!isAbsSupportType(x_desc->dtype, support_type, 4)) {
if (!isAbsSupportType(x_desc->getDtype(), support_type, 4)) {
LOG(ERROR) << op_name << ":x_desc's data type is not supported.";
return MLUOP_STATUS_BAD_PARAM;
}
}

PARAM_CHECK_GT(op_name, x_desc->dim, 0);
PARAM_CHECK_GT(op_name, y_desc->dim, 0);
for (int i = 0; i < x_desc->dim; i++) {
if (x_desc->dims[i] != y_desc->dims[i]) {
PARAM_CHECK_GT(op_name, x_desc->getDim(), 0);
PARAM_CHECK_GT(op_name, y_desc->getDim(), 0);
for (int i = 0; i < x_desc->getDim(); i++) {
if (x_desc->getDimIndex(i) != y_desc->getDimIndex(i)) {
LOG(ERROR) << op_name << ":The shape of x should be equal to y"
<< ". But now x_desc's shape[" << i << "] is "
<< x_desc->dims[i] << ", y_desc's shape[" << i << "] is "
<< y_desc->dims[i] << ".";
<< x_desc->getDimIndex(i) << ", y_desc's shape[" << i
<< "] is " << y_desc->getDimIndex(i) << ".";
return MLUOP_STATUS_BAD_PARAM;
}
}
Expand All @@ -98,7 +98,7 @@ static mluOpStatus_t mluOpAbsParamCheck(mluOpHandle_t handle,
}

if (needStrideProcess(x_desc, y_desc)) {
PARAM_CHECK(op_name, x_desc->dim <= MLUOP_DIM_MAX);
PARAM_CHECK(op_name, x_desc->getDim() <= MLUOP_DIM_MAX);
if (handle->arch < MLUOP_MLU590) {
// num_with_stride affects offset (related with mul op, which cannot
// exceed 32-bit on MLU300)
Expand Down Expand Up @@ -154,18 +154,19 @@ mluOpStatus_t MLUOP_WIN_API mluOpAbs(mluOpHandle_t handle,
}
if (if_stride_kernel) {
VLOG(5) << "kernel Kernel3StagePipelineWithStrideAbs";
PARAM_CHECK(op_name, x_desc->dim <= MLUOP_DIM_MAX);
PARAM_CHECK(op_name, x_desc->getDim() <= MLUOP_DIM_MAX);
mluop::TensorShape x_shape;
mluop::TensorShape y_shape;
mluop::getTensorShape(x_desc, &x_shape);
mluop::getTensorShape(y_desc, &y_shape);
CHECK_RETURN(op_name, Kernel3StagePipelineWithStrideAbs(
k_dim, k_type, handle->queue, x_desc->dtype, x,
x_shape, y, y_shape, dim_x));
k_dim, k_type, handle->queue, x_desc->getDtype(),
x, x_shape, y, y_shape, dim_x));
} else {
VLOG(5) << "kernel Kernel3StagePipelineAbs";
CHECK_RETURN(op_name, Kernel3StagePipelineAbs(k_dim, k_type, handle->queue,
x_desc->dtype, x, y, dim_x));
CHECK_RETURN(op_name,
Kernel3StagePipelineAbs(k_dim, k_type, handle->queue,
x_desc->getDtype(), x, y, dim_x));
}
GEN_CASE_END();
return MLUOP_STATUS_SUCCESS;
Expand Down
77 changes: 43 additions & 34 deletions kernels/active_rotated_filter/active_rotated_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ mluOpStatus_t MLUOP_WIN_API mluOpGetActiveRotatedFilterForwardWorkspaceSize(
PARAM_CHECK(api_name, input_desc != NULL);
PARAM_CHECK(api_name, workspace_size != NULL);

*workspace_size = input_desc->total_tensor_size;
*workspace_size = input_desc->getTotalTensorSize();
return MLUOP_STATUS_SUCCESS;
}

Expand All @@ -59,31 +59,40 @@ static mluOpStatus_t activeRotatedFilterForwardParamCheck(
PARAM_CHECK(api_name, output_desc != NULL);

// check tensor dim
PARAM_CHECK(api_name, input_desc->dim == 5);
PARAM_CHECK(api_name, indices_desc->dim == 4);
PARAM_CHECK(api_name, output_desc->dim == 4);
PARAM_CHECK(api_name, input_desc->getDim() == 5);
PARAM_CHECK(api_name, indices_desc->getDim() == 4);
PARAM_CHECK(api_name, output_desc->getDim() == 4);

// check dim
PARAM_CHECK(api_name, input_desc->dims[2] == indices_desc->dims[0]);
PARAM_CHECK(api_name, input_desc->dims[3] == input_desc->dims[4]);
PARAM_CHECK(api_name, input_desc->dims[3] == indices_desc->dims[1]);
PARAM_CHECK(api_name, input_desc->dims[3] == output_desc->dims[2]);
PARAM_CHECK(api_name, input_desc->dims[4] == indices_desc->dims[2]);
PARAM_CHECK(api_name, input_desc->dims[4] == output_desc->dims[3]);
PARAM_CHECK(api_name,
(input_desc->dims[2] > 0 && input_desc->dims[2] <= 128));
input_desc->getDimIndex(2) == indices_desc->getDimIndex(0));
PARAM_CHECK(api_name,
input_desc->getDimIndex(3) == input_desc->getDimIndex(4));
PARAM_CHECK(api_name,
input_desc->getDimIndex(3) == indices_desc->getDimIndex(1));
PARAM_CHECK(api_name,
input_desc->getDimIndex(3) == output_desc->getDimIndex(2));
PARAM_CHECK(api_name,
input_desc->getDimIndex(4) == indices_desc->getDimIndex(2));
PARAM_CHECK(api_name,
input_desc->getDimIndex(4) == output_desc->getDimIndex(3));
PARAM_CHECK(api_name, (input_desc->getDimIndex(2) > 0 &&
input_desc->getDimIndex(2) <= 128));
PARAM_CHECK_V2(api_name,
int(log(float(input_desc->dims[2])) / log(2.0f)) ==
log(float(input_desc->dims[2])) / log(2.0f),
"input_desc->dims[2] should be the power of 2.");
PARAM_CHECK(api_name, (input_desc->dims[3] == 3 || input_desc->dims[3] == 1));
int(log(float(input_desc->getDimIndex(2))) / log(2.0f)) ==
log(float(input_desc->getDimIndex(2))) / log(2.0f),
"input_desc->getDimIndex(2) should be the power of 2.");
PARAM_CHECK(api_name, (input_desc->getDimIndex(3) == 3 ||
input_desc->getDimIndex(3) == 1));
PARAM_CHECK(api_name, (indices_desc->getDimIndex(3) == 2 ||
indices_desc->getDimIndex(3) == 4 ||
indices_desc->getDimIndex(3) == 8));
PARAM_CHECK(api_name,
(output_desc->getDimIndex(0) ==
input_desc->getDimIndex(0) * indices_desc->getDimIndex(3)));
PARAM_CHECK(api_name,
(indices_desc->dims[3] == 2 || indices_desc->dims[3] == 4 ||
indices_desc->dims[3] == 8));
PARAM_CHECK(api_name, (output_desc->dims[0] ==
input_desc->dims[0] * indices_desc->dims[3]));
PARAM_CHECK(api_name, (output_desc->dims[1] ==
input_desc->dims[1] * input_desc->dims[2]));
(output_desc->getDimIndex(1) ==
input_desc->getDimIndex(1) * input_desc->getDimIndex(2)));

// check stride
STRIDE_TENSOR_CHECK(api_name + ":", input_desc,
Expand All @@ -95,17 +104,17 @@ static mluOpStatus_t activeRotatedFilterForwardParamCheck(

// check tensor datatype, support float16 and float32
PARAM_CHECK_V2(api_name,
(input_desc->dtype == MLUOP_DTYPE_HALF) ||
(input_desc->dtype == MLUOP_DTYPE_FLOAT),
(input_desc->getDtype() == MLUOP_DTYPE_HALF) ||
(input_desc->getDtype() == MLUOP_DTYPE_FLOAT),
"Only half and float are supported in input tensor, but the "
"data type of tensor is "
<< mluOpGetNameOfDataType(input_desc->dtype) << ".");
PARAM_CHECK(api_name, input_desc->dtype == output_desc->dtype);
<< mluOpGetNameOfDataType(input_desc->getDtype()) << ".");
PARAM_CHECK(api_name, input_desc->getDtype() == output_desc->getDtype());

PARAM_CHECK_V2(
api_name, (indices_desc->dtype == MLUOP_DTYPE_INT32),
api_name, (indices_desc->getDtype() == MLUOP_DTYPE_INT32),
"Only int32 are supported in indices idx, but the data type of tensor is "
<< mluOpGetNameOfDataType(indices_desc->dtype) << ".");
<< mluOpGetNameOfDataType(indices_desc->getDtype()) << ".");

const size_t input_element_num = mluOpGetTensorElementNum(input_desc);
const size_t indices_element_num = mluOpGetTensorElementNum(indices_desc);
Expand Down Expand Up @@ -149,12 +158,12 @@ mluOpStatus_t MLUOP_WIN_API mluOpActiveRotatedFilterForward(
if (status_paramcheck != MLUOP_STATUS_SUCCESS) {
return status_paramcheck;
}
const int output_planes = input_desc->dims[0];
const int input_planes = input_desc->dims[1];
const int orientations = input_desc->dims[2];
const int kH = input_desc->dims[3];
const int kW = input_desc->dims[4];
const int rotations = indices_desc->dims[3];
const int output_planes = input_desc->getDimIndex(0);
const int input_planes = input_desc->getDimIndex(1);
const int orientations = input_desc->getDimIndex(2);
const int kH = input_desc->getDimIndex(3);
const int kW = input_desc->getDimIndex(4);
const int rotations = indices_desc->getDimIndex(3);

// generate mluOpActiveRotatedFilterForward prototxt start!
if (MLUOP_GEN_CASE_ON_NEW) {
Expand All @@ -168,7 +177,7 @@ mluOpStatus_t MLUOP_WIN_API mluOpActiveRotatedFilterForward(
GEN_CASE_TEST_PARAM_NEW(false, false, true, 0.003, 0.003, 0);
}

mluOpDataType_t input_dtype = input_desc->dtype;
mluOpDataType_t input_dtype = input_desc->getDtype();

// start UX task, occupy all available clusters
cnrtDim3_t k_dims;
Expand Down
10 changes: 5 additions & 5 deletions kernels/adam_w/adam_w.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,11 @@ mluOpAdamW(mluOpHandle_t handle, const mluOpAdamWDescriptor_t adamw_desc,
PARAM_CHECK("[mluOpAdamW]", momentum_desc != nullptr);
PARAM_CHECK("[mluOpAdamW]", velocity_desc != nullptr);
PARAM_CHECK("[mluOpAdamW]", grad_desc != nullptr);
PARAM_CHECK("[mluOpAdamW]", param_desc->dtype == MLUOP_DTYPE_FLOAT);
PARAM_CHECK("[mluOpAdamW]", paramh_desc->dtype == MLUOP_DTYPE_BFLOAT16);
PARAM_CHECK("[mluOpAdamW]", momentum_desc->dtype == MLUOP_DTYPE_FLOAT);
PARAM_CHECK("[mluOpAdamW]", velocity_desc->dtype == MLUOP_DTYPE_FLOAT);
PARAM_CHECK("[mluOpAdamW]", grad_desc->dtype == MLUOP_DTYPE_BFLOAT16);
PARAM_CHECK("[mluOpAdamW]", param_desc->getDtype() == MLUOP_DTYPE_FLOAT);
PARAM_CHECK("[mluOpAdamW]", paramh_desc->getDtype() == MLUOP_DTYPE_BFLOAT16);
PARAM_CHECK("[mluOpAdamW]", momentum_desc->getDtype() == MLUOP_DTYPE_FLOAT);
PARAM_CHECK("[mluOpAdamW]", velocity_desc->getDtype() == MLUOP_DTYPE_FLOAT);
PARAM_CHECK("[mluOpAdamW]", grad_desc->getDtype() == MLUOP_DTYPE_BFLOAT16);

PARAM_CHECK_LE("[mluOpAdamW]", beta1, 1.0);
PARAM_CHECK_GE("[mluOpAdamW]", beta1, 0.0);
Expand Down
Loading