提交 f8027a4c 编写于 作者: 王明贵

Fix sigmoid activation bug

上级 b964e890
...@@ -43,8 +43,19 @@ int LRelu(const float *src, int length, float *dst, float alpha) { ...@@ -43,8 +43,19 @@ int LRelu(const float *src, int length, float *dst, float alpha) {
} }
int Sigmoid(const float *src, int length, float *dst) { int Sigmoid(const float *src, int length, float *dst) {
const float upper_bound = 16.619047164916992188f;
const float lower_bound = -9.0f;
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
dst[i] = 1.0f / (1.0f + exp(-src[i])); float input_val = src[i];
float result;
if (input_val > upper_bound) {
result = 1.0f;
} else if (input_val < lower_bound) {
result = exp(input_val);
} else {
result = 1.0f / (1.0f + exp(-input_val));
}
dst[i] = result;
} }
return NNACL_OK; return NNACL_OK;
} }
......
...@@ -31,8 +31,6 @@ using mindspore::schema::PrimitiveType_Gather; ...@@ -31,8 +31,6 @@ using mindspore::schema::PrimitiveType_Gather;
namespace mindspore::kernel { namespace mindspore::kernel {
int GatherCPUKernel::Init() { int GatherCPUKernel::Init() {
axis_ = (reinterpret_cast<GatherParameter *>(op_parameter_))->axis_;
batchDims_ = (reinterpret_cast<GatherParameter *>(op_parameter_))->batchDims_;
if (!InferShapeDone()) { if (!InferShapeDone()) {
return RET_OK; return RET_OK;
} }
...@@ -47,7 +45,7 @@ int GatherCPUKernel::DoGather(int task_id) { ...@@ -47,7 +45,7 @@ int GatherCPUKernel::DoGather(int task_id) {
auto out_tensor = out_tensors_.at(0); auto out_tensor = out_tensors_.at(0);
auto input_ptr = reinterpret_cast<float *>(input_tensor->Data()); auto input_ptr = reinterpret_cast<float *>(input_tensor->Data());
auto indices_ptr = reinterpret_cast<int *>(indices_tensor->Data()); auto indices_ptr = reinterpret_cast<float *>(indices_tensor->Data());
auto output_ptr = reinterpret_cast<float *>(out_tensor->Data()); auto output_ptr = reinterpret_cast<float *>(out_tensor->Data());
auto input_int32 = reinterpret_cast<int32_t *>(input_tensor->Data()); auto input_int32 = reinterpret_cast<int32_t *>(input_tensor->Data());
...@@ -56,26 +54,25 @@ int GatherCPUKernel::DoGather(int task_id) { ...@@ -56,26 +54,25 @@ int GatherCPUKernel::DoGather(int task_id) {
auto in_shape = input_tensor->shape(); auto in_shape = input_tensor->shape();
int in_rank = in_shape.size(); int in_rank = in_shape.size();
int indices_element_size = indices_tensor->ElementsNum(); int indices_element_size = indices_tensor->ElementsNum();
auto axis = (reinterpret_cast<GatherParameter *>(op_parameter_))->axis_;
const int limit = in_shape[axis_]; const int limit = in_shape[axis];
for (int i = 0; i < indices_element_size; ++i) { for (int i = 0; i < indices_element_size; ++i) {
if (indices_ptr[i] >= limit) { indices_data_[i] = static_cast<int>(indices_ptr[i]);
MS_LOG(ERROR) << " indice data: " << indices_ptr[i] << " is not in [ 0, " << limit - 1 << " ]"; if (indices_data_[i] >= limit) {
MS_LOG(ERROR) << " indice data: " << indices_data_[i] << " is not in [ 0, " << limit - 1 << " ]";
return RET_ERROR; return RET_ERROR;
} }
} }
int outer_size = 1; int outer_size = 1, inner_size = 1;
for (int i = 0; i < axis_; ++i) { for (int i = 0; i < axis; ++i) {
outer_size *= in_shape[i]; outer_size *= in_shape[i];
} }
for (int i = axis + 1; i < in_rank; ++i) {
int inner_size = 1;
for (int i = axis_ + 1; i < in_rank; ++i) {
inner_size *= in_shape[i]; inner_size *= in_shape[i];
} }
int stride = UP_DIV(outer_size, op_parameter_->thread_num_);
int stride = UP_DIV(outer_size, thread_count_);
int count = MSMIN(stride, outer_size - stride * task_id); int count = MSMIN(stride, outer_size - stride * task_id);
auto thread_stride = stride * task_id; auto thread_stride = stride * task_id;
...@@ -83,17 +80,13 @@ int GatherCPUKernel::DoGather(int task_id) { ...@@ -83,17 +80,13 @@ int GatherCPUKernel::DoGather(int task_id) {
if (input_tensor->data_type() == kNumberTypeInt32) { if (input_tensor->data_type() == kNumberTypeInt32) {
input_int32 += thread_stride * limit; input_int32 += thread_stride * limit;
output_int32 += thread_stride * indices_element_size; output_int32 += thread_stride * indices_element_size;
error_code = GatherInt32(input_int32, count, inner_size, limit, indices_ptr, indices_element_size, output_int32); error_code = GatherInt32(input_int32, count, inner_size, limit, indices_data_, indices_element_size, output_int32);
} else { } else {
input_ptr += thread_stride * limit; input_ptr += thread_stride * limit;
output_ptr += thread_stride * indices_element_size; output_ptr += thread_stride * indices_element_size;
error_code = Gather(input_ptr, count, inner_size, limit, indices_ptr, indices_element_size, output_ptr); error_code = Gather(input_ptr, count, inner_size, limit, indices_data_, indices_element_size, output_ptr);
}
if (error_code != RET_OK) {
return RET_ERROR;
} }
return RET_OK; return error_code;
} }
int GatherRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { int GatherRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
...@@ -101,9 +94,8 @@ int GatherRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { ...@@ -101,9 +94,8 @@ int GatherRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
auto error_code = gather_kernel->DoGather(task_id); auto error_code = gather_kernel->DoGather(task_id);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "GatherRun error task_id[" << task_id << "] error_code[" << error_code << "]"; MS_LOG(ERROR) << "GatherRun error task_id[" << task_id << "] error_code[" << error_code << "]";
return RET_ERROR;
} }
return RET_OK; return error_code;
} }
int GatherCPUKernel::Run() { int GatherCPUKernel::Run() {
...@@ -112,12 +104,19 @@ int GatherCPUKernel::Run() { ...@@ -112,12 +104,19 @@ int GatherCPUKernel::Run() {
MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret;
return prepare_ret; return prepare_ret;
} }
int error_code = LiteBackendParallelLaunch(GatherRun, this, thread_count_);
auto indices_tensor = in_tensors_.at(1);
indices_data_ = reinterpret_cast<int *>(context_->allocator->Malloc(indices_tensor->ElementsNum() * sizeof(int)));
if (indices_data_ == nullptr) {
MS_LOG(ERROR) << "Memory allocation failed";
context_->allocator->Free(indices_data_);
return RET_ERROR;
}
int error_code = LiteBackendParallelLaunch(GatherRun, this, op_parameter_->thread_num_);
if (error_code != RET_OK) { if (error_code != RET_OK) {
MS_LOG(ERROR) << "Gather function error error_code[" << error_code << "]"; MS_LOG(ERROR) << "Gather function error error_code[" << error_code << "]";
return RET_ERROR;
} }
return RET_OK; return error_code;
} }
kernel::LiteKernel *CpuGatherFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *CpuGatherFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
......
...@@ -27,7 +27,7 @@ class GatherCPUKernel : public LiteKernel { ...@@ -27,7 +27,7 @@ class GatherCPUKernel : public LiteKernel {
GatherCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, GatherCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx,
const mindspore::lite::PrimitiveC *primitive) const mindspore::lite::PrimitiveC *primitive)
: LiteKernel(parameter, inputs, outputs, ctx, primitive), thread_count_(ctx->thread_num_) {} : LiteKernel(parameter, inputs, outputs, ctx, primitive) {}
~GatherCPUKernel() override = default; ~GatherCPUKernel() override = default;
int Init() override; int Init() override;
...@@ -36,9 +36,7 @@ class GatherCPUKernel : public LiteKernel { ...@@ -36,9 +36,7 @@ class GatherCPUKernel : public LiteKernel {
int DoGather(int task_id); int DoGather(int task_id);
private: private:
int thread_count_; int *indices_data_;
int batchDims_;
int axis_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册