提交 3bf8d511 编写于 作者: Y yangruoqi713

fix bug: arm cpu fp32 op scale

上级 bb776efe
...@@ -376,7 +376,7 @@ table BNGradInput { ...@@ -376,7 +376,7 @@ table BNGradInput {
channels: int; channels: int;
} }
table Scale { table Scale {
format: Format = 0; axis: int;
} }
table Eltwise { table Eltwise {
......
...@@ -28,12 +28,16 @@ int Nchw2Nhwc::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<ten ...@@ -28,12 +28,16 @@ int Nchw2Nhwc::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<ten
auto output = outputs_.front(); auto output = outputs_.front();
MS_ASSERT(output != nullptr); MS_ASSERT(output != nullptr);
std::vector<int> nchw_shape = input->shape(); std::vector<int> nchw_shape = input->shape();
std::vector<int> nhwc_shape{nchw_shape}; if (nchw_shape.size() != 4) {
nhwc_shape[NHWC_N] = nchw_shape[NCHW_N]; output->set_shape(nchw_shape);
nhwc_shape[NHWC_H] = nchw_shape[NCHW_H]; } else {
nhwc_shape[NHWC_W] = nchw_shape[NCHW_W]; std::vector<int> nhwc_shape{nchw_shape};
nhwc_shape[NHWC_C] = nchw_shape[NCHW_C]; nhwc_shape[NHWC_N] = nchw_shape[NCHW_N];
output->set_shape(nhwc_shape); nhwc_shape[NHWC_H] = nchw_shape[NCHW_H];
nhwc_shape[NHWC_W] = nchw_shape[NCHW_W];
nhwc_shape[NHWC_C] = nchw_shape[NCHW_C];
output->set_shape(nhwc_shape);
}
output->SetFormat(schema::Format_NHWC); output->SetFormat(schema::Format_NHWC);
output->set_data_type(input->data_type()); output->set_data_type(input->data_type());
return RET_OK; return RET_OK;
......
...@@ -28,15 +28,18 @@ int Nhwc2Nchw::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<ten ...@@ -28,15 +28,18 @@ int Nhwc2Nchw::InferShape(std::vector<tensor::Tensor *> inputs_, std::vector<ten
auto output = outputs_.front(); auto output = outputs_.front();
MS_ASSERT(output != nullptr); MS_ASSERT(output != nullptr);
std::vector<int> nhwc_shape = input->shape(); std::vector<int> nhwc_shape = input->shape();
std::vector<int> nchw_shape{nhwc_shape}; if (nhwc_shape.size() != 4) {
nchw_shape[NCHW_N] = nhwc_shape[NHWC_N]; output->set_shape(nhwc_shape);
nchw_shape[NCHW_C] = nhwc_shape[NHWC_C]; } else {
nchw_shape[NCHW_H] = nhwc_shape[NHWC_H]; std::vector<int> nchw_shape{nhwc_shape};
nchw_shape[NCHW_W] = nhwc_shape[NHWC_W]; nchw_shape[NCHW_N] = nhwc_shape[NHWC_N];
output->set_shape(nchw_shape); nchw_shape[NCHW_C] = nhwc_shape[NHWC_C];
nchw_shape[NCHW_H] = nhwc_shape[NHWC_H];
nchw_shape[NCHW_W] = nhwc_shape[NHWC_W];
output->set_shape(nchw_shape);
}
output->SetFormat(schema::Format_NCHW); output->SetFormat(schema::Format_NCHW);
output->set_data_type(input->data_type()); output->set_data_type(input->data_type());
return RET_OK; return RET_OK;
} }
} // namespace mindspore::lite } // namespace mindspore::lite
...@@ -86,7 +86,7 @@ void StridedSlice::ApplyBeginMask() { ...@@ -86,7 +86,7 @@ void StridedSlice::ApplyBeginMask() {
void StridedSlice::ApplyEndMask() { void StridedSlice::ApplyEndMask() {
for (int i = 0; i < ndim_; i++) { for (int i = 0; i < ndim_; i++) {
if (ends_.at(i)) { if (ends_mask_.at(i)) {
ends_.at(i) = in_shape_.at(i); ends_.at(i) = in_shape_.at(i);
} }
} }
......
...@@ -753,15 +753,7 @@ OpParameter *PopulateScaleParameter(const lite::Primitive *primitive) { ...@@ -753,15 +753,7 @@ OpParameter *PopulateScaleParameter(const lite::Primitive *primitive) {
MS_LOG(ERROR) << "value_as_Scale return nullptr"; MS_LOG(ERROR) << "value_as_Scale return nullptr";
return nullptr; return nullptr;
} }
// NCHW todo use enum scale_param->axis_ = param->axis();
if (param->format() == schema::Format_NCHW) {
scale_param->axis_ = 1;
scale_param->num_axis_ = 1;
} else if (param->format() == schema::Format_NHWC) {
scale_param->axis_ = 3;
scale_param->num_axis_ = 1;
}
return reinterpret_cast<OpParameter *>(scale_param); return reinterpret_cast<OpParameter *>(scale_param);
} }
......
...@@ -278,7 +278,7 @@ int Convolution3x3FP16CPUKernel::Run() { ...@@ -278,7 +278,7 @@ int Convolution3x3FP16CPUKernel::Run() {
auto out_tensor = outputs_.at(kOutputIndex); auto out_tensor = outputs_.at(kOutputIndex);
auto output_addr = reinterpret_cast<float *>(out_tensor->Data()); auto output_addr = reinterpret_cast<float *>(out_tensor->Data());
for (int j = 0; j < out_tensor->ElementsNum(); ++j) { for (int j = 0; j < out_tensor->ElementsNum(); ++j) {
output_addr[j] = (float)fp16_out_[j]; output_addr[j] = (reinterpret_cast<float *>(fp16_out_))[j];
} }
return RET_OK; return RET_OK;
} }
......
...@@ -29,85 +29,91 @@ using mindspore::lite::RET_OK; ...@@ -29,85 +29,91 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Scale; using mindspore::schema::PrimitiveType_Scale;
namespace mindspore::kernel { namespace mindspore::kernel {
namespace { int ScaleCPUKernel::InitScaleOffset() {
constexpr int kScaleInputNum = 1;
constexpr int kScaleOutputNum = 1;
} // namespace
int ScaleCPUKernel::Init() {
auto param = reinterpret_cast<ScaleParameter *>(opParameter); auto param = reinterpret_cast<ScaleParameter *>(opParameter);
auto in_tensor = inputs_.front(); auto scale_tensor = inputs_.at(1);
auto scale = inputs_.at(1); float *scale_ptr = reinterpret_cast<float *>(inputs_.at(1)->Data());
if (scale_ptr != nullptr) {
if (inputs_.size() < 2 || inputs_.size() > 3) { scale_ = reinterpret_cast<float *>(malloc(scale_tensor->ElementsNum() * sizeof(float)));
MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << inputs_.size() << " is given."; if (scale_ == nullptr) {
return RET_ERROR; MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
memcpy(scale_, scale_ptr, scale_tensor->ElementsNum() * sizeof(float));
} else {
scale_ = nullptr;
} }
if (param->axis_ < 0) { if (inputs_.size() == 3) {
MS_LOG(ERROR) << "axis illegal."; auto offset_tensor = inputs_.at(1);
return RET_ERROR; offset_ = reinterpret_cast<float *>(malloc(offset_tensor->ElementsNum() * sizeof(float)));
if (offset_ == nullptr) {
MS_LOG(ERROR) << "Malloc buffer failed.";
return RET_ERROR;
}
param->has_offset_ = true;
} else {
offset_ = nullptr;
param->has_offset_ = false;
} }
if (param->num_axis_ < 1 || param->num_axis_ + param->axis_ >= in_tensor->shape().size()) { return RET_OK;
MS_LOG(ERROR) << "number of axis illegal"; }
int ScaleCPUKernel::InitParameter() {
auto param = reinterpret_cast<ScaleParameter *>(opParameter);
auto in_tensor = inputs_.at(0);
auto in_shape = in_tensor->shape();
auto scale_tensor = inputs_.at(1);
auto scale_shape = scale_tensor->shape();
if (scale_shape.size() + param->axis_ > in_shape.size()) {
MS_LOG(ERROR) << "Scale tensor shape is incorrect.";
return RET_ERROR; return RET_ERROR;
} }
param->outer_size_ = 1;
param->channel_ = 1; param->axis_size_ = 1;
param->out_count_ = 1; param->inner_size_ = 1;
param->in_stride_ = 1; for (int i = 0; i < param->axis_; i++) {
int cur_axis; param->outer_size_ *= in_shape[i];
for (cur_axis = 0; cur_axis < param->axis_; cur_axis++) {
param->out_count_ *= in_tensor->shape()[cur_axis];
} }
for (int i = 0; i < param->num_axis_; i++) { for (int i = 0; i < scale_shape.size(); i++) {
param->channel_ *= in_tensor->shape()[(cur_axis++)]; if (in_shape[i + param->axis_] != scale_shape[i]) {
MS_LOG(ERROR) << "Scale tensor shape is incorrect.";
return RET_ERROR;
}
param->axis_size_ *= in_shape[i + param->axis_];
} }
for (int i = cur_axis; i < in_tensor->shape().size(); i++) { for (int i = param->axis_ + scale_shape.size(); i < in_shape.size(); i++) {
param->in_stride_ *= in_tensor->shape()[cur_axis]; param->inner_size_ *= in_shape[i];
} }
if (scale->shape().back() != param->channel_ || scale->shape().size() > 2) { return RET_OK;
MS_LOG(ERROR) << "scale shape illegal."; }
int ScaleCPUKernel::Init() {
if (inputs_.size() < 2 || inputs_.size() > 3) {
MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << inputs_.size() << " is given.";
return RET_ERROR; return RET_ERROR;
} }
if (inputs_.size() == 3) {
if ((inputs_.at(2))->shape().back() != param->channel_ || (inputs_.at(2))->shape().size() > 2) {
MS_LOG(ERROR) << "offset shape illegal.";
return RET_ERROR;
}
}
input_ptr_ = reinterpret_cast<float *>(inputs_.front()->Data()); auto ret = InitParameter();
scale_ = reinterpret_cast<float *>(inputs_.at(1)->Data()); if (ret != RET_OK) {
if (inputs_.size() == 3) { MS_LOG(ERROR) << "Scale fp32 InitParameter failed.";
offset_ = reinterpret_cast<float *>(inputs_.at(2)->Data()); return RET_ERROR;
has_offset_ = true;
} else {
offset_ = nullptr;
has_offset_ = false;
} }
output_ptr_ = reinterpret_cast<float *>(outputs_.front()->Data());
num_unit_ = param->out_count_ * param->channel_; ret = InitScaleOffset();
unit_size_ = param->in_stride_; if (ret != RET_OK) {
thread_n_num_ = MSMIN(thread_num_, num_unit_); MS_LOG(ERROR) << "Scale fp32 InitScaleOffset failed.";
thread_n_stride_ = UP_DIV(num_unit_, thread_n_num_); return RET_ERROR;
}
return RET_OK; return RET_OK;
} }
int ScaleCPUKernel::ReSize() { return RET_OK; }
int ScaleCPUKernel::Scale(int task_id) { int ScaleCPUKernel::Scale(int task_id) {
int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_); auto ret =
if (num_unit_thread <= 0) { DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, reinterpret_cast<ScaleParameter *>(opParameter));
return RET_OK;
}
int thread_offset = task_id * thread_n_stride_;
int ret;
if (has_offset_) {
ret = DoScale(input_ptr_, output_ptr_, scale_, offset_, thread_offset, num_unit_thread,
reinterpret_cast<ScaleParameter *>(opParameter));
} else {
ret = DoScale(input_ptr_, output_ptr_, scale_, thread_offset, num_unit_thread,
reinterpret_cast<ScaleParameter *>(opParameter));
}
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale error task_id[" << task_id << "] error_code[" << ret << "]"; MS_LOG(ERROR) << "Scale error task_id[" << task_id << "] error_code[" << ret << "]";
...@@ -116,11 +122,9 @@ int ScaleCPUKernel::Scale(int task_id) { ...@@ -116,11 +122,9 @@ int ScaleCPUKernel::Scale(int task_id) {
return RET_OK; return RET_OK;
} }
int ScaleCPUKernel::ReSize() { return RET_OK; }
int ScaleRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { int ScaleRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
auto g_kernel = reinterpret_cast<ScaleCPUKernel *>(cdata); auto scale = reinterpret_cast<ScaleCPUKernel *>(cdata);
auto ret = g_kernel->Scale(task_id); auto ret = scale->Scale(task_id);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "ScaleRun error task_id[" << task_id << "] error_code[" << ret << "]"; MS_LOG(ERROR) << "ScaleRun error task_id[" << task_id << "] error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;
...@@ -129,7 +133,16 @@ int ScaleRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { ...@@ -129,7 +133,16 @@ int ScaleRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) {
} }
int ScaleCPUKernel::Run() { int ScaleCPUKernel::Run() {
int ret = LiteBackendParallelLaunch(ScaleRun, this, thread_n_num_); auto in_tensor = inputs_.front();
input_ptr_ = reinterpret_cast<float *>(in_tensor->Data());
if (scale_ == nullptr) {
auto scale_tensor = inputs_[1];
scale_ = reinterpret_cast<float *>(scale_tensor->Data());
}
auto out_tensor = outputs_.front();
output_ptr_ = reinterpret_cast<float *>(out_tensor->Data());
int ret = LiteBackendParallelLaunch(ScaleRun, this, opParameter->thread_num_);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; MS_LOG(ERROR) << "Scale error error_code[" << ret << "]";
return RET_ERROR; return RET_ERROR;
...@@ -160,7 +173,6 @@ kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector<lite::tensor::Te ...@@ -160,7 +173,6 @@ kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector<lite::tensor::Te
delete kernel; delete kernel;
return nullptr; return nullptr;
} }
return kernel; return kernel;
} }
......
...@@ -26,27 +26,24 @@ class ScaleCPUKernel : public LiteKernel { ...@@ -26,27 +26,24 @@ class ScaleCPUKernel : public LiteKernel {
public: public:
explicit ScaleCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, explicit ScaleCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx) const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx)
: LiteKernel(parameter, inputs, outputs), thread_num_(ctx->thread_num_) {} : LiteKernel(parameter, inputs, outputs) {
opParameter->thread_num_ = ctx->thread_num_;
}
~ScaleCPUKernel() override = default; ~ScaleCPUKernel() override = default;
int Init() override; int Init() override;
int ReSize() override; int ReSize() override;
int Run() override; int Run() override;
int InitParameter();
int InitScaleOffset();
int Scale(int task_id); int Scale(int task_id);
private: private:
int thread_num_;
int thread_n_stride_;
int thread_n_num_;
int num_unit_;
int unit_size_;
float *input_ptr_; float *input_ptr_;
float *scale_; float *scale_;
float *offset_; float *offset_;
float *output_ptr_; float *output_ptr_;
bool has_offset_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SCALE_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SCALE_H_
...@@ -17,37 +17,33 @@ ...@@ -17,37 +17,33 @@
#include "src/runtime/kernel/arm/opclib/scale.h" #include "src/runtime/kernel/arm/opclib/scale.h"
#include "src/runtime/kernel/arm/opclib/errorcode.h" #include "src/runtime/kernel/arm/opclib/errorcode.h"
int DoScale(float *in_data, float *out_data, float *scale, float *offset, int units_offset, int num_unit, int DoScale(float *in_data, float *out_data, float *scale, float *offset, int task_id, ScaleParameter *scale_param) {
ScaleParameter *scale_param) {
if (in_data == nullptr || out_data == nullptr || scale == nullptr || offset == nullptr || scale_param == nullptr) { if (in_data == nullptr || out_data == nullptr || scale == nullptr || offset == nullptr || scale_param == nullptr) {
return OPCLIB_ERR; return OPCLIB_ERR;
} }
int in_stride_j = units_offset * scale_param->in_stride_; if (scale_param->has_offset_) {
for (int j = units_offset; j < units_offset + num_unit; j++) { for (int out = task_id; out < scale_param->outer_size_; out += scale_param->op_parameter_.thread_num_) {
int channel = j % scale_param->channel_; int out_offset = out * scale_param->axis_size_ * scale_param->inner_size_;
for (int k = 0; k < scale_param->in_stride_; k++) { for (int i = 0; i < scale_param->axis_size_; i++) {
out_data[in_stride_j + k] = in_data[in_stride_j + k] * scale[channel] + offset[channel]; int axis_offset = out_offset + i * scale_param->inner_size_;
for (int in = 0; in < scale_param->inner_size_; in++) {
int in_offset = axis_offset + in;
out_data[in_offset] = in_data[in_offset] * scale[i] + offset[i];
}
}
} }
in_stride_j = in_stride_j + scale_param->in_stride_; } else {
} for (int out = task_id; out < scale_param->outer_size_; out += scale_param->op_parameter_.thread_num_) {
return OPCLIB_OK; int out_offset = out * scale_param->axis_size_ * scale_param->inner_size_;
} for (int i = 0; i < scale_param->axis_size_; i++) {
int axis_offset = out_offset + i * scale_param->inner_size_;
int DoScale(float *in_data, float *out_data, float *scale, int units_offset, int num_unit, for (int in = 0; in < scale_param->inner_size_; in++) {
ScaleParameter *scale_param) { int in_offset = axis_offset + in;
if (in_data == nullptr || out_data == nullptr || scale == nullptr || scale_param == nullptr) { out_data[in_offset] = in_data[in_offset] * scale[i];
return OPCLIB_ERR; }
} }
int in_stride_j = units_offset * scale_param->in_stride_;
for (int j = units_offset; j < units_offset + num_unit; j++) {
int channel = j % scale_param->channel_;
for (int k = 0; k < scale_param->in_stride_; k++) {
out_data[in_stride_j + k] = in_data[in_stride_j + k] * scale[channel];
} }
in_stride_j = in_stride_j + scale_param->in_stride_;
} }
return OPCLIB_OK; return OPCLIB_OK;
} }
...@@ -21,15 +21,13 @@ ...@@ -21,15 +21,13 @@
struct ScaleParameter { struct ScaleParameter {
OpParameter op_parameter_; OpParameter op_parameter_;
int out_count_; int outer_size_;
int channel_; int axis_size_;
int in_stride_; int inner_size_;
int axis_; int axis_;
int num_axis_; bool has_offset_;
// todo yangruoqi: axis
}; };
int DoScale(float *in_data, float *out_data, float *scale, float *offset, int units_offset, int num_unit, int DoScale(float *in_data, float *out_data, float *scale, float *offset, int task_id, ScaleParameter *scale_param);
ScaleParameter *scale_param);
int DoScale(float *in_data, float *out_data, float *scale, int units_offset, int num_unit, ScaleParameter *scale_param);
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_SCALE_H_ #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_SCALE_H_
...@@ -22,12 +22,9 @@ const int32_t DIM_DEFAULT_SIZE = 4; ...@@ -22,12 +22,9 @@ const int32_t DIM_DEFAULT_SIZE = 4;
namespace mindspore { namespace mindspore {
namespace lite { namespace lite {
STATUS CaffeScaleParser::Parse(const caffe::LayerParameter &proto, STATUS CaffeScaleParser::Parse(const caffe::LayerParameter &proto, const caffe::LayerParameter &weight,
const caffe::LayerParameter &weight, schema::CNodeT *op, std::vector<schema::TensorT *> *weightVec) {
schema::CNodeT *op,
std::vector<schema::TensorT *> *weightVec) {
std::unique_ptr<schema::ScaleT> attr(new schema::ScaleT()); std::unique_ptr<schema::ScaleT> attr(new schema::ScaleT());
attr->format = schema::Format_NCHW;
if (weight.blobs_size() + weight.bottom_size() < 2) { if (weight.blobs_size() + weight.bottom_size() < 2) {
// MS_LOGE("Scale bottom size:%d, blobs size:%d invalid in layer %s", weight.bottom_size(), weight.blobs_size(), // MS_LOGE("Scale bottom size:%d, blobs size:%d invalid in layer %s", weight.bottom_size(), weight.blobs_size(),
...@@ -36,12 +33,14 @@ STATUS CaffeScaleParser::Parse(const caffe::LayerParameter &proto, ...@@ -36,12 +33,14 @@ STATUS CaffeScaleParser::Parse(const caffe::LayerParameter &proto,
} }
const caffe::ScaleParameter scaleParam = weight.scale_param(); const caffe::ScaleParameter scaleParam = weight.scale_param();
int32_t axis = scaleParam.axis(); // NCHW_DIM_C; int axis = NCHW_DIM_C;
uint32_t axis_index = NCHW_DIM_C; if (scaleParam.has_axis()) {
uint32_t axis_index = NCHW_DIM_C;
if (GetAxisIndex(axis, &axis_index)) { if (GetAxisIndex(scaleParam.axis(), &axis_index)) {
// MS_LOGE("scale get axis failed for layer %s.", weight.name().c_str()); // MS_LOGE("scale get axis failed for layer %s.", weight.name().c_str());
}
} }
attr->axis = axis;
// parse scale // parse scale
// todo expect only weight as scale not bias // todo expect only weight as scale not bias
...@@ -94,4 +93,3 @@ STATUS CaffeScaleParser::GetAxisIndex(const int32_t &axis, uint32_t *axis_index) ...@@ -94,4 +93,3 @@ STATUS CaffeScaleParser::GetAxisIndex(const int32_t &axis, uint32_t *axis_index)
CaffeNodeRegistrar g_caffeScaleParser("Scale", new CaffeScaleParser()); CaffeNodeRegistrar g_caffeScaleParser("Scale", new CaffeScaleParser());
} // namespace lite } // namespace lite
} // namespace mindspore } // namespace mindspore
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册