提交 ecaa82d1 编写于 作者: R Ruilong Liu 提交者: GitHub

Merge pull request #515 from halsay/develop

fix lrn op in mali
...@@ -35,7 +35,6 @@ namespace ops = paddle_mobile::operators; ...@@ -35,7 +35,6 @@ namespace ops = paddle_mobile::operators;
REGISTER_OPERATOR_CPU(batch_norm, ops::BatchNormOp); REGISTER_OPERATOR_CPU(batch_norm, ops::BatchNormOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU(batch_norm);
REGISTER_OPERATOR_MALI_GPU(batch_norm, ops::BatchNormOp); REGISTER_OPERATOR_MALI_GPU(batch_norm, ops::BatchNormOp);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
......
...@@ -76,7 +76,7 @@ namespace ops = paddle_mobile::operators; ...@@ -76,7 +76,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_OPERATOR_CPU(fusion_conv_add, ops::FusionConvAddOp); REGISTER_OPERATOR_CPU(fusion_conv_add, ops::FusionConvAddOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(conv_add, ops::FusionConvAddOp); REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
......
...@@ -96,7 +96,7 @@ static framework::FusionOpRegistrar convadd_registrar( ...@@ -96,7 +96,7 @@ static framework::FusionOpRegistrar convadd_registrar(
USE_OP_CPU(fusion_conv_add); USE_OP_CPU(fusion_conv_add);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU(conv_add); USE_OP_MALI_GPU(fusion_conv_add);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
......
...@@ -80,7 +80,7 @@ namespace ops = paddle_mobile::operators; ...@@ -80,7 +80,7 @@ namespace ops = paddle_mobile::operators;
REGISTER_OPERATOR_CPU(fusion_fc, ops::FusionFcOp); REGISTER_OPERATOR_CPU(fusion_fc, ops::FusionFcOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(fc, ops::FusionFcOp); REGISTER_OPERATOR_MALI_GPU(fusion_fc, ops::FusionFcOp);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
......
...@@ -78,7 +78,7 @@ extern framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); ...@@ -78,7 +78,7 @@ extern framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#ifndef CONV_CPU_REGISTER #ifndef CONV_CPU_REGISTER
#define CONV_CPU_REGISTER #define CONV_CPU_REGISTER
static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); extern framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
#endif #endif
#endif #endif
...@@ -93,7 +93,7 @@ static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher()); ...@@ -93,7 +93,7 @@ static framework::FusionOpRegistrar fc_registrar(new FusionFcMatcher());
USE_OP_CPU(fusion_fc); USE_OP_CPU(fusion_fc);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
USE_OP_MALI_GPU(fc); USE_OP_MALI_GPU(fusion_fc);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
......
文件模式从 100644 更改为 100755
...@@ -225,6 +225,7 @@ class AclParameters { ...@@ -225,6 +225,7 @@ class AclParameters {
bool is_global_pool; bool is_global_pool;
bool is_channel_concat; bool is_channel_concat;
bool is_bypass;
std::vector<framework::LoDTensor *> in_tensor; std::vector<framework::LoDTensor *> in_tensor;
}; };
......
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
...@@ -71,6 +71,7 @@ class AclBatchNormOp : public acl::ACLOperator { ...@@ -71,6 +71,7 @@ class AclBatchNormOp : public acl::ACLOperator {
bool Bypass_acl(const BatchNormParam& param) { bool Bypass_acl(const BatchNormParam& param) {
bool bypass_acl = false; bool bypass_acl = false;
AclParametersByContext(param); AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance // for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_) { if (this->force_bypass_acl_path_) {
bypass_acl = true; bypass_acl = true;
...@@ -135,6 +136,10 @@ bool BatchNormKernel<GPU_MALI, float>::Init(BatchNormParam* param) { ...@@ -135,6 +136,10 @@ bool BatchNormKernel<GPU_MALI, float>::Init(BatchNormParam* param) {
acl_op = new AclBatchNormOp<GPU_MALI, float>(); acl_op = new AclBatchNormOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this); this->SetAclOp((void*)acl_op, (void*)this);
} }
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true; return true;
} }
...@@ -147,15 +152,8 @@ void BatchNormKernel<GPU_MALI, float>::Compute( ...@@ -147,15 +152,8 @@ void BatchNormKernel<GPU_MALI, float>::Compute(
if (acl_op == nullptr) { if (acl_op == nullptr) {
return; return;
} }
if (acl_op->Bypass_acl(param)) {
std::cout << "init acl failed" << std::endl;
return;
}
acl::AclParameters& args = acl_op->getargs(); acl::AclParameters& args = acl_op->getargs();
const float* input_data = (const float*)args.input_data; acl_op->RunAcl(args.input_data, args.output_data);
const float* output_data = (const float*)args.output_data;
acl_op->InitAclLayer(param);
acl_op->RunAcl((void*)input_data, (void*)output_data);
} }
template class BatchNormKernel<GPU_MALI, float>; template class BatchNormKernel<GPU_MALI, float>;
......
...@@ -50,8 +50,6 @@ class AclConcatOp : public acl::ACLOperator { ...@@ -50,8 +50,6 @@ class AclConcatOp : public acl::ACLOperator {
T type; T type;
for (int i = 0; i < input_data->size(); i++) { for (int i = 0; i < input_data->size(); i++) {
const T* idata = (*input_data)[i]->data<T>();
const T* pdata = (*input_data)[i]->data<T>();
int in_batch = (*input_data)[i]->dims()[0]; int in_batch = (*input_data)[i]->dims()[0];
int in_channels = (*input_data)[i]->dims()[1]; int in_channels = (*input_data)[i]->dims()[1];
int in_width = (*input_data)[i]->dims()[2]; int in_width = (*input_data)[i]->dims()[2];
...@@ -75,6 +73,7 @@ class AclConcatOp : public acl::ACLOperator { ...@@ -75,6 +73,7 @@ class AclConcatOp : public acl::ACLOperator {
bool Bypass_acl(const ConcatParam& param) { bool Bypass_acl(const ConcatParam& param) {
bool bypass_acl = false; bool bypass_acl = false;
AclParametersByContext(param); AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance // for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_ || !args.is_channel_concat) { if (this->force_bypass_acl_path_ || !args.is_channel_concat) {
bypass_acl = true; bypass_acl = true;
...@@ -103,13 +102,17 @@ class AclConcatOp : public acl::ACLOperator { ...@@ -103,13 +102,17 @@ class AclConcatOp : public acl::ACLOperator {
}; };
template <> template <>
bool ConcatKernel<GPU_MALI, float>::Init(const ConcatParam& param) const { bool ConcatKernel<GPU_MALI, float>::Init(ConcatParam* param) {
AclConcatOp<GPU_MALI, float>* acl_op = AclConcatOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclConcatOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclConcatOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) { if (acl_op == nullptr) {
acl_op = new AclConcatOp<GPU_MALI, float>(); acl_op = new AclConcatOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this); this->SetAclOp((void*)acl_op, (void*)this);
} }
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true; return true;
} }
...@@ -121,15 +124,8 @@ void ConcatKernel<GPU_MALI, float>::Compute(const ConcatParam& param) const { ...@@ -121,15 +124,8 @@ void ConcatKernel<GPU_MALI, float>::Compute(const ConcatParam& param) const {
if (acl_op == nullptr) { if (acl_op == nullptr) {
return; return;
} }
if (acl_op->Bypass_acl(param)) {
std::cout << "init acl failed" << std::endl;
return;
}
acl::AclParameters& args = acl_op->getargs(); acl::AclParameters& args = acl_op->getargs();
std::vector<framework::LoDTensor*> temp_data = args.in_tensor; acl_op->RunAcl(args.in_tensor, args.output_data);
const float* output_data = (const float*)args.output_data;
acl_op->InitAclLayer(param);
acl_op->RunAcl(temp_data, (void*)output_data);
} }
template class ConcatKernel<GPU_MALI, float>; template class ConcatKernel<GPU_MALI, float>;
......
...@@ -55,7 +55,8 @@ class AclConvAddOp : public acl::ACLOperator { ...@@ -55,7 +55,8 @@ class AclConvAddOp : public acl::ACLOperator {
set_operator_init_done(); set_operator_init_done();
this->force_bypass_acl_path_ = false; this->force_bypass_acl_path_ = false;
check_direct_conv(); // check_direct_conv();
group() = args.num_group;
//[kernel_x, kernel_y, IFM, OFM] //[kernel_x, kernel_y, IFM, OFM]
new_tensor(weights(), weights_shape, args.weight_data); new_tensor(weights(), weights_shape, args.weight_data);
//[OFM] //[OFM]
...@@ -63,8 +64,6 @@ class AclConvAddOp : public acl::ACLOperator { ...@@ -63,8 +64,6 @@ class AclConvAddOp : public acl::ACLOperator {
new_tensor(biases(), biases_shape, args.biases_data); new_tensor(biases(), biases_shape, args.biases_data);
} }
group() = args.num_group;
//[width, height, IFM] //[width, height, IFM]
new_tensor(input(), input_shape, args.input_data); new_tensor(input(), input_shape, args.input_data);
//[width, height, OFM] //[width, height, OFM]
...@@ -79,6 +78,7 @@ class AclConvAddOp : public acl::ACLOperator { ...@@ -79,6 +78,7 @@ class AclConvAddOp : public acl::ACLOperator {
bool Bypass_acl(const FusionConvAddParam& param) { bool Bypass_acl(const FusionConvAddParam& param) {
bool bypass_acl = false; bool bypass_acl = false;
AclParametersByContext(param); AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance // for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_ || args.num_group >= 5) { if (this->force_bypass_acl_path_ || args.num_group >= 5) {
bypass_acl = true; bypass_acl = true;
...@@ -196,14 +196,17 @@ class AclConvAddOp : public acl::ACLOperator { ...@@ -196,14 +196,17 @@ class AclConvAddOp : public acl::ACLOperator {
}; };
template <> template <>
bool ConvAddKernel<GPU_MALI, float>::Init( bool ConvAddKernel<GPU_MALI, float>::Init(FusionConvAddParam* param) {
const FusionConvAddParam& param) const {
AclConvAddOp<GPU_MALI, float>* acl_op = AclConvAddOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclConvAddOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclConvAddOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) { if (acl_op == nullptr) {
acl_op = new AclConvAddOp<GPU_MALI, float>(); acl_op = new AclConvAddOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this); this->SetAclOp((void*)acl_op, (void*)this);
} }
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true; return true;
} }
...@@ -216,15 +219,9 @@ void ConvAddKernel<GPU_MALI, float>::Compute( ...@@ -216,15 +219,9 @@ void ConvAddKernel<GPU_MALI, float>::Compute(
if (acl_op == nullptr) { if (acl_op == nullptr) {
return; return;
} }
if (acl_op->Bypass_acl(param)) {
std::cout << "init acl failed" << std::endl;
return;
}
acl::AclParameters& args = acl_op->getargs(); acl::AclParameters& args = acl_op->getargs();
const float* input_data = (const float*)args.input_data;
const float* output_data = (const float*)args.output_data; acl_op->RunAcl(args.input_data, args.output_data);
acl_op->InitAclLayer(param);
acl_op->RunAcl((void*)input_data, (void*)output_data);
} }
template class ConvAddKernel<GPU_MALI, float>; template class ConvAddKernel<GPU_MALI, float>;
......
...@@ -79,6 +79,7 @@ class AclConvOp : public acl::ACLOperator { ...@@ -79,6 +79,7 @@ class AclConvOp : public acl::ACLOperator {
bool Bypass_acl(const ConvParam& param) { bool Bypass_acl(const ConvParam& param) {
bool bypass_acl = false; bool bypass_acl = false;
AclParametersByContext(param); AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance // for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_ || args.num_group >= 5) { if (this->force_bypass_acl_path_ || args.num_group >= 5) {
bypass_acl = true; bypass_acl = true;
...@@ -202,6 +203,10 @@ bool ConvKernel<GPU_MALI, float>::Init(ConvParam* param) { ...@@ -202,6 +203,10 @@ bool ConvKernel<GPU_MALI, float>::Init(ConvParam* param) {
acl_op = new AclConvOp<GPU_MALI, float>(); acl_op = new AclConvOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this); this->SetAclOp((void*)acl_op, (void*)this);
} }
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true; return true;
} }
...@@ -213,15 +218,8 @@ void ConvKernel<GPU_MALI, float>::Compute(const ConvParam& param) const { ...@@ -213,15 +218,8 @@ void ConvKernel<GPU_MALI, float>::Compute(const ConvParam& param) const {
if (acl_op == nullptr) { if (acl_op == nullptr) {
return; return;
} }
if (acl_op->Bypass_acl(param)) {
std::cout << "init acl failed" << std::endl;
return;
}
acl::AclParameters& args = acl_op->getargs(); acl::AclParameters& args = acl_op->getargs();
const float* input_data = (const float*)args.input_data; acl_op->RunAcl(args.input_data, args.output_data);
const float* output_data = (const float*)args.output_data;
acl_op->InitAclLayer(param);
acl_op->RunAcl((void*)input_data, (void*)output_data);
} }
template class ConvKernel<GPU_MALI, float>; template class ConvKernel<GPU_MALI, float>;
......
文件模式从 100644 更改为 100755
文件模式从 100644 更改为 100755
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
#include "acl_operator.h" #include "acl_operator.h"
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/kernel/central-arm-func/lrn_arm_func.h"
#include "operators/op_param.h" #include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -59,12 +60,15 @@ class AclLrnOp : public acl::ACLOperator { ...@@ -59,12 +60,15 @@ class AclLrnOp : public acl::ACLOperator {
acl_configure(lrn, this, norm_info); acl_configure(lrn, this, norm_info);
} }
void Set_bypass(bool bypass) { args.is_bypass = bypass; }
void RunAcl(void* input, void* output) { void RunAcl(void* input, void* output) {
acl::ACLOperator::acl_run(input, output); acl::ACLOperator::acl_run(input, output);
} }
bool Bypass_acl(const LrnParam& param) { bool Bypass_acl(const LrnParam& param) {
bool bypass_acl = false; bool bypass_acl = false;
AclParametersByContext(param); AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance // for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_) { if (this->force_bypass_acl_path_) {
bypass_acl = true; bypass_acl = true;
...@@ -107,13 +111,18 @@ class AclLrnOp : public acl::ACLOperator { ...@@ -107,13 +111,18 @@ class AclLrnOp : public acl::ACLOperator {
}; };
template <> template <>
bool LrnKernel<GPU_MALI, float>::Init(const LrnParam& param) const { bool LrnKernel<GPU_MALI, float>::Init(LrnParam* param) {
AclLrnOp<GPU_MALI, float>* acl_op = AclLrnOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclLrnOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclLrnOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) { if (acl_op == nullptr) {
acl_op = new AclLrnOp<GPU_MALI, float>(); acl_op = new AclLrnOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this); this->SetAclOp((void*)acl_op, (void*)this);
} }
if (acl_op->Bypass_acl(*param)) {
acl_op->Set_bypass(true);
std::cout << "init acl failed" << std::endl;
return true;
}
return true; return true;
} }
...@@ -125,14 +134,14 @@ void LrnKernel<GPU_MALI, float>::Compute(const LrnParam& param) const { ...@@ -125,14 +134,14 @@ void LrnKernel<GPU_MALI, float>::Compute(const LrnParam& param) const {
if (acl_op == nullptr) { if (acl_op == nullptr) {
return; return;
} }
if (acl_op->Bypass_acl(param)) { acl::AclParameters& args = acl_op->getargs();
std::cout << "init acl failed" << std::endl; if (args.is_bypass) {
std::cout << "bypass op" << std::endl;
LrnCompute<float>(param);
return; return;
} }
acl::AclParameters& args = acl_op->getargs();
const float* input_data = (const float*)args.input_data; const float* input_data = (const float*)args.input_data;
const float* output_data = (const float*)args.output_data; const float* output_data = (const float*)args.output_data;
acl_op->InitAclLayer(param);
for (int n = 0; n < args.batch; ++n) { for (int n = 0; n < args.batch; ++n) {
acl_op->RunAcl((void*)input_data, (void*)output_data); acl_op->RunAcl((void*)input_data, (void*)output_data);
input_data += args.in_depth * args.in_cols * args.in_rows; input_data += args.in_depth * args.in_cols * args.in_rows;
......
文件模式从 100644 更改为 100755
...@@ -82,6 +82,7 @@ class AclPoolOp : public acl::ACLOperator { ...@@ -82,6 +82,7 @@ class AclPoolOp : public acl::ACLOperator {
bool Bypass_acl(const PoolParam& param) { bool Bypass_acl(const PoolParam& param) {
bool bypass_acl = false; bool bypass_acl = false;
AclParametersByContext(param); AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance // for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_) { if (this->force_bypass_acl_path_) {
bypass_acl = true; bypass_acl = true;
...@@ -179,13 +180,17 @@ class AclPoolOp : public acl::ACLOperator { ...@@ -179,13 +180,17 @@ class AclPoolOp : public acl::ACLOperator {
}; };
template <> template <>
bool PoolKernel<GPU_MALI, float>::Init(const PoolParam& param) const { bool PoolKernel<GPU_MALI, float>::Init(PoolParam* param) {
AclPoolOp<GPU_MALI, float>* acl_op = AclPoolOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclPoolOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclPoolOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) { if (acl_op == nullptr) {
acl_op = new AclPoolOp<GPU_MALI, float>(); acl_op = new AclPoolOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this); this->SetAclOp((void*)acl_op, (void*)this);
} }
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true; return true;
} }
...@@ -197,14 +202,9 @@ void PoolKernel<GPU_MALI, float>::Compute(const PoolParam& param) const { ...@@ -197,14 +202,9 @@ void PoolKernel<GPU_MALI, float>::Compute(const PoolParam& param) const {
if (acl_op == nullptr) { if (acl_op == nullptr) {
return; return;
} }
if (acl_op->Bypass_acl(param)) {
std::cout << "init acl failed" << std::endl;
return;
}
acl::AclParameters& args = acl_op->getargs(); acl::AclParameters& args = acl_op->getargs();
const float* input_data = (const float*)args.input_data; const float* input_data = (const float*)args.input_data;
const float* output_data = (const float*)args.output_data; const float* output_data = (const float*)args.output_data;
acl_op->InitAclLayer(param);
for (int n = 0; n < args.batch; ++n) { for (int n = 0; n < args.batch; ++n) {
acl_op->RunAcl((void*)input_data, (void*)output_data); acl_op->RunAcl((void*)input_data, (void*)output_data);
input_data += args.in_depth * args.in_cols * args.in_rows; input_data += args.in_depth * args.in_cols * args.in_rows;
......
...@@ -41,10 +41,10 @@ class AclReluOp : public acl::ACLOperator { ...@@ -41,10 +41,10 @@ class AclReluOp : public acl::ACLOperator {
acl::AclParameters& getargs() { return args; } acl::AclParameters& getargs() { return args; }
void InitAclLayer(const ReluParam& param) { void InitAclLayer(const ReluParam& param) {
setTargetHint(acl::TargetHint::OPENCL); setTargetHint(acl::TargetHint::OPENCL);
arm_compute::TensorShape input_shape(args.in_cols * args.in_rows * arm_compute::TensorShape input_shape(args.in_cols, args.in_rows,
args.in_depth * args.batch); args.in_depth, args.batch);
arm_compute::TensorShape output_shape(args.in_cols * args.in_rows * arm_compute::TensorShape output_shape(args.in_cols, args.in_rows,
args.in_depth * args.out_num); args.in_depth, args.out_num);
// arm_compute::TensorShape weights_shape( // arm_compute::TensorShape weights_shape(
// args.filter_cols, args.filter_rows, args.in_depth, args.out_depth); // args.filter_cols, args.filter_rows, args.in_depth, args.out_depth);
// arm_compute::TensorShape biases_shape(args.out_depth); // arm_compute::TensorShape biases_shape(args.out_depth);
...@@ -71,6 +71,7 @@ class AclReluOp : public acl::ACLOperator { ...@@ -71,6 +71,7 @@ class AclReluOp : public acl::ACLOperator {
bool Bypass_acl(const ReluParam& param) { bool Bypass_acl(const ReluParam& param) {
bool bypass_acl = false; bool bypass_acl = false;
AclParametersByContext(param); AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance // for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_) { if (this->force_bypass_acl_path_) {
bypass_acl = true; bypass_acl = true;
...@@ -99,13 +100,17 @@ class AclReluOp : public acl::ACLOperator { ...@@ -99,13 +100,17 @@ class AclReluOp : public acl::ACLOperator {
}; };
template <> template <>
bool ReluKernel<GPU_MALI, float>::Init(const ReluParam& param) const { bool ReluKernel<GPU_MALI, float>::Init(ReluParam* param) {
AclReluOp<GPU_MALI, float>* acl_op = AclReluOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclReluOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclReluOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) { if (acl_op == nullptr) {
acl_op = new AclReluOp<GPU_MALI, float>(); acl_op = new AclReluOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this); this->SetAclOp((void*)acl_op, (void*)this);
} }
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true; return true;
} }
...@@ -117,15 +122,8 @@ void ReluKernel<GPU_MALI, float>::Compute(const ReluParam& param) const { ...@@ -117,15 +122,8 @@ void ReluKernel<GPU_MALI, float>::Compute(const ReluParam& param) const {
if (acl_op == nullptr) { if (acl_op == nullptr) {
return; return;
} }
if (acl_op->Bypass_acl(param)) {
std::cout << "init acl failed" << std::endl;
return;
}
acl::AclParameters& args = acl_op->getargs(); acl::AclParameters& args = acl_op->getargs();
const float* input_data = (const float*)args.input_data; acl_op->RunAcl(args.input_data, args.output_data);
const float* output_data = (const float*)args.output_data;
acl_op->InitAclLayer(param);
acl_op->RunAcl((void*)input_data, (void*)output_data);
} }
template class ReluKernel<GPU_MALI, float>; template class ReluKernel<GPU_MALI, float>;
......
文件模式从 100644 更改为 100755
...@@ -61,6 +61,7 @@ class AclSoftmaxOp : public acl::ACLOperator { ...@@ -61,6 +61,7 @@ class AclSoftmaxOp : public acl::ACLOperator {
bool Bypass_acl(const SoftmaxParam& param) { bool Bypass_acl(const SoftmaxParam& param) {
bool bypass_acl = false; bool bypass_acl = false;
AclParametersByContext(param); AclParametersByContext(param);
InitAclLayer(param);
// for performance, more groups impact GPU performance // for performance, more groups impact GPU performance
if (this->force_bypass_acl_path_) { if (this->force_bypass_acl_path_) {
bypass_acl = true; bypass_acl = true;
...@@ -96,13 +97,17 @@ class AclSoftmaxOp : public acl::ACLOperator { ...@@ -96,13 +97,17 @@ class AclSoftmaxOp : public acl::ACLOperator {
}; };
template <> template <>
bool SoftmaxKernel<GPU_MALI, float>::Init(const SoftmaxParam& param) const { bool SoftmaxKernel<GPU_MALI, float>::Init(SoftmaxParam* param) {
AclSoftmaxOp<GPU_MALI, float>* acl_op = AclSoftmaxOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclSoftmaxOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclSoftmaxOp<GPU_MALI, float>*>(this->GetAclOp());
if (acl_op == nullptr) { if (acl_op == nullptr) {
acl_op = new AclSoftmaxOp<GPU_MALI, float>(); acl_op = new AclSoftmaxOp<GPU_MALI, float>();
this->SetAclOp((void*)acl_op, (void*)this); this->SetAclOp((void*)acl_op, (void*)this);
} }
if (acl_op->Bypass_acl(*param)) {
std::cout << "init acl failed" << std::endl;
return false;
}
return true; return true;
} }
...@@ -114,14 +119,10 @@ void SoftmaxKernel<GPU_MALI, float>::Compute(const SoftmaxParam& param) const { ...@@ -114,14 +119,10 @@ void SoftmaxKernel<GPU_MALI, float>::Compute(const SoftmaxParam& param) const {
if (acl_op == nullptr) { if (acl_op == nullptr) {
return; return;
} }
if (acl_op->Bypass_acl(param)) {
std::cout << "init acl failed" << std::endl;
return;
}
acl::AclParameters& args = acl_op->getargs(); acl::AclParameters& args = acl_op->getargs();
const float* input_data = (const float*)args.input_data; const float* input_data = (const float*)args.input_data;
const float* output_data = (const float*)args.output_data; const float* output_data = (const float*)args.output_data;
acl_op->InitAclLayer(param);
for (int n = 0; n < args.out_num; ++n) { for (int n = 0; n < args.out_num; ++n) {
acl_op->RunAcl((void*)input_data, (void*)output_data); acl_op->RunAcl((void*)input_data, (void*)output_data);
input_data += args.in_depth; input_data += args.in_depth;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册