diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index 7983541a221fb63f573dfa8186599934cd97387b..6439c4cea94fbc8474caa7115afbc58b964e21ad 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -46,7 +46,7 @@ class PaddleMobile { PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on"); #endif } - ~PaddleMobile() {} + virtual ~PaddleMobile() { Clear(); } PMStatus Load(const std::string &dirname, const bool optimize = false, const bool quantification = false, const int batch_size = 1, diff --git a/src/operators/activation_op.cpp b/src/operators/activation_op.cpp index ab129690fe95127fec8c36e6cd6e27abc4b8505e..6cb1c3c0403fd093c8566415b8fe81ca017fb732 100644 --- a/src/operators/activation_op.cpp +++ b/src/operators/activation_op.cpp @@ -17,12 +17,11 @@ limitations under the License. */ namespace paddle_mobile { namespace operators { -#define DEFINE_ACTIVATION_INFERSHAPE(OpName) \ - template \ - void OpName##Op::InferShape() const { \ - const auto &input_dims = this->param_.InputX()->dims(); \ - this->param_.Out()->Resize(input_dims); \ - this->param_.Out()->set_lod(this->param_.InputX()->lod()); \ +#define DEFINE_ACTIVATION_INFERSHAPE(OpName) \ + template \ + void OpName##Op::InferShape() const { \ + const auto &input_dims = this->param_.InputX()->dims(); \ + this->param_.Out()->Resize(input_dims); \ } #ifdef RELU_OP diff --git a/src/operators/elementwise_add_op.cpp b/src/operators/elementwise_add_op.cpp index f694a56621399e923d54da82027a73e064d310ed..9c1b612e1036ccf431e7155db7b3bdb4086e1bfd 100644 --- a/src/operators/elementwise_add_op.cpp +++ b/src/operators/elementwise_add_op.cpp @@ -23,7 +23,6 @@ template void ElementwiseAddOp::InferShape() const { auto x_dim = this->param_.InputX()->dims(); this->param_.Out()->Resize(x_dim); - this->param_.Out()->set_lod(this->param_.InputX()->lod()); } } // namespace operators diff --git a/src/operators/kernel/arm/activation_kernel.cpp b/src/operators/kernel/arm/activation_kernel.cpp index 4aac8a4732d66455a1fba5a1d8a766cf3215335f..73018c886885d8c8ae209dde808310c725363fab 100644 --- a/src/operators/kernel/arm/activation_kernel.cpp +++ b/src/operators/kernel/arm/activation_kernel.cpp @@ -71,9 +71,10 @@ bool ReluKernel::Init(ReluParam *param) { template <> void ReluKernel::Compute(const ReluParam ¶m) { - const Tensor *input = param.InputX(); - Tensor *output = param.Out(); + const LoDTensor *input = param.InputX(); + LoDTensor *output = param.Out(); ActivationCompute()(input, output); + output->set_lod(input->lod()); } template <> @@ -83,9 +84,10 @@ bool Relu6Kernel::Init(ReluParam *param) { template <> void Relu6Kernel::Compute(const ReluParam ¶m) { - const Tensor *input = param.InputX(); - Tensor *output = param.Out(); + const LoDTensor *input = param.InputX(); + LoDTensor *output = param.Out(); ActivationCompute()(input, output); + output->set_lod(input->lod()); } #endif @@ -97,9 +99,10 @@ bool SigmoidKernel::Init(SigmoidParam *param) { template <> void SigmoidKernel::Compute(const SigmoidParam ¶m) { - const Tensor *input = param.InputX(); - Tensor *output = param.Out(); + const LoDTensor *input = param.InputX(); + LoDTensor *output = param.Out(); ActivationCompute()(input, output); + output->set_lod(input->lod()); } #endif @@ -111,9 +114,10 @@ bool TanhKernel::Init(TanhParam *param) { template <> void TanhKernel::Compute(const TanhParam ¶m) { - const Tensor *input = param.InputX(); - Tensor *output = param.Out(); + const LoDTensor *input = param.InputX(); + LoDTensor *output = param.Out(); ActivationCompute()(input, output); + output->set_lod(input->lod()); } #endif @@ -125,9 +129,10 @@ bool LogKernel::Init(ReluParam *param) { template <> void LogKernel::Compute(const ReluParam ¶m) { - const Tensor *input = param.InputX(); - Tensor *output = param.Out(); + const LoDTensor *input = param.InputX(); + LoDTensor *output = param.Out(); ActivationCompute()(input, output); + output->set_lod(input->lod()); } #endif diff --git a/src/operators/kernel/arm/softmax_kernel.cpp b/src/operators/kernel/arm/softmax_kernel.cpp index d5a1009fd79d57d8815d313ed61bbc5d7bf32bbe..bdb05656d44fa8b5cc61e3eda0eb7f2759f826c4 100644 --- a/src/operators/kernel/arm/softmax_kernel.cpp +++ b/src/operators/kernel/arm/softmax_kernel.cpp @@ -28,6 +28,7 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { template <> void SoftmaxKernel::Compute(const SoftmaxParam ¶m) { SoftmaxCompute(param); + param.Out()->set_lod(param.InputX()->lod()); } template class SoftmaxKernel; diff --git a/src/operators/kernel/cl/feed_kernel.cpp b/src/operators/kernel/cl/feed_kernel.cpp index 3f33a863dbb2cd9df9327349fd409c5d66fd7560..c8c94038aa3536431c1fc9dcf982e6714b6484bb 100644 --- a/src/operators/kernel/cl/feed_kernel.cpp +++ b/src/operators/kernel/cl/feed_kernel.cpp @@ -27,13 +27,14 @@ bool FeedKernel::Init(FeedParam *param) { template <> void FeedKernel::Compute(const FeedParam ¶m) { + const int col = param.Col(); auto kernel = this->cl_helper_.KernelAt(0); auto default_work_size = this->cl_helper_.DefaultWorkSize(*(param.Out())); cl_int status; param.Out()->InitEmptyImage(cl_helper_.CLContext(), cl_helper_.CLCommandQueue(), param.Out()->dims()); auto output = param.Out(); - const Tensor *input = param.InputX(); + const Tensor *input = ¶m.InputX()->at(col); // DLOG << *input; const float *input_data = input->data(); int numel = input->numel(); diff --git a/src/operators/kernel/cl/fetch_kernel.cpp b/src/operators/kernel/cl/fetch_kernel.cpp index e13fbcaed6cd6caec495cafa31b9147a89d54753..4a477f081e89b6fe7b1dbd34ab80cacfea2c21fd 100644 --- a/src/operators/kernel/cl/fetch_kernel.cpp +++ b/src/operators/kernel/cl/fetch_kernel.cpp @@ -35,8 +35,9 @@ void FetchKernel::Compute(const FetchParam ¶m) { auto kernel = this->cl_helper_.KernelAt(0); auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.InputX()); + const int col = param.Col(); auto input = param.InputX()->GetCLImage(); - auto *out = param.Out(); + auto *out = ¶m.Out()->at(col); out->Resize(param.InputX()->dims()); out->mutable_data(); const auto &dim = param.InputX()->dims(); diff --git a/src/operators/math/gemm/gemm_kernel.h b/src/operators/math/gemm/gemm_kernel.h index 7cbbb09270acc9a58ccb414464e11879d97e2292..eea54114786ff14f21318fba50c83303f08a8dab 100644 --- a/src/operators/math/gemm/gemm_kernel.h +++ b/src/operators/math/gemm/gemm_kernel.h @@ -57,12 +57,12 @@ void sgemm_6x16(const float *lhs, const float *rhs, const int k, float *output, "dup v28.4s, wzr \n\t" "dup v29.4s, wzr \n\t" - "subs %[kc1], %[kc1], #1 \n\t" - "blt 2f \n\t" - "1: \n\t" + "subs %[kc1], %[kc1], #1 \n\t" + "blt 2f \n\t" + "1: \n\t" - "prfm pldl1keep, [%[lhs], #24] \n\t" - "prfm pldl1keep, [%[rhs], #64] \n\t" + "prfm pldl1keep, [%[lhs], #32] \n\t" + "prfm pldl1keep, [%[rhs], #64] \n\t" "ld1 {v0.4s, v1.4s}, [%[lhs]], %[step1] \n\t" "ld1 {v2.4s, v3.4s, v4.4s, v5.4s}, [%[rhs]], #64 \n\t" @@ -109,9 +109,10 @@ void sgemm_6x16(const float *lhs, const float *rhs, const int k, float *output, "st1 {v26.4s, v27.4s, v28.4s, v29.4s}, [%[c]], %[step] \n\t" : [lhs] "+r"(lhs), [rhs] "+r"(rhs), [c] "+r"(output), [kc1] "+r"(kc1) : [step] "r"(step), [step1] "r"(step1) - : "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", - "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", - "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29"); + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", + "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", + "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", + "v29"); } #else void sgemm_6x8(const float *lhs, const float *rhs, const int k, float *output, diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 95cb7d675370ca2afc5fae21311ed2bbbb27cc0c..6b14ef4736e0882eaf8aa67a3f29d869d0a2129e 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -1224,19 +1224,19 @@ class FeedParam : public OpParam { FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, Scope *scope) : OpParam(inputs, outputs, attrs, scope) { - input_x_ = InputXFrom(inputs, *scope); + input_x_ = InputXFrom>(inputs, *scope); out_ = OutFrom(outputs, *scope); col_ = GetAttr("col", attrs); auto var = scope->FindVar("batch_size"); batch_size = var->GetValue(); } - const framework::LoDTensorArray *InputX() const { return input_x_; } + const std::vector *InputX() const { return input_x_; } GType *Out() const { return out_; } const int Col() const { return col_; } const int BatchSize() const { return batch_size; } private: - framework::LoDTensorArray *input_x_; + std::vector *input_x_; GType *out_; int col_; int batch_size; @@ -1251,18 +1251,18 @@ class FetchParam : public OpParam { FetchParam(const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, Scope *scope) : OpParam(inputs, outputs, attrs, scope) { - input_x_ = InputXFrom(inputs, *scope); - out_ = OutFrom(outputs, *scope); + input_x_ = InputXFrom(inputs, *scope); + out_ = OutFrom>(outputs, *scope); col_ = GetAttr("col", attrs); } - const framework::LoDTensor *InputX() const { return input_x_; } - framework::LoDTensorArray *Out() const { return out_; } + const GType *InputX() const { return input_x_; } + std::vector *Out() const { return out_; } const int Col() const { return col_; } private: - framework::LoDTensor *input_x_; - framework::LoDTensorArray *out_; + GType *input_x_; + std::vector *out_; int col_; #ifdef PADDLE_MOBILE_FPGA @@ -2371,6 +2371,15 @@ class ConvTransposeParam : public OpParam { const int &Groups() const { return groups; } + enum ExecMode { + EXEC_INVALID = 0, + EXEC_GEMM_FLOAT, + EXEC_DECONV3X3_FLOAT, + EXEC_DECONV4X4_FLOAT, + }; + + ExecMode &ExecMode() const { return exec_mode_; } + private: GType *input_; GType *output_; @@ -2379,6 +2388,7 @@ class ConvTransposeParam : public OpParam { vector paddings_; vector dilations_; int groups; + mutable enum ExecMode exec_mode_; #ifdef PADDLE_MOBILE_FPGA @@ -3214,43 +3224,46 @@ class LogicalUnaryParam : public OpParam { #ifdef WRITE_TO_ARRAY_OP template class WriteToArrayParam : public OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + public: WriteToArrayParam(const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, Scope *scope) : OpParam(inputs, outputs, attrs, scope) { - input_ = OpParam::GetVarValue("X", inputs, *scope); - index_ = OpParam::GetVarValue("I", inputs, *scope); - output_ = - OpParam::GetVarValue("Out", outputs, *scope); + input_ = OpParam::GetVarValue("X", inputs, *scope); + index_ = OpParam::GetVarValue("I", inputs, *scope); + output_ = OpParam::GetVarValue>("Out", outputs, *scope); } public: - framework::LoDTensor *input_; - framework::LoDTensor *index_; - framework::LoDTensorArray *output_; + GType *input_; + GType *index_; + std::vector *output_; }; #endif #ifdef READ_FROM_ARRAY_OP template class ReadFromArrayParam : public OpParam { + typedef typename DtypeTensorTrait::gtype GType; + typedef typename DtypeTensorTrait::rtype RType; + public: ReadFromArrayParam(const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs, Scope *scope) : OpParam(inputs, outputs, attrs, scope) { - input_ = - OpParam::GetVarValue("X", inputs, *scope); - index_ = OpParam::GetVarValue("I", inputs, *scope); - output_ = - OpParam::GetVarValue("Out", outputs, *scope); + input_ = OpParam::GetVarValue>("X", inputs, *scope); + index_ = OpParam::GetVarValue("I", inputs, *scope); + output_ = OpParam::GetVarValue("Out", outputs, *scope); } public: - framework::LoDTensorArray *input_; - framework::LoDTensor *index_; - framework::LoDTensor *output_; + std::vector *input_; + GType *index_; + GType *output_; }; #endif diff --git a/src/operators/softmax_op.cpp b/src/operators/softmax_op.cpp index caa967bb9420989d33708baf08b5fb4aa08708e0..902a2f4326a849585372f09df7259fbad2f2f57f 100644 --- a/src/operators/softmax_op.cpp +++ b/src/operators/softmax_op.cpp @@ -21,7 +21,6 @@ namespace operators { template void SoftmaxOp::InferShape() const { this->param_.Out()->Resize(this->param_.InputX()->dims()); - this->param_.Out()->set_lod(this->param_.InputX()->lod()); } } // namespace operators