提交 b427fa29 编写于 作者: H hjchen2

Fix compile with opencl

上级 56ec9579
...@@ -46,7 +46,7 @@ class PaddleMobile { ...@@ -46,7 +46,7 @@ class PaddleMobile {
PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on"); PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
#endif #endif
} }
~PaddleMobile() {} virtual ~PaddleMobile() { Clear(); }
PMStatus Load(const std::string &dirname, const bool optimize = false, PMStatus Load(const std::string &dirname, const bool optimize = false,
const bool quantification = false, const int batch_size = 1, const bool quantification = false, const int batch_size = 1,
......
...@@ -17,12 +17,11 @@ limitations under the License. */ ...@@ -17,12 +17,11 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
#define DEFINE_ACTIVATION_INFERSHAPE(OpName) \ #define DEFINE_ACTIVATION_INFERSHAPE(OpName) \
template <typename Dtype, typename T> \ template <typename Dtype, typename T> \
void OpName##Op<Dtype, T>::InferShape() const { \ void OpName##Op<Dtype, T>::InferShape() const { \
const auto &input_dims = this->param_.InputX()->dims(); \ const auto &input_dims = this->param_.InputX()->dims(); \
this->param_.Out()->Resize(input_dims); \ this->param_.Out()->Resize(input_dims); \
this->param_.Out()->set_lod(this->param_.InputX()->lod()); \
} }
#ifdef RELU_OP #ifdef RELU_OP
......
...@@ -23,7 +23,6 @@ template <typename Dtype, typename T> ...@@ -23,7 +23,6 @@ template <typename Dtype, typename T>
void ElementwiseAddOp<Dtype, T>::InferShape() const { void ElementwiseAddOp<Dtype, T>::InferShape() const {
auto x_dim = this->param_.InputX()->dims(); auto x_dim = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dim); this->param_.Out()->Resize(x_dim);
this->param_.Out()->set_lod(this->param_.InputX()->lod());
} }
} // namespace operators } // namespace operators
......
...@@ -71,9 +71,10 @@ bool ReluKernel<CPU, float>::Init(ReluParam<CPU> *param) { ...@@ -71,9 +71,10 @@ bool ReluKernel<CPU, float>::Init(ReluParam<CPU> *param) {
template <> template <>
void ReluKernel<CPU, float>::Compute(const ReluParam<CPU> &param) { void ReluKernel<CPU, float>::Compute(const ReluParam<CPU> &param) {
const Tensor *input = param.InputX(); const LoDTensor *input = param.InputX();
Tensor *output = param.Out(); LoDTensor *output = param.Out();
ActivationCompute<float, RELU>()(input, output); ActivationCompute<float, RELU>()(input, output);
output->set_lod(input->lod());
} }
template <> template <>
...@@ -83,9 +84,10 @@ bool Relu6Kernel<CPU, float>::Init(ReluParam<CPU> *param) { ...@@ -83,9 +84,10 @@ bool Relu6Kernel<CPU, float>::Init(ReluParam<CPU> *param) {
template <> template <>
void Relu6Kernel<CPU, float>::Compute(const ReluParam<CPU> &param) { void Relu6Kernel<CPU, float>::Compute(const ReluParam<CPU> &param) {
const Tensor *input = param.InputX(); const LoDTensor *input = param.InputX();
Tensor *output = param.Out(); LoDTensor *output = param.Out();
ActivationCompute<float, RELU6>()(input, output); ActivationCompute<float, RELU6>()(input, output);
output->set_lod(input->lod());
} }
#endif #endif
...@@ -97,9 +99,10 @@ bool SigmoidKernel<CPU, float>::Init(SigmoidParam<CPU> *param) { ...@@ -97,9 +99,10 @@ bool SigmoidKernel<CPU, float>::Init(SigmoidParam<CPU> *param) {
template <> template <>
void SigmoidKernel<CPU, float>::Compute(const SigmoidParam<CPU> &param) { void SigmoidKernel<CPU, float>::Compute(const SigmoidParam<CPU> &param) {
const Tensor *input = param.InputX(); const LoDTensor *input = param.InputX();
Tensor *output = param.Out(); LoDTensor *output = param.Out();
ActivationCompute<float, SIGMOID>()(input, output); ActivationCompute<float, SIGMOID>()(input, output);
output->set_lod(input->lod());
} }
#endif #endif
...@@ -111,9 +114,10 @@ bool TanhKernel<CPU, float>::Init(TanhParam<CPU> *param) { ...@@ -111,9 +114,10 @@ bool TanhKernel<CPU, float>::Init(TanhParam<CPU> *param) {
template <> template <>
void TanhKernel<CPU, float>::Compute(const TanhParam<CPU> &param) { void TanhKernel<CPU, float>::Compute(const TanhParam<CPU> &param) {
const Tensor *input = param.InputX(); const LoDTensor *input = param.InputX();
Tensor *output = param.Out(); LoDTensor *output = param.Out();
ActivationCompute<float, TANH>()(input, output); ActivationCompute<float, TANH>()(input, output);
output->set_lod(input->lod());
} }
#endif #endif
...@@ -125,9 +129,10 @@ bool LogKernel<CPU, float>::Init(ReluParam<CPU> *param) { ...@@ -125,9 +129,10 @@ bool LogKernel<CPU, float>::Init(ReluParam<CPU> *param) {
template <> template <>
void LogKernel<CPU, float>::Compute(const ReluParam<CPU> &param) { void LogKernel<CPU, float>::Compute(const ReluParam<CPU> &param) {
const Tensor *input = param.InputX(); const LoDTensor *input = param.InputX();
Tensor *output = param.Out(); LoDTensor *output = param.Out();
ActivationCompute<float, LOG>()(input, output); ActivationCompute<float, LOG>()(input, output);
output->set_lod(input->lod());
} }
#endif #endif
......
...@@ -28,6 +28,7 @@ bool SoftmaxKernel<CPU, float>::Init(SoftmaxParam<CPU> *param) { ...@@ -28,6 +28,7 @@ bool SoftmaxKernel<CPU, float>::Init(SoftmaxParam<CPU> *param) {
template <> template <>
void SoftmaxKernel<CPU, float>::Compute(const SoftmaxParam<CPU> &param) { void SoftmaxKernel<CPU, float>::Compute(const SoftmaxParam<CPU> &param) {
SoftmaxCompute<float>(param); SoftmaxCompute<float>(param);
param.Out()->set_lod(param.InputX()->lod());
} }
template class SoftmaxKernel<CPU, float>; template class SoftmaxKernel<CPU, float>;
......
...@@ -27,13 +27,14 @@ bool FeedKernel<GPU_CL, float>::Init(FeedParam<GPU_CL> *param) { ...@@ -27,13 +27,14 @@ bool FeedKernel<GPU_CL, float>::Init(FeedParam<GPU_CL> *param) {
template <> template <>
void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) { void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
const int col = param.Col();
auto kernel = this->cl_helper_.KernelAt(0); auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*(param.Out())); auto default_work_size = this->cl_helper_.DefaultWorkSize(*(param.Out()));
cl_int status; cl_int status;
param.Out()->InitEmptyImage(cl_helper_.CLContext(), param.Out()->InitEmptyImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue(), param.Out()->dims()); cl_helper_.CLCommandQueue(), param.Out()->dims());
auto output = param.Out(); auto output = param.Out();
const Tensor *input = param.InputX(); const Tensor *input = &param.InputX()->at(col);
// DLOG << *input; // DLOG << *input;
const float *input_data = input->data<float>(); const float *input_data = input->data<float>();
int numel = input->numel(); int numel = input->numel();
......
...@@ -35,8 +35,9 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) { ...@@ -35,8 +35,9 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0); auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.InputX()); auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.InputX());
const int col = param.Col();
auto input = param.InputX()->GetCLImage(); auto input = param.InputX()->GetCLImage();
auto *out = param.Out(); auto *out = &param.Out()->at(col);
out->Resize(param.InputX()->dims()); out->Resize(param.InputX()->dims());
out->mutable_data<float>(); out->mutable_data<float>();
const auto &dim = param.InputX()->dims(); const auto &dim = param.InputX()->dims();
......
...@@ -57,12 +57,12 @@ void sgemm_6x16(const float *lhs, const float *rhs, const int k, float *output, ...@@ -57,12 +57,12 @@ void sgemm_6x16(const float *lhs, const float *rhs, const int k, float *output,
"dup v28.4s, wzr \n\t" "dup v28.4s, wzr \n\t"
"dup v29.4s, wzr \n\t" "dup v29.4s, wzr \n\t"
"subs %[kc1], %[kc1], #1 \n\t" "subs %[kc1], %[kc1], #1 \n\t"
"blt 2f \n\t" "blt 2f \n\t"
"1: \n\t" "1: \n\t"
"prfm pldl1keep, [%[lhs], #24] \n\t" "prfm pldl1keep, [%[lhs], #32] \n\t"
"prfm pldl1keep, [%[rhs], #64] \n\t" "prfm pldl1keep, [%[rhs], #64] \n\t"
"ld1 {v0.4s, v1.4s}, [%[lhs]], %[step1] \n\t" "ld1 {v0.4s, v1.4s}, [%[lhs]], %[step1] \n\t"
"ld1 {v2.4s, v3.4s, v4.4s, v5.4s}, [%[rhs]], #64 \n\t" "ld1 {v2.4s, v3.4s, v4.4s, v5.4s}, [%[rhs]], #64 \n\t"
...@@ -109,9 +109,10 @@ void sgemm_6x16(const float *lhs, const float *rhs, const int k, float *output, ...@@ -109,9 +109,10 @@ void sgemm_6x16(const float *lhs, const float *rhs, const int k, float *output,
"st1 {v26.4s, v27.4s, v28.4s, v29.4s}, [%[c]], %[step] \n\t" "st1 {v26.4s, v27.4s, v28.4s, v29.4s}, [%[c]], %[step] \n\t"
: [lhs] "+r"(lhs), [rhs] "+r"(rhs), [c] "+r"(output), [kc1] "+r"(kc1) : [lhs] "+r"(lhs), [rhs] "+r"(rhs), [c] "+r"(output), [kc1] "+r"(kc1)
: [step] "r"(step), [step1] "r"(step1) : [step] "r"(step), [step1] "r"(step1)
: "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29"); "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28",
"v29");
} }
#else #else
void sgemm_6x8(const float *lhs, const float *rhs, const int k, float *output, void sgemm_6x8(const float *lhs, const float *rhs, const int k, float *output,
......
...@@ -1224,19 +1224,19 @@ class FeedParam : public OpParam { ...@@ -1224,19 +1224,19 @@ class FeedParam : public OpParam {
FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs, FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope *scope) const AttributeMap &attrs, Scope *scope)
: OpParam(inputs, outputs, attrs, scope) { : OpParam(inputs, outputs, attrs, scope) {
input_x_ = InputXFrom<framework::LoDTensorArray>(inputs, *scope); input_x_ = InputXFrom<std::vector<LoDTensor>>(inputs, *scope);
out_ = OutFrom<GType>(outputs, *scope); out_ = OutFrom<GType>(outputs, *scope);
col_ = GetAttr<int>("col", attrs); col_ = GetAttr<int>("col", attrs);
auto var = scope->FindVar("batch_size"); auto var = scope->FindVar("batch_size");
batch_size = var->GetValue<int>(); batch_size = var->GetValue<int>();
} }
const framework::LoDTensorArray *InputX() const { return input_x_; } const std::vector<LoDTensor> *InputX() const { return input_x_; }
GType *Out() const { return out_; } GType *Out() const { return out_; }
const int Col() const { return col_; } const int Col() const { return col_; }
const int BatchSize() const { return batch_size; } const int BatchSize() const { return batch_size; }
private: private:
framework::LoDTensorArray *input_x_; std::vector<LoDTensor> *input_x_;
GType *out_; GType *out_;
int col_; int col_;
int batch_size; int batch_size;
...@@ -1251,18 +1251,18 @@ class FetchParam : public OpParam { ...@@ -1251,18 +1251,18 @@ class FetchParam : public OpParam {
FetchParam(const VariableNameMap &inputs, const VariableNameMap &outputs, FetchParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope *scope) const AttributeMap &attrs, Scope *scope)
: OpParam(inputs, outputs, attrs, scope) { : OpParam(inputs, outputs, attrs, scope) {
input_x_ = InputXFrom<framework::LoDTensor>(inputs, *scope); input_x_ = InputXFrom<GType>(inputs, *scope);
out_ = OutFrom<framework::LoDTensorArray>(outputs, *scope); out_ = OutFrom<std::vector<LoDTensor>>(outputs, *scope);
col_ = GetAttr<int>("col", attrs); col_ = GetAttr<int>("col", attrs);
} }
const framework::LoDTensor *InputX() const { return input_x_; } const GType *InputX() const { return input_x_; }
framework::LoDTensorArray *Out() const { return out_; } std::vector<LoDTensor> *Out() const { return out_; }
const int Col() const { return col_; } const int Col() const { return col_; }
private: private:
framework::LoDTensor *input_x_; GType *input_x_;
framework::LoDTensorArray *out_; std::vector<LoDTensor> *out_;
int col_; int col_;
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
...@@ -2371,6 +2371,15 @@ class ConvTransposeParam : public OpParam { ...@@ -2371,6 +2371,15 @@ class ConvTransposeParam : public OpParam {
const int &Groups() const { return groups; } const int &Groups() const { return groups; }
enum ExecMode {
EXEC_INVALID = 0,
EXEC_GEMM_FLOAT,
EXEC_DECONV3X3_FLOAT,
EXEC_DECONV4X4_FLOAT,
};
ExecMode &ExecMode() const { return exec_mode_; }
private: private:
GType *input_; GType *input_;
GType *output_; GType *output_;
...@@ -2379,6 +2388,7 @@ class ConvTransposeParam : public OpParam { ...@@ -2379,6 +2388,7 @@ class ConvTransposeParam : public OpParam {
vector<int> paddings_; vector<int> paddings_;
vector<int> dilations_; vector<int> dilations_;
int groups; int groups;
mutable enum ExecMode exec_mode_;
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
...@@ -3214,43 +3224,46 @@ class LogicalUnaryParam : public OpParam { ...@@ -3214,43 +3224,46 @@ class LogicalUnaryParam : public OpParam {
#ifdef WRITE_TO_ARRAY_OP #ifdef WRITE_TO_ARRAY_OP
template <typename Dtype> template <typename Dtype>
class WriteToArrayParam : public OpParam { class WriteToArrayParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
WriteToArrayParam(const VariableNameMap &inputs, WriteToArrayParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
Scope *scope) Scope *scope)
: OpParam(inputs, outputs, attrs, scope) { : OpParam(inputs, outputs, attrs, scope) {
input_ = OpParam::GetVarValue<framework::LoDTensor>("X", inputs, *scope); input_ = OpParam::GetVarValue<GType>("X", inputs, *scope);
index_ = OpParam::GetVarValue<framework::LoDTensor>("I", inputs, *scope); index_ = OpParam::GetVarValue<GType>("I", inputs, *scope);
output_ = output_ = OpParam::GetVarValue<std::vector<GType>>("Out", outputs, *scope);
OpParam::GetVarValue<framework::LoDTensorArray>("Out", outputs, *scope);
} }
public: public:
framework::LoDTensor *input_; GType *input_;
framework::LoDTensor *index_; GType *index_;
framework::LoDTensorArray *output_; std::vector<GType> *output_;
}; };
#endif #endif
#ifdef READ_FROM_ARRAY_OP #ifdef READ_FROM_ARRAY_OP
template <typename Dtype> template <typename Dtype>
class ReadFromArrayParam : public OpParam { class ReadFromArrayParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
ReadFromArrayParam(const VariableNameMap &inputs, ReadFromArrayParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
Scope *scope) Scope *scope)
: OpParam(inputs, outputs, attrs, scope) { : OpParam(inputs, outputs, attrs, scope) {
input_ = input_ = OpParam::GetVarValue<std::vector<GType>>("X", inputs, *scope);
OpParam::GetVarValue<framework::LoDTensorArray>("X", inputs, *scope); index_ = OpParam::GetVarValue<GType>("I", inputs, *scope);
index_ = OpParam::GetVarValue<framework::LoDTensor>("I", inputs, *scope); output_ = OpParam::GetVarValue<GType>("Out", outputs, *scope);
output_ =
OpParam::GetVarValue<framework::LoDTensor>("Out", outputs, *scope);
} }
public: public:
framework::LoDTensorArray *input_; std::vector<GType> *input_;
framework::LoDTensor *index_; GType *index_;
framework::LoDTensor *output_; GType *output_;
}; };
#endif #endif
......
...@@ -21,7 +21,6 @@ namespace operators { ...@@ -21,7 +21,6 @@ namespace operators {
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
void SoftmaxOp<DeviceType, T>::InferShape() const { void SoftmaxOp<DeviceType, T>::InferShape() const {
this->param_.Out()->Resize(this->param_.InputX()->dims()); this->param_.Out()->Resize(this->param_.InputX()->dims());
this->param_.Out()->set_lod(this->param_.InputX()->lod());
} }
} // namespace operators } // namespace operators
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册