提交 4ba33e24 编写于 作者: xiebaiyuan's avatar xiebaiyuan 提交者: GitHub

Merge pull request #1500 from hjchen2/backup

Fix compile with opencl
......@@ -46,7 +46,7 @@ class PaddleMobile {
PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
#endif
}
~PaddleMobile() {}
virtual ~PaddleMobile() { Clear(); }
PMStatus Load(const std::string &dirname, const bool optimize = false,
const bool quantification = false, const int batch_size = 1,
......
......@@ -17,12 +17,11 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
#define DEFINE_ACTIVATION_INFERSHAPE(OpName) \
template <typename Dtype, typename T> \
void OpName##Op<Dtype, T>::InferShape() const { \
const auto &input_dims = this->param_.InputX()->dims(); \
this->param_.Out()->Resize(input_dims); \
this->param_.Out()->set_lod(this->param_.InputX()->lod()); \
#define DEFINE_ACTIVATION_INFERSHAPE(OpName) \
template <typename Dtype, typename T> \
void OpName##Op<Dtype, T>::InferShape() const { \
const auto &input_dims = this->param_.InputX()->dims(); \
this->param_.Out()->Resize(input_dims); \
}
#ifdef RELU_OP
......
......@@ -23,7 +23,6 @@ template <typename Dtype, typename T>
void ElementwiseAddOp<Dtype, T>::InferShape() const {
auto x_dim = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dim);
this->param_.Out()->set_lod(this->param_.InputX()->lod());
}
} // namespace operators
......
......@@ -71,9 +71,10 @@ bool ReluKernel<CPU, float>::Init(ReluParam<CPU> *param) {
template <>
void ReluKernel<CPU, float>::Compute(const ReluParam<CPU> &param) {
const Tensor *input = param.InputX();
Tensor *output = param.Out();
const LoDTensor *input = param.InputX();
LoDTensor *output = param.Out();
ActivationCompute<float, RELU>()(input, output);
output->set_lod(input->lod());
}
template <>
......@@ -83,9 +84,10 @@ bool Relu6Kernel<CPU, float>::Init(ReluParam<CPU> *param) {
template <>
void Relu6Kernel<CPU, float>::Compute(const ReluParam<CPU> &param) {
const Tensor *input = param.InputX();
Tensor *output = param.Out();
const LoDTensor *input = param.InputX();
LoDTensor *output = param.Out();
ActivationCompute<float, RELU6>()(input, output);
output->set_lod(input->lod());
}
#endif
......@@ -97,9 +99,10 @@ bool SigmoidKernel<CPU, float>::Init(SigmoidParam<CPU> *param) {
template <>
void SigmoidKernel<CPU, float>::Compute(const SigmoidParam<CPU> &param) {
const Tensor *input = param.InputX();
Tensor *output = param.Out();
const LoDTensor *input = param.InputX();
LoDTensor *output = param.Out();
ActivationCompute<float, SIGMOID>()(input, output);
output->set_lod(input->lod());
}
#endif
......@@ -111,9 +114,10 @@ bool TanhKernel<CPU, float>::Init(TanhParam<CPU> *param) {
template <>
void TanhKernel<CPU, float>::Compute(const TanhParam<CPU> &param) {
const Tensor *input = param.InputX();
Tensor *output = param.Out();
const LoDTensor *input = param.InputX();
LoDTensor *output = param.Out();
ActivationCompute<float, TANH>()(input, output);
output->set_lod(input->lod());
}
#endif
......@@ -125,9 +129,10 @@ bool LogKernel<CPU, float>::Init(ReluParam<CPU> *param) {
template <>
void LogKernel<CPU, float>::Compute(const ReluParam<CPU> &param) {
const Tensor *input = param.InputX();
Tensor *output = param.Out();
const LoDTensor *input = param.InputX();
LoDTensor *output = param.Out();
ActivationCompute<float, LOG>()(input, output);
output->set_lod(input->lod());
}
#endif
......
......@@ -28,6 +28,7 @@ bool SoftmaxKernel<CPU, float>::Init(SoftmaxParam<CPU> *param) {
template <>
void SoftmaxKernel<CPU, float>::Compute(const SoftmaxParam<CPU> &param) {
SoftmaxCompute<float>(param);
param.Out()->set_lod(param.InputX()->lod());
}
template class SoftmaxKernel<CPU, float>;
......
......@@ -27,13 +27,14 @@ bool FeedKernel<GPU_CL, float>::Init(FeedParam<GPU_CL> *param) {
template <>
void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
const int col = param.Col();
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*(param.Out()));
cl_int status;
param.Out()->InitEmptyImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue(), param.Out()->dims());
auto output = param.Out();
const Tensor *input = param.InputX();
const Tensor *input = &param.InputX()->at(col);
// DLOG << *input;
const float *input_data = input->data<float>();
int numel = input->numel();
......
......@@ -35,8 +35,9 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.InputX());
const int col = param.Col();
auto input = param.InputX()->GetCLImage();
auto *out = param.Out();
auto *out = &param.Out()->at(col);
out->Resize(param.InputX()->dims());
out->mutable_data<float>();
const auto &dim = param.InputX()->dims();
......
......@@ -57,12 +57,12 @@ void sgemm_6x16(const float *lhs, const float *rhs, const int k, float *output,
"dup v28.4s, wzr \n\t"
"dup v29.4s, wzr \n\t"
"subs %[kc1], %[kc1], #1 \n\t"
"blt 2f \n\t"
"1: \n\t"
"subs %[kc1], %[kc1], #1 \n\t"
"blt 2f \n\t"
"1: \n\t"
"prfm pldl1keep, [%[lhs], #24] \n\t"
"prfm pldl1keep, [%[rhs], #64] \n\t"
"prfm pldl1keep, [%[lhs], #32] \n\t"
"prfm pldl1keep, [%[rhs], #64] \n\t"
"ld1 {v0.4s, v1.4s}, [%[lhs]], %[step1] \n\t"
"ld1 {v2.4s, v3.4s, v4.4s, v5.4s}, [%[rhs]], #64 \n\t"
......@@ -109,9 +109,10 @@ void sgemm_6x16(const float *lhs, const float *rhs, const int k, float *output,
"st1 {v26.4s, v27.4s, v28.4s, v29.4s}, [%[c]], %[step] \n\t"
: [lhs] "+r"(lhs), [rhs] "+r"(rhs), [c] "+r"(output), [kc1] "+r"(kc1)
: [step] "r"(step), [step1] "r"(step1)
: "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
"v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
"v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29");
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
"v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
"v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28",
"v29");
}
#else
void sgemm_6x8(const float *lhs, const float *rhs, const int k, float *output,
......
......@@ -1224,19 +1224,19 @@ class FeedParam : public OpParam {
FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope *scope)
: OpParam(inputs, outputs, attrs, scope) {
input_x_ = InputXFrom<framework::LoDTensorArray>(inputs, *scope);
input_x_ = InputXFrom<std::vector<LoDTensor>>(inputs, *scope);
out_ = OutFrom<GType>(outputs, *scope);
col_ = GetAttr<int>("col", attrs);
auto var = scope->FindVar("batch_size");
batch_size = var->GetValue<int>();
}
const framework::LoDTensorArray *InputX() const { return input_x_; }
const std::vector<LoDTensor> *InputX() const { return input_x_; }
GType *Out() const { return out_; }
const int Col() const { return col_; }
const int BatchSize() const { return batch_size; }
private:
framework::LoDTensorArray *input_x_;
std::vector<LoDTensor> *input_x_;
GType *out_;
int col_;
int batch_size;
......@@ -1251,18 +1251,18 @@ class FetchParam : public OpParam {
FetchParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope *scope)
: OpParam(inputs, outputs, attrs, scope) {
input_x_ = InputXFrom<framework::LoDTensor>(inputs, *scope);
out_ = OutFrom<framework::LoDTensorArray>(outputs, *scope);
input_x_ = InputXFrom<GType>(inputs, *scope);
out_ = OutFrom<std::vector<LoDTensor>>(outputs, *scope);
col_ = GetAttr<int>("col", attrs);
}
const framework::LoDTensor *InputX() const { return input_x_; }
framework::LoDTensorArray *Out() const { return out_; }
const GType *InputX() const { return input_x_; }
std::vector<LoDTensor> *Out() const { return out_; }
const int Col() const { return col_; }
private:
framework::LoDTensor *input_x_;
framework::LoDTensorArray *out_;
GType *input_x_;
std::vector<LoDTensor> *out_;
int col_;
#ifdef PADDLE_MOBILE_FPGA
......@@ -2371,6 +2371,15 @@ class ConvTransposeParam : public OpParam {
const int &Groups() const { return groups; }
enum ExecMode {
EXEC_INVALID = 0,
EXEC_GEMM_FLOAT,
EXEC_DECONV3X3_FLOAT,
EXEC_DECONV4X4_FLOAT,
};
ExecMode &ExecMode() const { return exec_mode_; }
private:
GType *input_;
GType *output_;
......@@ -2379,6 +2388,7 @@ class ConvTransposeParam : public OpParam {
vector<int> paddings_;
vector<int> dilations_;
int groups;
mutable enum ExecMode exec_mode_;
#ifdef PADDLE_MOBILE_FPGA
......@@ -3214,43 +3224,46 @@ class LogicalUnaryParam : public OpParam {
#ifdef WRITE_TO_ARRAY_OP
template <typename Dtype>
class WriteToArrayParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
WriteToArrayParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
Scope *scope)
: OpParam(inputs, outputs, attrs, scope) {
input_ = OpParam::GetVarValue<framework::LoDTensor>("X", inputs, *scope);
index_ = OpParam::GetVarValue<framework::LoDTensor>("I", inputs, *scope);
output_ =
OpParam::GetVarValue<framework::LoDTensorArray>("Out", outputs, *scope);
input_ = OpParam::GetVarValue<GType>("X", inputs, *scope);
index_ = OpParam::GetVarValue<GType>("I", inputs, *scope);
output_ = OpParam::GetVarValue<std::vector<GType>>("Out", outputs, *scope);
}
public:
framework::LoDTensor *input_;
framework::LoDTensor *index_;
framework::LoDTensorArray *output_;
GType *input_;
GType *index_;
std::vector<GType> *output_;
};
#endif
#ifdef READ_FROM_ARRAY_OP
template <typename Dtype>
class ReadFromArrayParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
ReadFromArrayParam(const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
Scope *scope)
: OpParam(inputs, outputs, attrs, scope) {
input_ =
OpParam::GetVarValue<framework::LoDTensorArray>("X", inputs, *scope);
index_ = OpParam::GetVarValue<framework::LoDTensor>("I", inputs, *scope);
output_ =
OpParam::GetVarValue<framework::LoDTensor>("Out", outputs, *scope);
input_ = OpParam::GetVarValue<std::vector<GType>>("X", inputs, *scope);
index_ = OpParam::GetVarValue<GType>("I", inputs, *scope);
output_ = OpParam::GetVarValue<GType>("Out", outputs, *scope);
}
public:
framework::LoDTensorArray *input_;
framework::LoDTensor *index_;
framework::LoDTensor *output_;
std::vector<GType> *input_;
GType *index_;
GType *output_;
};
#endif
......
......@@ -21,7 +21,6 @@ namespace operators {
template <typename DeviceType, typename T>
void SoftmaxOp<DeviceType, T>::InferShape() const {
this->param_.Out()->Resize(this->param_.InputX()->dims());
this->param_.Out()->set_lod(this->param_.InputX()->lod());
}
} // namespace operators
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册