提交 ddbcabed 编写于 作者: S suiyang 提交者: GitHub

Merge branch 'develop' into develop

......@@ -40,9 +40,11 @@ const char *G_OP_TYPE_POOL2D = "pool2d";
const char *G_OP_TYPE_PRIOR_BOX = "prior_box";
const char *G_OP_TYPE_RELU = "relu";
const char *G_OP_TYPE_RESHAPE = "reshape";
const char *G_OP_TYPE_RESHAPE2 = "reshape2";
const char *G_OP_TYPE_SIGMOID = "sigmoid";
const char *G_OP_TYPE_SOFTMAX = "softmax";
const char *G_OP_TYPE_TRANSPOSE = "transpose";
const char *G_OP_TYPE_TRANSPOSE2 = "transpose2";
const char *G_OP_TYPE_SPLIT = "split";
const char *G_OP_TYPE_FEED = "feed";
const char *G_OP_TYPE_FETCH = "fetch";
......@@ -90,6 +92,7 @@ std::unordered_map<
{G_OP_TYPE_FEED, {{"X"}, {"Out"}}},
{G_OP_TYPE_FETCH, {{"X"}, {"Out"}}},
{G_OP_TYPE_TRANSPOSE, {{"X"}, {"Out"}}},
{G_OP_TYPE_TRANSPOSE2, {{"X"}, {"Out", "XShape"}}},
{G_OP_TYPE_BOX_CODER,
{{"PriorBox", "PriorBoxVar", "TargetBox"}, {"OutputBox"}}},
{G_OP_TYPE_FUSION_CONV_ADD_BN_RELU, {{"Input"}, {"Out"}}},
......@@ -99,6 +102,7 @@ std::unordered_map<
{G_OP_TYPE_POLYGON_BOX_TRANSFORM, {{"Input"}, {"Output"}}},
{G_OP_TYPE_FC, {{"X", "Y", "Z"}, {"Out"}}},
{G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}},
{G_OP_TYPE_RESHAPE2, {{"X"}, {"Out", "XShape"}}},
{G_OP_TYPE_DEPTHWISE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_FILL_CONSTANT, {{}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_ADD_RELU, {{"Input"}, {"Out"}}},
......
......@@ -109,9 +109,15 @@ LOAD_FUSION_MATCHER(fusion_conv_add_bn_relu);
#ifdef RESHAPE_OP
LOAD_OP2(reshape, CPU, MALI_GPU);
#endif
#ifdef RESHAPE2_OP
LOAD_OP2(reshape2, CPU, MALI_GPU);
#endif
#ifdef TRANSPOSE_OP
LOAD_OP1(transpose, CPU);
#endif
#ifdef TRANSPOSE2_OP
LOAD_OP1(transpose2, CPU);
#endif
#ifdef PRIORBOX_OP
LOAD_OP1(prior_box, CPU);
#endif
......@@ -221,5 +227,9 @@ LOAD_FUSION_MATCHER(fusion_conv_bn);
#ifdef ELEMENTWISESUB_OP
LOAD_OP1(elementwise_sub, CPU)
#endif
#ifdef QUANT_OP
LOAD_OP1(quantize, CPU);
#endif
#ifdef DEQUANT_OP
LOAD_OP1(dequantize, CPU);
#endif
......@@ -135,11 +135,15 @@ static void quantize_round_to_even(const Tensor *input, const float scale,
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
size_t loop = size >> 4;
size_t remain = size & 0xF;
#pragma omp parallel for
for (size_t i = 0; i < loop; ++i) {
float32x4_t r0 = vld1q_f32(x);
float32x4_t r1 = vld1q_f32(x + 4);
float32x4_t r2 = vld1q_f32(x + 8);
float32x4_t r3 = vld1q_f32(x + 12);
const float *local_x = x + (i << 4);
int8_t *local_y = y + (i << 4);
float32x4_t r0 = vld1q_f32(local_x);
float32x4_t r1 = vld1q_f32(local_x + 4);
float32x4_t r2 = vld1q_f32(local_x + 8);
float32x4_t r3 = vld1q_f32(local_x + 12);
r0 = vmulq_n_f32(r0, scale);
r1 = vmulq_n_f32(r1, scale);
r2 = vmulq_n_f32(r2, scale);
......@@ -156,12 +160,12 @@ static void quantize_round_to_even(const Tensor *input, const float scale,
int16x8_t q6 = vcombine_s16(d2, d3);
int8x8_t d5 = vmovn_s16(q5);
int8x8_t d6 = vmovn_s16(q6);
vst1_s8(y, d5);
vst1_s8(y + 8, d6);
x += 16;
y += 16;
vst1_s8(local_y, d5);
vst1_s8(local_y + 8, d6);
}
size = remain;
x += (loop << 4);
y += (loop << 4);
#endif
for (size_t i = 0; i < size; ++i) {
float value = x[i] * scale;
......@@ -187,11 +191,15 @@ static void quantize_round_to_zero(const Tensor *input, const float scale,
#ifdef defined(__ARM_NEON__) || defined(__ARM_NEON)
size_t loop = size >> 4;
size_t remain = size & 0xF;
#pragma omp parallel for
for (size_t i = 0; i < loop; ++i) {
float32x4_t r0 = vld1q_f32(x);
float32x4_t r1 = vld1q_f32(x + 4);
float32x4_t r2 = vld1q_f32(x + 8);
float32x4_t r3 = vld1q_f32(x + 12);
const float *local_x = x + (i << 4);
int8_t *local_y = y + (i << 4);
float32x4_t r0 = vld1q_f32(local_x);
float32x4_t r1 = vld1q_f32(local_x + 4);
float32x4_t r2 = vld1q_f32(local_x + 8);
float32x4_t r3 = vld1q_f32(local_x + 12);
r0 = vmulq_n_f32(r0, scale);
r1 = vmulq_n_f32(r1, scale);
r2 = vmulq_n_f32(r2, scale);
......@@ -208,12 +216,12 @@ static void quantize_round_to_zero(const Tensor *input, const float scale,
int16x8_t q6 = vcombine_s16(d2, d3);
int8x8_t d5 = vmovn_s16(q5);
int8x8_t d6 = vmovn_s16(q6);
vst1_s8(y, d5);
vst1_s8(y + 8, d6);
x += 16;
y += 16;
vst1_s8(local_y, d5);
vst1_s8(local_y + 8, d6);
}
size = remain;
x += (loop << 4);
y += (loop << 4);
#endif
for (size_t i = 0; i < size; ++i) {
y[i] = trunc(x[i] * scale);
......@@ -228,11 +236,15 @@ static void quantize_round_to_nearest(const Tensor *input, const float scale,
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
size_t loop = size >> 4;
size_t remain = size & 0xF;
#pragma omp parallel for
for (size_t i = 0; i < loop; ++i) {
float32x4_t r0 = vld1q_f32(x);
float32x4_t r1 = vld1q_f32(x + 4);
float32x4_t r2 = vld1q_f32(x + 8);
float32x4_t r3 = vld1q_f32(x + 12);
const float *local_x = x + (i << 4);
int8_t *local_y = y + (i << 4);
float32x4_t r0 = vld1q_f32(local_x);
float32x4_t r1 = vld1q_f32(local_x + 4);
float32x4_t r2 = vld1q_f32(local_x + 8);
float32x4_t r3 = vld1q_f32(local_x + 12);
r0 = vmulq_n_f32(r0, scale);
r1 = vmulq_n_f32(r1, scale);
r2 = vmulq_n_f32(r2, scale);
......@@ -249,12 +261,12 @@ static void quantize_round_to_nearest(const Tensor *input, const float scale,
int16x8_t q6 = vcombine_s16(d2, d3);
int8x8_t d5 = vmovn_s16(q5);
int8x8_t d6 = vmovn_s16(q6);
vst1_s8(y, d5);
vst1_s8(y + 8, d6);
x += 16;
y += 16;
vst1_s8(local_y, d5);
vst1_s8(local_y + 8, d6);
}
size = remain;
x += (loop << 4);
y += (loop << 4);
#endif
for (size_t i = 0; i < size; ++i) {
y[i] = round(x[i] * scale);
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE2_OP
#include "operators/kernel/reshape2_kernel.h"
#include "operators/kernel/central-arm-func/reshape2_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool Reshape2Kernel<CPU, float>::Init(Reshape2Param<CPU> *param) {
return true;
}
template <>
void Reshape2Kernel<CPU, float>::Compute(
const Reshape2Param<CPU> &param) const {
Reshape2Compute<float>(param);
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef TRANSPOSE2_OP
#include "operators/kernel/transpose2_kernel.h"
#include "operators/kernel/central-arm-func/transpose2_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool Transpose2Kernel<CPU, float>::Init(Transpose2Param<CPU> *param) {
return true;
}
template <>
void Transpose2Kernel<CPU, float>::Compute(
const Transpose2Param<CPU> &param) const {
Transpose2Compute<float>(param);
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -58,6 +58,7 @@ void ElementwiseAddCompute(const ElementwiseAddParam<CPU> &param) {
const float *input_data = input_x->data<float>();
float *output_data = Out->mutable_data<float>();
for (int i = 0; i < batch; ++i) {
#pragma omp parallel for
for (int j = 0; j < channels; ++j) {
size_t offset = (i * channels + j) * elementwise_num;
const float *input = input_data + offset;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE2_OP
#pragma once
#include <vector>
#include "operators/kernel/reshape_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename P>
void Reshape2Compute(const Reshape2Param<CPU> &param) {
const auto *input_x = param.InputX();
const auto &input_x_dims = input_x->dims();
auto *out = param.Out();
framework::DDim out_dims = out->dims();
const auto *input_shape = param.InputShape();
if (input_shape) {
auto *shape_data = input_shape->data<int>();
framework::Tensor cpu_shape_tensor;
auto shape =
std::vector<int>(shape_data, shape_data + input_shape->numel());
out_dims = ValidateShape(shape, input_x->dims());
} else {
auto &shape = param.Shape();
out_dims = ValidateShape(shape, input_x_dims);
}
bool inplace = param.Inplace();
out->Resize(out_dims);
if (!inplace) {
out->mutable_data<float>();
framework::TensorCopy(*input_x, out);
out->Resize(out_dims);
} else {
out->ShareDataWith(*input_x);
out->Resize(out_dims);
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef TRANSPOSE2_OP
#pragma once
#include <vector>
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename P>
void Transpose2Compute(const Transpose2Param<CPU>& param) {
const auto* input_x = param.InputX();
const auto input_x_dims = input_x->dims();
auto* out = param.Out();
const auto axis = param.Axis();
const auto* input_x_data = input_x->data<float>();
auto* out_data = out->mutable_data<float>();
size_t ndim = axis.size();
std::vector<int> xdim(ndim);
std::vector<int> xstride(ndim);
std::vector<int> xout(ndim);
for (int i = 0; i < ndim; i++) {
int j = ndim - 1 - i;
xdim[j] = input_x_dims[axis[i]];
xstride[j] = 1;
for (int k = axis[i] + 1; k < ndim; k++) {
xstride[j] *= input_x_dims[k];
}
xout[j] = xstride[j] * xdim[j];
}
auto numel = input_x->numel();
size_t pind = 0;
std::vector<int> ind(ndim);
for (int i = 0; i < numel; i++) {
out_data[i] = input_x_data[pind];
ind[0]++;
pind += xstride[0];
for (int j = 0; j < ndim - 1; j++) {
if (ind[j] == xdim[j]) {
ind[j + 1]++;
ind[j] = 0;
pind += xstride[j + 1];
pind -= xout[j];
} else {
break;
}
}
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE2_OP
#pragma once
#include <vector>
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class Reshape2Kernel
: public framework::OpKernelBase<DeviceType, Reshape2Param<DeviceType>> {
public:
void Compute(const Reshape2Param<DeviceType>& param) const;
bool Init(Reshape2Param<DeviceType>* param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef TRANSPOSE2_OP
#pragma once
#include <vector>
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class Transpose2Kernel
: public framework::OpKernelBase<DeviceType, Transpose2Param<DeviceType>> {
public:
void Compute(const Transpose2Param<DeviceType>& param) const;
bool Init(Transpose2Param<DeviceType>* param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -243,6 +243,12 @@ class OpParam {
return GetVarValue<T>("Y", outputs, scope);
}
template <typename T>
static T *OutputXShapeFrom(const VariableNameMap &outputs,
const Scope &scope) {
return GetVarValue<T>("XShape", outputs, scope);
}
template <typename T>
static T *OutputBoxesFrom(const VariableNameMap &outputs,
const Scope &scope) {
......@@ -1126,6 +1132,37 @@ class TransposeParam : public OpParam {
};
#endif
#ifdef TRANSPOSE2_OP
template <typename Dtype>
class Transpose2Param : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
Transpose2Param(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
output_xshape_ = OutputXShapeFrom<GType>(outputs, scope);
axis_ = GetAttr<vector<int>>("axis", attrs);
}
const RType *InputX() const { return input_x_; }
RType *Out() const { return out_; }
RType *OutputXShape() const { return output_xshape_; }
const vector<int> &Axis() const { return axis_; }
private:
RType *input_x_;
RType *out_;
RType *output_xshape_;
vector<int> axis_;
};
#endif
#ifdef LOOKUP_OP
template <typename Dtype>
class LookupParam : public OpParam {
......@@ -1233,6 +1270,49 @@ class ReshapeParam : public OpParam {
};
#endif
#ifdef RESHAPE2_OP
template <typename Dtype>
class Reshape2Param : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
Reshape2Param(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope);
input_shape_ = InputShapeFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
output_xshape_ = OutputXShapeFrom<GType>(outputs, scope);
shape_ = GetAttr<vector<int>>("shape", attrs);
if (HasAttr("inplace", attrs)) {
inplace_ = GetAttr<bool>("inplace", attrs);
} else {
inplace_ = false;
}
}
const RType *InputX() const { return input_x_; }
const RType *InputShape() const { return input_shape_; }
RType *Out() const { return out_; }
RType *OutputXShape() const { return output_xshape_; }
const vector<int> &Shape() const { return shape_; }
const bool &Inplace() const { return inplace_; }
private:
RType *input_x_;
RType *input_shape_;
RType *out_;
RType *output_xshape_;
vector<int> shape_;
bool inplace_;
};
#endif
#ifdef SCALE_OP
template <typename Dtype>
class ScaleParam : public OpParam {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE2_OP
#include "operators/reshape2_op.h"
#include <vector>
#include "operators/kernel/reshape_kernel.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void Reshape2Op<Dtype, T>::InferShape() const {
auto &shape = this->param_.Shape();
auto input_x_dims = this->param_.InputX()->dims();
auto out_dims = ValidateShape(shape, input_x_dims);
this->param_.Out()->Resize(out_dims);
std::vector<int64_t> xshape_dims(input_x_dims.size() + 1, 0);
for (int i = 0; i < input_x_dims.size(); ++i) {
xshape_dims[i + 1] = input_x_dims[i];
}
this->param_.OutputXShape()->Resize(framework::make_ddim(xshape_dims));
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(reshape2, ops::Reshape2Op);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(reshape2, ops::Reshape2Op);
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE2_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/reshape2_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using paddle_mobile::framework::Tensor;
template <typename DeviceType, typename T>
class Reshape2Op : public framework::OperatorWithKernel<
DeviceType, Reshape2Param<DeviceType>,
operators::Reshape2Kernel<DeviceType, T>> {
public:
Reshape2Op(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, Reshape2Param<DeviceType>,
operators::Reshape2Kernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, Reshape2Param<DeviceType>,
operators::Reshape2Kernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef TRANSPOSE2_OP
#include <vector>
#include "common/enforce.h"
#include "operators/transpose2_op.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void Transpose2Op<Dtype, T>::InferShape() const {
auto input_x_dims = this->param_.InputX()->dims();
auto axis = this->param_.Axis();
size_t x_dims_size = input_x_dims.size();
size_t axis_size = axis.size();
PADDLE_MOBILE_ENFORCE((x_dims_size == axis_size),
"input_dims must "
"be equal to the axis_size. ")
std::vector<int> count(axis_size, 0);
for (size_t i = 0; i < axis_size; i++) {
PADDLE_MOBILE_ENFORCE(
axis[i] < static_cast<int>(axis_size) && ++count[axis[i]] == 1,
"Each element of Attribute axis should be a unique value "
"range from 0 to (dims - 1), "
"where the dims is the axis's size");
}
framework::DDim out_dims(input_x_dims);
for (size_t i = 0; i < axis_size; i++) {
out_dims[i] = input_x_dims[axis[i]];
}
this->param_.Out()->Resize(out_dims);
std::vector<int64_t> xshape_dims(input_x_dims.size() + 1, 0);
for (int i = 0; i < input_x_dims.size(); ++i) {
xshape_dims[i + 1] = input_x_dims[i];
}
this->param_.OutputXShape()->Resize(framework::make_ddim(xshape_dims));
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(transpose2, ops::Transpose2Op);
#endif
#endif // TRANSPOSE_OP
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef TRANSPOSE2_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/transpose2_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using paddle_mobile::framework::Tensor;
template <typename DeviceType, typename T>
class Transpose2Op : public framework::OperatorWithKernel<
DeviceType, Transpose2Param<DeviceType>,
operators::Transpose2Kernel<DeviceType, T>> {
public:
Transpose2Op(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<
DeviceType, Transpose2Param<DeviceType>,
operators::Transpose2Kernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, Transpose2Param<DeviceType>,
operators::Transpose2Kernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
};
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -184,6 +184,10 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-transpose-op operators/test_transpose_op.cpp test_helper.h test_include.h)
target_link_libraries(test-transpose-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-transpose2-op operators/test_transpose2_op.cpp test_helper.h test_include.h)
target_link_libraries(test-transpose2-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-multiclassnms-op operators/test_multiclass_nms_op.cpp test_helper.h test_include.h)
target_link_libraries(test-multiclassnms-op paddle-mobile)
......@@ -200,6 +204,10 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-reshape-op operators/test_reshape_op.cpp test_helper.h test_include.h)
target_link_libraries(test-reshape-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-reshape2-op operators/test_reshape2_op.cpp test_helper.h test_include.h)
target_link_libraries(test-reshape2-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-relu-op operators/test_relu_op.cpp test_helper.h test_include.h)
target_link_libraries(test-relu-op paddle-mobile)
......
......@@ -25,8 +25,8 @@ int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
#endif
paddle_mobile.SetThreadNum(1);
bool optimize = false;
paddle_mobile.SetThreadNum(4);
bool optimize = true;
auto time1 = time();
if (paddle_mobile.Load(g_googlenet, optimize)) {
auto time2 = time();
......@@ -35,10 +35,10 @@ int main() {
std::vector<float> output;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224, &input, dims);
// // 预热十次
// for (int i = 0; i < 10; ++i) {
// output = paddle_mobile.Predict(input, dims);
// }
// 预热十次
for (int i = 0; i < 10; ++i) {
output = paddle_mobile.Predict(input, dims);
}
auto time3 = time();
for (int i = 0; i < 10; ++i) {
output = paddle_mobile.Predict(input, dims);
......@@ -47,9 +47,6 @@ int main() {
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
<< std::endl;
for (int i = 0; i < output.size(); ++i) {
DLOG << "result[" << i << "] = " << output[i];
}
}
return 0;
}
......@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/batchnorm_op.h"
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_include.h"
#include "operators/box_coder_op.h"
......
......@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/elementwise_sub_op.h"
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_include.h"
#include "operators/fill_constant_op.h"
......
......@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <framework/program/program-optimize/program_optimize.h>
#include "../test_include.h"
#include "operators/fusion_fc_op.h"
......
......@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/im2sequence_op.h"
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_include.h"
#include "operators/multiclass_nms_op.h"
......@@ -31,14 +30,12 @@ class TestMultiClassNMSOp {
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
// DLOG << " **block size " << blocks.size();
for (auto block_desc : blocks) {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
// DLOG << " ops " << ops.size();
for (auto op : ops) {
if (op->Type() == "multiclass_nms" &&
op->Input("BBoxes")[0] == "box_coder_0.tmp_0") {
DLOG << " mul attr size: " << op->GetAttrMap().size();
DLOG << " attr size: " << op->GetAttrMap().size();
DLOG << " inputs size: " << op->GetInputs().size();
DLOG << " outputs size: " << op->GetOutputs().size();
DLOG << " BBoxes is : " << op->Input("BBoxes")[0];
......@@ -55,14 +52,6 @@ class TestMultiClassNMSOp {
<< op->GetAttrMap().at("nms_top_k").Get<int>();
DLOG << " score_threshold : "
<< op->GetAttrMap().at("score_threshold").Get<float>();
// DLOG << " variances : " <<
// op->GetAttrMap().at("variances").Get<std::vector<float>>();
// DLOG << " aspect_ratios : " <<
// op->GetAttrMap().at("aspect_ratios").Get<std::vector<float>>();
// DLOG << " min_sizes : " <<
// op->GetAttrMap().at("min_sizes").Get<std::vector<float>>();
// DLOG << " max_sizes : " <<
// op->GetAttrMap().at("max_sizes").Get<std::vector<float>>();
std::shared_ptr<operators::MultiClassNMSOp<Dtype, float>> priorbox =
std::make_shared<operators::MultiClassNMSOp<Dtype, float>>(
op->Type(), op->GetInputs(), op->GetOutputs(),
......@@ -88,16 +77,12 @@ class TestMultiClassNMSOp {
auto *output_tensor = output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>({1917, 6});
// DLOG << typeid(output_tensor).name();
// DLOG << "output_tensor dims: " << output_tensor->dims();
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
predict(t1, t2, 0);
return out_tensor;
// return outvars_tensor;
}
private:
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_include.h"
#include "operators/polygon_box_transform_op.h"
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_include.h"
#include "operators/prior_box_op.h"
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_include.h"
#include "operators/reshape2_op.h"
namespace paddle_mobile {
namespace framework {
template <typename Dtype>
class TestReshape2Op {
public:
explicit TestReshape2Op(const Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
for (auto block_desc : blocks) {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (auto op : ops) {
if (op->Type() == "reshape2") {
DLOG << " attr size: " << op->GetAttrMap().size();
std::unordered_map<std::string, Attribute> attrs = op->GetAttrMap();
for (std::unordered_map<std::string, Attribute>::iterator it =
attrs.begin();
it != attrs.end(); ++it) {
DLOG << " " << it->first << " " << it->second;
}
DLOG << " inputs size: " << op->GetInputs().size();
VariableNameMap inputs = op->GetInputs();
for (VariableNameMap::iterator it = inputs.begin();
it != inputs.end(); ++it) {
DLOG << " " << it->first << " " << it->second;
}
DLOG << " outputs size: " << op->GetOutputs().size();
VariableNameMap outputs = op->GetOutputs();
for (VariableNameMap::iterator it = outputs.begin();
it != outputs.end(); ++it) {
DLOG << " " << it->first << " " << it->second;
}
input_var_name = op->Input("X")[0];
output_var_name = op->Output("Out")[0];
std::shared_ptr<operators::Reshape2Op<Dtype, float>> op_ptr =
std::make_shared<operators::Reshape2Op<Dtype, float>>(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), program_.scope);
ops_of_block_[*block_desc.get()].push_back(op_ptr);
return;
}
}
}
}
std::shared_ptr<Tensor> predict(const Tensor &t) {
auto scope = program_.scope;
Variable *input_feed_value = scope->Var(input_var_name);
auto tensor_input = input_feed_value->GetMutable<LoDTensor>();
tensor_input->ShareDataWith(t);
Variable *output = scope->Var(output_var_name);
auto *output_tensor = output->GetMutable<LoDTensor>();
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
predict(t, 0);
return out_tensor;
}
private:
const framework::Program<Dtype> program_;
std::shared_ptr<ProgramDesc> to_predict_program_;
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
string input_var_name;
string output_var_name;
void predict(const Tensor &t, int block_id) {
std::shared_ptr<BlockDesc> to_predict_block =
to_predict_program_->Block(block_id);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j];
op->Run();
}
}
};
template class TestReshape2Op<CPU>;
} // namespace framework
} // namespace paddle_mobile
int main() {
DLOG << "----------**********----------";
DLOG << "begin to run Reshape2 Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_ocr) + "/model",
std::string(g_ocr) + "/params");
paddle_mobile::framework::Tensor input;
SetupTensor<float>(&input, {1, 4, 4}, static_cast<float>(0),
static_cast<float>(1));
auto *input_ptr = input.data<float>();
for (int i = 0; i < 16; ++i) {
*(input_ptr + i) = i;
}
DLOG << "input : ";
for (int i = 0; i < input.numel(); ++i) {
DLOG << " index " << i << " : " << input_ptr[i];
}
paddle_mobile::framework::TestReshape2Op<paddle_mobile::CPU> testReshape2Op(
program);
auto output = testReshape2Op.predict(input);
auto *output_ptr = output->data<float>();
DLOG << "output : ";
for (int i = 0; i < output->numel(); ++i) {
DLOG << " index " << i << " : " << output_ptr[i];
}
return 0;
}
......@@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_helper.h"
#include "../test_include.h"
#include "operators/sum_op.h"
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_include.h"
#include "operators/transpose2_op.h"
namespace paddle_mobile {
namespace framework {
template <typename Dtype>
class TestTranspose2Op {
public:
explicit TestTranspose2Op(const Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
for (auto block_desc : blocks) {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (auto op : ops) {
if (op->Type() == "transpose2") {
DLOG << " attr size: " << op->GetAttrMap().size();
std::unordered_map<std::string, Attribute> attrs = op->GetAttrMap();
for (std::unordered_map<std::string, Attribute>::iterator it =
attrs.begin();
it != attrs.end(); ++it) {
DLOG << " " << it->first << " " << it->second;
}
DLOG << " inputs size: " << op->GetInputs().size();
VariableNameMap inputs = op->GetInputs();
for (VariableNameMap::iterator it = inputs.begin();
it != inputs.end(); ++it) {
DLOG << " " << it->first << " " << it->second;
}
DLOG << " outputs size: " << op->GetOutputs().size();
VariableNameMap outputs = op->GetOutputs();
for (VariableNameMap::iterator it = outputs.begin();
it != outputs.end(); ++it) {
DLOG << " " << it->first << " " << it->second;
}
input_var_name = op->Input("X")[0];
output_var_name = op->Output("Out")[0];
std::shared_ptr<operators::Transpose2Op<Dtype, float>> op_ptr =
std::make_shared<operators::Transpose2Op<Dtype, float>>(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), program_.scope);
ops_of_block_[*block_desc.get()].push_back(op_ptr);
return;
}
}
}
}
std::shared_ptr<Tensor> predict(const Tensor &t) {
auto scope = program_.scope;
Variable *input_feed_value = scope->Var(input_var_name);
auto tensor_input = input_feed_value->GetMutable<LoDTensor>();
tensor_input->ShareDataWith(t);
Variable *output = scope->Var(output_var_name);
auto *output_tensor = output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>({1, 2, 8});
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
predict(t, 0);
return out_tensor;
}
private:
const framework::Program<Dtype> program_;
std::shared_ptr<ProgramDesc> to_predict_program_;
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
string input_var_name;
string output_var_name;
void predict(const Tensor &t, int block_id) {
std::shared_ptr<BlockDesc> to_predict_block =
to_predict_program_->Block(block_id);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j];
op->Run();
}
}
};
template class TestTranspose2Op<CPU>;
} // namespace framework
} // namespace paddle_mobile
int main() {
DLOG << "----------**********----------";
DLOG << "begin to run Transpose2 Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_ocr) + "/model",
std::string(g_ocr) + "/params");
paddle_mobile::framework::Tensor input;
SetupTensor<float>(&input, {1, 8, 2}, static_cast<float>(0),
static_cast<float>(1));
auto *input_ptr = input.data<float>();
for (int i = 0; i < 16; ++i) {
*(input_ptr + i) = i;
}
DLOG << "input : ";
for (int i = 0; i < input.numel(); ++i) {
DLOG << " index " << i << " : " << input_ptr[i];
}
paddle_mobile::framework::TestTranspose2Op<paddle_mobile::CPU>
testTranspose2Op(program);
auto output = testTranspose2Op.predict(input);
auto *output_ptr = output->data<float>();
DLOG << "output : ";
for (int i = 0; i < output->numel(); ++i) {
DLOG << " index " << i << " : " << output_ptr[i];
}
return 0;
}
......@@ -201,9 +201,11 @@ if(NOT FOUND_MATCH)
set(PRIORBOX_OP ON)
set(RELU_OP ON)
set(RESHAPE_OP ON)
set(RESHAPE2_OP ON)
set(SIGMOID_OP ON)
set(SOFTMAX_OP ON)
set(TRANSPOSE_OP ON)
set(TRANSPOSE2_OP ON)
set(FUSION_CONVADDBNRELU_OP ON)
set(FUSION_CONVADDADDPRELU_OP ON)
set(FUSION_DWCONVBNRELU_OP ON)
......@@ -246,9 +248,11 @@ endif()
# option(PRIORBOX_OP "" ON)
# option(RELU_OP "" ON)
# option(RESHAPE_OP "" ON)
# option(RESHAPE2_OP "" ON)
# option(SIGMOID_OP "" ON)
# option(SOFTMAX_OP "" ON)
# option(TRANSPOSE_OP "" ON)
# option(TRANSPOSE2_OP "" ON)
# endif ()
if (BATCHNORM_OP)
......@@ -314,6 +318,9 @@ endif()
if (RESHAPE_OP)
add_definitions(-DRESHAPE_OP)
endif()
if (RESHAPE2_OP)
add_definitions(-DRESHAPE2_OP)
endif()
if (SIGMOID_OP)
add_definitions(-DSIGMOID_OP)
endif()
......@@ -323,6 +330,9 @@ endif()
if (TRANSPOSE_OP)
add_definitions(-DTRANSPOSE_OP)
endif()
if (TRANSPOSE2_OP)
add_definitions(-DTRANSPOSE2_OP)
endif()
if (FUSION_CONVADDBNRELU_OP)
add_definitions(-DFUSION_CONVADDBNRELU_OP)
endif()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册