提交 c1d76b0a 编写于 作者: L lijiancheng0614

Merge branch 'develop' of https://github.com/PaddlePaddle/paddle-mobile into transpose2-dev

......@@ -40,6 +40,7 @@ const char *G_OP_TYPE_POOL2D = "pool2d";
const char *G_OP_TYPE_PRIOR_BOX = "prior_box";
const char *G_OP_TYPE_RELU = "relu";
const char *G_OP_TYPE_RESHAPE = "reshape";
const char *G_OP_TYPE_RESHAPE2 = "reshape2";
const char *G_OP_TYPE_SIGMOID = "sigmoid";
const char *G_OP_TYPE_SOFTMAX = "softmax";
const char *G_OP_TYPE_TRANSPOSE = "transpose";
......@@ -101,6 +102,7 @@ std::unordered_map<
{G_OP_TYPE_POLYGON_BOX_TRANSFORM, {{"Input"}, {"Output"}}},
{G_OP_TYPE_FC, {{"X", "Y", "Z"}, {"Out"}}},
{G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}},
{G_OP_TYPE_RESHAPE2, {{"X"}, {"Out", "XShape"}}},
{G_OP_TYPE_DEPTHWISE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_FILL_CONSTANT, {{}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_ADD_RELU, {{"Input"}, {"Out"}}},
......
......@@ -109,6 +109,9 @@ LOAD_FUSION_MATCHER(fusion_conv_add_bn_relu);
#ifdef RESHAPE_OP
LOAD_OP2(reshape, CPU, MALI_GPU);
#endif
#ifdef RESHAPE2_OP
LOAD_OP2(reshape2, CPU, MALI_GPU);
#endif
#ifdef TRANSPOSE_OP
LOAD_OP1(transpose, CPU);
#endif
......@@ -224,5 +227,9 @@ LOAD_FUSION_MATCHER(fusion_conv_bn);
#ifdef ELEMENTWISESUB_OP
LOAD_OP1(elementwise_sub, CPU)
#endif
#ifdef QUANT_OP
LOAD_OP1(quantize, CPU);
#endif
#ifdef DEQUANT_OP
LOAD_OP1(dequantize, CPU);
#endif
......@@ -135,11 +135,15 @@ static void quantize_round_to_even(const Tensor *input, const float scale,
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
size_t loop = size >> 4;
size_t remain = size & 0xF;
#pragma omp parallel for
for (size_t i = 0; i < loop; ++i) {
float32x4_t r0 = vld1q_f32(x);
float32x4_t r1 = vld1q_f32(x + 4);
float32x4_t r2 = vld1q_f32(x + 8);
float32x4_t r3 = vld1q_f32(x + 12);
const float *local_x = x + (i << 4);
int8_t *local_y = y + (i << 4);
float32x4_t r0 = vld1q_f32(local_x);
float32x4_t r1 = vld1q_f32(local_x + 4);
float32x4_t r2 = vld1q_f32(local_x + 8);
float32x4_t r3 = vld1q_f32(local_x + 12);
r0 = vmulq_n_f32(r0, scale);
r1 = vmulq_n_f32(r1, scale);
r2 = vmulq_n_f32(r2, scale);
......@@ -156,12 +160,12 @@ static void quantize_round_to_even(const Tensor *input, const float scale,
int16x8_t q6 = vcombine_s16(d2, d3);
int8x8_t d5 = vmovn_s16(q5);
int8x8_t d6 = vmovn_s16(q6);
vst1_s8(y, d5);
vst1_s8(y + 8, d6);
x += 16;
y += 16;
vst1_s8(local_y, d5);
vst1_s8(local_y + 8, d6);
}
size = remain;
x += (loop << 4);
y += (loop << 4);
#endif
for (size_t i = 0; i < size; ++i) {
float value = x[i] * scale;
......@@ -187,11 +191,15 @@ static void quantize_round_to_zero(const Tensor *input, const float scale,
#ifdef defined(__ARM_NEON__) || defined(__ARM_NEON)
size_t loop = size >> 4;
size_t remain = size & 0xF;
#pragma omp parallel for
for (size_t i = 0; i < loop; ++i) {
float32x4_t r0 = vld1q_f32(x);
float32x4_t r1 = vld1q_f32(x + 4);
float32x4_t r2 = vld1q_f32(x + 8);
float32x4_t r3 = vld1q_f32(x + 12);
const float *local_x = x + (i << 4);
int8_t *local_y = y + (i << 4);
float32x4_t r0 = vld1q_f32(local_x);
float32x4_t r1 = vld1q_f32(local_x + 4);
float32x4_t r2 = vld1q_f32(local_x + 8);
float32x4_t r3 = vld1q_f32(local_x + 12);
r0 = vmulq_n_f32(r0, scale);
r1 = vmulq_n_f32(r1, scale);
r2 = vmulq_n_f32(r2, scale);
......@@ -208,12 +216,12 @@ static void quantize_round_to_zero(const Tensor *input, const float scale,
int16x8_t q6 = vcombine_s16(d2, d3);
int8x8_t d5 = vmovn_s16(q5);
int8x8_t d6 = vmovn_s16(q6);
vst1_s8(y, d5);
vst1_s8(y + 8, d6);
x += 16;
y += 16;
vst1_s8(local_y, d5);
vst1_s8(local_y + 8, d6);
}
size = remain;
x += (loop << 4);
y += (loop << 4);
#endif
for (size_t i = 0; i < size; ++i) {
y[i] = trunc(x[i] * scale);
......@@ -228,11 +236,15 @@ static void quantize_round_to_nearest(const Tensor *input, const float scale,
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
size_t loop = size >> 4;
size_t remain = size & 0xF;
#pragma omp parallel for
for (size_t i = 0; i < loop; ++i) {
float32x4_t r0 = vld1q_f32(x);
float32x4_t r1 = vld1q_f32(x + 4);
float32x4_t r2 = vld1q_f32(x + 8);
float32x4_t r3 = vld1q_f32(x + 12);
const float *local_x = x + (i << 4);
int8_t *local_y = y + (i << 4);
float32x4_t r0 = vld1q_f32(local_x);
float32x4_t r1 = vld1q_f32(local_x + 4);
float32x4_t r2 = vld1q_f32(local_x + 8);
float32x4_t r3 = vld1q_f32(local_x + 12);
r0 = vmulq_n_f32(r0, scale);
r1 = vmulq_n_f32(r1, scale);
r2 = vmulq_n_f32(r2, scale);
......@@ -249,12 +261,12 @@ static void quantize_round_to_nearest(const Tensor *input, const float scale,
int16x8_t q6 = vcombine_s16(d2, d3);
int8x8_t d5 = vmovn_s16(q5);
int8x8_t d6 = vmovn_s16(q6);
vst1_s8(y, d5);
vst1_s8(y + 8, d6);
x += 16;
y += 16;
vst1_s8(local_y, d5);
vst1_s8(local_y + 8, d6);
}
size = remain;
x += (loop << 4);
y += (loop << 4);
#endif
for (size_t i = 0; i < size; ++i) {
y[i] = round(x[i] * scale);
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE2_OP
#include "operators/kernel/reshape2_kernel.h"
#include "operators/kernel/central-arm-func/reshape2_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool Reshape2Kernel<CPU, float>::Init(Reshape2Param<CPU> *param) {
return true;
}
template <>
void Reshape2Kernel<CPU, float>::Compute(
const Reshape2Param<CPU> &param) const {
Reshape2Compute<float>(param);
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -58,6 +58,7 @@ void ElementwiseAddCompute(const ElementwiseAddParam<CPU> &param) {
const float *input_data = input_x->data<float>();
float *output_data = Out->mutable_data<float>();
for (int i = 0; i < batch; ++i) {
#pragma omp parallel for
for (int j = 0; j < channels; ++j) {
size_t offset = (i * channels + j) * elementwise_num;
const float *input = input_data + offset;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE2_OP
#pragma once
#include <vector>
#include "operators/kernel/reshape_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename P>
void Reshape2Compute(const Reshape2Param<CPU> &param) {
const auto *input_x = param.InputX();
const auto &input_x_dims = input_x->dims();
auto *out = param.Out();
framework::DDim out_dims = out->dims();
const auto *input_shape = param.InputShape();
if (input_shape) {
auto *shape_data = input_shape->data<int>();
framework::Tensor cpu_shape_tensor;
auto shape =
std::vector<int>(shape_data, shape_data + input_shape->numel());
out_dims = ValidateShape(shape, input_x->dims());
} else {
auto &shape = param.Shape();
out_dims = ValidateShape(shape, input_x_dims);
}
bool inplace = param.Inplace();
out->Resize(out_dims);
if (!inplace) {
out->mutable_data<float>();
framework::TensorCopy(*input_x, out);
out->Resize(out_dims);
} else {
out->ShareDataWith(*input_x);
out->Resize(out_dims);
}
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE2_OP
#pragma once
#include <vector>
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class Reshape2Kernel
: public framework::OpKernelBase<DeviceType, Reshape2Param<DeviceType>> {
public:
void Compute(const Reshape2Param<DeviceType>& param) const;
bool Init(Reshape2Param<DeviceType>* param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -1270,6 +1270,49 @@ class ReshapeParam : public OpParam {
};
#endif
#ifdef RESHAPE2_OP
template <typename Dtype>
class Reshape2Param : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public:
Reshape2Param(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope);
input_shape_ = InputShapeFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope);
output_xshape_ = OutputXShapeFrom<GType>(outputs, scope);
shape_ = GetAttr<vector<int>>("shape", attrs);
if (HasAttr("inplace", attrs)) {
inplace_ = GetAttr<bool>("inplace", attrs);
} else {
inplace_ = false;
}
}
const RType *InputX() const { return input_x_; }
const RType *InputShape() const { return input_shape_; }
RType *Out() const { return out_; }
RType *OutputXShape() const { return output_xshape_; }
const vector<int> &Shape() const { return shape_; }
const bool &Inplace() const { return inplace_; }
private:
RType *input_x_;
RType *input_shape_;
RType *out_;
RType *output_xshape_;
vector<int> shape_;
bool inplace_;
};
#endif
#ifdef SCALE_OP
template <typename Dtype>
class ScaleParam : public OpParam {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE2_OP
#include "operators/reshape2_op.h"
#include <vector>
#include "operators/kernel/reshape_kernel.h"
namespace paddle_mobile {
namespace operators {
template <typename Dtype, typename T>
void Reshape2Op<Dtype, T>::InferShape() const {
auto &shape = this->param_.Shape();
auto input_x_dims = this->param_.InputX()->dims();
auto out_dims = ValidateShape(shape, input_x_dims);
this->param_.Out()->Resize(out_dims);
std::vector<int64_t> xshape_dims(input_x_dims.size() + 1, 0);
for (int i = 0; i < input_x_dims.size(); ++i) {
xshape_dims[i + 1] = input_x_dims[i];
}
this->param_.OutputXShape()->Resize(framework::make_ddim(xshape_dims));
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(reshape2, ops::Reshape2Op);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(reshape2, ops::Reshape2Op);
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RESHAPE2_OP
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/reshape2_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using paddle_mobile::framework::Tensor;
template <typename DeviceType, typename T>
class Reshape2Op : public framework::OperatorWithKernel<
DeviceType, Reshape2Param<DeviceType>,
operators::Reshape2Kernel<DeviceType, T>> {
public:
Reshape2Op(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs,
const framework::AttributeMap &attrs,
std::shared_ptr<framework::Scope> scope)
: framework::OperatorWithKernel<DeviceType, Reshape2Param<DeviceType>,
operators::Reshape2Kernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, Reshape2Param<DeviceType>,
operators::Reshape2Kernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override;
protected:
};
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -200,6 +200,10 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-reshape-op operators/test_reshape_op.cpp test_helper.h test_include.h)
target_link_libraries(test-reshape-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-reshape2-op operators/test_reshape2_op.cpp test_helper.h test_include.h)
target_link_libraries(test-reshape2-op paddle-mobile)
# gen test
ADD_EXECUTABLE(test-relu-op operators/test_relu_op.cpp test_helper.h test_include.h)
target_link_libraries(test-relu-op paddle-mobile)
......
......@@ -25,8 +25,8 @@ int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
#endif
paddle_mobile.SetThreadNum(1);
bool optimize = false;
paddle_mobile.SetThreadNum(4);
bool optimize = true;
auto time1 = time();
if (paddle_mobile.Load(g_googlenet, optimize)) {
auto time2 = time();
......@@ -35,10 +35,10 @@ int main() {
std::vector<float> output;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224, &input, dims);
// // 预热十次
// for (int i = 0; i < 10; ++i) {
// output = paddle_mobile.Predict(input, dims);
// }
// 预热十次
for (int i = 0; i < 10; ++i) {
output = paddle_mobile.Predict(input, dims);
}
auto time3 = time();
for (int i = 0; i < 10; ++i) {
output = paddle_mobile.Predict(input, dims);
......@@ -47,9 +47,6 @@ int main() {
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
<< std::endl;
for (int i = 0; i < output.size(); ++i) {
DLOG << "result[" << i << "] = " << output[i];
}
}
return 0;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "../test_include.h"
#include "operators/reshape2_op.h"
namespace paddle_mobile {
namespace framework {
template <typename Dtype>
class TestReshape2Op {
public:
explicit TestReshape2Op(const Program<Dtype> p) : program_(p) {
if (use_optimize_) {
to_predict_program_ = program_.optimizeProgram;
} else {
to_predict_program_ = program_.originProgram;
}
const std::vector<std::shared_ptr<BlockDesc>> blocks =
to_predict_program_->Blocks();
for (auto block_desc : blocks) {
std::vector<std::shared_ptr<OpDesc>> ops = block_desc->Ops();
for (auto op : ops) {
if (op->Type() == "reshape2") {
DLOG << " attr size: " << op->GetAttrMap().size();
std::unordered_map<std::string, Attribute> attrs = op->GetAttrMap();
for (std::unordered_map<std::string, Attribute>::iterator it =
attrs.begin();
it != attrs.end(); ++it) {
DLOG << " " << it->first << " " << it->second;
}
DLOG << " inputs size: " << op->GetInputs().size();
VariableNameMap inputs = op->GetInputs();
for (VariableNameMap::iterator it = inputs.begin();
it != inputs.end(); ++it) {
DLOG << " " << it->first << " " << it->second;
}
DLOG << " outputs size: " << op->GetOutputs().size();
VariableNameMap outputs = op->GetOutputs();
for (VariableNameMap::iterator it = outputs.begin();
it != outputs.end(); ++it) {
DLOG << " " << it->first << " " << it->second;
}
input_var_name = op->Input("X")[0];
output_var_name = op->Output("Out")[0];
std::shared_ptr<operators::Reshape2Op<Dtype, float>> op_ptr =
std::make_shared<operators::Reshape2Op<Dtype, float>>(
op->Type(), op->GetInputs(), op->GetOutputs(),
op->GetAttrMap(), program_.scope);
ops_of_block_[*block_desc.get()].push_back(op_ptr);
return;
}
}
}
}
std::shared_ptr<Tensor> predict(const Tensor &t) {
auto scope = program_.scope;
Variable *input_feed_value = scope->Var(input_var_name);
auto tensor_input = input_feed_value->GetMutable<LoDTensor>();
tensor_input->ShareDataWith(t);
Variable *output = scope->Var(output_var_name);
auto *output_tensor = output->GetMutable<LoDTensor>();
std::shared_ptr<Tensor> out_tensor = std::make_shared<LoDTensor>();
out_tensor.reset(output_tensor);
predict(t, 0);
return out_tensor;
}
private:
const framework::Program<Dtype> program_;
std::shared_ptr<ProgramDesc> to_predict_program_;
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
string input_var_name;
string output_var_name;
void predict(const Tensor &t, int block_id) {
std::shared_ptr<BlockDesc> to_predict_block =
to_predict_program_->Block(block_id);
for (int j = 0; j < ops_of_block_[*to_predict_block.get()].size(); ++j) {
auto op = ops_of_block_[*to_predict_block.get()][j];
op->Run();
}
}
};
template class TestReshape2Op<CPU>;
} // namespace framework
} // namespace paddle_mobile
int main() {
DLOG << "----------**********----------";
DLOG << "begin to run Reshape2 Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_ocr) + "/model",
std::string(g_ocr) + "/params");
paddle_mobile::framework::Tensor input;
SetupTensor<float>(&input, {1, 4, 4}, static_cast<float>(0),
static_cast<float>(1));
auto *input_ptr = input.data<float>();
for (int i = 0; i < 16; ++i) {
*(input_ptr + i) = i;
}
DLOG << "input : ";
for (int i = 0; i < input.numel(); ++i) {
DLOG << " index " << i << " : " << input_ptr[i];
}
paddle_mobile::framework::TestReshape2Op<paddle_mobile::CPU> testReshape2Op(
program);
auto output = testReshape2Op.predict(input);
auto *output_ptr = output->data<float>();
DLOG << "output : ";
for (int i = 0; i < output->numel(); ++i) {
DLOG << " index " << i << " : " << output_ptr[i];
}
return 0;
}
......@@ -201,6 +201,7 @@ if(NOT FOUND_MATCH)
set(PRIORBOX_OP ON)
set(RELU_OP ON)
set(RESHAPE_OP ON)
set(RESHAPE2_OP ON)
set(SIGMOID_OP ON)
set(SOFTMAX_OP ON)
set(TRANSPOSE_OP ON)
......@@ -247,6 +248,7 @@ endif()
# option(PRIORBOX_OP "" ON)
# option(RELU_OP "" ON)
# option(RESHAPE_OP "" ON)
# option(RESHAPE2_OP "" ON)
# option(SIGMOID_OP "" ON)
# option(SOFTMAX_OP "" ON)
# option(TRANSPOSE_OP "" ON)
......@@ -316,6 +318,9 @@ endif()
if (RESHAPE_OP)
add_definitions(-DRESHAPE_OP)
endif()
if (RESHAPE2_OP)
add_definitions(-DRESHAPE2_OP)
endif()
if (SIGMOID_OP)
add_definitions(-DSIGMOID_OP)
endif()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册