提交 b92caeb2 编写于 作者: H hjchen2

Transform kernel in op initialization

上级 00350575
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <map>
#include <string>
#include <utility>
#include <vector>
#include "common/enforce.h"
......@@ -119,10 +120,6 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
virtual void InferShape() const = 0;
void Init() {
// for (auto i : this->inputs_) {
// DLOG << i.first;
// DLOG << i.second;
// }
PADDLE_MOBILE_ENFORCE(kernel_.Init(&param_), " %s kernel init failed",
this->type_.c_str());
}
......
......@@ -135,22 +135,6 @@ class Tensor {
return reinterpret_cast<T *>(mutable_data(typeid(T)));
}
#ifdef PADDLE_MOBILE_DEBUG
template <typename T>
inline void dump(std::string filename) const {
const T *dataptr = data<T>();
std::ofstream out(filename.c_str());
for (int i = 0; i < numel(); ++i) {
out << dataptr[i] << " ";
}
out << "形状:";
for (int j = 0; j < dims_.size(); ++j) {
out << dims_[j] << " ";
}
out.close();
}
#endif
inline void *mutable_data(std::type_index type) {
if (holder_ != nullptr) {
holder_->set_type(type);
......
......@@ -17,17 +17,69 @@ limitations under the License. */
#include "operators/kernel/conv_kernel.h"
#include "operators/kernel/central-arm-func/conv_arm_func.h"
#include <iostream>
namespace paddle_mobile {
namespace operators {
template <>
bool ConvKernel<CPU, float>::Init(ConvParam<CPU> *param) {
if (param->Input()->type() == typeid(int8_t)) {
param->ExecMode() = ConvParam<CPU>::EXEC_GEMM_INT8;
} else {
if (param->Groups() == param->Input()->dims()[1] &&
param->Input()->dims()[1] == param->Output()->dims()[1] &&
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Filter()->dims()[2] == 3 && param->Strides()[0] == 1) {
param->ExecMode() = ConvParam<CPU>::EXEC_DEPTHWISE3x3S1P1_FLOAT;
} else if (param->Groups() == param->Input()->dims()[1] &&
param->Input()->dims()[1] == param->Output()->dims()[1] &&
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Filter()->dims()[2] == 3) {
param->ExecMode() = ConvParam<CPU>::EXEC_DEPTHWISE3x3_FLOAT;
} else if (param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Strides()[0] == param->Strides()[1] &&
param->Dilations()[0] == param->Dilations()[1] &&
param->Filter()->dims()[2] == 3 && param->Strides()[0] == 1 &&
param->Dilations()[0] == 1 && param->Output()->dims()[1] >= 16 &&
param->Input()->dims()[2] >= 16) {
param->ExecMode() = ConvParam<CPU>::EXEC_WINOGRAD3X3_FLOAT;
// transform weight
framework::Tensor *transformed_weight = new framework::Tensor;
operators::math::winograd_transform_weight<8, 3>(*param->Filter(),
transformed_weight);
param->Filter() = transformed_weight;
} else {
param->ExecMode() = ConvParam<CPU>::EXEC_GEMM_FLOAT;
}
}
return true;
}
template <>
void ConvKernel<CPU, float>::Compute(const ConvParam<CPU> &param) const {
ConvCompute<float>(param);
switch (param.ExecMode()) {
case ConvParam<CPU>::EXEC_GEMM_INT8:
GemmConv<int8_t, int32_t>(param);
break;
case ConvParam<CPU>::EXEC_DEPTHWISE3x3S1P1_FLOAT:
math::DepthwiseConv3x3s1p1(param.Input(), param.Filter(), param.Output(),
nullptr, false);
break;
case ConvParam<CPU>::EXEC_DEPTHWISE3x3_FLOAT:
math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(),
param.Filter(), nullptr, param.Output(), false);
break;
case ConvParam<CPU>::EXEC_WINOGRAD3X3_FLOAT:
WinogradConv3x3<8, 3>(param);
break;
case ConvParam<CPU>::EXEC_GEMM_FLOAT:
GemmConv<float, float>(param);
break;
default:
PADDLE_MOBILE_THROW_EXCEPTION("Invalid convolution execute mode %d",
param.ExecMode());
}
}
template class ConvKernel<CPU, float>;
......
......@@ -22,14 +22,14 @@ limitations under the License. */
#include "operators/math/math_function.h"
#include "operators/math/pad.h"
#include "operators/math/vol2col.h"
#include "operators/math/winograd/winograd.h"
#include "operators/math/winograd/winograd_transform.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename Itype, typename Otype>
inline void ConvBasic(const ConvParam<CPU> &param) {
inline void GemmConv(const ConvParam<CPU> &param) {
const Tensor *input = param.Input();
Tensor filter = *param.Filter();
Tensor *output = param.Output();
......@@ -117,9 +117,10 @@ inline void ConvBasic(const ConvParam<CPU> &param) {
}
}
inline void BatchConv3x3Winograd(const ConvParam<CPU> &param) {
template <int tile, int kernel>
inline void WinogradConv3x3(const ConvParam<CPU> &param) {
const Tensor *input = param.Input();
Tensor *filter = param.Filter();
const Tensor *filter = param.Filter();
Tensor *output = param.Output();
output->mutable_data<float>();
int batch_size = input->dims()[0];
......@@ -127,51 +128,40 @@ inline void BatchConv3x3Winograd(const ConvParam<CPU> &param) {
const std::vector<int> &paddings = param.Paddings();
math::PadFunctor<CPU, float> pad;
auto winograd_pad = [&](int width, int pad) {
int output_tile = tile - kernel + 1;
// int tiles = (width + pad - kernel) / output_tile + 1;
// return (tiles - 1) * output_tile + tile - width;
int pad_width = (width + 2 * pad - kernel) / output_tile * output_tile;
return pad_width + tile - width;
};
Tensor input_pad;
framework::Tensor transformed_input;
for (int i = 0; i < batch_size; ++i) {
Tensor in_batch = input->Slice(i, i + 1);
Tensor out_batch = output->Slice(i, i + 1);
if (paddings[0] == 0 && paddings[1] == 0) {
input_pad = in_batch;
} else {
int pad_bottom = winograd_pad(in_batch.dims()[2], paddings[0]);
int pad_right = winograd_pad(in_batch.dims()[3], paddings[1]);
if (paddings[0] || paddings[1] || pad_bottom || pad_right) {
framework::DDim pad_shape = in_batch.dims();
pad_shape[2] += 2 * paddings[0];
pad_shape[3] += 2 * paddings[1];
pad_shape[2] += paddings[0] + pad_bottom;
pad_shape[3] += paddings[1] + pad_right;
input_pad.mutable_data<float>(pad_shape);
pad(in_batch, paddings[0], paddings[0], paddings[1], paddings[1],
pad(in_batch, paddings[0], pad_bottom, paddings[1], pad_right,
&input_pad);
}
math::winograd_f6k3(input_pad, *filter, &out_batch);
}
}
template <typename P>
void ConvCompute(const ConvParam<CPU> &param) {
if (param.Input()->type() == typeid(int8_t)) {
ConvBasic<int8_t, int32_t>(param);
} else {
if (param.Groups() == param.Input()->dims()[1] &&
param.Input()->dims()[1] == param.Output()->dims()[1] &&
param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1) {
math::DepthwiseConv3x3s1p1(param.Input(), param.Filter(), param.Output(),
nullptr, false);
} else if (param.Groups() == param.Input()->dims()[1] &&
param.Input()->dims()[1] == param.Output()->dims()[1] &&
param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
param.Filter()->dims()[2] == 3) {
math::DepthwiseConv3x3(param.Input(), param.Strides(), param.Paddings(),
param.Filter(), nullptr, param.Output(), false);
} else if (param.Filter()->dims()[2] == param.Filter()->dims()[3] &&
param.Strides()[0] == param.Strides()[1] &&
param.Dilations()[0] == param.Dilations()[1] &&
param.Filter()->dims()[2] == 3 && param.Strides()[0] == 1 &&
param.Dilations()[0] == 1 && param.Output()->dims()[1] >= 16 &&
param.Output()->dims()[2] >= 16) {
BatchConv3x3Winograd(param);
} else {
ConvBasic<float, float>(param);
input_pad = in_batch;
}
#if __aarch64__
// TODO(hjchen2)
#else
// tile input and transform
math::winograd_transform_input<tile, kernel>(input_pad, &transformed_input);
// caculate output
math::winograd_transform_output<tile, kernel>(transformed_input, *filter,
output);
#endif
}
}
......
......@@ -44,7 +44,7 @@ void DepthwiseConvCompute(const ConvParam<CPU> &param) {
Bias, false);
} else {
ConvBasic<float, float>(param);
GemmConv<float, float>(param);
}
}
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#include "operators/math/winograd/winograd.h"
#include "operators/math/winograd/winograd_transform.h"
namespace paddle_mobile {
namespace operators {
namespace math {
// F(2X2, 3X3)
void winograd_f2k3(const framework::Tensor &input,
const framework::Tensor &weight, framework::Tensor *output) {
}
// F(6X6, 3X3)
void winograd_f6k3(const framework::Tensor &input,
const framework::Tensor &weight, framework::Tensor *output) {
framework::Tensor transformed_input;
framework::Tensor transformed_weight;
#if __aarch64__
// TODO(hjchen2)
#else
// transform weight
winograd_transform_weight<8, 3>(weight, &transformed_weight);
// tile input and transform
winograd_transform_input<8, 3>(input, &transformed_input);
// caculate output
winograd_transform_output<8, 3>(transformed_input, transformed_weight,
output);
#endif
}
// F(4X4, 5X5)
void winograd_f4k5(const framework::Tensor &input,
const framework::Tensor &weight, framework::Tensor *output) {
}
} // namespace math
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#pragma once
#include "framework/tensor.h"
namespace paddle_mobile {
namespace operators {
namespace math {
// F(2X2, 3X3)
void winograd_f2k3(const framework::Tensor &input,
const framework::Tensor &weight, framework::Tensor *output);
// F(6X6, 3X3)
void winograd_f6k3(const framework::Tensor &input,
const framework::Tensor &weight, framework::Tensor *output);
// F(4X4, 5X5)
void winograd_f4k5(const framework::Tensor &input,
const framework::Tensor &weight, framework::Tensor *output);
} // namespace math
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -40,6 +40,7 @@ void winograd_transform_weight<8, 3>(const framework::Tensor &weight,
* w7 = g2
*/
// weight shape is [out_channel, in_channel, kernel_h, kernel_w]
// package weight into [roundup(out_channel/4), 64, in_channel, 4] tiles
int out_channel = weight.dims()[0];
int in_channel = weight.dims()[1];
// reshape and alloc transformed weight
......@@ -322,12 +323,12 @@ void winograd_transform_input<8, 3>(const framework::Tensor &input,
* x6 = (4 * d2 - 5 * d4 + d6) - (2 * d1 - 2.5 * d3 + 0.5 * d5)
* x7 = (d7 - d1) + (d3 - d5) * 5.25
*/
// pack input to [8 * roundup(h/6), 8 * roundup(w/6), channel] tiles
// package input into [roundup(tiles/8), 64, channel, 8] tiles
int channel = input.dims()[1];
int height = input.dims()[2];
int width = input.dims()[3];
int h_tiles = (height + 3) / 6; // (height + 5 - 2) / 6
int w_tiles = (width + 3) / 6; // (width + 5 - 2) / 6
int h_tiles = (height + 3) / 6; // (height - 8 + 5 + 6) / 6
int w_tiles = (width + 3) / 6; // (width - 8 + 5 + 6) / 6
int tiles = (h_tiles * w_tiles + 7) / 8;
framework::DDim transformed_shape =
framework::make_ddim(std::vector<int>{tiles, 64, channel, 8});
......@@ -335,29 +336,11 @@ void winograd_transform_input<8, 3>(const framework::Tensor &input,
memset(outptr, 0, output->numel() * sizeof(float));
const float *inptr = input.data<float>();
int inter_h = (height - 2) / 6;
int inter_w = (width - 2) / 6;
int remain_h = height - (inter_h * 6);
int remain_w = width - (inter_w * 6);
framework::Tensor input_pad;
if (remain_h > 2 || remain_w > 2) {
inter_h += (remain_h > 2);
inter_w += (remain_w > 2);
height = (inter_h - 1) * 6 + 8;
width = (inter_w - 1) * 6 + 8;
framework::DDim input_shape =
framework::make_ddim(std::vector<int>{1, channel, height, width});
PadFunctor<CPU, float> pad;
inptr = input_pad.mutable_data<float>(input_shape);
pad(input, 0, height - input.dims()[2], 0, width - input.dims()[3],
&input_pad);
}
size_t image_size = height * width;
const float transform_matrix[8] = {5.25f, -5.f, -4.25f, -2.5f,
2.f, -1.25f, 0.5f, 0.25f};
int remain_c_start = channel & 0xFFFC;
#if 0
#if 1
remain_c_start = 0;
#else
#pragma omp parallel for
......@@ -381,14 +364,14 @@ void winograd_transform_input<8, 3>(const framework::Tensor &input,
"vld1.32 {d8-d11}, [%[in1]], %[steps] \n"
"vld1.32 {d12-d15}, [%[in2]], %[steps] \n"
"vld1.32 {d16-d19}, [%[in3]], %[steps] \n"
"vtrn.32 q2, q4 \n" // d0: q2, q2
"vtrn.32 q3, q5 \n" // d1: q4, q3
"vtrn.32 q6, q8 \n" // d2: q6, q4
"vtrn.32 q7, q9 \n" // d3: q8, q5
"vswp.32 d5, d12 \n" // d4: q3, q6
"vswp.32 d9, d16 \n" // d5: q5, q7
"vswp.32 d7, d14 \n" // d6: q7, q8
"vswp.32 d11, d18 \n" // d7: q9, q9
"vtrn.32 q2, q4 \n" // d0: q2
"vtrn.32 q3, q5 \n" // d1: q4
"vtrn.32 q6, q8 \n" // d2: q6
"vtrn.32 q7, q9 \n" // d3: q8
"vswp.32 d5, d12 \n" // d4: q3
"vswp.32 d9, d16 \n" // d5: q5
"vswp.32 d7, d14 \n" // d6: q7
"vswp.32 d11, d18 \n" // d7: q9
"vsub.f32 q10, q2, q7 \n"
"vsub.f32 q11, q3, q6 \n"
......@@ -680,14 +663,14 @@ void winograd_transform_input<8, 3>(const framework::Tensor &input,
"vld1.32 {d8-d11}, [%[in1]], %[steps] \n"
"vld1.32 {d12-d15}, [%[in2]], %[steps] \n"
"vld1.32 {d16-d19}, [%[in3]], %[steps] \n"
"vtrn.32 q2, q4 \n" // d0: q2, q2
"vtrn.32 q3, q5 \n" // d1: q4, q3
"vtrn.32 q6, q8 \n" // d2: q6, q4
"vtrn.32 q7, q9 \n" // d3: q8, q5
"vswp.32 d5, d12 \n" // d4: q3, q6
"vswp.32 d9, d16 \n" // d5: q5, q7
"vswp.32 d7, d14 \n" // d6: q7, q8
"vswp.32 d11, d18 \n" // d7: q9, q9
"vtrn.32 q2, q4 \n" // d0: q2
"vtrn.32 q3, q5 \n" // d1: q4
"vtrn.32 q6, q8 \n" // d2: q6
"vtrn.32 q7, q9 \n" // d3: q8
"vswp.32 d5, d12 \n" // d4: q3
"vswp.32 d9, d16 \n" // d5: q5
"vswp.32 d7, d14 \n" // d6: q7
"vswp.32 d11, d18 \n" // d7: q9
"vsub.f32 q10, q2, q7 \n"
"vsub.f32 q11, q3, q6 \n"
......@@ -749,11 +732,12 @@ void winograd_transform_input<8, 3>(const framework::Tensor &input,
float *ptr0 = d_bt;
float *ptr1 = ptr0 + 32;
int tile_id = h * w_tiles + w;
int block_id = tile_id >> 3;
int pack_id = tile_id & 0x7;
int tile_indics = h * w_tiles + w;
int tile_block = tile_indics >> 3;
int block_indics = tile_indics & 0x7;
// (tiles / 8, 64, channel, 8)
float *out0 = outptr + (block_id * 64 * channel + c) * 8 + pack_id;
float *out0 =
outptr + (tile_block * 64 * channel + c) * 8 + block_indics;
float *out1 = out0 + channel * 8;
float *out2 = out1 + channel * 8;
float *out3 = out2 + channel * 8;
......@@ -771,7 +755,6 @@ void winograd_transform_input<8, 3>(const framework::Tensor &input,
"vld1.32 {d8-d11}, [%[ptr0]]! \n" // q4: d2, q5: d3
"vld1.32 {d12-d15}, [%[ptr1]]! \n" // q6: d4, q7: d5
"vld1.32 {d16-d19}, [%[ptr1]]! \n" // q8: d6, q9: d7
"vtrn.32 q2, q3 \n"
"vtrn.32 q4, q5 \n"
"vtrn.32 q6, q7 \n"
......@@ -918,7 +901,7 @@ void winograd_transform_output<8, 3>(const framework::Tensor &input,
"cmp %[inter_channel], #0 \n"
"ble cmp_remain_%= \n"
"loop_4c_%=: \n"
"loop_2c_%=: \n"
"vld1.32 {d0-d3}, [%[w_ptr]]! \n"
"vld1.32 {d4-d7}, [%[in_ptr]]! \n"
"vmla.f32 q8, q2, d0[0] \n"
......@@ -941,7 +924,7 @@ void winograd_transform_output<8, 3>(const framework::Tensor &input,
"vmla.f32 q15, q5, d3[1] \n"
"subs %[inter_channel], #1 \n"
"bne loop_4c_%= \n"
"bne loop_2c_%= \n"
// cmp remain channel > 0
"cmp_remain_%=: \n"
......
......@@ -379,9 +379,9 @@ class ConvParam : public OpParam {
const RType *Input() const { return input_; }
RType *Filter() const { return filter_; }
RType *&Filter() const { return filter_; }
RType *Output() const { return output_; }
RType *&Output() const { return output_; }
const vector<int> &Strides() const { return strides_; }
......@@ -389,15 +389,28 @@ class ConvParam : public OpParam {
const vector<int> &Dilations() const { return dilations_; }
enum ExecMode {
EXEC_INVALID = 0,
EXEC_GEMM_FLOAT,
EXEC_DEPTHWISE3x3S1P1_FLOAT,
EXEC_DEPTHWISE3x3_FLOAT,
EXEC_WINOGRAD3X3_FLOAT,
EXEC_WINOGRAD5X5_FLOAT,
EXEC_GEMM_INT8,
};
ExecMode &ExecMode() const { return exec_mode_; }
const int &Groups() const { return groups; }
private:
RType *input_;
RType *output_;
RType *filter_;
mutable RType *output_;
mutable RType *filter_;
vector<int> strides_;
vector<int> paddings_;
vector<int> dilations_;
mutable enum ExecMode exec_mode_;
int groups;
};
template <typename Dtype>
......
......@@ -141,7 +141,7 @@ if (NOT FOUND_MATCH)
target_link_libraries(test-googlenet-quali paddle-mobile)
# gen test
ADD_EXECUTABLE(test-conv-op operators/test_cov_op.cpp test_helper.h test_include.h executor_for_test.h)
ADD_EXECUTABLE(test-conv-op operators/test_conv_op.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-conv-op paddle-mobile)
# gen test
......@@ -220,10 +220,6 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-dequantize-op operators/test_dequantize_op.cpp test_helper.h test_include.h)
target_link_libraries(test-dequantize-op paddle-mobile)
# test int8 conv op
ADD_EXECUTABLE(test-int8-conv-op operators/test_int8_conv_op.cpp test_helper.h test_include.h)
target_link_libraries(test-int8-conv-op paddle-mobile)
# gen test log
ADD_EXECUTABLE(test-log common/test_log.cpp)
target_link_libraries(test-log paddle-mobile)
......
......@@ -18,7 +18,7 @@ limitations under the License. */
namespace paddle_mobile {
// Reference convolution for checking results:
// Reference convolution from Caffe for checking results.
// accumulate through explicit loops over input, output, and filters.
template <typename Itype, typename Otype>
void conv2d(const framework::Tensor *input, const framework::Tensor *filter,
......@@ -129,7 +129,7 @@ void conv2d(const framework::Tensor *input, const framework::Tensor *filter,
}
template <typename Itype, typename Otype, int Kernel, int Pad, int Stride>
int TestConvOp() {
int TestConvOp(int in_channels, int in_height, int in_width, int out_channels) {
int kernel_h = Kernel;
int kernel_w = Kernel;
int pad_h = Pad;
......@@ -140,10 +140,10 @@ int TestConvOp() {
int dilation_w = 1;
int batch_size = 1;
int input_c = 3;
int input_h = 100;
int input_w = 100;
int output_c = 10;
int input_c = in_channels;
int input_h = in_height;
int input_w = in_width;
int output_c = out_channels;
framework::DDim input_shape =
framework::make_ddim({batch_size, input_c, input_h, input_w});
framework::DDim filter_shape =
......@@ -158,7 +158,7 @@ int TestConvOp() {
auto input_var = scope.get()->Var("input");
auto input = input_var->template GetMutable<framework::LoDTensor>();
SetupTensor<Itype>(input, input_shape, -20, 20);
SetupTensor<Itype>(input, input_shape, -20.0, 20.0);
auto filter_var = scope.get()->Var("filter");
auto filter = filter_var->template GetMutable<framework::LoDTensor>();
......@@ -174,8 +174,9 @@ int TestConvOp() {
auto *op = new operators::ConvOp<CPU, float>("conv2d", inputs, outputs, attrs,
scope);
// struct timespec ts_begin, ts_end;
op->InferShape();
op->Init();
// struct timespec ts_begin, ts_end;
// warmup
// op->Run();
// clock_gettime(CLOCK_MONOTONIC, &ts_begin);
......@@ -202,7 +203,8 @@ int TestConvOp() {
const Otype *output_data = output->data<Otype>();
Otype *output_cmp_data = output_cmp.data<Otype>();
for (int i = 0; i < output->numel(); ++i) {
PADDLE_MOBILE_ENFORCE(output_data[i] == output_cmp_data[i],
float gap = output_data[i] - output_cmp_data[i];
PADDLE_MOBILE_ENFORCE(std::abs(gap / output_data[i]) < 1e-3,
"output[%d] = %d, output_cmp[%d] = %d", i,
output_data[i], i, output_cmp_data[i]);
}
......@@ -212,68 +214,104 @@ int TestConvOp() {
} // namespace paddle_mobile
int main() {
int main(int argc, char *argv[]) {
if (argc < 5) {
LOG(paddle_mobile::kLOG_INFO)
<< "Usage:\n"
<< " ./test-int8-conv-op in_channels in_height in_width out_channels\n"
<< " params:\n"
<< " -in_channels: int, input image's channels\n"
<< " -in_height: int, input image's height\n"
<< " -in_width: int, input image's width\n"
<< " -out_channels: int, conv output channels\n";
return 1;
}
int in_channels = atoi(argv[1]);
int in_height = atoi(argv[2]);
int in_width = atoi(argv[3]);
int out_channels = atoi(argv[4]);
// kernel = 3, pad = 1, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=1, stride=1";
paddle_mobile::TestConvOp<float, float, 3, 1, 1>(in_channels, in_height,
in_width, out_channels);
// kernel = 7, pad = 0, stride = 2
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=0, stride=2";
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 0, 2>();
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 0, 2>(in_channels, in_height,
in_width, out_channels);
// kernel = 7, pad = 1, stride = 2
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=1, stride=2";
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 1, 2>();
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 1, 2>(in_channels, in_height,
in_width, out_channels);
// kernel = 7, pad = 3, stride = 2
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=3, stride=2";
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 3, 2>();
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 3, 2>(in_channels, in_height,
in_width, out_channels);
// kernel = 7, pad = 0, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=0, stride=1";
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 0, 1>();
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 0, 1>(in_channels, in_height,
in_width, out_channels);
// kernel = 7, pad = 1, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=1, stride=1";
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 1, 1>();
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 1, 1>(in_channels, in_height,
in_width, out_channels);
// kernel = 7, pad = 3, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=3, stride=1";
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 3, 1>();
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 3, 1>(in_channels, in_height,
in_width, out_channels);
// kernel = 7, pad = 5, stride = 3
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=5, stride=3";
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 5, 3>();
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 5, 3>(in_channels, in_height,
in_width, out_channels);
// kernel = 7, pad = 3, stride = 4
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=7, pad=3, stride=4";
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 3, 4>();
LOG(paddle_mobile::kLOG_INFO) << "\n";
paddle_mobile::TestConvOp<int8_t, int32_t, 7, 3, 4>(in_channels, in_height,
in_width, out_channels);
// kernel = 3, pad = 0, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=3, pad=0, stride=1";
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 0, 1>();
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 0, 1>(in_channels, in_height,
in_width, out_channels);
// kernel = 3, pad = 0, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=0, stride=1";
paddle_mobile::TestConvOp<float, float, 3, 0, 1>();
LOG(paddle_mobile::kLOG_INFO) << "\n";
paddle_mobile::TestConvOp<float, float, 3, 0, 1>(in_channels, in_height,
in_width, out_channels);
// kernel = 3, pad = 1, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=3, pad=1, stride=1";
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 1, 1>();
paddle_mobile::TestConvOp<int8_t, int32_t, 3, 1, 1>(in_channels, in_height,
in_width, out_channels);
// kernel = 3, pad = 1, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=3, pad=1, stride=1";
paddle_mobile::TestConvOp<float, float, 3, 1, 1>();
LOG(paddle_mobile::kLOG_INFO) << "\n";
paddle_mobile::TestConvOp<float, float, 3, 1, 1>(in_channels, in_height,
in_width, out_channels);
// kernel = 5, pad = 0, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=0, stride=1";
paddle_mobile::TestConvOp<int8_t, int32_t, 5, 0, 1>();
paddle_mobile::TestConvOp<int8_t, int32_t, 5, 0, 1>(in_channels, in_height,
in_width, out_channels);
// kernel = 5, pad = 0, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=0, stride=1";
paddle_mobile::TestConvOp<float, float, 5, 0, 1>();
LOG(paddle_mobile::kLOG_INFO) << "\n";
paddle_mobile::TestConvOp<float, float, 5, 0, 1>(in_channels, in_height,
in_width, out_channels);
// kernel = 5, pad = 2, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "int8, kernel=5, pad=2, stride=1";
paddle_mobile::TestConvOp<int8_t, int32_t, 5, 2, 1>();
paddle_mobile::TestConvOp<int8_t, int32_t, 5, 2, 1>(in_channels, in_height,
in_width, out_channels);
// kernel = 5, pad = 2, stride = 1
LOG(paddle_mobile::kLOG_INFO) << "float, kernel=5, pad=2, stride=1";
paddle_mobile::TestConvOp<float, float, 5, 2, 1>();
paddle_mobile::TestConvOp<float, float, 5, 2, 1>(in_channels, in_height,
in_width, out_channels);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_include.h"
#include "operators/conv_op.h"
int main() {
paddle_mobile::Loader<paddle_mobile::GPU_MALI> loader;
// ../models/image_classification_resnet.inference.model
auto program = loader.Load(g_googlenet);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail");
Executor4Test<paddle_mobile::GPU_MALI, paddle_mobile::operators::ConvOp<
paddle_mobile::GPU_MALI, float>>
executor(program, "conv2d");
paddle_mobile::framework::Tensor input;
GetInput<float>(g_test_image_1x3x224x224, &input, {1, 3, 224, 224});
// // use SetupTensor if not has local input image .
// SetupTensor<float>(&input, {1, 3, 224, 224}, static_cast<float>(0),
// static_cast<float>(1));
auto out_ddim = paddle_mobile::framework::make_ddim({1, 64, 112, 112});
auto output = executor.Predict(input, "data", "conv2d_0.tmp_0", out_ddim);
auto output_ptr = output->data<float>();
for (int j = 0; j < 20; ++j) {
DLOG << " value of output: " << output_ptr[j];
}
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册