提交 51f366fb 编写于 作者: L liuqi

Reformat the code.

上级 5232b85e
......@@ -15,30 +15,30 @@ namespace kernels {
template<DeviceType D, typename T>
class DepthwiseConv2dFunctor {
public:
DepthwiseConv2dFunctor(const index_t* input_shape,
const index_t* filter_shape,
const int* strides,
DepthwiseConv2dFunctor(const index_t *input_shape,
const index_t *filter_shape,
const int *strides,
const Padding padding,
const int* dilations) :
const int *dilations) :
strides_(strides),
paddings_(2, 0),
dilations_(dilations) {
CalPaddingSize(input_shape, filter_shape, dilations_, strides_, padding, paddings_.data());
}
DepthwiseConv2dFunctor(const int* strides,
const std::vector<int>& paddings,
const int* dilations) :
DepthwiseConv2dFunctor(const int *strides,
const std::vector<int> &paddings,
const int *dilations) :
strides_(strides),
paddings_(paddings),
dilations_(dilations) {}
void operator()(const T* input, // NCHW
const index_t* input_shape,
const T* filter, // c_out, c_in, kernel_h, kernel_w
const index_t* filter_shape,
const T* bias, // c_out
T* output, // NCHW
const index_t* output_shape) {
void operator()(const T *input, // NCHW
const index_t *input_shape,
const T *filter, // c_out, c_in, kernel_h, kernel_w
const index_t *filter_shape,
const T *bias, // c_out
T *output, // NCHW
const index_t *output_shape) {
MACE_CHECK_NOTNULL(output);
......@@ -80,7 +80,7 @@ class DepthwiseConv2dFunctor {
index_t offset = n * channels * height * width +
c * height * width + h * width + w;
T sum = 0;
const T* filter_ptr = filter + c * kernel_size;
const T *filter_ptr = filter + c * kernel_size;
for (int kh = 0; kh < kernel_h; ++kh) {
for (int kw = 0; kw < kernel_w; ++kw) {
int inh = padded_h_start + h * stride_h + dilation_h * kh;
......@@ -110,19 +110,19 @@ class DepthwiseConv2dFunctor {
}
}
private:
const int* strides_; // [stride_h, stride_w]
const int *strides_; // [stride_h, stride_w]
std::vector<int> paddings_; // [padding_h, padding_w]
const int* dilations_; // [dilation_h, dilation_w]
const int *dilations_; // [dilation_h, dilation_w]
};
template <>
void DepthwiseConv2dFunctor<DeviceType::NEON, float>::operator()(const float* input,
const index_t* input_shape,
const float* filter,
const index_t* filter_shape,
const float* bias,
float* output,
const index_t* output_shape);
template<>
void DepthwiseConv2dFunctor<DeviceType::NEON, float>::operator()(const float *input,
const index_t *input_shape,
const float *filter,
const index_t *filter_shape,
const float *bias,
float *output,
const index_t *output_shape);
} // namespace kernels
} // namespace mace
......
......@@ -8,13 +8,13 @@
namespace mace {
namespace kernels {
void Conv2dNeonK1x1S1(const float* input, // NCHW
const index_t* input_shape,
const float* filter, // c_out, c_in, kernel_h, kernel_w
const index_t* filter_shape,
const float* bias, // c_out
float* output, // NCHW
const index_t* output_shape) {
void Conv2dNeonK1x1S1(const float *input, // NCHW
const index_t *input_shape,
const float *filter, // c_out, c_in, kernel_h, kernel_w
const index_t *filter_shape,
const float *bias, // c_out
float *output, // NCHW
const index_t *output_shape) {
const index_t batch = output_shape[0];
const index_t channels = output_shape[1];
const index_t height = output_shape[2];
......@@ -26,7 +26,7 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
const index_t input_width = input_shape[3];
MACE_CHECK(input_batch == batch && input_height == height &&
input_width == width);
input_width == width);
const index_t total_pixels = height * width;
// Process 4 * 2 = 8 pixels for each innermost loop
......@@ -36,17 +36,17 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
// benchmark omp collapsed(2)
for (index_t n = 0; n < batch; ++n) {
const float* filter_ptr = filter;
const float *filter_ptr = filter;
#pragma omp parallel for
for (index_t c = 0; c < channels; ++c) {
// TODO Will GCC opt these out?
float* channel_output_start =
float *channel_output_start =
output + n * channels * height * width + c * height * width;
const float* input_ptr =
const float *input_ptr =
input + n * input_channels * input_height * input_width;
// Fill with bias
float* output_ptr = channel_output_start;
float *output_ptr = channel_output_start;
for (index_t ptr = 0; ptr < total_pixels; ++ptr) {
output_ptr[ptr] = bias[c]; // TODO can we avoid this?
}
......@@ -54,15 +54,15 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
index_t inc = 0;
// Process 4 input channels in batch
for (; inc + 3 < input_channels; inc += 4) {
float* output_ptr = channel_output_start;
float *output_ptr = channel_output_start;
// The begining of each input feature map channel
MACE_ASSERT(input_ptr ==
input + n * input_channels * input_height * input_width +
inc * input_height * input_width);
input + n * input_channels * input_height * input_width +
inc * input_height * input_width);
const float* input_ptr1 = input_ptr + total_pixels;
const float* input_ptr2 = input_ptr1 + total_pixels;
const float* input_ptr3 = input_ptr2 + total_pixels;
const float *input_ptr1 = input_ptr + total_pixels;
const float *input_ptr2 = input_ptr1 + total_pixels;
const float *input_ptr3 = input_ptr2 + total_pixels;
// filter is in c_out, c_in, 1, 1 order
MACE_ASSERT(filter_ptr == filter + c * input_channels + inc);
......@@ -140,10 +140,10 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
}
// Process the remaining channels
for (; inc < input_channels; ++inc) {
float* output_ptr = channel_output_start;
float *output_ptr = channel_output_start;
MACE_ASSERT(input_ptr ==
input + n * input_channels * input_height * input_width +
inc * input_height * input_width);
input + n * input_channels * input_height * input_width +
inc * input_height * input_width);
MACE_ASSERT(filter_ptr == filter + c * input_channels + inc);
const float k0 = filter_ptr[0];
......
......@@ -20,19 +20,18 @@ namespace kernels {
int multiplier = filter_shape == nullptr ? 0 : (filter_shape[0] / input_channels); \
int filter_in_channels = filter_shape == nullptr ? input_channels : filter_shape[1]; \
for (int b = 0; b < output_batch; ++b) { \
float* output_ptr_base = output + b * output_channels * output_height * output_width; \
float *output_ptr_base = output + b * output_channels * output_height * output_width; \
for (int oc = 0; oc < output_channels; ++oc) { \
const float* filter_ptr = filter + oc * filter_in_channels * kFilterSize; \
const float* input_ptr = input + b * input_channels * input_height * input_width; \
const float *filter_ptr = filter + oc * filter_in_channels * kFilterSize; \
const float *input_ptr = input + b * input_channels * input_height * input_width; \
if (filter_shape != nullptr) { \
input_ptr += (oc / multiplier) * input_height * input_width; \
} \
float* output_ptr = output_ptr_base + oc * output_height * output_width; \
float *output_ptr = output_ptr_base + oc * output_height * output_width; \
std::fill(output_ptr, output_ptr + output_height * output_width, bias[oc]); \
for (int ic = 0; ic < filter_in_channels; ++ic) { \
float32x4_t n_filter_v[3] = {vld1q_f32(filter_ptr), vld1q_f32(filter_ptr+3), vld1q_f32(filter_ptr+6)};
#define KERNEL_TAIL_CODE \
filter_ptr += kFilterSize; \
input_ptr += input_height * input_width; \
......
......@@ -10,13 +10,13 @@
namespace mace {
namespace kernels {
void Conv2dNeonK5x5S1(const float* input, // NCHW
const index_t* input_shape,
const float* filter, // c_out, c_in, kernel_h, kernel_w
const index_t* filter_shape,
const float* bias, // c_out
float* output, // NCHW
const index_t* output_shape) {
void Conv2dNeonK5x5S1(const float *input, // NCHW
const index_t *input_shape,
const float *filter, // c_out, c_in, kernel_h, kernel_w
const index_t *filter_shape,
const float *bias, // c_out
float *output, // NCHW
const index_t *output_shape) {
const index_t batch = output_shape[0];
const index_t channels = output_shape[1];
const index_t height = output_shape[2];
......@@ -40,9 +40,9 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
#pragma omp parallel for collapse(2)
for (index_t n = 0; n < batch; ++n) {
for (index_t c = 0; c < channels; ++c) {
float* output_ptr = output + n * output_total_pixels_per_batch +
c * output_total_pixels_per_channel;
const float* input_ptr = input + n * input_total_pixels_per_batch;
float *output_ptr = output + n * output_total_pixels_per_batch +
c * output_total_pixels_per_channel;
const float *input_ptr = input + n * input_total_pixels_per_batch;
// Fill with bias
for (index_t i = 0; i < output_total_pixels_per_channel; ++i) {
......@@ -50,24 +50,24 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
}
for (index_t inc = 0; inc < input_channels; ++inc) {
float* outptr = output_ptr;
float* outptr2 = outptr + width;
const float* inptr = input_ptr + inc * input_total_pixels_per_channel;
const float* filter_ptr = filter + c * patch_size + inc * 25;
const float* r0 = inptr;
const float* r1 = inptr + input_width;
const float* r2 = inptr + input_width * 2;
const float* r3 = inptr + input_width * 3;
const float* r4 = inptr + input_width * 4;
const float* r5 = inptr + input_width * 5;
const float* k0 = filter_ptr;
const float* k1 = filter_ptr + 5;
const float* k2 = filter_ptr + 10;
const float* k3 = filter_ptr + 15;
const float* k4 = filter_ptr + 20;
float *outptr = output_ptr;
float *outptr2 = outptr + width;
const float *inptr = input_ptr + inc * input_total_pixels_per_channel;
const float *filter_ptr = filter + c * patch_size + inc * 25;
const float *r0 = inptr;
const float *r1 = inptr + input_width;
const float *r2 = inptr + input_width * 2;
const float *r3 = inptr + input_width * 3;
const float *r4 = inptr + input_width * 4;
const float *r5 = inptr + input_width * 5;
const float *k0 = filter_ptr;
const float *k1 = filter_ptr + 5;
const float *k2 = filter_ptr + 10;
const float *k3 = filter_ptr + 15;
const float *k4 = filter_ptr + 20;
float32x4_t _k0123 = vld1q_f32(filter_ptr);
float32x4_t _k4567 = vld1q_f32(filter_ptr + 4);
......
......@@ -25,13 +25,13 @@ extern void Conv2dNeonK3x3S2(const float *input,
const index_t *output_shape);
template<>
void DepthwiseConv2dFunctor<DeviceType::NEON, float>::operator()(const float* input, // NCHW
const index_t* input_shape,
const float* filter, // c_out, c_in, kernel_h, kernel_w
const index_t* filter_shape,
const float* bias, // c_out
float* output, // NCHW
const index_t* output_shape) {
void DepthwiseConv2dFunctor<DeviceType::NEON, float>::operator()(const float *input, // NCHW
const index_t *input_shape,
const float *filter, // c_out, c_in, kernel_h, kernel_w
const index_t *filter_shape,
const float *bias, // c_out
float *output, // NCHW
const index_t *output_shape) {
typedef void (*Conv2dNeonFunction)(
const float *input,
const index_t *input_shape,
......
......@@ -13,17 +13,17 @@
namespace mace {
template <DeviceType D, typename T>
template<DeviceType D, typename T>
class Conv2dOp : public ConvPool2dOpBase<D, T> {
public:
Conv2dOp(const OperatorDef& op_def, Workspace* ws)
: ConvPool2dOpBase<D, T>(op_def, ws){};
Conv2dOp(const OperatorDef &op_def, Workspace *ws)
: ConvPool2dOpBase<D, T>(op_def, ws) {};
bool Run() override {
const Tensor* input = this->Input(INPUT);
const Tensor* filter = this->Input(FILTER);
const Tensor* bias = this->Input(BIAS);
Tensor* output = this->Output(OUTPUT);
const Tensor *input = this->Input(INPUT);
const Tensor *filter = this->Input(FILTER);
const Tensor *bias = this->Input(BIAS);
Tensor *output = this->Output(OUTPUT);
std::vector<index_t> output_shape(4);
std::vector<int> paddings(2);
......
......@@ -14,25 +14,25 @@
namespace mace {
template <DeviceType D, typename T>
template<DeviceType D, typename T>
class DepthwiseConv2dOp : public ConvPool2dOpBase<D, T> {
public:
DepthwiseConv2dOp(const OperatorDef& op_def, Workspace* ws)
DepthwiseConv2dOp(const OperatorDef &op_def, Workspace *ws)
: ConvPool2dOpBase<D, T>(op_def, ws),
functor_(this->Input(INPUT)->shape().data(),
this->Input(FILTER)->shape().data(),
this->strides_.data(), this->padding_, this->dilations_.data()){};
this->strides_.data(), this->padding_, this->dilations_.data()) {};
bool Run() override {
const Tensor* input = this->Input(INPUT);
const Tensor* filter = this->Input(FILTER);
const Tensor* bias = this->Input(BIAS);
Tensor* output = this->Output(OUTPUT);
const Tensor *input = this->Input(INPUT);
const Tensor *filter = this->Input(FILTER);
const Tensor *bias = this->Input(BIAS);
Tensor *output = this->Output(OUTPUT);
// resize filter shape.
std::vector<index_t> filter_shape(filter->shape().begin(), filter->shape().end());
filter_shape[0] *= filter_shape[1];
filter_shape[1] = 1;
filter_shape[1] = 1;
std::vector<index_t> output_shape(4);
this->CalOutputSize(input->shape().data(), filter_shape.data(), output_shape.data());
output->Resize(output_shape);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册