提交 9aff3c14 编写于 作者: L luxuhui

refactor: refactor the delegators for arm

N/A
Signed-off-by: NLuxuhui <luxuhui@xiaomi.com>
上级 fbd0ff09
...@@ -60,6 +60,7 @@ MaceStatus OpDelegatorRegistry::Register(const DelegatorInfo &key, ...@@ -60,6 +60,7 @@ MaceStatus OpDelegatorRegistry::Register(const DelegatorInfo &key,
DelegatorCreator OpDelegatorRegistry::GetCreator( DelegatorCreator OpDelegatorRegistry::GetCreator(
const DelegatorInfo &key) const { const DelegatorInfo &key) const {
if (registry_.count(key) > 0) { if (registry_.count(key) > 0) {
VLOG(3) << "find delegator creator: " << key.ToString();
return registry_.at(key); return registry_.at(key);
} }
......
...@@ -105,6 +105,7 @@ cc_library( ...@@ -105,6 +105,7 @@ cc_library(
name = "arm_neon_kernels", name = "arm_neon_kernels",
srcs = glob( srcs = glob(
[ [
"arm/base/*.cc",
"arm/fp32/*.cc", "arm/fp32/*.cc",
"arm/fp16/gemv.h", "arm/fp16/gemv.h",
], ],
...@@ -121,6 +122,7 @@ cc_library( ...@@ -121,6 +122,7 @@ cc_library(
)), )),
hdrs = glob( hdrs = glob(
[ [
"arm/base/*.h",
"arm/fp32/*.h", "arm/fp32/*.h",
], ],
) + if_quantize_enabled(glob( ) + if_quantize_enabled(glob(
......
...@@ -5,6 +5,9 @@ file(GLOB OPS_REF_Q8_KERNELS_SRCS ...@@ -5,6 +5,9 @@ file(GLOB OPS_REF_Q8_KERNELS_SRCS
ref/q8/*.cc ref/q8/*.cc
) )
file(GLOB OPS_ARM_NEON_BASE_KERNELS_SRCS
arm/base/*.cc
)
file(GLOB OPS_ARM_NEON_FP32_KERNELS_SRCS file(GLOB OPS_ARM_NEON_FP32_KERNELS_SRCS
arm/fp32/*.cc arm/fp32/*.cc
) )
...@@ -32,7 +35,7 @@ if(MACE_ENABLE_QUANTIZE) ...@@ -32,7 +35,7 @@ if(MACE_ENABLE_QUANTIZE)
endif(MACE_ENABLE_QUANTIZE) endif(MACE_ENABLE_QUANTIZE)
if(MACE_ENABLE_NEON) if(MACE_ENABLE_NEON)
set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_FP32_KERNELS_SRCS}) set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_BASE_KERNELS_SRCS} ${OPS_ARM_NEON_FP32_KERNELS_SRCS})
if(MACE_ENABLE_QUANTIZE) if(MACE_ENABLE_QUANTIZE)
set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_Q8_KERNELS_SRCS}) set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_Q8_KERNELS_SRCS})
endif(MACE_ENABLE_QUANTIZE) endif(MACE_ENABLE_QUANTIZE)
......
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/activation.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
MaceStatus Activation<T>::Compute(const OpContext *context,
const Tensor *input, Tensor *output) {
Tensor::MappingGuard input_guard(input);
if (input != output) {
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
Tensor::MappingGuard output_guard(output);
DoActivation(context, input, output);
} else {
DoActivation(context, input, output);
}
return MaceStatus::MACE_SUCCESS;
}
template<typename T>
void Activation<T>::DoActivation(const OpContext *context,
const Tensor *input,
Tensor *output) {
const T *input_data = input->data<T>();
T *output_data = output->mutable_data<T>();
const index_t size = input->size();
utils::ThreadPool &thread_pool =
context->device()->cpu_runtime()->thread_pool();
switch (type_) {
case RELU: {
ActivateRelu(&thread_pool, input_data, size, output_data);
break;
}
case RELUX: {
ActivateRelux(&thread_pool, input_data, size, output_data);
break;
}
case LEAKYRELU: {
ActivateLeakyRelu(&thread_pool, input_data, size, output_data);
break;
}
case TANH: {
ActivateTanh(&thread_pool, input_data, size, output_data);
break;
}
case SIGMOID: {
ActivateSigmoid(&thread_pool, input_data, size, output_data);
break;
}
case NOOP: {
break;
}
default: {
MACE_NOT_IMPLEMENTED;
}
}
}
void RegisterActivationDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Activation<float>, delegator::ActivationParam,
MACE_DELEGATOR_KEY(Activation, DeviceType::CPU, float, ImplType::NEON));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_ACTIVATION_H_
#define MACE_OPS_ARM_BASE_ACTIVATION_H_
#include "mace/ops/delegator/activation.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Activation : public delegator::Activation {
public:
explicit Activation(const delegator::ActivationParam &param)
: delegator::Activation(param) {}
~Activation() = default;
MaceStatus Compute(const OpContext *context,
const Tensor *input, Tensor *output) override;
private:
void DoActivation(const OpContext *context,
const Tensor *input, Tensor *output);
void ActivateRelu(utils::ThreadPool *thread_pool, const T *input_data,
const index_t input_size, T *output_data);
void ActivateRelux(utils::ThreadPool *thread_pool, const T *input_data,
const index_t input_size, T *output_data);
void ActivateLeakyRelu(utils::ThreadPool *thread_pool, const T *input_data,
const index_t input_size, T *output_data);
void ActivateTanh(utils::ThreadPool *thread_pool, const T *input_data,
const index_t input_size, T *output_data);
void ActivateSigmoid(utils::ThreadPool *thread_pool, const T *input_data,
const index_t input_size, T *output_data);
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_ACTIVATION_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/bias_add.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
MaceStatus BiasAdd<T>::Compute(const OpContext *context, const Tensor *input,
const Tensor *bias, Tensor *output) {
if (input != output) {
if (bias == nullptr) {
output->Copy(*input);
} else {
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard bias_guard(bias);
Tensor::MappingGuard output_guard(output);
AddBias(context, input, bias, output);
}
} else {
if (bias != nullptr) {
Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard bias_guard(bias);
AddBias(context, input, bias, output);
}
}
return MaceStatus::MACE_SUCCESS;
}
template<typename T>
void BiasAdd<T>::AddBias(const OpContext *context, const Tensor *input,
const Tensor *bias, mace::Tensor *output) {
auto input_data = input->data<T>();
auto bias_data = bias->data<T>();
auto output_data = output->mutable_data<T>();
const index_t batch = input->dim(0);
const index_t channels = input->dim(1);
const index_t height = input->dim(2);
const index_t width = input->dim(3);
const index_t image_size = height * width;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
if (bias->dim_size() == 1) {
Add1DimBias(&thread_pool, input_data, bias_data,
output_data, batch, channels, image_size);
} else {
Add2DimsBias(&thread_pool, input_data, bias_data,
output_data, batch, channels, image_size);
}
}
void RegisterBiasAddDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, BiasAdd<float>, DelegatorParam,
MACE_DELEGATOR_KEY(BiasAdd, DeviceType::CPU, float, ImplType::NEON));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_BIAS_ADD_H_
#define MACE_OPS_ARM_BASE_BIAS_ADD_H_
#include "mace/ops/delegator/bias_add.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class BiasAdd : public delegator::BiasAdd {
public:
explicit BiasAdd(const DelegatorParam &param) : delegator::BiasAdd(param) {}
~BiasAdd() = default;
MaceStatus Compute(const OpContext *context, const Tensor *input,
const Tensor *bias, Tensor *output) override;
private:
void AddBias(const OpContext *context, const Tensor *input,
const Tensor *bias, Tensor *output);
void Add1DimBias(utils::ThreadPool *thread_pool, const T *input_data,
const T *bias_data, T *output_data,
const index_t batch, const index_t channels,
const index_t image_size);
void Add2DimsBias(utils::ThreadPool *thread_pool, const T *input_data,
const T *bias_data, T *output_data,
const index_t batch, const index_t channels,
const index_t image_size);
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_BIAS_ADD_H_
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,18 +12,17 @@ ...@@ -12,18 +12,17 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/ops/arm/fp32/conv_2d.h" #include "mace/ops/arm/base/conv_2d.h"
#include <algorithm>
#include <memory> #include <memory>
#include <utility> #include <utility>
#include <algorithm>
#include "mace/utils/memory.h" #include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
void Conv2dBase::CalOutputShapeAndInputPadSize( void Conv2dBase::CalOutputShapeAndInputPadSize(
const std::vector<index_t> &input_shape, const std::vector<index_t> &input_shape,
...@@ -164,10 +163,10 @@ MaceStatus Conv2dBase::ResizeOutAndPadInOut(const OpContext *context, ...@@ -164,10 +163,10 @@ MaceStatus Conv2dBase::ResizeOutAndPadInOut(const OpContext *context,
auto scratch_buffer = context->device()->scratch_buffer(); auto scratch_buffer = context->device()->scratch_buffer();
const index_t padded_in_size = const index_t padded_in_size =
MACE_EXTRA_BUFFER_PAD_SIZE + (is_in_padded ? PadAlignSize( MACE_EXTRA_BUFFER_PAD_SIZE + (is_in_padded ? PadAlignSize(
sizeof(float) * batch * in_channels * padded_in_height type_size_ * batch * in_channels * padded_in_height
* padded_in_width) : 0); * padded_in_width) : 0);
const index_t padded_out_size = is_out_padded ? PadAlignSize( const index_t padded_out_size = is_out_padded ? PadAlignSize(
sizeof(float) * batch * out_channels * padded_out_height type_size_ * batch * out_channels * padded_out_height
* padded_out_width) : 0; * padded_out_width) : 0;
scratch_buffer->Rewind(); scratch_buffer->Rewind();
...@@ -176,7 +175,7 @@ MaceStatus Conv2dBase::ResizeOutAndPadInOut(const OpContext *context, ...@@ -176,7 +175,7 @@ MaceStatus Conv2dBase::ResizeOutAndPadInOut(const OpContext *context,
std::unique_ptr<Tensor> std::unique_ptr<Tensor>
padded_in = padded_in =
make_unique<Tensor>(scratch_buffer->Scratch(padded_in_size), make_unique<Tensor>(scratch_buffer->Scratch(padded_in_size),
DataType::DT_FLOAT); input->dtype());
padded_in->Resize({batch, in_channels, padded_in_height, padded_in_width}); padded_in->Resize({batch, in_channels, padded_in_height, padded_in_width});
PadInput(*input, in_pad_size[0], in_pad_size[2], padded_in.get()); PadInput(*input, in_pad_size[0], in_pad_size[2], padded_in.get());
*padded_input = std::move(padded_in); *padded_input = std::move(padded_in);
...@@ -185,7 +184,7 @@ MaceStatus Conv2dBase::ResizeOutAndPadInOut(const OpContext *context, ...@@ -185,7 +184,7 @@ MaceStatus Conv2dBase::ResizeOutAndPadInOut(const OpContext *context,
std::unique_ptr<Tensor> std::unique_ptr<Tensor>
padded_out = padded_out =
make_unique<Tensor>(scratch_buffer->Scratch(padded_out_size), make_unique<Tensor>(scratch_buffer->Scratch(padded_out_size),
DataType::DT_FLOAT); output->dtype());
padded_out->Resize({batch, out_channels, padded_out_height, padded_out->Resize({batch, out_channels, padded_out_height,
padded_out_width}); padded_out_width});
*padded_output = std::move(padded_out); *padded_output = std::move(padded_out);
...@@ -206,8 +205,8 @@ void Conv2dBase::PadInput(const Tensor &src, ...@@ -206,8 +205,8 @@ void Conv2dBase::PadInput(const Tensor &src,
const index_t padded_width = dst->dim(3); const index_t padded_width = dst->dim(3);
const int pad_bottom = static_cast<int>(padded_height - height - pad_top); const int pad_bottom = static_cast<int>(padded_height - height - pad_top);
const int pad_right = static_cast<int>(padded_width - width - pad_left); const int pad_right = static_cast<int>(padded_width - width - pad_left);
auto in_data = src.data<float>(); auto in_data = src.data<uint8_t>();
auto padded_in_data = dst->mutable_data<float>(); auto padded_in_data = dst->mutable_data<uint8_t>();
const index_t img_size = height * width; const index_t img_size = height * width;
const index_t padded_img_size = padded_height * padded_width; const index_t padded_img_size = padded_height * padded_width;
...@@ -215,25 +214,26 @@ void Conv2dBase::PadInput(const Tensor &src, ...@@ -215,25 +214,26 @@ void Conv2dBase::PadInput(const Tensor &src,
for (index_t b = 0; b < batch; ++b) { for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channels; ++c) { for (index_t c = 0; c < channels; ++c) {
const index_t bc = b * channels + c; const index_t bc = b * channels + c;
const float *in_base = in_data + bc * img_size; const uint8_t *in_base = in_data + bc * img_size * type_size_;
float *padded_in_base = padded_in_data + bc * padded_img_size; uint8_t *padded_in_base =
padded_in_data + bc * padded_img_size * type_size_;
memset(padded_in_base, 0, sizeof(float) * pad_top * padded_width); memset(padded_in_base, 0, type_size_ * pad_top * padded_width);
padded_in_base += pad_top * padded_width; padded_in_base += pad_top * padded_width * type_size_;
for (index_t h = 0; h < height; ++h) { for (index_t h = 0; h < height; ++h) {
memset(padded_in_base, memset(padded_in_base,
0, 0,
sizeof(float) * pad_left); type_size_ * pad_left);
memcpy(padded_in_base + pad_left, memcpy(padded_in_base + pad_left * type_size_,
in_base, in_base,
sizeof(float) * width); type_size_ * width);
memset(padded_in_base + pad_left + width, memset(padded_in_base + (pad_left + width) * type_size_,
0, 0,
sizeof(float) * pad_right); type_size_ * pad_right);
in_base += width; in_base += width * type_size_;
padded_in_base += padded_width; padded_in_base += padded_width * type_size_;
} }
memset(padded_in_base, 0, sizeof(float) * pad_bottom * padded_width); memset(padded_in_base, 0, type_size_ * pad_bottom * padded_width);
} }
} }
} }
...@@ -247,8 +247,8 @@ void Conv2dBase::UnPadOutput(const Tensor &src, Tensor *dst) { ...@@ -247,8 +247,8 @@ void Conv2dBase::UnPadOutput(const Tensor &src, Tensor *dst) {
const index_t padded_height = src.dim(2); const index_t padded_height = src.dim(2);
const index_t padded_width = src.dim(3); const index_t padded_width = src.dim(3);
auto padded_out_data = src.data<float>(); auto padded_out_data = src.data<uint8_t>();
auto out_data = dst->mutable_data<float>(); auto out_data = dst->mutable_data<uint8_t>();
const index_t img_size = height * width; const index_t img_size = height * width;
const index_t padded_img_size = padded_height * padded_width; const index_t padded_img_size = padded_height * padded_width;
...@@ -256,21 +256,93 @@ void Conv2dBase::UnPadOutput(const Tensor &src, Tensor *dst) { ...@@ -256,21 +256,93 @@ void Conv2dBase::UnPadOutput(const Tensor &src, Tensor *dst) {
for (index_t b = 0; b < batch; ++b) { for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channels; ++c) { for (index_t c = 0; c < channels; ++c) {
const index_t bc = (b * channels + c); const index_t bc = (b * channels + c);
float *out_base = out_data + bc * img_size; uint8_t *out_base = out_data + bc * img_size * type_size_;
const float *padded_out_base = padded_out_data + bc * padded_img_size; const uint8_t *padded_out_base =
padded_out_data + bc * padded_img_size * type_size_;
for (index_t h = 0; h < height; ++h) { for (index_t h = 0; h < height; ++h) {
memcpy(out_base, memcpy(out_base, padded_out_base, type_size_ * width);
padded_out_base, out_base += width * type_size_;
sizeof(float) * width); padded_out_base += padded_width * type_size_;
out_base += width;
padded_out_base += padded_width;
} // h } // h
} // c } // c
} // b } // b
} }
} // namespace fp32 ConvComputeParam Conv2dBase::PreWorkAndGetConv2DParam(
const OpContext *context, const Tensor *in_tensor, Tensor *out_tensor) {
auto &in_shape = in_tensor->shape();
auto &out_shape = out_tensor->shape();
const index_t batch = in_shape[0];
const index_t in_channels = in_shape[1];
const index_t in_height = in_shape[2];
const index_t in_width = in_shape[3];
const index_t out_channels = out_shape[1];
const index_t out_height = out_shape[2];
const index_t out_width = out_shape[3];
const index_t in_image_size = in_height * in_width;
const index_t out_image_size = out_height * out_width;
const index_t in_batch_size = in_channels * in_image_size;
const index_t out_batch_size = out_channels * out_image_size;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
return ConvComputeParam(batch, in_channels, in_height, in_width,
out_channels, out_height, out_width,
in_image_size, out_image_size,
in_batch_size, out_batch_size, &thread_pool);
}
DepthwiseConvComputeParam Conv2dBase::PreWorkAndGetDepthwiseConv2DParam(
const OpContext *context, const Tensor *input,
const Tensor *filter, Tensor *output) {
std::vector<index_t> out_shape(4);
std::vector<int> paddings(2);
auto &in_shape = input->shape();
auto &filter_shape = filter->shape();
CalOutputShapeAndInputPadSize(in_shape, filter_shape, &out_shape, &paddings);
out_shape[1] *= filter_shape[1];
MACE_CHECK(output->Resize(out_shape) == MaceStatus::MACE_SUCCESS,
"Resize failed.");
output->Clear();
const int pad_top = paddings[0] / 2;
const int pad_left = paddings[1] / 2;
const index_t batch = in_shape[0];
const index_t in_channels = in_shape[1];
const index_t in_height = in_shape[2];
const index_t in_width = in_shape[3];
const index_t out_channels = out_shape[1];
const index_t out_height = out_shape[2];
const index_t out_width = out_shape[3];
const index_t in_image_size = in_height * in_width;
const index_t out_image_size = out_height * out_width;
const index_t in_batch_size = in_channels * in_image_size;
const index_t out_batch_size = out_channels * out_image_size;
const index_t multiplier = out_channels / in_channels;
std::vector<index_t> out_bounds;
CalOutputBoundaryWithoutUsingInputPad(out_shape, paddings, &out_bounds);
const index_t valid_h_start = out_bounds[0];
const index_t valid_h_stop = out_bounds[1];
const index_t valid_w_start = out_bounds[2];
const index_t valid_w_stop = out_bounds[3];
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
return DepthwiseConvComputeParam(
batch, in_channels, in_height, in_width, out_channels, out_height,
out_width, in_image_size, out_image_size, in_batch_size, out_batch_size,
&thread_pool, pad_top, pad_left, multiplier, valid_h_start, valid_h_stop,
valid_w_start, valid_w_stop);
}
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
......
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,28 +12,97 @@ ...@@ -12,28 +12,97 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_CONV_2D_H_ #ifndef MACE_OPS_ARM_BASE_CONV_2D_H_
#define MACE_OPS_ARM_FP32_CONV_2D_H_ #define MACE_OPS_ARM_BASE_CONV_2D_H_
#include <vector>
#include <memory> #include <memory>
#include <vector>
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/ops/delegator/conv_2d.h" #include "mace/ops/arm/base/gemm.h"
#include "mace/ops/arm/fp32/gemm.h"
#include "mace/ops/common/conv_pool_2d_util.h" #include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/conv_2d.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
struct ConvComputeParam {
const index_t batch;
const index_t in_channels;
const index_t in_height;
const index_t in_width;
const index_t out_channels;
const index_t out_height;
const index_t out_width;
const index_t in_image_size;
const index_t out_image_size;
const index_t in_batch_size;
const index_t out_batch_size;
utils::ThreadPool &thread_pool;
ConvComputeParam(const index_t b,
const index_t in_c,
const index_t in_h,
const index_t in_w,
const index_t out_c,
const index_t out_h,
const index_t out_w,
const index_t in_size,
const index_t out_size,
const index_t in_b_size,
const index_t out_b_size,
utils::ThreadPool *thrd_pool)
: batch(b), in_channels(in_c), in_height(in_h), in_width(in_w),
out_channels(out_c), out_height(out_h), out_width(out_w),
in_image_size(in_size), out_image_size(out_size),
in_batch_size(in_b_size), out_batch_size(out_b_size),
thread_pool(*thrd_pool) {}
};
struct DepthwiseConvComputeParam : public ConvComputeParam {
const int pad_top;
const int pad_left;
const index_t multiplier;
const index_t valid_h_start;
const index_t valid_h_stop;
const index_t valid_w_start;
const index_t valid_w_stop;
DepthwiseConvComputeParam(const index_t b,
const index_t in_c,
const index_t in_h,
const index_t in_w,
const index_t out_c,
const index_t out_h,
const index_t out_w,
const index_t in_size,
const index_t out_size,
const index_t in_b_size,
const index_t out_b_size,
utils::ThreadPool *thrd_pool,
const int pad_top_data,
const int pad_left_data,
const index_t multiplier_data,
const index_t valid_height_start,
const index_t valid_height_stop,
const index_t valid_width_start,
const index_t valid_width_stop)
: ConvComputeParam(b, in_c, in_h, in_w, out_c, out_h, out_w,
in_size, out_size, in_b_size, out_b_size, thrd_pool),
pad_top(pad_top_data), pad_left(pad_left_data),
multiplier(multiplier_data),
valid_h_start(valid_height_start), valid_h_stop(valid_height_stop),
valid_w_start(valid_width_start), valid_w_stop(valid_width_stop) {}
};
class Conv2dBase : public delegator::Conv2d { class Conv2dBase : public delegator::Conv2d {
public: public:
explicit Conv2dBase(const delegator::Conv2dParam &param) explicit Conv2dBase(const delegator::Conv2dParam &param, int type_size)
: delegator::Conv2d(param) {} : delegator::Conv2d(param), type_size_(type_size) {}
virtual ~Conv2dBase() = default; virtual ~Conv2dBase() = default;
...@@ -72,11 +141,19 @@ class Conv2dBase : public delegator::Conv2d { ...@@ -72,11 +141,19 @@ class Conv2dBase : public delegator::Conv2d {
const int pad_left, const int pad_left,
Tensor *dst); Tensor *dst);
void UnPadOutput(const Tensor &src, Tensor *dst); void UnPadOutput(const Tensor &src, Tensor *dst);
ConvComputeParam PreWorkAndGetConv2DParam(
const OpContext *context, const Tensor *in_tensor, Tensor *out_tensor);
DepthwiseConvComputeParam PreWorkAndGetDepthwiseConv2DParam(
const OpContext *context, const Tensor *input,
const Tensor *filter, Tensor *output);
private:
int type_size_;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_CONV_2D_H_ #endif // MACE_OPS_ARM_BASE_CONV_2D_H_
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,33 +12,16 @@ ...@@ -12,33 +12,16 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/ops/arm/fp32/conv_2d.h" #include "mace/ops/arm/base/conv_2d_1x1.h"
#include "mace/ops/arm/fp32/gemm.h"
#include "mace/ops/delegator/conv_2d.h" #include <vector>
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class Conv2dK1x1 : public Conv2dBase {
public:
explicit Conv2dK1x1(const delegator::Conv2dParam &param)
: Conv2dBase(param),
gemm_(delegator::GemmParam()) {}
virtual ~Conv2dK1x1() {}
MaceStatus Compute( template<typename T>
const OpContext *context, MaceStatus Conv2dK1x1<T>::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
private:
Gemm gemm_;
};
MaceStatus Conv2dK1x1::Compute(const OpContext *context,
const Tensor *input, const Tensor *input,
const Tensor *filter, const Tensor *filter,
Tensor *output) { Tensor *output) {
...@@ -50,13 +33,8 @@ MaceStatus Conv2dK1x1::Compute(const OpContext *context, ...@@ -50,13 +33,8 @@ MaceStatus Conv2dK1x1::Compute(const OpContext *context,
std::vector<index_t> output_shape; std::vector<index_t> output_shape;
std::vector<int> in_pad_size; std::vector<int> in_pad_size;
std::vector<int> out_pad_size; std::vector<int> out_pad_size;
CalOutputShapeAndPadSize(input, CalOutputShapeAndPadSize(input, filter, 1, 1,
filter, &output_shape, &in_pad_size, &out_pad_size);
1,
1,
&output_shape,
&in_pad_size,
&out_pad_size);
MACE_RETURN_IF_ERROR(output->Resize(output_shape)); MACE_RETURN_IF_ERROR(output->Resize(output_shape));
const index_t out_channels = output_shape[1]; const index_t out_channels = output_shape[1];
...@@ -70,16 +48,16 @@ MaceStatus Conv2dK1x1::Compute(const OpContext *context, ...@@ -70,16 +48,16 @@ MaceStatus Conv2dK1x1::Compute(const OpContext *context,
in_height != padded_in_height || in_width != padded_in_width; in_height != padded_in_height || in_width != padded_in_width;
auto scratch_buffer = context->device()->scratch_buffer(); auto scratch_buffer = context->device()->scratch_buffer();
const index_t padded_in_size = is_in_padded ? PadAlignSize( const index_t padded_in_size = is_in_padded ? PadAlignSize(
sizeof(float) * batch * in_channels * padded_in_height sizeof(T) * batch * in_channels * padded_in_height
* padded_in_width) : 0; * padded_in_width) : 0;
const index_t pack_filter_size = const index_t pack_filter_size =
PadAlignSize(sizeof(float) * out_channels * in_channels); PadAlignSize(sizeof(T) * out_channels * in_channels);
const index_t pack_input_size = const index_t pack_input_size =
PadAlignSize( PadAlignSize(
sizeof(float) * in_channels * padded_in_height * padded_in_width); sizeof(T) * in_channels * padded_in_height * padded_in_width);
const index_t pack_output_size = const index_t pack_output_size =
PadAlignSize( PadAlignSize(
sizeof(float) * out_channels * padded_in_height * padded_in_width); sizeof(T) * out_channels * padded_in_height * padded_in_width);
const index_t gemm_pack_size = const index_t gemm_pack_size =
pack_filter_size + pack_input_size + pack_output_size; pack_filter_size + pack_input_size + pack_output_size;
...@@ -115,12 +93,11 @@ MaceStatus Conv2dK1x1::Compute(const OpContext *context, ...@@ -115,12 +93,11 @@ MaceStatus Conv2dK1x1::Compute(const OpContext *context,
void RegisterConv2dK1x1Delegator(OpDelegatorRegistry *registry) { void RegisterConv2dK1x1Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR( MACE_REGISTER_DELEGATOR(
registry, Conv2dK1x1, delegator::Conv2dParam, registry, Conv2dK1x1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU, MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K1x1)); float, ImplType::NEON, K1x1));
} }
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_CONV_2D_1X1_H_
#define MACE_OPS_ARM_BASE_CONV_2D_1X1_H_
#include "mace/ops/arm/base/conv_2d.h"
#include "mace/ops/arm/base/gemm.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Conv2dK1x1 : public Conv2dBase {
public:
explicit Conv2dK1x1(const delegator::Conv2dParam &param)
: Conv2dBase(param, sizeof(T)),
gemm_(delegator::GemmParam()) {}
virtual ~Conv2dK1x1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
private:
Gemm<T> gemm_;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_CONV_2D_1X1_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/conv_2d_1xn.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterConv2dK1xNDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Conv2dK1x7S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K1x7S1));
MACE_REGISTER_DELEGATOR(
registry, Conv2dK7x1S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K7x1S1));
MACE_REGISTER_DELEGATOR(
registry, Conv2dK1x15S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K1x15S1));
MACE_REGISTER_DELEGATOR(
registry, Conv2dK15x1S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K15x1S1));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,76 +12,66 @@ ...@@ -12,76 +12,66 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_CONV_2D_1XN_H_ #ifndef MACE_OPS_ARM_BASE_CONV_2D_1XN_H_
#define MACE_OPS_ARM_FP32_CONV_2D_1XN_H_ #define MACE_OPS_ARM_BASE_CONV_2D_1XN_H_
#include <vector> #include <vector>
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/ops/arm/fp32/conv_2d.h" #include "mace/ops/arm/base/conv_2d_mxn.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class Conv2dK1x7S1 : public Conv2dBase { template<typename T>
class Conv2dK1x7S1 : public Conv2dKMxN<T> {
public: public:
explicit Conv2dK1x7S1(const delegator::Conv2dParam &param) explicit Conv2dK1x7S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {} : Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK1x7S1() {} virtual ~Conv2dK1x7S1() {}
MaceStatus Compute( MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *output_data) override;
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
}; };
class Conv2dK7x1S1 : public Conv2dBase { template<typename T>
class Conv2dK7x1S1 : public Conv2dKMxN<T> {
public: public:
explicit Conv2dK7x1S1(const delegator::Conv2dParam &param) explicit Conv2dK7x1S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {} : Conv2dKMxN<T>(param, 4, 1) {}
virtual ~Conv2dK7x1S1() {} virtual ~Conv2dK7x1S1() {}
MaceStatus Compute( MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *output_data) override;
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
}; };
class Conv2dK1x15S1 : public Conv2dBase { template<typename T>
class Conv2dK1x15S1 : public Conv2dKMxN<T> {
public: public:
explicit Conv2dK1x15S1(const delegator::Conv2dParam &param) explicit Conv2dK1x15S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {} : Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK1x15S1() {} virtual ~Conv2dK1x15S1() {}
MaceStatus Compute( MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *output_data) override;
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
}; };
class Conv2dK15x1S1 : public Conv2dBase { template<typename T>
class Conv2dK15x1S1 : public Conv2dKMxN<T> {
public: public:
explicit Conv2dK15x1S1(const delegator::Conv2dParam &param) explicit Conv2dK15x1S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {} : Conv2dKMxN<T>(param, 4, 1) {}
virtual ~Conv2dK15x1S1() {} virtual ~Conv2dK15x1S1() {}
MaceStatus Compute( MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *output_data) override;
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_CONV_2D_1XN_H_ #endif // MACE_OPS_ARM_BASE_CONV_2D_1XN_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/conv_2d_3x3.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterConv2dK3x3Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Conv2dK3x3S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S1));
MACE_REGISTER_DELEGATOR(
registry, Conv2dK3x3S2<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,50 +12,44 @@ ...@@ -12,50 +12,44 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_CONV_2D_3X3_H_ #ifndef MACE_OPS_ARM_BASE_CONV_2D_3X3_H_
#define MACE_OPS_ARM_FP32_CONV_2D_3X3_H_ #define MACE_OPS_ARM_BASE_CONV_2D_3X3_H_
#include <vector> #include <vector>
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/ops/arm/fp32/conv_2d.h" #include "mace/ops/arm/base/conv_2d_mxn.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class Conv2dK3x3S1 : public Conv2dBase { template<typename T>
class Conv2dK3x3S1 : public Conv2dKMxN<T> {
public: public:
explicit Conv2dK3x3S1(const delegator::Conv2dParam &param) explicit Conv2dK3x3S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {} : Conv2dKMxN<T>(param, 2, 4) {}
virtual ~Conv2dK3x3S1() {} virtual ~Conv2dK3x3S1() {}
MaceStatus Compute( MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *output_data) override;
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
}; };
class Conv2dK3x3S2 : public Conv2dBase { template<typename T>
class Conv2dK3x3S2 : public Conv2dKMxN<T> {
public: public:
explicit Conv2dK3x3S2(const delegator::Conv2dParam &param) explicit Conv2dK3x3S2(const delegator::Conv2dParam &param)
: Conv2dBase(param) {} : Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK3x3S2() {} virtual ~Conv2dK3x3S2() {}
MaceStatus Compute( MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *output_data) override;
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_CONV_2D_3X3_H_ #endif // MACE_OPS_ARM_BASE_CONV_2D_3X3_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/conv_2d_5x5.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterConv2dK5x5Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Conv2dK5x5S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K5x5S1));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_CONV_2D_5X5_H_
#define MACE_OPS_ARM_BASE_CONV_2D_5X5_H_
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/base/conv_2d_mxn.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Conv2dK5x5S1 : public Conv2dKMxN<T> {
public:
explicit Conv2dK5x5S1(const delegator::Conv2dParam &param)
: Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK5x5S1() {}
MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_CONV_2D_5X5_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/conv_2d_7x7.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterConv2dK7x7Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Conv2dK7x7S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K7x7S1));
MACE_REGISTER_DELEGATOR(
registry, Conv2dK7x7S2<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K7x7S2));
MACE_REGISTER_DELEGATOR(
registry, Conv2dK7x7S3<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K7x7S3));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,63 +12,55 @@ ...@@ -12,63 +12,55 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_CONV_2D_7X7_H_ #ifndef MACE_OPS_ARM_BASE_CONV_2D_7X7_H_
#define MACE_OPS_ARM_FP32_CONV_2D_7X7_H_ #define MACE_OPS_ARM_BASE_CONV_2D_7X7_H_
#include <vector> #include <vector>
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/ops/arm/fp32/conv_2d.h" #include "mace/ops/arm/base/conv_2d_mxn.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class Conv2dK7x7S1 : public Conv2dBase { template<typename T>
class Conv2dK7x7S1 : public Conv2dKMxN<T> {
public: public:
explicit Conv2dK7x7S1(const delegator::Conv2dParam &param) explicit Conv2dK7x7S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {} : Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK7x7S1() {} virtual ~Conv2dK7x7S1() {}
MaceStatus Compute( MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *output_data) override;
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
}; };
class Conv2dK7x7S2 : public Conv2dBase { template<typename T>
class Conv2dK7x7S2 : public Conv2dKMxN<T> {
public: public:
explicit Conv2dK7x7S2(const delegator::Conv2dParam &param) explicit Conv2dK7x7S2(const delegator::Conv2dParam &param)
: Conv2dBase(param) {} : Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK7x7S2() {} virtual ~Conv2dK7x7S2() {}
MaceStatus Compute( MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *output_data) override;
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
}; };
class Conv2dK7x7S3 : public Conv2dBase { template<typename T>
class Conv2dK7x7S3 : public Conv2dKMxN<T> {
public: public:
explicit Conv2dK7x7S3(const delegator::Conv2dParam &param) explicit Conv2dK7x7S3(const delegator::Conv2dParam &param)
: Conv2dBase(param) {} : Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK7x7S3() {} virtual ~Conv2dK7x7S3() {}
MaceStatus Compute( MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *output_data) override;
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_CONV_2D_7X7_H_ #endif // MACE_OPS_ARM_BASE_CONV_2D_7X7_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/conv_2d_general.h"
#include <memory>
namespace mace {
namespace ops {
namespace arm {
template<typename T>
MaceStatus Conv2dGeneral<T>::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) {
std::unique_ptr<const Tensor> padded_input;
std::unique_ptr<Tensor> padded_output;
ResizeOutAndPadInOut(context, input, filter, output, 1, 4,
&padded_input, &padded_output);
const Tensor *in_tensor = input;
if (padded_input != nullptr) {
in_tensor = padded_input.get();
}
Tensor *out_tensor = output;
if (padded_output != nullptr) {
out_tensor = padded_output.get();
}
out_tensor->Clear();
Tensor::MappingGuard in_guard(input);
Tensor::MappingGuard filter_guard(filter);
Tensor::MappingGuard out_guard(output);
const T *filter_data = filter->data<T>();
const T *input_data = in_tensor->data<T>();
T *output_data = out_tensor->mutable_data<T>();
const ConvComputeParam p =
PreWorkAndGetConv2DParam(context, in_tensor, out_tensor);
auto &filter_shape = filter->shape();
DoCompute(p, filter_data, input_data, output_data, filter_shape);
UnPadOutput(*out_tensor, output);
return MaceStatus::MACE_SUCCESS;
}
void RegisterConv2dGeneralDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Conv2dGeneral<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY(Conv2d, DeviceType::CPU, float, ImplType::NEON));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_CONV_2D_GENERAL_H_
#define MACE_OPS_ARM_BASE_CONV_2D_GENERAL_H_
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/base/conv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Conv2dGeneral : public Conv2dBase {
public:
explicit Conv2dGeneral(const delegator::Conv2dParam &param)
: Conv2dBase(param, sizeof(T)) {}
virtual ~Conv2dGeneral() {}
MaceStatus Compute(const OpContext *context, const Tensor *input,
const Tensor *filter, Tensor *output) override;
protected:
MaceStatus DoCompute(
const ConvComputeParam &p, const T *filter_data,
const T *input_data, T *output_data,
const std::vector<index_t> &filter_shape);
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_CONV_2D_GENERAL_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_CONV_2D_MXN_H_
#define MACE_OPS_ARM_BASE_CONV_2D_MXN_H_
#include <memory>
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/base/conv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Conv2dKMxN : public Conv2dBase {
public:
explicit Conv2dKMxN(const delegator::Conv2dParam &param,
const int tile_h, const int tile_w)
: Conv2dBase(param, sizeof(T)),
out_tile_h_(tile_h), out_tile_w_(tile_w) {}
virtual ~Conv2dKMxN() {}
MaceStatus Compute(const OpContext *context, const Tensor *input,
const Tensor *filter, Tensor *output) override {
std::unique_ptr<const Tensor> padded_input;
std::unique_ptr<Tensor> padded_output;
ResizeOutAndPadInOut(context, input, filter, output, out_tile_h_,
out_tile_w_, &padded_input, &padded_output);
const Tensor *in_tensor = input;
if (padded_input != nullptr) {
in_tensor = padded_input.get();
}
Tensor *out_tensor = output;
if (padded_output != nullptr) {
out_tensor = padded_output.get();
}
out_tensor->Clear();
Tensor::MappingGuard in_guard(input);
Tensor::MappingGuard filter_guard(filter);
Tensor::MappingGuard out_guard(output);
const T *filter_data = filter->data<T>();
const T *input_data = in_tensor->data<T>();
T *output_data = out_tensor->mutable_data<T>();
const ConvComputeParam p =
PreWorkAndGetConv2DParam(context, in_tensor, out_tensor);
DoCompute(p, filter_data, input_data, output_data);
UnPadOutput(*out_tensor, output);
return MaceStatus::MACE_SUCCESS;
}
virtual MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) = 0;
private:
const int out_tile_h_;
const int out_tile_w_;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_CONV_2D_MXN_H_
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,17 +12,17 @@ ...@@ -12,17 +12,17 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/arm/base/deconv_2d.h"
#include <utility>
#include <functional> #include <functional>
#include "mace/utils/memory.h" #include <utility>
#include "mace/ops/common/conv_pool_2d_util.h" #include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
MaceStatus Deconv2dBase::ResizeOutAndPadOut( MaceStatus Deconv2dBase::ResizeOutAndPadOut(
const OpContext *context, const OpContext *context,
...@@ -67,7 +67,7 @@ MaceStatus Deconv2dBase::ResizeOutAndPadOut( ...@@ -67,7 +67,7 @@ MaceStatus Deconv2dBase::ResizeOutAndPadOut(
std::accumulate(padded_out_shape.begin(), std::accumulate(padded_out_shape.begin(),
padded_out_shape.end(), padded_out_shape.end(),
1, 1,
std::multiplies<index_t>()) * sizeof(float); std::multiplies<index_t>()) * type_size_;
ScratchBuffer *scratch = context->device()->scratch_buffer(); ScratchBuffer *scratch = context->device()->scratch_buffer();
scratch->Rewind(); scratch->Rewind();
index_t scratch_size = PadAlignSize(padded_out_size); index_t scratch_size = PadAlignSize(padded_out_size);
...@@ -75,7 +75,7 @@ MaceStatus Deconv2dBase::ResizeOutAndPadOut( ...@@ -75,7 +75,7 @@ MaceStatus Deconv2dBase::ResizeOutAndPadOut(
std::unique_ptr<Tensor> std::unique_ptr<Tensor>
padded_out padded_out
(make_unique<Tensor>(scratch->Scratch(scratch_size), DT_FLOAT)); (make_unique<Tensor>(scratch->Scratch(scratch_size), output->dtype()));
padded_out->Reshape(padded_out_shape); padded_out->Reshape(padded_out_shape);
*padded_output = std::move(padded_out); *padded_output = std::move(padded_out);
} }
...@@ -97,24 +97,97 @@ void Deconv2dBase::UnPadOutput(const Tensor &src, ...@@ -97,24 +97,97 @@ void Deconv2dBase::UnPadOutput(const Tensor &src,
const index_t padded_height = src.dim(2); const index_t padded_height = src.dim(2);
const index_t padded_width = src.dim(3); const index_t padded_width = src.dim(3);
auto padded_out_data = src.data<float>(); auto padded_out_data = src.data<uint8_t>();
auto out_data = dst->mutable_data<float>(); auto out_data = dst->mutable_data<uint8_t>();
for (index_t i = 0; i < batch; ++i) { for (index_t i = 0; i < batch; ++i) {
for (index_t j = 0; j < channels; ++j) { for (index_t j = 0; j < channels; ++j) {
for (index_t k = 0; k < height; ++k) { for (index_t k = 0; k < height; ++k) {
const float *input_base = const uint8_t *input_base =
padded_out_data + ((i * channels + j) * padded_height padded_out_data + ((i * channels + j) * padded_height
+ (k + pad_h)) * padded_width; + (k + pad_h)) * padded_width * type_size_;
float *output_base = uint8_t *output_base =
out_data + ((i * channels + j) * height + k) * width; out_data + ((i * channels + j) * height + k) * width * type_size_;
memcpy(output_base, input_base + pad_w, width * sizeof(float)); memcpy(output_base,
input_base + pad_w * type_size_,
width * type_size_);
} }
} }
} }
} }
} // namespace fp32 DeconvComputeParam Deconv2dBase::PreWorkAndGetDeconvParam(
const OpContext *context, const Tensor *input, Tensor *out_tensor) {
auto &in_shape = input->shape();
auto &out_shape = out_tensor->shape();
const index_t batch = in_shape[0];
const index_t inch = in_shape[1];
const index_t h = in_shape[2];
const index_t w = in_shape[3];
const index_t outch = out_shape[1];
const index_t outh = out_shape[2];
const index_t outw = out_shape[3];
const index_t out_img_size = outh * outw;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
return DeconvComputeParam(batch, inch, h, w, outch, outh, outw,
out_img_size, &thread_pool);
}
DepthwiseDeconvComputeParam Deconv2dBase::PreWorkAndGetDepthwiseDeconvParam(
const OpContext *context, const Tensor *input, Tensor *out_tensor) {
auto &in_shape = input->shape();
auto &out_shape = out_tensor->shape();
const index_t batch = in_shape[0];
const index_t channels = in_shape[1];
const index_t h = in_shape[2];
const index_t w = in_shape[3];
const index_t in_img_size = h * w;
const index_t outh = out_shape[2];
const index_t outw = out_shape[3];
const index_t out_img_size = outh * outw;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
return DepthwiseDeconvComputeParam(batch, channels, h, w, in_img_size,
outh, outw, out_img_size, &thread_pool);
}
GroupDeconvComputeParam Deconv2dBase::PreWorkAndGetGroupDeconvParam(
const OpContext *context, const Tensor *input, Tensor *out_tensor) {
auto &in_shape = input->shape();
auto &out_shape = out_tensor->shape();
const index_t batch = in_shape[0];
const index_t inch = in_shape[1];
const index_t h = in_shape[2];
const index_t w = in_shape[3];
const index_t outch = out_shape[1];
const index_t outh = out_shape[2];
const index_t outw = out_shape[3];
const index_t in_img_size = h * w;
const index_t out_img_size = outh * outw;
const index_t inch_g = inch / group_;
const index_t outch_g = outch / group_;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
return GroupDeconvComputeParam(batch, inch, h, w, outch, outh, outw,
in_img_size, out_img_size, inch_g,
outch_g, &thread_pool);
}
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_DECONV_2D_H_
#define MACE_OPS_ARM_BASE_DECONV_2D_H_
#include <memory>
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/ops/arm/base/gemm.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/deconv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
struct DeconvComputeParam {
const index_t batch;
const index_t in_channels;
const index_t in_height;
const index_t in_width;
const index_t out_channels;
const index_t out_height;
const index_t out_width;
const index_t out_img_size;
utils::ThreadPool &thread_pool;
DeconvComputeParam(const index_t b,
const index_t in_c,
const index_t in_h,
const index_t in_w,
const index_t out_c,
const index_t out_h,
const index_t out_w,
const index_t out_size,
utils::ThreadPool *thrd_pool)
: batch(b), in_channels(in_c), in_height(in_h), in_width(in_w),
out_channels(out_c), out_height(out_h), out_width(out_w),
out_img_size(out_size), thread_pool(*thrd_pool) {}
};
struct DepthwiseDeconvComputeParam {
const index_t batch;
const index_t in_channels;
const index_t in_height;
const index_t in_width;
const index_t in_img_size;
const index_t out_height;
const index_t out_width;
const index_t out_img_size;
utils::ThreadPool &thread_pool;
DepthwiseDeconvComputeParam(const index_t b,
const index_t in_c,
const index_t in_h,
const index_t in_w,
const index_t in_size,
const index_t out_h,
const index_t out_w,
const index_t out_size,
utils::ThreadPool *thrd_pool)
: batch(b),
in_channels(in_c),
in_height(in_h),
in_width(in_w),
in_img_size(in_size),
out_height(out_h),
out_width(out_w),
out_img_size(out_size),
thread_pool(*thrd_pool) {}
};
struct GroupDeconvComputeParam {
const index_t batch;
const index_t in_channels;
const index_t in_height;
const index_t in_width;
const index_t out_channels;
const index_t out_height;
const index_t out_width;
const index_t in_img_size;
const index_t out_img_size;
const index_t inch_g;
const index_t outch_g;
utils::ThreadPool &thread_pool;
GroupDeconvComputeParam(const index_t in_b,
const index_t in_ch,
const index_t in_h,
const index_t in_w,
const index_t out_ch,
const index_t out_h,
const index_t out_w,
const index_t in_size,
const index_t out_size,
const index_t in_ch_g,
const index_t out_ch_g,
utils::ThreadPool *thrd_pool)
: batch(in_b),
in_channels(in_ch),
in_height(in_h),
in_width(in_w),
out_channels(out_ch),
out_height(out_h),
out_width(out_w),
in_img_size(in_size),
out_img_size(out_size),
inch_g(in_ch_g),
outch_g(out_ch_g),
thread_pool(*thrd_pool) {}
};
class Deconv2dBase : public delegator::Deconv2d {
public:
explicit Deconv2dBase(const delegator::Deconv2dParam &param, int type_size)
: delegator::Deconv2d(param),
group_(param.group_), type_size_(type_size) {}
virtual ~Deconv2dBase() = default;
protected:
MaceStatus ResizeOutAndPadOut(const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output,
std::vector<int> *out_pad_size,
std::unique_ptr<Tensor> *padded_output);
void UnPadOutput(const Tensor &src,
const std::vector<int> &out_pad_size,
Tensor *dst);
DeconvComputeParam PreWorkAndGetDeconvParam(
const OpContext *context, const Tensor *input, Tensor *out_tensor);
DepthwiseDeconvComputeParam PreWorkAndGetDepthwiseDeconvParam(
const OpContext *context, const Tensor *input, Tensor *out_tensor);
GroupDeconvComputeParam PreWorkAndGetGroupDeconvParam(
const OpContext *context, const Tensor *input, Tensor *out_tensor);
protected:
index_t group_;
private:
int type_size_;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_DECONV_2D_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/deconv_2d_2x2.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterDeconv2dK2x2Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Deconv2dK2x2S1<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, DeviceType::CPU,
float, ImplType::NEON, K2x2S1));
MACE_REGISTER_DELEGATOR(
registry, Deconv2dK2x2S2<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, DeviceType::CPU,
float, ImplType::NEON, K2x2S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DECONV_2D_2X2_H_ #ifndef MACE_OPS_ARM_BASE_DECONV_2D_2X2_H_
#define MACE_OPS_ARM_FP32_DECONV_2D_2X2_H_ #define MACE_OPS_ARM_BASE_DECONV_2D_2X2_H_
#include <vector> #include <vector>
#include <memory> #include <memory>
...@@ -21,46 +21,38 @@ ...@@ -21,46 +21,38 @@
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/arm/base/deconv_2d_mxn.h"
#include "mace/ops/common/conv_pool_2d_util.h" #include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class Deconv2dK2x2S1 : public Deconv2dBase { template<typename T>
class Deconv2dK2x2S1 : public Deconv2dKMxN<T> {
public: public:
explicit Deconv2dK2x2S1(const delegator::Deconv2dParam &param) explicit Deconv2dK2x2S1(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {} : Deconv2dKMxN<T>(param) {}
virtual ~Deconv2dK2x2S1() {} virtual ~Deconv2dK2x2S1() {}
MaceStatus Compute( MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
class Deconv2dK2x2S2 : public Deconv2dBase { template<typename T>
class Deconv2dK2x2S2 : public Deconv2dKMxN<T> {
public: public:
explicit Deconv2dK2x2S2(const delegator::Deconv2dParam &param) explicit Deconv2dK2x2S2(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {} : Deconv2dKMxN<T>(param) {}
virtual ~Deconv2dK2x2S2() {} virtual ~Deconv2dK2x2S2() {}
MaceStatus Compute( MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_DECONV_2D_2X2_H_ #endif // MACE_OPS_ARM_BASE_DECONV_2D_2X2_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/deconv_2d_3x3.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterDeconv2dK3x3Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Deconv2dK3x3S1<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S1));
MACE_REGISTER_DELEGATOR(
registry, Deconv2dK3x3S2<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DECONV_2D_3X3_H_ #ifndef MACE_OPS_ARM_BASE_DECONV_2D_3X3_H_
#define MACE_OPS_ARM_FP32_DECONV_2D_3X3_H_ #define MACE_OPS_ARM_BASE_DECONV_2D_3X3_H_
#include <vector> #include <vector>
#include <memory> #include <memory>
...@@ -21,46 +21,38 @@ ...@@ -21,46 +21,38 @@
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/arm/base/deconv_2d_mxn.h"
#include "mace/ops/common/conv_pool_2d_util.h" #include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class Deconv2dK3x3S1 : public Deconv2dBase { template<typename T>
class Deconv2dK3x3S1 : public Deconv2dKMxN<T> {
public: public:
explicit Deconv2dK3x3S1(const delegator::Deconv2dParam &param) explicit Deconv2dK3x3S1(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {} : Deconv2dKMxN<T>(param) {}
virtual ~Deconv2dK3x3S1() {} virtual ~Deconv2dK3x3S1() {}
MaceStatus Compute( MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
class Deconv2dK3x3S2 : public Deconv2dBase { template<typename T>
class Deconv2dK3x3S2 : public Deconv2dKMxN<T> {
public: public:
explicit Deconv2dK3x3S2(const delegator::Deconv2dParam &param) explicit Deconv2dK3x3S2(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {} : Deconv2dKMxN<T>(param) {}
virtual ~Deconv2dK3x3S2() {} virtual ~Deconv2dK3x3S2() {}
MaceStatus Compute( MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_DECONV_2D_3X3_H_ #endif // MACE_OPS_ARM_BASE_DECONV_2D_3X3_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/deconv_2d_4x4.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterDeconv2dK4x4Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Deconv2dK4x4S1<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, DeviceType::CPU,
float, ImplType::NEON, K4x4S1));
MACE_REGISTER_DELEGATOR(
registry, Deconv2dK4x4S2<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, DeviceType::CPU,
float, ImplType::NEON, K4x4S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,55 +12,47 @@ ...@@ -12,55 +12,47 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DECONV_2D_4X4_H_ #ifndef MACE_OPS_ARM_BASE_DECONV_2D_4X4_H_
#define MACE_OPS_ARM_FP32_DECONV_2D_4X4_H_ #define MACE_OPS_ARM_BASE_DECONV_2D_4X4_H_
#include <vector>
#include <memory> #include <memory>
#include <vector>
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/arm/base/deconv_2d_mxn.h"
#include "mace/ops/common/conv_pool_2d_util.h" #include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class Deconv2dK4x4S1 : public Deconv2dBase { template<typename T>
class Deconv2dK4x4S1 : public Deconv2dKMxN<T> {
public: public:
explicit Deconv2dK4x4S1(const delegator::Deconv2dParam &param) explicit Deconv2dK4x4S1(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {} : Deconv2dKMxN<T>(param) {}
virtual ~Deconv2dK4x4S1() {} virtual ~Deconv2dK4x4S1() {}
MaceStatus Compute( MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
class Deconv2dK4x4S2 : public Deconv2dBase { template<typename T>
class Deconv2dK4x4S2 : public Deconv2dKMxN<T> {
public: public:
explicit Deconv2dK4x4S2(const delegator::Deconv2dParam &param) explicit Deconv2dK4x4S2(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {} : Deconv2dKMxN<T>(param) {}
virtual ~Deconv2dK4x4S2() {} virtual ~Deconv2dK4x4S2() {}
MaceStatus Compute( MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_DECONV_2D_4X4_H_ #endif // MACE_OPS_ARM_BASE_DECONV_2D_4X4_H_
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,30 +12,17 @@ ...@@ -12,30 +12,17 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/arm/base/deconv_2d_general.h"
// TODO(liutuo): optimize it #include <memory>
#include <vector>
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class Deconv2dGeneral : public Deconv2dBase { template<typename T>
public: MaceStatus Deconv2dGeneral<T>::Compute(const OpContext *context,
explicit Deconv2dGeneral(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~Deconv2dGeneral() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
};
MaceStatus Deconv2dGeneral::Compute(const OpContext *context,
const Tensor *input, const Tensor *input,
const Tensor *filter, const Tensor *filter,
const Tensor *output_shape, const Tensor *output_shape,
...@@ -60,9 +47,9 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context, ...@@ -60,9 +47,9 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context,
Tensor::MappingGuard filter_mapper(filter); Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard output_mapper(output); Tensor::MappingGuard output_mapper(output);
auto input_data = input->data<float>(); auto input_data = input->data<T>();
auto filter_data = filter->data<float>(); auto filter_data = filter->data<T>();
auto padded_out_data = out_tensor->mutable_data<float>(); auto padded_out_data = out_tensor->mutable_data<T>();
auto &in_shape = input->shape(); auto &in_shape = input->shape();
auto &out_shape = out_tensor->shape(); auto &out_shape = out_tensor->shape();
...@@ -95,7 +82,7 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context, ...@@ -95,7 +82,7 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context,
index_t start1, index_t end1, index_t step1) { index_t start1, index_t end1, index_t step1) {
for (index_t b = start0; b < end0; b += step0) { for (index_t b = start0; b < end0; b += step0) {
for (index_t oc = start1; oc < end1; oc += step1) { for (index_t oc = start1; oc < end1; oc += step1) {
float *out_base = T *out_base =
padded_out_data + (b * out_channels + oc) * out_img_size; padded_out_data + (b * out_channels + oc) * out_img_size;
for (index_t i = 0; i < in_height; ++i) { for (index_t i = 0; i < in_height; ++i) {
for (index_t j = 0; j < in_width; ++j) { for (index_t j = 0; j < in_width; ++j) {
...@@ -104,7 +91,7 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context, ...@@ -104,7 +91,7 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context,
for (int ic = 0; ic < in_channels; ++ic) { for (int ic = 0; ic < in_channels; ++ic) {
const index_t input_idx = const index_t input_idx =
(b * in_channels + ic) * in_img_size + i * in_width + j; (b * in_channels + ic) * in_img_size + i * in_width + j;
const float val = input_data[input_idx]; const T val = input_data[input_idx];
const index_t kernel_offset = const index_t kernel_offset =
(oc * in_channels + ic) * kernel_size; (oc * in_channels + ic) * kernel_size;
for (int k = 0; k < kernel_size; ++k) { for (int k = 0; k < kernel_size; ++k) {
...@@ -126,11 +113,10 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context, ...@@ -126,11 +113,10 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context,
void RegisterDeconv2dGeneralDelegator(OpDelegatorRegistry *registry) { void RegisterDeconv2dGeneralDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR( MACE_REGISTER_DELEGATOR(
registry, Deconv2dGeneral, delegator::Deconv2dParam, registry, Deconv2dGeneral<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY(Deconv2d, DeviceType::CPU, float, ImplType::NEON)); MACE_DELEGATOR_KEY(Deconv2d, DeviceType::CPU, float, ImplType::NEON));
} }
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
......
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_DECONV_2D_GENERAL_H_
#define MACE_OPS_ARM_BASE_DECONV_2D_GENERAL_H_
#include "mace/ops/arm/base/deconv_2d.h"
// TODO(liutuo): optimize it
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Deconv2dGeneral : public Deconv2dBase {
public:
explicit Deconv2dGeneral(const delegator::Deconv2dParam &param)
: Deconv2dBase(param, sizeof(T)) {}
virtual ~Deconv2dGeneral() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_DECONV_2D_GENERAL_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_DECONV_2D_MXN_H_
#define MACE_OPS_ARM_BASE_DECONV_2D_MXN_H_
#include <memory>
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/base/deconv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Deconv2dKMxN : public Deconv2dBase {
public:
explicit Deconv2dKMxN(const delegator::Deconv2dParam &param)
: Deconv2dBase(param, sizeof(T)) {}
virtual ~Deconv2dKMxN() {}
MaceStatus Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) {
std::unique_ptr<Tensor> padded_out;
std::vector<int> out_pad_size;
ResizeOutAndPadOut(context, input, filter, output_shape,
output, &out_pad_size, &padded_out);
Tensor *out_tensor = output;
if (padded_out != nullptr) {
out_tensor = padded_out.get();
}
out_tensor->Clear();
Tensor::MappingGuard input_mapper(input);
Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard output_mapper(output);
const T *input_data = input->data<T>();
const T *filter_data = filter->data<T>();
T *padded_out_data = out_tensor->mutable_data<T>();
const DeconvComputeParam p =
PreWorkAndGetDeconvParam(context, input, out_tensor);
DoCompute(p, filter_data, input_data, padded_out_data);
UnPadOutput(*out_tensor, out_pad_size, output);
return MaceStatus::MACE_SUCCESS;
}
virtual MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) = 0;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_DECONV_2D_MXN_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/depthwise_conv_2d_3x3.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterDepthwiseConv2dK3x3Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, DepthwiseConv2dK3x3S1<float>, delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S1));
MACE_REGISTER_DELEGATOR(
registry, DepthwiseConv2dK3x3S2<float>, delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,51 +12,47 @@ ...@@ -12,51 +12,47 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DEPTHWISE_CONV_2D_3X3_H_ #ifndef MACE_OPS_ARM_BASE_DEPTHWISE_CONV_2D_3X3_H_
#define MACE_OPS_ARM_FP32_DEPTHWISE_CONV_2D_3X3_H_ #define MACE_OPS_ARM_BASE_DEPTHWISE_CONV_2D_3X3_H_
#include <vector> #include <vector>
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/ops/arm/fp32/conv_2d.h" #include "mace/ops/arm/base/depthwise_conv_2d_mxn.h"
#include "mace/ops/delegator/depthwise_conv_2d.h" #include "mace/ops/delegator/depthwise_conv_2d.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class DepthwiseConv2dK3x3S1 : public Conv2dBase { template<typename T>
class DepthwiseConv2dK3x3S1 : public DepthwiseConv2dKMxN<T> {
public: public:
explicit DepthwiseConv2dK3x3S1(const delegator::DepthwiseConv2dParam &param) explicit DepthwiseConv2dK3x3S1(const delegator::DepthwiseConv2dParam &param)
: Conv2dBase(param) {} : DepthwiseConv2dKMxN<T>(param) {}
virtual ~DepthwiseConv2dK3x3S1() {} virtual ~DepthwiseConv2dK3x3S1() {}
MaceStatus Compute( MaceStatus DoCompute(
const OpContext *context, const DepthwiseConvComputeParam &p, const T *filter,
const Tensor *input, const T *input_data, T *output_data) override;
const Tensor *filter,
Tensor *output) override;
}; };
class DepthwiseConv2dK3x3S2 : public Conv2dBase { template<typename T>
class DepthwiseConv2dK3x3S2 : public DepthwiseConv2dKMxN<T> {
public: public:
explicit DepthwiseConv2dK3x3S2(const delegator::DepthwiseConv2dParam &param) explicit DepthwiseConv2dK3x3S2(const delegator::DepthwiseConv2dParam &param)
: Conv2dBase(param) {} : DepthwiseConv2dKMxN<T>(param) {}
virtual ~DepthwiseConv2dK3x3S2() {} virtual ~DepthwiseConv2dK3x3S2() {}
MaceStatus Compute( MaceStatus DoCompute(
const OpContext *context, const DepthwiseConvComputeParam &p, const T *filter,
const Tensor *input, const T *input_data, T *output_data) override;
const Tensor *filter,
Tensor *output) override;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_DEPTHWISE_CONV_2D_3X3_H_ #endif // MACE_OPS_ARM_BASE_DEPTHWISE_CONV_2D_3X3_H_
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,51 +12,53 @@ ...@@ -12,51 +12,53 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DECONV_2D_H_ #ifndef MACE_OPS_ARM_BASE_DEPTHWISE_CONV_2D_MXN_H_
#define MACE_OPS_ARM_FP32_DECONV_2D_H_ #define MACE_OPS_ARM_BASE_DEPTHWISE_CONV_2D_MXN_H_
#include <vector> #include <vector>
#include <memory>
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/types.h" #include "mace/ops/arm/base/conv_2d.h"
#include "mace/ops/arm/fp32/gemm.h" #include "mace/ops/delegator/depthwise_conv_2d.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/deconv_2d.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class Deconv2dBase : public delegator::Deconv2d { template<typename T>
class DepthwiseConv2dKMxN : public Conv2dBase {
public: public:
explicit Deconv2dBase(const delegator::Deconv2dParam &param) explicit DepthwiseConv2dKMxN(const delegator::DepthwiseConv2dParam &param)
: delegator::Deconv2d(param), : Conv2dBase(param, sizeof(T)) {}
group_(param.group_) {} virtual ~DepthwiseConv2dKMxN() {}
virtual ~Deconv2dBase() = default; MaceStatus Compute(const OpContext *context, const Tensor *input,
const Tensor *filter, Tensor *output) {
DepthwiseConvComputeParam p =
PreWorkAndGetDepthwiseConv2DParam(context, input, filter, output);
Tensor::MappingGuard in_guard(input);
Tensor::MappingGuard filter_guard(filter);
Tensor::MappingGuard out_guard(output);
const T *filter_data = filter->data<T>();
const T *input_data = input->data<T>();
T *output_data = output->mutable_data<T>();
DoCompute(p, filter_data, input_data, output_data);
return MaceStatus::MACE_SUCCESS;
}
protected: protected:
MaceStatus ResizeOutAndPadOut(const OpContext *context, virtual MaceStatus DoCompute(
const Tensor *input, const DepthwiseConvComputeParam &p, const T *filter,
const Tensor *filter, const T *input_data, T *output_data) = 0;
const Tensor *output_shape,
Tensor *output,
std::vector<int> *out_pad_size,
std::unique_ptr<Tensor> *padded_output);
void UnPadOutput(const Tensor &src,
const std::vector<int> &out_pad_size,
Tensor *dst);
index_t group_;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_DECONV_2D_H_ #endif // MACE_OPS_ARM_BASE_DEPTHWISE_CONV_2D_MXN_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/depthwise_deconv_2d_3x3.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterDepthwiseDeconv2dK3x3Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dK3x3S1<float>,
delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S1));
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dK3x3S2<float>,
delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S2));
}
void RegisterGroupDeconv2dK3x3Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dK3x3S1<float>, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S1));
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dK3x3S2<float>, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_3X3_H_ #ifndef MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_3X3_H_
#define MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_3X3_H_ #define MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_3X3_H_
#include <vector> #include <vector>
#include <memory> #include <memory>
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/arm/base/depthwise_deconv_2d_mxn.h"
#include "mace/ops/common/conv_pool_2d_util.h" #include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/depthwise_deconv_2d.h" #include "mace/ops/delegator/depthwise_deconv_2d.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
...@@ -29,70 +29,56 @@ ...@@ -29,70 +29,56 @@
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class DepthwiseDeconv2dK3x3S1 : public Deconv2dBase { template<typename T>
class DepthwiseDeconv2dK3x3S1 : public DepthwiseDeconv2dKMxN<T> {
public: public:
explicit DepthwiseDeconv2dK3x3S1( explicit DepthwiseDeconv2dK3x3S1(
const delegator::DepthwiseDeconv2dParam &param) const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {} : DepthwiseDeconv2dKMxN<T>(param) {}
virtual ~DepthwiseDeconv2dK3x3S1() {} virtual ~DepthwiseDeconv2dK3x3S1() {}
MaceStatus Compute( MaceStatus DoCompute(const DepthwiseDeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
class DepthwiseDeconv2dK3x3S2 : public Deconv2dBase { template<typename T>
class DepthwiseDeconv2dK3x3S2 : public DepthwiseDeconv2dKMxN<T> {
public: public:
explicit DepthwiseDeconv2dK3x3S2( explicit DepthwiseDeconv2dK3x3S2(
const delegator::DepthwiseDeconv2dParam &param) const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {} : DepthwiseDeconv2dKMxN<T>(param) {}
virtual ~DepthwiseDeconv2dK3x3S2() {} virtual ~DepthwiseDeconv2dK3x3S2() {}
MaceStatus Compute( MaceStatus DoCompute(const DepthwiseDeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
class GroupDeconv2dK3x3S1 : public Deconv2dBase { template<typename T>
class GroupDeconv2dK3x3S1 : public GroupDeconv2dKMxN<T> {
public: public:
explicit GroupDeconv2dK3x3S1( explicit GroupDeconv2dK3x3S1(
const delegator::GroupDeconv2dParam &param) const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {} : GroupDeconv2dKMxN<T>(param) {}
virtual ~GroupDeconv2dK3x3S1() {} virtual ~GroupDeconv2dK3x3S1() {}
MaceStatus Compute( MaceStatus DoCompute(const GroupDeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
class GroupDeconv2dK3x3S2 : public Deconv2dBase { template<typename T>
class GroupDeconv2dK3x3S2 : public GroupDeconv2dKMxN<T> {
public: public:
explicit GroupDeconv2dK3x3S2(const delegator::GroupDeconv2dParam &param) explicit GroupDeconv2dK3x3S2(const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {} : GroupDeconv2dKMxN<T>(param) {}
virtual ~GroupDeconv2dK3x3S2() {} virtual ~GroupDeconv2dK3x3S2() {}
MaceStatus Compute( MaceStatus DoCompute(const GroupDeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_3X3_H_ #endif // MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_3X3_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/depthwise_deconv_2d_4x4.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterDepthwiseDeconv2dK4x4Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dK4x4S1<float>,
delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K4x4S1));
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dK4x4S2<float>,
delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K4x4S2));
}
void RegisterGroupDeconv2dK4x4Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dK4x4S1<float>, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K4x4S1));
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dK4x4S2<float>, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K4x4S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_4X4_H_ #ifndef MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_4X4_H_
#define MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_4X4_H_ #define MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_4X4_H_
#include <vector> #include <vector>
#include <memory> #include <memory>
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/arm/base/depthwise_deconv_2d_mxn.h"
#include "mace/ops/common/conv_pool_2d_util.h" #include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/depthwise_deconv_2d.h" #include "mace/ops/delegator/depthwise_deconv_2d.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
...@@ -29,69 +29,55 @@ ...@@ -29,69 +29,55 @@
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class DepthwiseDeconv2dK4x4S1 : public Deconv2dBase { template<typename T>
class DepthwiseDeconv2dK4x4S1 : public DepthwiseDeconv2dKMxN<T> {
public: public:
explicit DepthwiseDeconv2dK4x4S1( explicit DepthwiseDeconv2dK4x4S1(
const delegator::DepthwiseDeconv2dParam &param) const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {} : DepthwiseDeconv2dKMxN<T>(param) {}
virtual ~DepthwiseDeconv2dK4x4S1() {} virtual ~DepthwiseDeconv2dK4x4S1() {}
MaceStatus Compute( MaceStatus DoCompute(const DepthwiseDeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
class DepthwiseDeconv2dK4x4S2 : public Deconv2dBase { template<typename T>
class DepthwiseDeconv2dK4x4S2 : public DepthwiseDeconv2dKMxN<T> {
public: public:
explicit DepthwiseDeconv2dK4x4S2( explicit DepthwiseDeconv2dK4x4S2(
const delegator::DepthwiseDeconv2dParam &param) const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {} : DepthwiseDeconv2dKMxN<T>(param) {}
virtual ~DepthwiseDeconv2dK4x4S2() {} virtual ~DepthwiseDeconv2dK4x4S2() {}
MaceStatus Compute( MaceStatus DoCompute(const DepthwiseDeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
class GroupDeconv2dK4x4S1 : public Deconv2dBase { template<typename T>
class GroupDeconv2dK4x4S1 : public GroupDeconv2dKMxN<T> {
public: public:
explicit GroupDeconv2dK4x4S1(const delegator::GroupDeconv2dParam &param) explicit GroupDeconv2dK4x4S1(const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {} : GroupDeconv2dKMxN<T>(param) {}
virtual ~GroupDeconv2dK4x4S1() {} virtual ~GroupDeconv2dK4x4S1() {}
MaceStatus Compute( MaceStatus DoCompute(const GroupDeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
class GroupDeconv2dK4x4S2 : public Deconv2dBase { template<typename T>
class GroupDeconv2dK4x4S2 : public GroupDeconv2dKMxN<T> {
public: public:
explicit GroupDeconv2dK4x4S2(const delegator::GroupDeconv2dParam &param) explicit GroupDeconv2dK4x4S2(const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {} : GroupDeconv2dKMxN<T>(param) {}
virtual ~GroupDeconv2dK4x4S2() {} virtual ~GroupDeconv2dK4x4S2() {}
MaceStatus Compute( MaceStatus DoCompute(const GroupDeconvComputeParam &p, const T *filter,
const OpContext *context, const T *input_data, T *padded_out_data) override;
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_4X4_H_ #endif // MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_4X4_H_
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,14 +12,14 @@ ...@@ -12,14 +12,14 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/ops/arm/fp32/depthwise_deconv_2d_general.h" #include "mace/ops/arm/base/depthwise_deconv_2d_general.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
MaceStatus DepthwiseDeconv2dGeneral::Compute(const OpContext *context, template<typename T>
MaceStatus DepthwiseDeconv2dGeneral<T>::Compute(const OpContext *context,
const Tensor *input, const Tensor *input,
const Tensor *filter, const Tensor *filter,
const Tensor *output_shape, const Tensor *output_shape,
...@@ -46,9 +46,9 @@ MaceStatus DepthwiseDeconv2dGeneral::Compute(const OpContext *context, ...@@ -46,9 +46,9 @@ MaceStatus DepthwiseDeconv2dGeneral::Compute(const OpContext *context,
Tensor::MappingGuard filter_mapper(filter); Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard output_mapper(output); Tensor::MappingGuard output_mapper(output);
auto input_data = input->data<float>(); const T *input_data = input->data<T>();
auto filter_data = filter->data<float>(); const T *filter_data = filter->data<T>();
auto padded_out_data = out_tensor->mutable_data<float>(); T *padded_out_data = out_tensor->mutable_data<T>();
auto &in_shape = input->shape(); auto &in_shape = input->shape();
auto &out_shape = out_tensor->shape(); auto &out_shape = out_tensor->shape();
...@@ -79,7 +79,7 @@ MaceStatus DepthwiseDeconv2dGeneral::Compute(const OpContext *context, ...@@ -79,7 +79,7 @@ MaceStatus DepthwiseDeconv2dGeneral::Compute(const OpContext *context,
index_t start1, index_t end1, index_t step1) { index_t start1, index_t end1, index_t step1) {
for (index_t b = start0; b < end0; b += step0) { for (index_t b = start0; b < end0; b += step0) {
for (index_t c = start1; c < end1; c += step1) { for (index_t c = start1; c < end1; c += step1) {
float *out_base = T *out_base =
padded_out_data + (b * channels + c) * out_img_size; padded_out_data + (b * channels + c) * out_img_size;
for (index_t i = 0; i < in_height; ++i) { for (index_t i = 0; i < in_height; ++i) {
for (index_t j = 0; j < in_width; ++j) { for (index_t j = 0; j < in_width; ++j) {
...@@ -105,7 +105,8 @@ MaceStatus DepthwiseDeconv2dGeneral::Compute(const OpContext *context, ...@@ -105,7 +105,8 @@ MaceStatus DepthwiseDeconv2dGeneral::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
MaceStatus GroupDeconv2dGeneral::Compute(const OpContext *context, template<typename T>
MaceStatus GroupDeconv2dGeneral<T>::Compute(const OpContext *context,
const Tensor *input, const Tensor *input,
const Tensor *filter, const Tensor *filter,
const Tensor *output_shape, const Tensor *output_shape,
...@@ -131,9 +132,9 @@ MaceStatus GroupDeconv2dGeneral::Compute(const OpContext *context, ...@@ -131,9 +132,9 @@ MaceStatus GroupDeconv2dGeneral::Compute(const OpContext *context,
Tensor::MappingGuard filter_mapper(filter); Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard output_mapper(output); Tensor::MappingGuard output_mapper(output);
auto input_data = input->data<float>(); const T *input_data = input->data<T>();
auto filter_data = filter->data<float>(); const T *filter_data = filter->data<T>();
auto padded_out_data = out_tensor->mutable_data<float>(); T *padded_out_data = out_tensor->mutable_data<T>();
auto &in_shape = input->shape(); auto &in_shape = input->shape();
auto &out_shape = out_tensor->shape(); auto &out_shape = out_tensor->shape();
...@@ -209,19 +210,19 @@ MaceStatus GroupDeconv2dGeneral::Compute(const OpContext *context, ...@@ -209,19 +210,19 @@ MaceStatus GroupDeconv2dGeneral::Compute(const OpContext *context,
void RegisterDepthwiseDeconv2dGeneralDelegator(OpDelegatorRegistry *registry) { void RegisterDepthwiseDeconv2dGeneralDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR( MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dGeneral, delegator::DepthwiseDeconv2dParam, registry, DepthwiseDeconv2dGeneral<float>,
delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY(DepthwiseDeconv2d, DeviceType::CPU, MACE_DELEGATOR_KEY(DepthwiseDeconv2d, DeviceType::CPU,
float, ImplType::NEON)); float, ImplType::NEON));
} }
void RegisterGroupDeconv2dGeneralDelegator(OpDelegatorRegistry *registry) { void RegisterGroupDeconv2dGeneralDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR( MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dGeneral, delegator::GroupDeconv2dParam, registry, GroupDeconv2dGeneral<float>, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY(GroupDeconv2d, DeviceType::CPU, MACE_DELEGATOR_KEY(GroupDeconv2d, DeviceType::CPU,
float, ImplType::NEON)); float, ImplType::NEON));
} }
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_GENERAL_H_ #ifndef MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_GENERAL_H_
#define MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_GENERAL_H_ #define MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_GENERAL_H_
#include <vector> #include <vector>
#include <memory> #include <memory>
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/ops/arm/fp32/deconv_2d.h" #include "mace/ops/arm/base/deconv_2d.h"
#include "mace/ops/common/conv_pool_2d_util.h" #include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/depthwise_deconv_2d.h" #include "mace/ops/delegator/depthwise_deconv_2d.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
...@@ -29,13 +29,13 @@ ...@@ -29,13 +29,13 @@
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
template<typename T>
class DepthwiseDeconv2dGeneral : public Deconv2dBase { class DepthwiseDeconv2dGeneral : public Deconv2dBase {
public: public:
explicit DepthwiseDeconv2dGeneral( explicit DepthwiseDeconv2dGeneral(
const delegator::DepthwiseDeconv2dParam &param) const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {} : Deconv2dBase(param, sizeof(T)) {}
virtual ~DepthwiseDeconv2dGeneral() {} virtual ~DepthwiseDeconv2dGeneral() {}
MaceStatus Compute( MaceStatus Compute(
...@@ -46,10 +46,11 @@ class DepthwiseDeconv2dGeneral : public Deconv2dBase { ...@@ -46,10 +46,11 @@ class DepthwiseDeconv2dGeneral : public Deconv2dBase {
Tensor *output) override; Tensor *output) override;
}; };
template<typename T>
class GroupDeconv2dGeneral : public Deconv2dBase { class GroupDeconv2dGeneral : public Deconv2dBase {
public: public:
explicit GroupDeconv2dGeneral(const delegator::GroupDeconv2dParam &param) explicit GroupDeconv2dGeneral(const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {} : Deconv2dBase(param, sizeof(T)) {}
virtual ~GroupDeconv2dGeneral() {} virtual ~GroupDeconv2dGeneral() {}
MaceStatus Compute( MaceStatus Compute(
...@@ -60,9 +61,8 @@ class GroupDeconv2dGeneral : public Deconv2dBase { ...@@ -60,9 +61,8 @@ class GroupDeconv2dGeneral : public Deconv2dBase {
Tensor *output) override; Tensor *output) override;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_GENERAL_H_ #endif // MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_GENERAL_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_MXN_H_
#define MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_MXN_H_
#include <vector>
#include <memory>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/ops/arm/base/deconv_2d.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/depthwise_deconv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class DepthwiseDeconv2dKMxN : public Deconv2dBase {
public:
explicit DepthwiseDeconv2dKMxN(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param, sizeof(T)) {}
virtual ~DepthwiseDeconv2dKMxN() {}
MaceStatus Compute(
const OpContext *context, const Tensor *input, const Tensor *filter,
const Tensor *output_shape, Tensor *output) override {
std::unique_ptr<Tensor> padded_out;
std::vector<int> out_pad_size;
group_ = input->dim(1);
ResizeOutAndPadOut(context,
input,
filter,
output_shape,
output,
&out_pad_size,
&padded_out);
Tensor *out_tensor = output;
if (padded_out != nullptr) {
out_tensor = padded_out.get();
}
out_tensor->Clear();
Tensor::MappingGuard input_mapper(input);
Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard output_mapper(output);
const T *input_data = input->data<float>();
const T *filter_data = filter->data<float>();
T *padded_out_data = out_tensor->mutable_data<float>();
DepthwiseDeconvComputeParam p =
PreWorkAndGetDepthwiseDeconvParam(context, input, out_tensor);
DoCompute(p, filter_data, input_data, padded_out_data);
UnPadOutput(*out_tensor, out_pad_size, output);
return MaceStatus::MACE_SUCCESS;
}
virtual MaceStatus DoCompute(
const DepthwiseDeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) = 0;
};
template<typename T>
class GroupDeconv2dKMxN : public Deconv2dBase {
public:
explicit GroupDeconv2dKMxN(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param, sizeof(T)) {}
virtual ~GroupDeconv2dKMxN() {}
MaceStatus Compute(
const OpContext *context, const Tensor *input, const Tensor *filter,
const Tensor *output_shape, Tensor *output) override {
std::unique_ptr<Tensor> padded_out;
std::vector<int> out_pad_size;
ResizeOutAndPadOut(context,
input,
filter,
output_shape,
output,
&out_pad_size,
&padded_out);
Tensor *out_tensor = output;
if (padded_out != nullptr) {
out_tensor = padded_out.get();
}
out_tensor->Clear();
Tensor::MappingGuard input_mapper(input);
Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard output_mapper(output);
auto input_data = input->data<float>();
auto filter_data = filter->data<float>();
auto padded_out_data = out_tensor->mutable_data<float>();
GroupDeconvComputeParam p =
PreWorkAndGetGroupDeconvParam(context, input, out_tensor);
DoCompute(p, filter_data, input_data, padded_out_data);
UnPadOutput(*out_tensor, out_pad_size, output);
return MaceStatus::MACE_SUCCESS;
}
virtual MaceStatus DoCompute(
const GroupDeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) = 0;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_MXN_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/gemm.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterGemmDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Gemm<float>, delegator::GemmParam,
MACE_DELEGATOR_KEY(Gemm, DeviceType::CPU, float, ImplType::NEON));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_GEMM_H_ #ifndef MACE_OPS_ARM_BASE_GEMM_H_
#define MACE_OPS_ARM_FP32_GEMM_H_ #define MACE_OPS_ARM_BASE_GEMM_H_
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
...@@ -28,8 +28,10 @@ ...@@ -28,8 +28,10 @@
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
enum { kNoCache, kCacheLhs, kCacheRhs };
template<typename T>
class Gemm : public delegator::Gemm { class Gemm : public delegator::Gemm {
public: public:
explicit Gemm(const delegator::GemmParam &param) explicit Gemm(const delegator::GemmParam &param)
...@@ -68,26 +70,49 @@ class Gemm : public delegator::Gemm { ...@@ -68,26 +70,49 @@ class Gemm : public delegator::Gemm {
const bool transpose_out, const bool transpose_out,
const bool lhs_batched, const bool lhs_batched,
const bool rhs_batched, const bool rhs_batched,
Tensor *output) override; Tensor *output) override {
index_t rows = transpose_lhs ? lhs_cols : lhs_rows;
index_t depth = transpose_lhs ? lhs_rows : lhs_cols;
index_t cols = transpose_rhs ? rhs_rows : rhs_cols;
index_t depth2 = transpose_rhs ? rhs_cols : rhs_rows;
MACE_CHECK(depth == depth2,
"Matrices that multiply have inconsistent depth dim: ",
depth,
" vs. ",
depth2);
return Compute(context,
lhs,
rhs,
batch,
rows,
cols,
depth,
transpose_lhs ? ColMajor : RowMajor,
transpose_rhs ? ColMajor : RowMajor,
transpose_out ? ColMajor : RowMajor,
lhs_batched,
rhs_batched,
output);
}
private: protected:
void ComputeBlock(const float *packed_lhs_data, void ComputeBlock(const T *packed_lhs_data,
const float *packed_rhs_data, const T *packed_rhs_data,
const index_t depth_padded, const index_t depth_padded,
float *packed_output_data); T *packed_output_data);
void PackLhs(const MatrixMap<const float> &lhs, void PackLhs(const MatrixMap<const T> &lhs,
float *packed_lhs); T *packed_lhs);
void PackRhs(const MatrixMap<const float> &rhs, void PackRhs(const MatrixMap<const T> &rhs,
float *packed_rhs); T *packed_rhs);
void UnpackOutput(const float *packed_output,
MatrixMap<float> *output);
void UnpackOutput(const T *packed_output,
MatrixMap<T> *output);
template<int RowBlockSize, int ColBlockSize> template<int RowBlockSize, int ColBlockSize>
void Unpack(const float *packed_output, void Unpack(const T *packed_output,
MatrixMap<float> *output) { MatrixMap<T> *output) {
const index_t rows = output->rows(); const index_t rows = output->rows();
const index_t cols = output->cols(); const index_t cols = output->cols();
for (index_t r = 0; r < rows; ++r) { for (index_t r = 0; r < rows; ++r) {
...@@ -98,9 +123,9 @@ class Gemm : public delegator::Gemm { ...@@ -98,9 +123,9 @@ class Gemm : public delegator::Gemm {
} }
template<int WidthBlockSize, int DepthBlockSize> template<int WidthBlockSize, int DepthBlockSize>
void Pack(const MatrixMap<const float> &matrix, void Pack(const MatrixMap<const T> &matrix,
MatrixMajor dst_major, MatrixMajor dst_major,
float *packed_matrix) { T *packed_matrix) {
const index_t rows = matrix.rows(); const index_t rows = matrix.rows();
const index_t cols = matrix.cols(); const index_t cols = matrix.cols();
index_t depth = cols; index_t depth = cols;
...@@ -109,7 +134,7 @@ class Gemm : public delegator::Gemm { ...@@ -109,7 +134,7 @@ class Gemm : public delegator::Gemm {
depth = rows; depth = rows;
} }
const index_t depth_padded = RoundUp(depth, static_cast<index_t>(4)); const index_t depth_padded = RoundUp(depth, static_cast<index_t>(4));
memset(packed_matrix, 0, sizeof(float) * WidthBlockSize * depth_padded); memset(packed_matrix, 0, sizeof(T) * WidthBlockSize * depth_padded);
if (dst_major == ColMajor) { if (dst_major == ColMajor) {
for (index_t c = 0; c < cols; ++c) { for (index_t c = 0; c < cols; ++c) {
for (index_t r = 0; r < rows; ++r) { for (index_t r = 0; r < rows; ++r) {
...@@ -125,31 +150,14 @@ class Gemm : public delegator::Gemm { ...@@ -125,31 +150,14 @@ class Gemm : public delegator::Gemm {
} }
} }
private:
Buffer pack_cache_; Buffer pack_cache_;
bool should_cache_pack_; bool should_cache_pack_;
int cached_; int cached_;
}; };
template<>
void Gemm::Pack<4, 4>(const MatrixMap<const float> &matrix,
MatrixMajor dst_major,
float *packed_matrix);
template<>
void Gemm::Pack<8, 4>(const MatrixMap<const float> &matrix,
MatrixMajor dst_major,
float *packed_matrix);
template<>
void Gemm::Unpack<4, 8>(const float *packed_output, MatrixMap<float> *output);
template<>
void Gemm::Unpack<8, 8>(const float *packed_output, MatrixMap<float> *output);
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_GEMM_H_ #endif // MACE_OPS_ARM_BASE_GEMM_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/gemv.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterGemvDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Gemv<float>, DelegatorParam,
MACE_DELEGATOR_KEY(Gemv, DeviceType::CPU, float, ImplType::NEON));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved. // Copyright 2020 The MACE Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef MACE_OPS_ARM_FP32_GEMV_H_ #ifndef MACE_OPS_ARM_BASE_GEMV_H_
#define MACE_OPS_ARM_FP32_GEMV_H_ #define MACE_OPS_ARM_BASE_GEMV_H_
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
...@@ -23,8 +23,8 @@ ...@@ -23,8 +23,8 @@
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
template<typename T>
class Gemv : public delegator::Gemv { class Gemv : public delegator::Gemv {
public: public:
explicit Gemv(const DelegatorParam &param) : delegator::Gemv(param) {} explicit Gemv(const DelegatorParam &param) : delegator::Gemv(param) {}
...@@ -43,9 +43,8 @@ class Gemv : public delegator::Gemv { ...@@ -43,9 +43,8 @@ class Gemv : public delegator::Gemv {
Tensor *output) override; Tensor *output) override;
}; };
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_OPS_ARM_FP32_GEMV_H_ #endif // MACE_OPS_ARM_BASE_GEMV_H_
...@@ -12,60 +12,24 @@ ...@@ -12,60 +12,24 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/ops/delegator/activation.h"
#include <arm_neon.h> #include <arm_neon.h>
#include <algorithm> #include <algorithm>
#include "mace/ops/arm/base/activation.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class Activation : public delegator::Activation {
public:
explicit Activation(const delegator::ActivationParam &param)
: delegator::Activation(param) {}
~Activation() = default;
MaceStatus Compute(const OpContext *context,
const Tensor *input, Tensor *output) override;
private:
void DoActivation(const OpContext *context,
const Tensor *input, Tensor *output);
};
MaceStatus Activation::Compute(const OpContext *context,
const Tensor *input, Tensor *output) {
Tensor::MappingGuard input_guard(input);
if (input != output) {
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
Tensor::MappingGuard output_guard(output);
DoActivation(context, input, output);
} else {
DoActivation(context, input, output);
}
return MaceStatus::MACE_SUCCESS;
}
void Activation::DoActivation(const OpContext *context,
const Tensor *input,
Tensor *output) {
auto input_data = input->data<float>();
auto output_data = output->mutable_data<float>();
const index_t size = input->size();
utils::ThreadPool &thread_pool =
context->device()->cpu_runtime()->thread_pool();
switch (type_) { template<>
case RELU: { void Activation<float>::ActivateRelu(utils::ThreadPool *thread_pool,
const float *input_data,
const index_t input_size,
float *output_data) {
const float32x4_t vzero = vdupq_n_f32(0.f); const float32x4_t vzero = vdupq_n_f32(0.f);
const index_t block_count = size / 4; const index_t block_count = input_size / 4;
thread_pool.Compute1D( thread_pool->Compute1D(
[=](index_t start, index_t end, index_t step) { [=](index_t start, index_t end, index_t step) {
auto input_ptr = input_data + start * 4; auto input_ptr = input_data + start * 4;
auto output_ptr = output_data + start * 4; auto output_ptr = output_data + start * 4;
...@@ -82,19 +46,21 @@ void Activation::DoActivation(const OpContext *context, ...@@ -82,19 +46,21 @@ void Activation::DoActivation(const OpContext *context,
0, block_count, 1); 0, block_count, 1);
// remain // remain
for (index_t i = block_count * 4; i < size; ++i) { for (index_t i = block_count * 4; i < input_size; ++i) {
output_data[i] = std::max(0.f, input_data[i]); output_data[i] = std::max(0.f, input_data[i]);
} }
}
break; template<>
} void Activation<float>::ActivateRelux(utils::ThreadPool *thread_pool,
const float *input_data,
case RELUX: { const index_t input_size,
float *output_data) {
const float32x4_t vzero = vdupq_n_f32(0.f); const float32x4_t vzero = vdupq_n_f32(0.f);
const float32x4_t vlimit = vdupq_n_f32(limit_); const float32x4_t vlimit = vdupq_n_f32(limit_);
const index_t block_count = size / 4; const index_t block_count = input_size / 4;
thread_pool.Compute1D( thread_pool->Compute1D(
[=](index_t start, index_t end, index_t step) { [=](index_t start, index_t end, index_t step) {
auto input_ptr = input_data + start * 4; auto input_ptr = input_data + start * 4;
auto output_ptr = output_data + start * 4; auto output_ptr = output_data + start * 4;
...@@ -112,19 +78,21 @@ void Activation::DoActivation(const OpContext *context, ...@@ -112,19 +78,21 @@ void Activation::DoActivation(const OpContext *context,
0, block_count, 1); 0, block_count, 1);
// remain // remain
for (index_t i = block_count * 4; i < size; ++i) { for (index_t i = block_count * 4; i < input_size; ++i) {
output_data[i] = std::max(0.f, std::min(limit_, input_data[i])); output_data[i] = std::max(0.f, std::min(limit_, input_data[i]));
} }
}
break; template<>
} void Activation<float>::ActivateLeakyRelu(utils::ThreadPool *thread_pool,
const float *input_data,
case LEAKYRELU: { const index_t input_size,
float *output_data) {
const float32x4_t vzero = vdupq_n_f32(0.f); const float32x4_t vzero = vdupq_n_f32(0.f);
const float32x4_t valpha = vdupq_n_f32(leakyrelu_coefficient_); const float32x4_t valpha = vdupq_n_f32(leakyrelu_coefficient_);
const index_t block_count = size / 4; const index_t block_count = input_size / 4;
thread_pool.Compute1D( thread_pool->Compute1D(
[=](index_t start, index_t end, index_t step) { [=](index_t start, index_t end, index_t step) {
auto input_ptr = input_data + start * 4; auto input_ptr = input_data + start * 4;
auto output_ptr = output_data + start * 4; auto output_ptr = output_data + start * 4;
...@@ -143,55 +111,40 @@ void Activation::DoActivation(const OpContext *context, ...@@ -143,55 +111,40 @@ void Activation::DoActivation(const OpContext *context,
0, block_count, 1); 0, block_count, 1);
// remain // remain
for (index_t i = block_count * 4; i < size; ++i) { for (index_t i = block_count * 4; i < input_size; ++i) {
output_data[i] = std::max(input_data[i], 0.f) + output_data[i] = std::max(input_data[i], 0.f) +
std::min(input_data[i], 0.f) * leakyrelu_coefficient_; std::min(input_data[i], 0.f) * leakyrelu_coefficient_;
} }
}
break; template<>
} void Activation<float>::ActivateTanh(utils::ThreadPool *thread_pool,
const float *input_data,
case TANH: { const index_t input_size,
thread_pool.Compute1D( float *output_data) {
thread_pool->Compute1D(
[=](index_t start, index_t end, index_t step) { [=](index_t start, index_t end, index_t step) {
for (index_t i = start; i < end; i += step) { for (index_t i = start; i < end; i += step) {
output_data[i] = std::tanh(input_data[i]); output_data[i] = std::tanh(input_data[i]);
} }
}, },
0, size, 1); 0, input_size, 1);
}
break;
}
case SIGMOID: { template<>
thread_pool.Compute1D( void Activation<float>::ActivateSigmoid(utils::ThreadPool *thread_pool,
const float *input_data,
const index_t input_size,
float *output_data) {
thread_pool->Compute1D(
[=](index_t start, index_t end, index_t step) { [=](index_t start, index_t end, index_t step) {
for (index_t i = start; i < end; i += step) { for (index_t i = start; i < end; i += step) {
output_data[i] = 1 / (1 + std::exp(-(input_data[i]))); output_data[i] = 1 / (1 + std::exp(-(input_data[i])));
} }
}, },
0, size, 1); 0, input_size, 1);
break;
}
case NOOP: {
break;
}
default: {
MACE_NOT_IMPLEMENTED;
}
}
}
void RegisterActivationDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Activation, delegator::ActivationParam,
MACE_DELEGATOR_KEY(Activation, DeviceType::CPU, float, ImplType::NEON));
} }
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
...@@ -13,69 +13,21 @@ ...@@ -13,69 +13,21 @@
// limitations under the License. // limitations under the License.
#include <arm_neon.h> #include <arm_neon.h>
#include "mace/ops/delegator/bias_add.h"
#include "mace/ops/arm/base/bias_add.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
class BiasAdd : public delegator::BiasAdd {
public:
explicit BiasAdd(const DelegatorParam &param) : delegator::BiasAdd(param) {}
~BiasAdd() = default;
MaceStatus Compute(const OpContext *context, const Tensor *input,
const Tensor *bias, Tensor *output) override;
private:
void AddBias(const OpContext *context, const Tensor *input,
const Tensor *bias, Tensor *output);
};
MaceStatus BiasAdd::Compute(const OpContext *context,
const Tensor *input,
const Tensor *bias,
Tensor *output) {
Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard bias_guard(bias);
if (input != output) {
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
if (bias == nullptr) {
output->Copy(*input);
} else {
Tensor::MappingGuard output_guard(output);
AddBias(context, input, bias, output);
}
} else {
if (bias != nullptr) {
AddBias(context, input, bias, output);
}
}
return MaceStatus::MACE_SUCCESS;
}
void BiasAdd::AddBias(const OpContext *context, template<>
const Tensor *input, void BiasAdd<float>::Add1DimBias(
const Tensor *bias, utils::ThreadPool *thread_pool, const float *input_data,
mace::Tensor *output) { const float *bias_data, float *output_data, const index_t batch,
auto input_data = input->data<float>(); const index_t channels, const index_t image_size) {
auto bias_data = bias->data<float>();
auto output_data = output->mutable_data<float>();
const index_t batch = input->dim(0);
const index_t channels = input->dim(1);
const index_t height = output->dim(2);
const index_t width = output->dim(3);
const index_t image_size = height * width;
const index_t block_count = image_size / 4; const index_t block_count = image_size / 4;
const index_t remain = image_size % 4; const index_t remain = image_size % 4;
thread_pool->Compute2D([=](index_t start0, index_t end0, index_t step0,
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
if (bias->dim_size() == 1) {
thread_pool.Compute2D([=](index_t start0, index_t end0, index_t step0,
index_t start1, index_t end1, index_t step1) { index_t start1, index_t end1, index_t step1) {
for (index_t b = start0; b < end0; b += step0) { for (index_t b = start0; b < end0; b += step0) {
const index_t b_offset = b * channels; const index_t b_offset = b * channels;
...@@ -100,8 +52,16 @@ void BiasAdd::AddBias(const OpContext *context, ...@@ -100,8 +52,16 @@ void BiasAdd::AddBias(const OpContext *context,
} }
} }
}, 0, batch, 1, 0, channels, 1); }, 0, batch, 1, 0, channels, 1);
} else { }
thread_pool.Compute2D([=](index_t start0, index_t end0, index_t step0,
template<>
void BiasAdd<float>::Add2DimsBias(
utils::ThreadPool *thread_pool, const float *input_data,
const float *bias_data, float *output_data, const index_t batch,
const index_t channels, const index_t image_size) {
const index_t block_count = image_size / 4;
const index_t remain = image_size % 4;
thread_pool->Compute2D([=](index_t start0, index_t end0, index_t step0,
index_t start1, index_t end1, index_t step1) { index_t start1, index_t end1, index_t step1) {
for (index_t b = start0; b < end0; b += step0) { for (index_t b = start0; b < end0; b += step0) {
const index_t b_offset = b * channels; const index_t b_offset = b * channels;
...@@ -126,16 +86,8 @@ void BiasAdd::AddBias(const OpContext *context, ...@@ -126,16 +86,8 @@ void BiasAdd::AddBias(const OpContext *context,
} }
} }
}, 0, batch, 1, 0, channels, 1); }, 0, batch, 1, 0, channels, 1);
}
}
void RegisterBiasAddDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, BiasAdd, DelegatorParam,
MACE_DELEGATOR_KEY(BiasAdd, DeviceType::CPU, float, ImplType::NEON));
} }
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace arm { namespace arm {
namespace fp32 {
inline float32x4_t neon_vfma_lane_0(float32x4_t a, inline float32x4_t neon_vfma_lane_0(float32x4_t a,
float32x4_t b, float32x4_t b,
...@@ -63,7 +62,6 @@ inline float32x4_t neon_vfma_lane_3(float32x4_t a, ...@@ -63,7 +62,6 @@ inline float32x4_t neon_vfma_lane_3(float32x4_t a,
#endif #endif
} }
} // namespace fp32
} // namespace arm } // namespace arm
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
......
此差异已折叠。
此差异已折叠。
...@@ -18,8 +18,8 @@ ...@@ -18,8 +18,8 @@
#include "mace/ops/common/conv_pool_2d_util.h" #include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/conv_2d.h" #include "mace/ops/delegator/conv_2d.h"
#include "mace/utils/memory.h"
#include "mace/utils/math.h" #include "mace/utils/math.h"
#include "mace/utils/memory.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
......
...@@ -20,8 +20,8 @@ ...@@ -20,8 +20,8 @@
#include "mace/core/ops/op_context.h" #include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/ops/arm/fp32/conv_2d.h" #include "mace/ops/arm/base/conv_2d.h"
#include "mace/ops/arm/fp32/gemm.h" #include "mace/ops/arm/base/gemm.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
namespace mace { namespace mace {
...@@ -32,7 +32,7 @@ namespace fp32 { ...@@ -32,7 +32,7 @@ namespace fp32 {
class Conv2dK3x3Winograd : public Conv2dBase { class Conv2dK3x3Winograd : public Conv2dBase {
public: public:
explicit Conv2dK3x3Winograd(const delegator::Conv2dParam &param) explicit Conv2dK3x3Winograd(const delegator::Conv2dParam &param)
: Conv2dBase(param), : Conv2dBase(param, sizeof(float)),
gemm_(delegator::GemmParam()), gemm_(delegator::GemmParam()),
transformed_filter_(nullptr), transformed_filter_(nullptr),
out_tile_size_(0) {} out_tile_size_(0) {}
...@@ -94,7 +94,7 @@ class Conv2dK3x3Winograd : public Conv2dBase { ...@@ -94,7 +94,7 @@ class Conv2dK3x3Winograd : public Conv2dBase {
index_t tile_count, index_t tile_count,
float *output); float *output);
Gemm gemm_; Gemm<float> gemm_;
std::unique_ptr<Tensor> transformed_filter_; std::unique_ptr<Tensor> transformed_filter_;
index_t out_tile_size_; index_t out_tile_size_;
}; };
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册