提交 9aff3c14 编写于 作者: L luxuhui

refactor: refactor the delegators for arm

N/A
Signed-off-by: NLuxuhui <luxuhui@xiaomi.com>
上级 fbd0ff09
......@@ -60,6 +60,7 @@ MaceStatus OpDelegatorRegistry::Register(const DelegatorInfo &key,
DelegatorCreator OpDelegatorRegistry::GetCreator(
const DelegatorInfo &key) const {
if (registry_.count(key) > 0) {
VLOG(3) << "find delegator creator: " << key.ToString();
return registry_.at(key);
}
......
......@@ -105,6 +105,7 @@ cc_library(
name = "arm_neon_kernels",
srcs = glob(
[
"arm/base/*.cc",
"arm/fp32/*.cc",
"arm/fp16/gemv.h",
],
......@@ -121,6 +122,7 @@ cc_library(
)),
hdrs = glob(
[
"arm/base/*.h",
"arm/fp32/*.h",
],
) + if_quantize_enabled(glob(
......
......@@ -5,6 +5,9 @@ file(GLOB OPS_REF_Q8_KERNELS_SRCS
ref/q8/*.cc
)
file(GLOB OPS_ARM_NEON_BASE_KERNELS_SRCS
arm/base/*.cc
)
file(GLOB OPS_ARM_NEON_FP32_KERNELS_SRCS
arm/fp32/*.cc
)
......@@ -32,7 +35,7 @@ if(MACE_ENABLE_QUANTIZE)
endif(MACE_ENABLE_QUANTIZE)
if(MACE_ENABLE_NEON)
set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_FP32_KERNELS_SRCS})
set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_BASE_KERNELS_SRCS} ${OPS_ARM_NEON_FP32_KERNELS_SRCS})
if(MACE_ENABLE_QUANTIZE)
set(OPS_SRCS ${OPS_SRCS} ${OPS_ARM_NEON_Q8_KERNELS_SRCS})
endif(MACE_ENABLE_QUANTIZE)
......
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/activation.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
MaceStatus Activation<T>::Compute(const OpContext *context,
const Tensor *input, Tensor *output) {
Tensor::MappingGuard input_guard(input);
if (input != output) {
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
Tensor::MappingGuard output_guard(output);
DoActivation(context, input, output);
} else {
DoActivation(context, input, output);
}
return MaceStatus::MACE_SUCCESS;
}
template<typename T>
void Activation<T>::DoActivation(const OpContext *context,
const Tensor *input,
Tensor *output) {
const T *input_data = input->data<T>();
T *output_data = output->mutable_data<T>();
const index_t size = input->size();
utils::ThreadPool &thread_pool =
context->device()->cpu_runtime()->thread_pool();
switch (type_) {
case RELU: {
ActivateRelu(&thread_pool, input_data, size, output_data);
break;
}
case RELUX: {
ActivateRelux(&thread_pool, input_data, size, output_data);
break;
}
case LEAKYRELU: {
ActivateLeakyRelu(&thread_pool, input_data, size, output_data);
break;
}
case TANH: {
ActivateTanh(&thread_pool, input_data, size, output_data);
break;
}
case SIGMOID: {
ActivateSigmoid(&thread_pool, input_data, size, output_data);
break;
}
case NOOP: {
break;
}
default: {
MACE_NOT_IMPLEMENTED;
}
}
}
void RegisterActivationDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Activation<float>, delegator::ActivationParam,
MACE_DELEGATOR_KEY(Activation, DeviceType::CPU, float, ImplType::NEON));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_ACTIVATION_H_
#define MACE_OPS_ARM_BASE_ACTIVATION_H_
#include "mace/ops/delegator/activation.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Activation : public delegator::Activation {
public:
explicit Activation(const delegator::ActivationParam &param)
: delegator::Activation(param) {}
~Activation() = default;
MaceStatus Compute(const OpContext *context,
const Tensor *input, Tensor *output) override;
private:
void DoActivation(const OpContext *context,
const Tensor *input, Tensor *output);
void ActivateRelu(utils::ThreadPool *thread_pool, const T *input_data,
const index_t input_size, T *output_data);
void ActivateRelux(utils::ThreadPool *thread_pool, const T *input_data,
const index_t input_size, T *output_data);
void ActivateLeakyRelu(utils::ThreadPool *thread_pool, const T *input_data,
const index_t input_size, T *output_data);
void ActivateTanh(utils::ThreadPool *thread_pool, const T *input_data,
const index_t input_size, T *output_data);
void ActivateSigmoid(utils::ThreadPool *thread_pool, const T *input_data,
const index_t input_size, T *output_data);
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_ACTIVATION_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/bias_add.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
MaceStatus BiasAdd<T>::Compute(const OpContext *context, const Tensor *input,
const Tensor *bias, Tensor *output) {
if (input != output) {
if (bias == nullptr) {
output->Copy(*input);
} else {
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard bias_guard(bias);
Tensor::MappingGuard output_guard(output);
AddBias(context, input, bias, output);
}
} else {
if (bias != nullptr) {
Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard bias_guard(bias);
AddBias(context, input, bias, output);
}
}
return MaceStatus::MACE_SUCCESS;
}
template<typename T>
void BiasAdd<T>::AddBias(const OpContext *context, const Tensor *input,
const Tensor *bias, mace::Tensor *output) {
auto input_data = input->data<T>();
auto bias_data = bias->data<T>();
auto output_data = output->mutable_data<T>();
const index_t batch = input->dim(0);
const index_t channels = input->dim(1);
const index_t height = input->dim(2);
const index_t width = input->dim(3);
const index_t image_size = height * width;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
if (bias->dim_size() == 1) {
Add1DimBias(&thread_pool, input_data, bias_data,
output_data, batch, channels, image_size);
} else {
Add2DimsBias(&thread_pool, input_data, bias_data,
output_data, batch, channels, image_size);
}
}
void RegisterBiasAddDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, BiasAdd<float>, DelegatorParam,
MACE_DELEGATOR_KEY(BiasAdd, DeviceType::CPU, float, ImplType::NEON));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_BIAS_ADD_H_
#define MACE_OPS_ARM_BASE_BIAS_ADD_H_
#include "mace/ops/delegator/bias_add.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class BiasAdd : public delegator::BiasAdd {
public:
explicit BiasAdd(const DelegatorParam &param) : delegator::BiasAdd(param) {}
~BiasAdd() = default;
MaceStatus Compute(const OpContext *context, const Tensor *input,
const Tensor *bias, Tensor *output) override;
private:
void AddBias(const OpContext *context, const Tensor *input,
const Tensor *bias, Tensor *output);
void Add1DimBias(utils::ThreadPool *thread_pool, const T *input_data,
const T *bias_data, T *output_data,
const index_t batch, const index_t channels,
const index_t image_size);
void Add2DimsBias(utils::ThreadPool *thread_pool, const T *input_data,
const T *bias_data, T *output_data,
const index_t batch, const index_t channels,
const index_t image_size);
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_BIAS_ADD_H_
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,18 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/arm/base/conv_2d.h"
#include <algorithm>
#include <memory>
#include <utility>
#include <algorithm>
#include "mace/utils/memory.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
void Conv2dBase::CalOutputShapeAndInputPadSize(
const std::vector<index_t> &input_shape,
......@@ -164,10 +163,10 @@ MaceStatus Conv2dBase::ResizeOutAndPadInOut(const OpContext *context,
auto scratch_buffer = context->device()->scratch_buffer();
const index_t padded_in_size =
MACE_EXTRA_BUFFER_PAD_SIZE + (is_in_padded ? PadAlignSize(
sizeof(float) * batch * in_channels * padded_in_height
type_size_ * batch * in_channels * padded_in_height
* padded_in_width) : 0);
const index_t padded_out_size = is_out_padded ? PadAlignSize(
sizeof(float) * batch * out_channels * padded_out_height
type_size_ * batch * out_channels * padded_out_height
* padded_out_width) : 0;
scratch_buffer->Rewind();
......@@ -176,7 +175,7 @@ MaceStatus Conv2dBase::ResizeOutAndPadInOut(const OpContext *context,
std::unique_ptr<Tensor>
padded_in =
make_unique<Tensor>(scratch_buffer->Scratch(padded_in_size),
DataType::DT_FLOAT);
input->dtype());
padded_in->Resize({batch, in_channels, padded_in_height, padded_in_width});
PadInput(*input, in_pad_size[0], in_pad_size[2], padded_in.get());
*padded_input = std::move(padded_in);
......@@ -185,7 +184,7 @@ MaceStatus Conv2dBase::ResizeOutAndPadInOut(const OpContext *context,
std::unique_ptr<Tensor>
padded_out =
make_unique<Tensor>(scratch_buffer->Scratch(padded_out_size),
DataType::DT_FLOAT);
output->dtype());
padded_out->Resize({batch, out_channels, padded_out_height,
padded_out_width});
*padded_output = std::move(padded_out);
......@@ -206,8 +205,8 @@ void Conv2dBase::PadInput(const Tensor &src,
const index_t padded_width = dst->dim(3);
const int pad_bottom = static_cast<int>(padded_height - height - pad_top);
const int pad_right = static_cast<int>(padded_width - width - pad_left);
auto in_data = src.data<float>();
auto padded_in_data = dst->mutable_data<float>();
auto in_data = src.data<uint8_t>();
auto padded_in_data = dst->mutable_data<uint8_t>();
const index_t img_size = height * width;
const index_t padded_img_size = padded_height * padded_width;
......@@ -215,25 +214,26 @@ void Conv2dBase::PadInput(const Tensor &src,
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channels; ++c) {
const index_t bc = b * channels + c;
const float *in_base = in_data + bc * img_size;
float *padded_in_base = padded_in_data + bc * padded_img_size;
const uint8_t *in_base = in_data + bc * img_size * type_size_;
uint8_t *padded_in_base =
padded_in_data + bc * padded_img_size * type_size_;
memset(padded_in_base, 0, sizeof(float) * pad_top * padded_width);
padded_in_base += pad_top * padded_width;
memset(padded_in_base, 0, type_size_ * pad_top * padded_width);
padded_in_base += pad_top * padded_width * type_size_;
for (index_t h = 0; h < height; ++h) {
memset(padded_in_base,
0,
sizeof(float) * pad_left);
memcpy(padded_in_base + pad_left,
type_size_ * pad_left);
memcpy(padded_in_base + pad_left * type_size_,
in_base,
sizeof(float) * width);
memset(padded_in_base + pad_left + width,
type_size_ * width);
memset(padded_in_base + (pad_left + width) * type_size_,
0,
sizeof(float) * pad_right);
in_base += width;
padded_in_base += padded_width;
type_size_ * pad_right);
in_base += width * type_size_;
padded_in_base += padded_width * type_size_;
}
memset(padded_in_base, 0, sizeof(float) * pad_bottom * padded_width);
memset(padded_in_base, 0, type_size_ * pad_bottom * padded_width);
}
}
}
......@@ -247,8 +247,8 @@ void Conv2dBase::UnPadOutput(const Tensor &src, Tensor *dst) {
const index_t padded_height = src.dim(2);
const index_t padded_width = src.dim(3);
auto padded_out_data = src.data<float>();
auto out_data = dst->mutable_data<float>();
auto padded_out_data = src.data<uint8_t>();
auto out_data = dst->mutable_data<uint8_t>();
const index_t img_size = height * width;
const index_t padded_img_size = padded_height * padded_width;
......@@ -256,21 +256,93 @@ void Conv2dBase::UnPadOutput(const Tensor &src, Tensor *dst) {
for (index_t b = 0; b < batch; ++b) {
for (index_t c = 0; c < channels; ++c) {
const index_t bc = (b * channels + c);
float *out_base = out_data + bc * img_size;
const float *padded_out_base = padded_out_data + bc * padded_img_size;
uint8_t *out_base = out_data + bc * img_size * type_size_;
const uint8_t *padded_out_base =
padded_out_data + bc * padded_img_size * type_size_;
for (index_t h = 0; h < height; ++h) {
memcpy(out_base,
padded_out_base,
sizeof(float) * width);
out_base += width;
padded_out_base += padded_width;
memcpy(out_base, padded_out_base, type_size_ * width);
out_base += width * type_size_;
padded_out_base += padded_width * type_size_;
} // h
} // c
} // b
}
} // namespace fp32
ConvComputeParam Conv2dBase::PreWorkAndGetConv2DParam(
const OpContext *context, const Tensor *in_tensor, Tensor *out_tensor) {
auto &in_shape = in_tensor->shape();
auto &out_shape = out_tensor->shape();
const index_t batch = in_shape[0];
const index_t in_channels = in_shape[1];
const index_t in_height = in_shape[2];
const index_t in_width = in_shape[3];
const index_t out_channels = out_shape[1];
const index_t out_height = out_shape[2];
const index_t out_width = out_shape[3];
const index_t in_image_size = in_height * in_width;
const index_t out_image_size = out_height * out_width;
const index_t in_batch_size = in_channels * in_image_size;
const index_t out_batch_size = out_channels * out_image_size;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
return ConvComputeParam(batch, in_channels, in_height, in_width,
out_channels, out_height, out_width,
in_image_size, out_image_size,
in_batch_size, out_batch_size, &thread_pool);
}
DepthwiseConvComputeParam Conv2dBase::PreWorkAndGetDepthwiseConv2DParam(
const OpContext *context, const Tensor *input,
const Tensor *filter, Tensor *output) {
std::vector<index_t> out_shape(4);
std::vector<int> paddings(2);
auto &in_shape = input->shape();
auto &filter_shape = filter->shape();
CalOutputShapeAndInputPadSize(in_shape, filter_shape, &out_shape, &paddings);
out_shape[1] *= filter_shape[1];
MACE_CHECK(output->Resize(out_shape) == MaceStatus::MACE_SUCCESS,
"Resize failed.");
output->Clear();
const int pad_top = paddings[0] / 2;
const int pad_left = paddings[1] / 2;
const index_t batch = in_shape[0];
const index_t in_channels = in_shape[1];
const index_t in_height = in_shape[2];
const index_t in_width = in_shape[3];
const index_t out_channels = out_shape[1];
const index_t out_height = out_shape[2];
const index_t out_width = out_shape[3];
const index_t in_image_size = in_height * in_width;
const index_t out_image_size = out_height * out_width;
const index_t in_batch_size = in_channels * in_image_size;
const index_t out_batch_size = out_channels * out_image_size;
const index_t multiplier = out_channels / in_channels;
std::vector<index_t> out_bounds;
CalOutputBoundaryWithoutUsingInputPad(out_shape, paddings, &out_bounds);
const index_t valid_h_start = out_bounds[0];
const index_t valid_h_stop = out_bounds[1];
const index_t valid_w_start = out_bounds[2];
const index_t valid_w_stop = out_bounds[3];
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
return DepthwiseConvComputeParam(
batch, in_channels, in_height, in_width, out_channels, out_height,
out_width, in_image_size, out_image_size, in_batch_size, out_batch_size,
&thread_pool, pad_top, pad_left, multiplier, valid_h_start, valid_h_stop,
valid_w_start, valid_w_stop);
}
} // namespace arm
} // namespace ops
} // namespace mace
......
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,28 +12,97 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_CONV_2D_H_
#define MACE_OPS_ARM_FP32_CONV_2D_H_
#ifndef MACE_OPS_ARM_BASE_CONV_2D_H_
#define MACE_OPS_ARM_BASE_CONV_2D_H_
#include <vector>
#include <memory>
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/delegator/conv_2d.h"
#include "mace/ops/arm/fp32/gemm.h"
#include "mace/ops/arm/base/gemm.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/conv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
struct ConvComputeParam {
const index_t batch;
const index_t in_channels;
const index_t in_height;
const index_t in_width;
const index_t out_channels;
const index_t out_height;
const index_t out_width;
const index_t in_image_size;
const index_t out_image_size;
const index_t in_batch_size;
const index_t out_batch_size;
utils::ThreadPool &thread_pool;
ConvComputeParam(const index_t b,
const index_t in_c,
const index_t in_h,
const index_t in_w,
const index_t out_c,
const index_t out_h,
const index_t out_w,
const index_t in_size,
const index_t out_size,
const index_t in_b_size,
const index_t out_b_size,
utils::ThreadPool *thrd_pool)
: batch(b), in_channels(in_c), in_height(in_h), in_width(in_w),
out_channels(out_c), out_height(out_h), out_width(out_w),
in_image_size(in_size), out_image_size(out_size),
in_batch_size(in_b_size), out_batch_size(out_b_size),
thread_pool(*thrd_pool) {}
};
struct DepthwiseConvComputeParam : public ConvComputeParam {
const int pad_top;
const int pad_left;
const index_t multiplier;
const index_t valid_h_start;
const index_t valid_h_stop;
const index_t valid_w_start;
const index_t valid_w_stop;
DepthwiseConvComputeParam(const index_t b,
const index_t in_c,
const index_t in_h,
const index_t in_w,
const index_t out_c,
const index_t out_h,
const index_t out_w,
const index_t in_size,
const index_t out_size,
const index_t in_b_size,
const index_t out_b_size,
utils::ThreadPool *thrd_pool,
const int pad_top_data,
const int pad_left_data,
const index_t multiplier_data,
const index_t valid_height_start,
const index_t valid_height_stop,
const index_t valid_width_start,
const index_t valid_width_stop)
: ConvComputeParam(b, in_c, in_h, in_w, out_c, out_h, out_w,
in_size, out_size, in_b_size, out_b_size, thrd_pool),
pad_top(pad_top_data), pad_left(pad_left_data),
multiplier(multiplier_data),
valid_h_start(valid_height_start), valid_h_stop(valid_height_stop),
valid_w_start(valid_width_start), valid_w_stop(valid_width_stop) {}
};
class Conv2dBase : public delegator::Conv2d {
public:
explicit Conv2dBase(const delegator::Conv2dParam &param)
: delegator::Conv2d(param) {}
explicit Conv2dBase(const delegator::Conv2dParam &param, int type_size)
: delegator::Conv2d(param), type_size_(type_size) {}
virtual ~Conv2dBase() = default;
......@@ -72,11 +141,19 @@ class Conv2dBase : public delegator::Conv2d {
const int pad_left,
Tensor *dst);
void UnPadOutput(const Tensor &src, Tensor *dst);
ConvComputeParam PreWorkAndGetConv2DParam(
const OpContext *context, const Tensor *in_tensor, Tensor *out_tensor);
DepthwiseConvComputeParam PreWorkAndGetDepthwiseConv2DParam(
const OpContext *context, const Tensor *input,
const Tensor *filter, Tensor *output);
private:
int type_size_;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_CONV_2D_H_
#endif // MACE_OPS_ARM_BASE_CONV_2D_H_
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,36 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/arm/fp32/gemm.h"
#include "mace/ops/delegator/conv_2d.h"
#include "mace/ops/arm/base/conv_2d_1x1.h"
#include <vector>
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Conv2dK1x1 : public Conv2dBase {
public:
explicit Conv2dK1x1(const delegator::Conv2dParam &param)
: Conv2dBase(param),
gemm_(delegator::GemmParam()) {}
virtual ~Conv2dK1x1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
private:
Gemm gemm_;
};
MaceStatus Conv2dK1x1::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) {
template<typename T>
MaceStatus Conv2dK1x1<T>::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) {
index_t batch = input->dim(0);
index_t in_height = input->dim(2);
index_t in_width = input->dim(3);
......@@ -50,13 +33,8 @@ MaceStatus Conv2dK1x1::Compute(const OpContext *context,
std::vector<index_t> output_shape;
std::vector<int> in_pad_size;
std::vector<int> out_pad_size;
CalOutputShapeAndPadSize(input,
filter,
1,
1,
&output_shape,
&in_pad_size,
&out_pad_size);
CalOutputShapeAndPadSize(input, filter, 1, 1,
&output_shape, &in_pad_size, &out_pad_size);
MACE_RETURN_IF_ERROR(output->Resize(output_shape));
const index_t out_channels = output_shape[1];
......@@ -70,16 +48,16 @@ MaceStatus Conv2dK1x1::Compute(const OpContext *context,
in_height != padded_in_height || in_width != padded_in_width;
auto scratch_buffer = context->device()->scratch_buffer();
const index_t padded_in_size = is_in_padded ? PadAlignSize(
sizeof(float) * batch * in_channels * padded_in_height
sizeof(T) * batch * in_channels * padded_in_height
* padded_in_width) : 0;
const index_t pack_filter_size =
PadAlignSize(sizeof(float) * out_channels * in_channels);
PadAlignSize(sizeof(T) * out_channels * in_channels);
const index_t pack_input_size =
PadAlignSize(
sizeof(float) * in_channels * padded_in_height * padded_in_width);
sizeof(T) * in_channels * padded_in_height * padded_in_width);
const index_t pack_output_size =
PadAlignSize(
sizeof(float) * out_channels * padded_in_height * padded_in_width);
sizeof(T) * out_channels * padded_in_height * padded_in_width);
const index_t gemm_pack_size =
pack_filter_size + pack_input_size + pack_output_size;
......@@ -115,12 +93,11 @@ MaceStatus Conv2dK1x1::Compute(const OpContext *context,
void RegisterConv2dK1x1Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Conv2dK1x1, delegator::Conv2dParam,
registry, Conv2dK1x1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K1x1));
}
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_CONV_2D_1X1_H_
#define MACE_OPS_ARM_BASE_CONV_2D_1X1_H_
#include "mace/ops/arm/base/conv_2d.h"
#include "mace/ops/arm/base/gemm.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Conv2dK1x1 : public Conv2dBase {
public:
explicit Conv2dK1x1(const delegator::Conv2dParam &param)
: Conv2dBase(param, sizeof(T)),
gemm_(delegator::GemmParam()) {}
virtual ~Conv2dK1x1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
private:
Gemm<T> gemm_;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_CONV_2D_1X1_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/conv_2d_1xn.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterConv2dK1xNDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Conv2dK1x7S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K1x7S1));
MACE_REGISTER_DELEGATOR(
registry, Conv2dK7x1S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K7x1S1));
MACE_REGISTER_DELEGATOR(
registry, Conv2dK1x15S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K1x15S1));
MACE_REGISTER_DELEGATOR(
registry, Conv2dK15x1S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K15x1S1));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,76 +12,66 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_CONV_2D_1XN_H_
#define MACE_OPS_ARM_FP32_CONV_2D_1XN_H_
#ifndef MACE_OPS_ARM_BASE_CONV_2D_1XN_H_
#define MACE_OPS_ARM_BASE_CONV_2D_1XN_H_
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/arm/base/conv_2d_mxn.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Conv2dK1x7S1 : public Conv2dBase {
template<typename T>
class Conv2dK1x7S1 : public Conv2dKMxN<T> {
public:
explicit Conv2dK1x7S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
: Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK1x7S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
class Conv2dK7x1S1 : public Conv2dBase {
template<typename T>
class Conv2dK7x1S1 : public Conv2dKMxN<T> {
public:
explicit Conv2dK7x1S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
: Conv2dKMxN<T>(param, 4, 1) {}
virtual ~Conv2dK7x1S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
class Conv2dK1x15S1 : public Conv2dBase {
template<typename T>
class Conv2dK1x15S1 : public Conv2dKMxN<T> {
public:
explicit Conv2dK1x15S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
: Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK1x15S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
class Conv2dK15x1S1 : public Conv2dBase {
template<typename T>
class Conv2dK15x1S1 : public Conv2dKMxN<T> {
public:
explicit Conv2dK15x1S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
: Conv2dKMxN<T>(param, 4, 1) {}
virtual ~Conv2dK15x1S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_CONV_2D_1XN_H_
#endif // MACE_OPS_ARM_BASE_CONV_2D_1XN_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/conv_2d_3x3.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterConv2dK3x3Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Conv2dK3x3S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S1));
MACE_REGISTER_DELEGATOR(
registry, Conv2dK3x3S2<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,50 +12,44 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_CONV_2D_3X3_H_
#define MACE_OPS_ARM_FP32_CONV_2D_3X3_H_
#ifndef MACE_OPS_ARM_BASE_CONV_2D_3X3_H_
#define MACE_OPS_ARM_BASE_CONV_2D_3X3_H_
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/arm/base/conv_2d_mxn.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Conv2dK3x3S1 : public Conv2dBase {
template<typename T>
class Conv2dK3x3S1 : public Conv2dKMxN<T> {
public:
explicit Conv2dK3x3S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
: Conv2dKMxN<T>(param, 2, 4) {}
virtual ~Conv2dK3x3S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
class Conv2dK3x3S2 : public Conv2dBase {
template<typename T>
class Conv2dK3x3S2 : public Conv2dKMxN<T> {
public:
explicit Conv2dK3x3S2(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
: Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK3x3S2() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_CONV_2D_3X3_H_
#endif // MACE_OPS_ARM_BASE_CONV_2D_3X3_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/conv_2d_5x5.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterConv2dK5x5Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Conv2dK5x5S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K5x5S1));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_CONV_2D_5X5_H_
#define MACE_OPS_ARM_BASE_CONV_2D_5X5_H_
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/base/conv_2d_mxn.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Conv2dK5x5S1 : public Conv2dKMxN<T> {
public:
explicit Conv2dK5x5S1(const delegator::Conv2dParam &param)
: Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK5x5S1() {}
MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_CONV_2D_5X5_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/conv_2d_7x7.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterConv2dK7x7Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Conv2dK7x7S1<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K7x7S1));
MACE_REGISTER_DELEGATOR(
registry, Conv2dK7x7S2<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K7x7S2));
MACE_REGISTER_DELEGATOR(
registry, Conv2dK7x7S3<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K7x7S3));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,63 +12,55 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_CONV_2D_7X7_H_
#define MACE_OPS_ARM_FP32_CONV_2D_7X7_H_
#ifndef MACE_OPS_ARM_BASE_CONV_2D_7X7_H_
#define MACE_OPS_ARM_BASE_CONV_2D_7X7_H_
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/arm/base/conv_2d_mxn.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Conv2dK7x7S1 : public Conv2dBase {
template<typename T>
class Conv2dK7x7S1 : public Conv2dKMxN<T> {
public:
explicit Conv2dK7x7S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
: Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK7x7S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
class Conv2dK7x7S2 : public Conv2dBase {
template<typename T>
class Conv2dK7x7S2 : public Conv2dKMxN<T> {
public:
explicit Conv2dK7x7S2(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
: Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK7x7S2() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
class Conv2dK7x7S3 : public Conv2dBase {
template<typename T>
class Conv2dK7x7S3 : public Conv2dKMxN<T> {
public:
explicit Conv2dK7x7S3(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
: Conv2dKMxN<T>(param, 1, 4) {}
virtual ~Conv2dK7x7S3() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_CONV_2D_7X7_H_
#endif // MACE_OPS_ARM_BASE_CONV_2D_7X7_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/conv_2d_general.h"
#include <memory>
namespace mace {
namespace ops {
namespace arm {
template<typename T>
MaceStatus Conv2dGeneral<T>::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) {
std::unique_ptr<const Tensor> padded_input;
std::unique_ptr<Tensor> padded_output;
ResizeOutAndPadInOut(context, input, filter, output, 1, 4,
&padded_input, &padded_output);
const Tensor *in_tensor = input;
if (padded_input != nullptr) {
in_tensor = padded_input.get();
}
Tensor *out_tensor = output;
if (padded_output != nullptr) {
out_tensor = padded_output.get();
}
out_tensor->Clear();
Tensor::MappingGuard in_guard(input);
Tensor::MappingGuard filter_guard(filter);
Tensor::MappingGuard out_guard(output);
const T *filter_data = filter->data<T>();
const T *input_data = in_tensor->data<T>();
T *output_data = out_tensor->mutable_data<T>();
const ConvComputeParam p =
PreWorkAndGetConv2DParam(context, in_tensor, out_tensor);
auto &filter_shape = filter->shape();
DoCompute(p, filter_data, input_data, output_data, filter_shape);
UnPadOutput(*out_tensor, output);
return MaceStatus::MACE_SUCCESS;
}
void RegisterConv2dGeneralDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Conv2dGeneral<float>, delegator::Conv2dParam,
MACE_DELEGATOR_KEY(Conv2d, DeviceType::CPU, float, ImplType::NEON));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_CONV_2D_GENERAL_H_
#define MACE_OPS_ARM_BASE_CONV_2D_GENERAL_H_
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/base/conv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Conv2dGeneral : public Conv2dBase {
public:
explicit Conv2dGeneral(const delegator::Conv2dParam &param)
: Conv2dBase(param, sizeof(T)) {}
virtual ~Conv2dGeneral() {}
MaceStatus Compute(const OpContext *context, const Tensor *input,
const Tensor *filter, Tensor *output) override;
protected:
MaceStatus DoCompute(
const ConvComputeParam &p, const T *filter_data,
const T *input_data, T *output_data,
const std::vector<index_t> &filter_shape);
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_CONV_2D_GENERAL_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_CONV_2D_MXN_H_
#define MACE_OPS_ARM_BASE_CONV_2D_MXN_H_
#include <memory>
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/base/conv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Conv2dKMxN : public Conv2dBase {
public:
explicit Conv2dKMxN(const delegator::Conv2dParam &param,
const int tile_h, const int tile_w)
: Conv2dBase(param, sizeof(T)),
out_tile_h_(tile_h), out_tile_w_(tile_w) {}
virtual ~Conv2dKMxN() {}
MaceStatus Compute(const OpContext *context, const Tensor *input,
const Tensor *filter, Tensor *output) override {
std::unique_ptr<const Tensor> padded_input;
std::unique_ptr<Tensor> padded_output;
ResizeOutAndPadInOut(context, input, filter, output, out_tile_h_,
out_tile_w_, &padded_input, &padded_output);
const Tensor *in_tensor = input;
if (padded_input != nullptr) {
in_tensor = padded_input.get();
}
Tensor *out_tensor = output;
if (padded_output != nullptr) {
out_tensor = padded_output.get();
}
out_tensor->Clear();
Tensor::MappingGuard in_guard(input);
Tensor::MappingGuard filter_guard(filter);
Tensor::MappingGuard out_guard(output);
const T *filter_data = filter->data<T>();
const T *input_data = in_tensor->data<T>();
T *output_data = out_tensor->mutable_data<T>();
const ConvComputeParam p =
PreWorkAndGetConv2DParam(context, in_tensor, out_tensor);
DoCompute(p, filter_data, input_data, output_data);
UnPadOutput(*out_tensor, output);
return MaceStatus::MACE_SUCCESS;
}
virtual MaceStatus DoCompute(const ConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) = 0;
private:
const int out_tile_h_;
const int out_tile_w_;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_CONV_2D_MXN_H_
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,17 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/arm/base/deconv_2d.h"
#include <utility>
#include <functional>
#include "mace/utils/memory.h"
#include <utility>
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/utils/memory.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
MaceStatus Deconv2dBase::ResizeOutAndPadOut(
const OpContext *context,
......@@ -67,7 +67,7 @@ MaceStatus Deconv2dBase::ResizeOutAndPadOut(
std::accumulate(padded_out_shape.begin(),
padded_out_shape.end(),
1,
std::multiplies<index_t>()) * sizeof(float);
std::multiplies<index_t>()) * type_size_;
ScratchBuffer *scratch = context->device()->scratch_buffer();
scratch->Rewind();
index_t scratch_size = PadAlignSize(padded_out_size);
......@@ -75,7 +75,7 @@ MaceStatus Deconv2dBase::ResizeOutAndPadOut(
std::unique_ptr<Tensor>
padded_out
(make_unique<Tensor>(scratch->Scratch(scratch_size), DT_FLOAT));
(make_unique<Tensor>(scratch->Scratch(scratch_size), output->dtype()));
padded_out->Reshape(padded_out_shape);
*padded_output = std::move(padded_out);
}
......@@ -97,24 +97,97 @@ void Deconv2dBase::UnPadOutput(const Tensor &src,
const index_t padded_height = src.dim(2);
const index_t padded_width = src.dim(3);
auto padded_out_data = src.data<float>();
auto out_data = dst->mutable_data<float>();
auto padded_out_data = src.data<uint8_t>();
auto out_data = dst->mutable_data<uint8_t>();
for (index_t i = 0; i < batch; ++i) {
for (index_t j = 0; j < channels; ++j) {
for (index_t k = 0; k < height; ++k) {
const float *input_base =
const uint8_t *input_base =
padded_out_data + ((i * channels + j) * padded_height
+ (k + pad_h)) * padded_width;
float *output_base =
out_data + ((i * channels + j) * height + k) * width;
memcpy(output_base, input_base + pad_w, width * sizeof(float));
+ (k + pad_h)) * padded_width * type_size_;
uint8_t *output_base =
out_data + ((i * channels + j) * height + k) * width * type_size_;
memcpy(output_base,
input_base + pad_w * type_size_,
width * type_size_);
}
}
}
}
} // namespace fp32
DeconvComputeParam Deconv2dBase::PreWorkAndGetDeconvParam(
const OpContext *context, const Tensor *input, Tensor *out_tensor) {
auto &in_shape = input->shape();
auto &out_shape = out_tensor->shape();
const index_t batch = in_shape[0];
const index_t inch = in_shape[1];
const index_t h = in_shape[2];
const index_t w = in_shape[3];
const index_t outch = out_shape[1];
const index_t outh = out_shape[2];
const index_t outw = out_shape[3];
const index_t out_img_size = outh * outw;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
return DeconvComputeParam(batch, inch, h, w, outch, outh, outw,
out_img_size, &thread_pool);
}
DepthwiseDeconvComputeParam Deconv2dBase::PreWorkAndGetDepthwiseDeconvParam(
const OpContext *context, const Tensor *input, Tensor *out_tensor) {
auto &in_shape = input->shape();
auto &out_shape = out_tensor->shape();
const index_t batch = in_shape[0];
const index_t channels = in_shape[1];
const index_t h = in_shape[2];
const index_t w = in_shape[3];
const index_t in_img_size = h * w;
const index_t outh = out_shape[2];
const index_t outw = out_shape[3];
const index_t out_img_size = outh * outw;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
return DepthwiseDeconvComputeParam(batch, channels, h, w, in_img_size,
outh, outw, out_img_size, &thread_pool);
}
GroupDeconvComputeParam Deconv2dBase::PreWorkAndGetGroupDeconvParam(
const OpContext *context, const Tensor *input, Tensor *out_tensor) {
auto &in_shape = input->shape();
auto &out_shape = out_tensor->shape();
const index_t batch = in_shape[0];
const index_t inch = in_shape[1];
const index_t h = in_shape[2];
const index_t w = in_shape[3];
const index_t outch = out_shape[1];
const index_t outh = out_shape[2];
const index_t outw = out_shape[3];
const index_t in_img_size = h * w;
const index_t out_img_size = outh * outw;
const index_t inch_g = inch / group_;
const index_t outch_g = outch / group_;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
return GroupDeconvComputeParam(batch, inch, h, w, outch, outh, outw,
in_img_size, out_img_size, inch_g,
outch_g, &thread_pool);
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_DECONV_2D_H_
#define MACE_OPS_ARM_BASE_DECONV_2D_H_
#include <memory>
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/ops/arm/base/gemm.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/deconv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
struct DeconvComputeParam {
const index_t batch;
const index_t in_channels;
const index_t in_height;
const index_t in_width;
const index_t out_channels;
const index_t out_height;
const index_t out_width;
const index_t out_img_size;
utils::ThreadPool &thread_pool;
DeconvComputeParam(const index_t b,
const index_t in_c,
const index_t in_h,
const index_t in_w,
const index_t out_c,
const index_t out_h,
const index_t out_w,
const index_t out_size,
utils::ThreadPool *thrd_pool)
: batch(b), in_channels(in_c), in_height(in_h), in_width(in_w),
out_channels(out_c), out_height(out_h), out_width(out_w),
out_img_size(out_size), thread_pool(*thrd_pool) {}
};
struct DepthwiseDeconvComputeParam {
const index_t batch;
const index_t in_channels;
const index_t in_height;
const index_t in_width;
const index_t in_img_size;
const index_t out_height;
const index_t out_width;
const index_t out_img_size;
utils::ThreadPool &thread_pool;
DepthwiseDeconvComputeParam(const index_t b,
const index_t in_c,
const index_t in_h,
const index_t in_w,
const index_t in_size,
const index_t out_h,
const index_t out_w,
const index_t out_size,
utils::ThreadPool *thrd_pool)
: batch(b),
in_channels(in_c),
in_height(in_h),
in_width(in_w),
in_img_size(in_size),
out_height(out_h),
out_width(out_w),
out_img_size(out_size),
thread_pool(*thrd_pool) {}
};
struct GroupDeconvComputeParam {
const index_t batch;
const index_t in_channels;
const index_t in_height;
const index_t in_width;
const index_t out_channels;
const index_t out_height;
const index_t out_width;
const index_t in_img_size;
const index_t out_img_size;
const index_t inch_g;
const index_t outch_g;
utils::ThreadPool &thread_pool;
GroupDeconvComputeParam(const index_t in_b,
const index_t in_ch,
const index_t in_h,
const index_t in_w,
const index_t out_ch,
const index_t out_h,
const index_t out_w,
const index_t in_size,
const index_t out_size,
const index_t in_ch_g,
const index_t out_ch_g,
utils::ThreadPool *thrd_pool)
: batch(in_b),
in_channels(in_ch),
in_height(in_h),
in_width(in_w),
out_channels(out_ch),
out_height(out_h),
out_width(out_w),
in_img_size(in_size),
out_img_size(out_size),
inch_g(in_ch_g),
outch_g(out_ch_g),
thread_pool(*thrd_pool) {}
};
class Deconv2dBase : public delegator::Deconv2d {
public:
explicit Deconv2dBase(const delegator::Deconv2dParam &param, int type_size)
: delegator::Deconv2d(param),
group_(param.group_), type_size_(type_size) {}
virtual ~Deconv2dBase() = default;
protected:
MaceStatus ResizeOutAndPadOut(const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output,
std::vector<int> *out_pad_size,
std::unique_ptr<Tensor> *padded_output);
void UnPadOutput(const Tensor &src,
const std::vector<int> &out_pad_size,
Tensor *dst);
DeconvComputeParam PreWorkAndGetDeconvParam(
const OpContext *context, const Tensor *input, Tensor *out_tensor);
DepthwiseDeconvComputeParam PreWorkAndGetDepthwiseDeconvParam(
const OpContext *context, const Tensor *input, Tensor *out_tensor);
GroupDeconvComputeParam PreWorkAndGetGroupDeconvParam(
const OpContext *context, const Tensor *input, Tensor *out_tensor);
protected:
index_t group_;
private:
int type_size_;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_DECONV_2D_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/deconv_2d_2x2.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterDeconv2dK2x2Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Deconv2dK2x2S1<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, DeviceType::CPU,
float, ImplType::NEON, K2x2S1));
MACE_REGISTER_DELEGATOR(
registry, Deconv2dK2x2S2<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, DeviceType::CPU,
float, ImplType::NEON, K2x2S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DECONV_2D_2X2_H_
#define MACE_OPS_ARM_FP32_DECONV_2D_2X2_H_
#ifndef MACE_OPS_ARM_BASE_DECONV_2D_2X2_H_
#define MACE_OPS_ARM_BASE_DECONV_2D_2X2_H_
#include <vector>
#include <memory>
......@@ -21,46 +21,38 @@
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/arm/base/deconv_2d_mxn.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Deconv2dK2x2S1 : public Deconv2dBase {
template<typename T>
class Deconv2dK2x2S1 : public Deconv2dKMxN<T> {
public:
explicit Deconv2dK2x2S1(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
: Deconv2dKMxN<T>(param) {}
virtual ~Deconv2dK2x2S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
class Deconv2dK2x2S2 : public Deconv2dBase {
template<typename T>
class Deconv2dK2x2S2 : public Deconv2dKMxN<T> {
public:
explicit Deconv2dK2x2S2(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
: Deconv2dKMxN<T>(param) {}
virtual ~Deconv2dK2x2S2() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_DECONV_2D_2X2_H_
#endif // MACE_OPS_ARM_BASE_DECONV_2D_2X2_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/deconv_2d_3x3.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterDeconv2dK3x3Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Deconv2dK3x3S1<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S1));
MACE_REGISTER_DELEGATOR(
registry, Deconv2dK3x3S2<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DECONV_2D_3X3_H_
#define MACE_OPS_ARM_FP32_DECONV_2D_3X3_H_
#ifndef MACE_OPS_ARM_BASE_DECONV_2D_3X3_H_
#define MACE_OPS_ARM_BASE_DECONV_2D_3X3_H_
#include <vector>
#include <memory>
......@@ -21,46 +21,38 @@
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/arm/base/deconv_2d_mxn.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Deconv2dK3x3S1 : public Deconv2dBase {
template<typename T>
class Deconv2dK3x3S1 : public Deconv2dKMxN<T> {
public:
explicit Deconv2dK3x3S1(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
: Deconv2dKMxN<T>(param) {}
virtual ~Deconv2dK3x3S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
class Deconv2dK3x3S2 : public Deconv2dBase {
template<typename T>
class Deconv2dK3x3S2 : public Deconv2dKMxN<T> {
public:
explicit Deconv2dK3x3S2(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
: Deconv2dKMxN<T>(param) {}
virtual ~Deconv2dK3x3S2() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_DECONV_2D_3X3_H_
#endif // MACE_OPS_ARM_BASE_DECONV_2D_3X3_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/deconv_2d_4x4.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterDeconv2dK4x4Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Deconv2dK4x4S1<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, DeviceType::CPU,
float, ImplType::NEON, K4x4S1));
MACE_REGISTER_DELEGATOR(
registry, Deconv2dK4x4S2<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY_EX(Deconv2d, DeviceType::CPU,
float, ImplType::NEON, K4x4S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,55 +12,47 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DECONV_2D_4X4_H_
#define MACE_OPS_ARM_FP32_DECONV_2D_4X4_H_
#ifndef MACE_OPS_ARM_BASE_DECONV_2D_4X4_H_
#define MACE_OPS_ARM_BASE_DECONV_2D_4X4_H_
#include <vector>
#include <memory>
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/arm/base/deconv_2d_mxn.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Deconv2dK4x4S1 : public Deconv2dBase {
template<typename T>
class Deconv2dK4x4S1 : public Deconv2dKMxN<T> {
public:
explicit Deconv2dK4x4S1(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
: Deconv2dKMxN<T>(param) {}
virtual ~Deconv2dK4x4S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
class Deconv2dK4x4S2 : public Deconv2dBase {
template<typename T>
class Deconv2dK4x4S2 : public Deconv2dKMxN<T> {
public:
explicit Deconv2dK4x4S2(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
: Deconv2dKMxN<T>(param) {}
virtual ~Deconv2dK4x4S2() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_DECONV_2D_4X4_H_
#endif // MACE_OPS_ARM_BASE_DECONV_2D_4X4_H_
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,34 +12,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/arm/base/deconv_2d_general.h"
// TODO(liutuo): optimize it
#include <memory>
#include <vector>
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Deconv2dGeneral : public Deconv2dBase {
public:
explicit Deconv2dGeneral(const delegator::Deconv2dParam &param)
: Deconv2dBase(param) {}
virtual ~Deconv2dGeneral() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
};
MaceStatus Deconv2dGeneral::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) {
template<typename T>
MaceStatus Deconv2dGeneral<T>::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) {
std::unique_ptr<Tensor> padded_out;
std::vector<int> out_pad_size;
ResizeOutAndPadOut(context,
......@@ -60,9 +47,9 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context,
Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard output_mapper(output);
auto input_data = input->data<float>();
auto filter_data = filter->data<float>();
auto padded_out_data = out_tensor->mutable_data<float>();
auto input_data = input->data<T>();
auto filter_data = filter->data<T>();
auto padded_out_data = out_tensor->mutable_data<T>();
auto &in_shape = input->shape();
auto &out_shape = out_tensor->shape();
......@@ -95,7 +82,7 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context,
index_t start1, index_t end1, index_t step1) {
for (index_t b = start0; b < end0; b += step0) {
for (index_t oc = start1; oc < end1; oc += step1) {
float *out_base =
T *out_base =
padded_out_data + (b * out_channels + oc) * out_img_size;
for (index_t i = 0; i < in_height; ++i) {
for (index_t j = 0; j < in_width; ++j) {
......@@ -104,7 +91,7 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context,
for (int ic = 0; ic < in_channels; ++ic) {
const index_t input_idx =
(b * in_channels + ic) * in_img_size + i * in_width + j;
const float val = input_data[input_idx];
const T val = input_data[input_idx];
const index_t kernel_offset =
(oc * in_channels + ic) * kernel_size;
for (int k = 0; k < kernel_size; ++k) {
......@@ -126,11 +113,10 @@ MaceStatus Deconv2dGeneral::Compute(const OpContext *context,
void RegisterDeconv2dGeneralDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Deconv2dGeneral, delegator::Deconv2dParam,
registry, Deconv2dGeneral<float>, delegator::Deconv2dParam,
MACE_DELEGATOR_KEY(Deconv2d, DeviceType::CPU, float, ImplType::NEON));
}
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
......
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_DECONV_2D_GENERAL_H_
#define MACE_OPS_ARM_BASE_DECONV_2D_GENERAL_H_
#include "mace/ops/arm/base/deconv_2d.h"
// TODO(liutuo): optimize it
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Deconv2dGeneral : public Deconv2dBase {
public:
explicit Deconv2dGeneral(const delegator::Deconv2dParam &param)
: Deconv2dBase(param, sizeof(T)) {}
virtual ~Deconv2dGeneral() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_DECONV_2D_GENERAL_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_DECONV_2D_MXN_H_
#define MACE_OPS_ARM_BASE_DECONV_2D_MXN_H_
#include <memory>
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/base/deconv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class Deconv2dKMxN : public Deconv2dBase {
public:
explicit Deconv2dKMxN(const delegator::Deconv2dParam &param)
: Deconv2dBase(param, sizeof(T)) {}
virtual ~Deconv2dKMxN() {}
MaceStatus Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) {
std::unique_ptr<Tensor> padded_out;
std::vector<int> out_pad_size;
ResizeOutAndPadOut(context, input, filter, output_shape,
output, &out_pad_size, &padded_out);
Tensor *out_tensor = output;
if (padded_out != nullptr) {
out_tensor = padded_out.get();
}
out_tensor->Clear();
Tensor::MappingGuard input_mapper(input);
Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard output_mapper(output);
const T *input_data = input->data<T>();
const T *filter_data = filter->data<T>();
T *padded_out_data = out_tensor->mutable_data<T>();
const DeconvComputeParam p =
PreWorkAndGetDeconvParam(context, input, out_tensor);
DoCompute(p, filter_data, input_data, padded_out_data);
UnPadOutput(*out_tensor, out_pad_size, output);
return MaceStatus::MACE_SUCCESS;
}
virtual MaceStatus DoCompute(const DeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) = 0;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_DECONV_2D_MXN_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/depthwise_conv_2d_3x3.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterDepthwiseConv2dK3x3Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, DepthwiseConv2dK3x3S1<float>, delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S1));
MACE_REGISTER_DELEGATOR(
registry, DepthwiseConv2dK3x3S2<float>, delegator::DepthwiseConv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseConv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,51 +12,47 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DEPTHWISE_CONV_2D_3X3_H_
#define MACE_OPS_ARM_FP32_DEPTHWISE_CONV_2D_3X3_H_
#ifndef MACE_OPS_ARM_BASE_DEPTHWISE_CONV_2D_3X3_H_
#define MACE_OPS_ARM_BASE_DEPTHWISE_CONV_2D_3X3_H_
#include <vector>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/arm/base/depthwise_conv_2d_mxn.h"
#include "mace/ops/delegator/depthwise_conv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class DepthwiseConv2dK3x3S1 : public Conv2dBase {
template<typename T>
class DepthwiseConv2dK3x3S1 : public DepthwiseConv2dKMxN<T> {
public:
explicit DepthwiseConv2dK3x3S1(const delegator::DepthwiseConv2dParam &param)
: Conv2dBase(param) {}
: DepthwiseConv2dKMxN<T>(param) {}
virtual ~DepthwiseConv2dK3x3S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
MaceStatus DoCompute(
const DepthwiseConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
class DepthwiseConv2dK3x3S2 : public Conv2dBase {
template<typename T>
class DepthwiseConv2dK3x3S2 : public DepthwiseConv2dKMxN<T> {
public:
explicit DepthwiseConv2dK3x3S2(const delegator::DepthwiseConv2dParam &param)
: Conv2dBase(param) {}
: DepthwiseConv2dKMxN<T>(param) {}
virtual ~DepthwiseConv2dK3x3S2() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
MaceStatus DoCompute(
const DepthwiseConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_DEPTHWISE_CONV_2D_3X3_H_
#endif // MACE_OPS_ARM_BASE_DEPTHWISE_CONV_2D_3X3_H_
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,51 +12,53 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DECONV_2D_H_
#define MACE_OPS_ARM_FP32_DECONV_2D_H_
#ifndef MACE_OPS_ARM_BASE_DEPTHWISE_CONV_2D_MXN_H_
#define MACE_OPS_ARM_BASE_DEPTHWISE_CONV_2D_MXN_H_
#include <vector>
#include <memory>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/ops/arm/fp32/gemm.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/deconv_2d.h"
#include "mace/ops/arm/base/conv_2d.h"
#include "mace/ops/delegator/depthwise_conv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Deconv2dBase : public delegator::Deconv2d {
template<typename T>
class DepthwiseConv2dKMxN : public Conv2dBase {
public:
explicit Deconv2dBase(const delegator::Deconv2dParam &param)
: delegator::Deconv2d(param),
group_(param.group_) {}
explicit DepthwiseConv2dKMxN(const delegator::DepthwiseConv2dParam &param)
: Conv2dBase(param, sizeof(T)) {}
virtual ~DepthwiseConv2dKMxN() {}
virtual ~Deconv2dBase() = default;
MaceStatus Compute(const OpContext *context, const Tensor *input,
const Tensor *filter, Tensor *output) {
DepthwiseConvComputeParam p =
PreWorkAndGetDepthwiseConv2DParam(context, input, filter, output);
Tensor::MappingGuard in_guard(input);
Tensor::MappingGuard filter_guard(filter);
Tensor::MappingGuard out_guard(output);
const T *filter_data = filter->data<T>();
const T *input_data = input->data<T>();
T *output_data = output->mutable_data<T>();
DoCompute(p, filter_data, input_data, output_data);
return MaceStatus::MACE_SUCCESS;
}
protected:
MaceStatus ResizeOutAndPadOut(const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output,
std::vector<int> *out_pad_size,
std::unique_ptr<Tensor> *padded_output);
void UnPadOutput(const Tensor &src,
const std::vector<int> &out_pad_size,
Tensor *dst);
index_t group_;
virtual MaceStatus DoCompute(
const DepthwiseConvComputeParam &p, const T *filter,
const T *input_data, T *output_data) = 0;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_DECONV_2D_H_
#endif // MACE_OPS_ARM_BASE_DEPTHWISE_CONV_2D_MXN_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/depthwise_deconv_2d_3x3.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterDepthwiseDeconv2dK3x3Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dK3x3S1<float>,
delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S1));
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dK3x3S2<float>,
delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S2));
}
void RegisterGroupDeconv2dK3x3Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dK3x3S1<float>, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S1));
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dK3x3S2<float>, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K3x3S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_3X3_H_
#define MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_3X3_H_
#ifndef MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_3X3_H_
#define MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_3X3_H_
#include <vector>
#include <memory>
......@@ -21,7 +21,7 @@
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/arm/base/depthwise_deconv_2d_mxn.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/depthwise_deconv_2d.h"
#include "mace/public/mace.h"
......@@ -29,70 +29,56 @@
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class DepthwiseDeconv2dK3x3S1 : public Deconv2dBase {
template<typename T>
class DepthwiseDeconv2dK3x3S1 : public DepthwiseDeconv2dKMxN<T> {
public:
explicit DepthwiseDeconv2dK3x3S1(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {}
: DepthwiseDeconv2dKMxN<T>(param) {}
virtual ~DepthwiseDeconv2dK3x3S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const DepthwiseDeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
class DepthwiseDeconv2dK3x3S2 : public Deconv2dBase {
template<typename T>
class DepthwiseDeconv2dK3x3S2 : public DepthwiseDeconv2dKMxN<T> {
public:
explicit DepthwiseDeconv2dK3x3S2(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {}
: DepthwiseDeconv2dKMxN<T>(param) {}
virtual ~DepthwiseDeconv2dK3x3S2() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const DepthwiseDeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
class GroupDeconv2dK3x3S1 : public Deconv2dBase {
template<typename T>
class GroupDeconv2dK3x3S1 : public GroupDeconv2dKMxN<T> {
public:
explicit GroupDeconv2dK3x3S1(
const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {}
: GroupDeconv2dKMxN<T>(param) {}
virtual ~GroupDeconv2dK3x3S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const GroupDeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
class GroupDeconv2dK3x3S2 : public Deconv2dBase {
template<typename T>
class GroupDeconv2dK3x3S2 : public GroupDeconv2dKMxN<T> {
public:
explicit GroupDeconv2dK3x3S2(const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {}
: GroupDeconv2dKMxN<T>(param) {}
virtual ~GroupDeconv2dK3x3S2() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const GroupDeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_3X3_H_
#endif // MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_3X3_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/depthwise_deconv_2d_4x4.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterDepthwiseDeconv2dK4x4Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dK4x4S1<float>,
delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K4x4S1));
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dK4x4S2<float>,
delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY_EX(DepthwiseDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K4x4S2));
}
void RegisterGroupDeconv2dK4x4Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dK4x4S1<float>, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K4x4S1));
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dK4x4S2<float>, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY_EX(GroupDeconv2d, DeviceType::CPU,
float, ImplType::NEON, K4x4S2));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_4X4_H_
#define MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_4X4_H_
#ifndef MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_4X4_H_
#define MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_4X4_H_
#include <vector>
#include <memory>
......@@ -21,7 +21,7 @@
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/arm/base/depthwise_deconv_2d_mxn.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/depthwise_deconv_2d.h"
#include "mace/public/mace.h"
......@@ -29,69 +29,55 @@
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class DepthwiseDeconv2dK4x4S1 : public Deconv2dBase {
template<typename T>
class DepthwiseDeconv2dK4x4S1 : public DepthwiseDeconv2dKMxN<T> {
public:
explicit DepthwiseDeconv2dK4x4S1(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {}
: DepthwiseDeconv2dKMxN<T>(param) {}
virtual ~DepthwiseDeconv2dK4x4S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const DepthwiseDeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
class DepthwiseDeconv2dK4x4S2 : public Deconv2dBase {
template<typename T>
class DepthwiseDeconv2dK4x4S2 : public DepthwiseDeconv2dKMxN<T> {
public:
explicit DepthwiseDeconv2dK4x4S2(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {}
: DepthwiseDeconv2dKMxN<T>(param) {}
virtual ~DepthwiseDeconv2dK4x4S2() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const DepthwiseDeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
class GroupDeconv2dK4x4S1 : public Deconv2dBase {
template<typename T>
class GroupDeconv2dK4x4S1 : public GroupDeconv2dKMxN<T> {
public:
explicit GroupDeconv2dK4x4S1(const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {}
: GroupDeconv2dKMxN<T>(param) {}
virtual ~GroupDeconv2dK4x4S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const GroupDeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
class GroupDeconv2dK4x4S2 : public Deconv2dBase {
template<typename T>
class GroupDeconv2dK4x4S2 : public GroupDeconv2dKMxN<T> {
public:
explicit GroupDeconv2dK4x4S2(const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {}
: GroupDeconv2dKMxN<T>(param) {}
virtual ~GroupDeconv2dK4x4S2() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) override;
MaceStatus DoCompute(const GroupDeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_4X4_H_
#endif // MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_4X4_H_
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,18 +12,18 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/fp32/depthwise_deconv_2d_general.h"
#include "mace/ops/arm/base/depthwise_deconv_2d_general.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
MaceStatus DepthwiseDeconv2dGeneral::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) {
template<typename T>
MaceStatus DepthwiseDeconv2dGeneral<T>::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) {
std::unique_ptr<Tensor> padded_out;
std::vector<int> out_pad_size;
group_ = input->dim(1);
......@@ -46,9 +46,9 @@ MaceStatus DepthwiseDeconv2dGeneral::Compute(const OpContext *context,
Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard output_mapper(output);
auto input_data = input->data<float>();
auto filter_data = filter->data<float>();
auto padded_out_data = out_tensor->mutable_data<float>();
const T *input_data = input->data<T>();
const T *filter_data = filter->data<T>();
T *padded_out_data = out_tensor->mutable_data<T>();
auto &in_shape = input->shape();
auto &out_shape = out_tensor->shape();
......@@ -79,7 +79,7 @@ MaceStatus DepthwiseDeconv2dGeneral::Compute(const OpContext *context,
index_t start1, index_t end1, index_t step1) {
for (index_t b = start0; b < end0; b += step0) {
for (index_t c = start1; c < end1; c += step1) {
float *out_base =
T *out_base =
padded_out_data + (b * channels + c) * out_img_size;
for (index_t i = 0; i < in_height; ++i) {
for (index_t j = 0; j < in_width; ++j) {
......@@ -105,11 +105,12 @@ MaceStatus DepthwiseDeconv2dGeneral::Compute(const OpContext *context,
return MaceStatus::MACE_SUCCESS;
}
MaceStatus GroupDeconv2dGeneral::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) {
template<typename T>
MaceStatus GroupDeconv2dGeneral<T>::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
const Tensor *output_shape,
Tensor *output) {
std::unique_ptr<Tensor> padded_out;
std::vector<int> out_pad_size;
ResizeOutAndPadOut(context,
......@@ -131,9 +132,9 @@ MaceStatus GroupDeconv2dGeneral::Compute(const OpContext *context,
Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard output_mapper(output);
auto input_data = input->data<float>();
auto filter_data = filter->data<float>();
auto padded_out_data = out_tensor->mutable_data<float>();
const T *input_data = input->data<T>();
const T *filter_data = filter->data<T>();
T *padded_out_data = out_tensor->mutable_data<T>();
auto &in_shape = input->shape();
auto &out_shape = out_tensor->shape();
......@@ -209,19 +210,19 @@ MaceStatus GroupDeconv2dGeneral::Compute(const OpContext *context,
void RegisterDepthwiseDeconv2dGeneralDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, DepthwiseDeconv2dGeneral, delegator::DepthwiseDeconv2dParam,
registry, DepthwiseDeconv2dGeneral<float>,
delegator::DepthwiseDeconv2dParam,
MACE_DELEGATOR_KEY(DepthwiseDeconv2d, DeviceType::CPU,
float, ImplType::NEON));
}
void RegisterGroupDeconv2dGeneralDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, GroupDeconv2dGeneral, delegator::GroupDeconv2dParam,
registry, GroupDeconv2dGeneral<float>, delegator::GroupDeconv2dParam,
MACE_DELEGATOR_KEY(GroupDeconv2d, DeviceType::CPU,
float, ImplType::NEON));
}
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_GENERAL_H_
#define MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_GENERAL_H_
#ifndef MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_GENERAL_H_
#define MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_GENERAL_H_
#include <vector>
#include <memory>
......@@ -21,7 +21,7 @@
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/ops/arm/fp32/deconv_2d.h"
#include "mace/ops/arm/base/deconv_2d.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/depthwise_deconv_2d.h"
#include "mace/public/mace.h"
......@@ -29,13 +29,13 @@
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
template<typename T>
class DepthwiseDeconv2dGeneral : public Deconv2dBase {
public:
explicit DepthwiseDeconv2dGeneral(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param) {}
: Deconv2dBase(param, sizeof(T)) {}
virtual ~DepthwiseDeconv2dGeneral() {}
MaceStatus Compute(
......@@ -46,10 +46,11 @@ class DepthwiseDeconv2dGeneral : public Deconv2dBase {
Tensor *output) override;
};
template<typename T>
class GroupDeconv2dGeneral : public Deconv2dBase {
public:
explicit GroupDeconv2dGeneral(const delegator::GroupDeconv2dParam &param)
: Deconv2dBase(param) {}
: Deconv2dBase(param, sizeof(T)) {}
virtual ~GroupDeconv2dGeneral() {}
MaceStatus Compute(
......@@ -60,9 +61,8 @@ class GroupDeconv2dGeneral : public Deconv2dBase {
Tensor *output) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_DEPTHWISE_DECONV_2D_GENERAL_H_
#endif // MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_GENERAL_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_MXN_H_
#define MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_MXN_H_
#include <vector>
#include <memory>
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/core/types.h"
#include "mace/ops/arm/base/deconv_2d.h"
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/depthwise_deconv_2d.h"
#include "mace/public/mace.h"
namespace mace {
namespace ops {
namespace arm {
template<typename T>
class DepthwiseDeconv2dKMxN : public Deconv2dBase {
public:
explicit DepthwiseDeconv2dKMxN(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param, sizeof(T)) {}
virtual ~DepthwiseDeconv2dKMxN() {}
MaceStatus Compute(
const OpContext *context, const Tensor *input, const Tensor *filter,
const Tensor *output_shape, Tensor *output) override {
std::unique_ptr<Tensor> padded_out;
std::vector<int> out_pad_size;
group_ = input->dim(1);
ResizeOutAndPadOut(context,
input,
filter,
output_shape,
output,
&out_pad_size,
&padded_out);
Tensor *out_tensor = output;
if (padded_out != nullptr) {
out_tensor = padded_out.get();
}
out_tensor->Clear();
Tensor::MappingGuard input_mapper(input);
Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard output_mapper(output);
const T *input_data = input->data<float>();
const T *filter_data = filter->data<float>();
T *padded_out_data = out_tensor->mutable_data<float>();
DepthwiseDeconvComputeParam p =
PreWorkAndGetDepthwiseDeconvParam(context, input, out_tensor);
DoCompute(p, filter_data, input_data, padded_out_data);
UnPadOutput(*out_tensor, out_pad_size, output);
return MaceStatus::MACE_SUCCESS;
}
virtual MaceStatus DoCompute(
const DepthwiseDeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) = 0;
};
template<typename T>
class GroupDeconv2dKMxN : public Deconv2dBase {
public:
explicit GroupDeconv2dKMxN(
const delegator::DepthwiseDeconv2dParam &param)
: Deconv2dBase(param, sizeof(T)) {}
virtual ~GroupDeconv2dKMxN() {}
MaceStatus Compute(
const OpContext *context, const Tensor *input, const Tensor *filter,
const Tensor *output_shape, Tensor *output) override {
std::unique_ptr<Tensor> padded_out;
std::vector<int> out_pad_size;
ResizeOutAndPadOut(context,
input,
filter,
output_shape,
output,
&out_pad_size,
&padded_out);
Tensor *out_tensor = output;
if (padded_out != nullptr) {
out_tensor = padded_out.get();
}
out_tensor->Clear();
Tensor::MappingGuard input_mapper(input);
Tensor::MappingGuard filter_mapper(filter);
Tensor::MappingGuard output_mapper(output);
auto input_data = input->data<float>();
auto filter_data = filter->data<float>();
auto padded_out_data = out_tensor->mutable_data<float>();
GroupDeconvComputeParam p =
PreWorkAndGetGroupDeconvParam(context, input, out_tensor);
DoCompute(p, filter_data, input_data, padded_out_data);
UnPadOutput(*out_tensor, out_pad_size, output);
return MaceStatus::MACE_SUCCESS;
}
virtual MaceStatus DoCompute(
const GroupDeconvComputeParam &p, const T *filter,
const T *input_data, T *padded_out_data) = 0;
};
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_BASE_DEPTHWISE_DECONV_2D_MXN_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/gemm.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterGemmDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Gemm<float>, delegator::GemmParam,
MACE_DELEGATOR_KEY(Gemm, DeviceType::CPU, float, ImplType::NEON));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_GEMM_H_
#define MACE_OPS_ARM_FP32_GEMM_H_
#ifndef MACE_OPS_ARM_BASE_GEMM_H_
#define MACE_OPS_ARM_BASE_GEMM_H_
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
......@@ -28,8 +28,10 @@
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
enum { kNoCache, kCacheLhs, kCacheRhs };
template<typename T>
class Gemm : public delegator::Gemm {
public:
explicit Gemm(const delegator::GemmParam &param)
......@@ -68,26 +70,49 @@ class Gemm : public delegator::Gemm {
const bool transpose_out,
const bool lhs_batched,
const bool rhs_batched,
Tensor *output) override;
Tensor *output) override {
index_t rows = transpose_lhs ? lhs_cols : lhs_rows;
index_t depth = transpose_lhs ? lhs_rows : lhs_cols;
index_t cols = transpose_rhs ? rhs_rows : rhs_cols;
index_t depth2 = transpose_rhs ? rhs_cols : rhs_rows;
MACE_CHECK(depth == depth2,
"Matrices that multiply have inconsistent depth dim: ",
depth,
" vs. ",
depth2);
return Compute(context,
lhs,
rhs,
batch,
rows,
cols,
depth,
transpose_lhs ? ColMajor : RowMajor,
transpose_rhs ? ColMajor : RowMajor,
transpose_out ? ColMajor : RowMajor,
lhs_batched,
rhs_batched,
output);
}
private:
void ComputeBlock(const float *packed_lhs_data,
const float *packed_rhs_data,
protected:
void ComputeBlock(const T *packed_lhs_data,
const T *packed_rhs_data,
const index_t depth_padded,
float *packed_output_data);
void PackLhs(const MatrixMap<const float> &lhs,
float *packed_lhs);
T *packed_output_data);
void PackRhs(const MatrixMap<const float> &rhs,
float *packed_rhs);
void PackLhs(const MatrixMap<const T> &lhs,
T *packed_lhs);
void UnpackOutput(const float *packed_output,
MatrixMap<float> *output);
void PackRhs(const MatrixMap<const T> &rhs,
T *packed_rhs);
void UnpackOutput(const T *packed_output,
MatrixMap<T> *output);
template<int RowBlockSize, int ColBlockSize>
void Unpack(const float *packed_output,
MatrixMap<float> *output) {
void Unpack(const T *packed_output,
MatrixMap<T> *output) {
const index_t rows = output->rows();
const index_t cols = output->cols();
for (index_t r = 0; r < rows; ++r) {
......@@ -98,9 +123,9 @@ class Gemm : public delegator::Gemm {
}
template<int WidthBlockSize, int DepthBlockSize>
void Pack(const MatrixMap<const float> &matrix,
void Pack(const MatrixMap<const T> &matrix,
MatrixMajor dst_major,
float *packed_matrix) {
T *packed_matrix) {
const index_t rows = matrix.rows();
const index_t cols = matrix.cols();
index_t depth = cols;
......@@ -109,7 +134,7 @@ class Gemm : public delegator::Gemm {
depth = rows;
}
const index_t depth_padded = RoundUp(depth, static_cast<index_t>(4));
memset(packed_matrix, 0, sizeof(float) * WidthBlockSize * depth_padded);
memset(packed_matrix, 0, sizeof(T) * WidthBlockSize * depth_padded);
if (dst_major == ColMajor) {
for (index_t c = 0; c < cols; ++c) {
for (index_t r = 0; r < rows; ++r) {
......@@ -125,31 +150,14 @@ class Gemm : public delegator::Gemm {
}
}
private:
Buffer pack_cache_;
bool should_cache_pack_;
int cached_;
};
template<>
void Gemm::Pack<4, 4>(const MatrixMap<const float> &matrix,
MatrixMajor dst_major,
float *packed_matrix);
template<>
void Gemm::Pack<8, 4>(const MatrixMap<const float> &matrix,
MatrixMajor dst_major,
float *packed_matrix);
template<>
void Gemm::Unpack<4, 8>(const float *packed_output, MatrixMap<float> *output);
template<>
void Gemm::Unpack<8, 8>(const float *packed_output, MatrixMap<float> *output);
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_GEMM_H_
#endif // MACE_OPS_ARM_BASE_GEMM_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/base/gemv.h"
namespace mace {
namespace ops {
namespace arm {
void RegisterGemvDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Gemv<float>, DelegatorParam,
MACE_DELEGATOR_KEY(Gemv, DeviceType::CPU, float, ImplType::NEON));
}
} // namespace arm
} // namespace ops
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_OPS_ARM_FP32_GEMV_H_
#define MACE_OPS_ARM_FP32_GEMV_H_
#ifndef MACE_OPS_ARM_BASE_GEMV_H_
#define MACE_OPS_ARM_BASE_GEMV_H_
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
......@@ -23,8 +23,8 @@
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
template<typename T>
class Gemv : public delegator::Gemv {
public:
explicit Gemv(const DelegatorParam &param) : delegator::Gemv(param) {}
......@@ -43,9 +43,8 @@ class Gemv : public delegator::Gemv {
Tensor *output) override;
};
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
#endif // MACE_OPS_ARM_FP32_GEMV_H_
#endif // MACE_OPS_ARM_BASE_GEMV_H_
......@@ -12,186 +12,139 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/delegator/activation.h"
#include <arm_neon.h>
#include <algorithm>
#include "mace/ops/arm/base/activation.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Activation : public delegator::Activation {
public:
explicit Activation(const delegator::ActivationParam &param)
: delegator::Activation(param) {}
~Activation() = default;
MaceStatus Compute(const OpContext *context,
const Tensor *input, Tensor *output) override;
private:
void DoActivation(const OpContext *context,
const Tensor *input, Tensor *output);
};
MaceStatus Activation::Compute(const OpContext *context,
const Tensor *input, Tensor *output) {
Tensor::MappingGuard input_guard(input);
if (input != output) {
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
Tensor::MappingGuard output_guard(output);
DoActivation(context, input, output);
} else {
DoActivation(context, input, output);
template<>
void Activation<float>::ActivateRelu(utils::ThreadPool *thread_pool,
const float *input_data,
const index_t input_size,
float *output_data) {
const float32x4_t vzero = vdupq_n_f32(0.f);
const index_t block_count = input_size / 4;
thread_pool->Compute1D(
[=](index_t start, index_t end, index_t step) {
auto input_ptr = input_data + start * 4;
auto output_ptr = output_data + start * 4;
for (index_t i = start; i < end; i += step) {
float32x4_t v = vld1q_f32(input_ptr);
v = vmaxq_f32(v, vzero);
vst1q_f32(output_ptr, v);
input_ptr += 4;
output_ptr += 4;
}
},
0, block_count, 1);
// remain
for (index_t i = block_count * 4; i < input_size; ++i) {
output_data[i] = std::max(0.f, input_data[i]);
}
}
return MaceStatus::MACE_SUCCESS;
template<>
void Activation<float>::ActivateRelux(utils::ThreadPool *thread_pool,
const float *input_data,
const index_t input_size,
float *output_data) {
const float32x4_t vzero = vdupq_n_f32(0.f);
const float32x4_t vlimit = vdupq_n_f32(limit_);
const index_t block_count = input_size / 4;
thread_pool->Compute1D(
[=](index_t start, index_t end, index_t step) {
auto input_ptr = input_data + start * 4;
auto output_ptr = output_data + start * 4;
for (index_t i = start; i < end; i += step) {
float32x4_t v = vld1q_f32(input_ptr);
v = vmaxq_f32(v, vzero);
v = vminq_f32(v, vlimit);
vst1q_f32(output_ptr, v);
input_ptr += 4;
output_ptr += 4;
}
},
0, block_count, 1);
// remain
for (index_t i = block_count * 4; i < input_size; ++i) {
output_data[i] = std::max(0.f, std::min(limit_, input_data[i]));
}
}
void Activation::DoActivation(const OpContext *context,
const Tensor *input,
Tensor *output) {
auto input_data = input->data<float>();
auto output_data = output->mutable_data<float>();
const index_t size = input->size();
utils::ThreadPool &thread_pool =
context->device()->cpu_runtime()->thread_pool();
switch (type_) {
case RELU: {
const float32x4_t vzero = vdupq_n_f32(0.f);
const index_t block_count = size / 4;
thread_pool.Compute1D(
[=](index_t start, index_t end, index_t step) {
auto input_ptr = input_data + start * 4;
auto output_ptr = output_data + start * 4;
for (index_t i = start; i < end; i += step) {
float32x4_t v = vld1q_f32(input_ptr);
v = vmaxq_f32(v, vzero);
vst1q_f32(output_ptr, v);
input_ptr += 4;
output_ptr += 4;
}
},
0, block_count, 1);
// remain
for (index_t i = block_count * 4; i < size; ++i) {
output_data[i] = std::max(0.f, input_data[i]);
}
break;
}
case RELUX: {
const float32x4_t vzero = vdupq_n_f32(0.f);
const float32x4_t vlimit = vdupq_n_f32(limit_);
const index_t block_count = size / 4;
thread_pool.Compute1D(
[=](index_t start, index_t end, index_t step) {
auto input_ptr = input_data + start * 4;
auto output_ptr = output_data + start * 4;
for (index_t i = start; i < end; i += step) {
float32x4_t v = vld1q_f32(input_ptr);
v = vmaxq_f32(v, vzero);
v = vminq_f32(v, vlimit);
vst1q_f32(output_ptr, v);
input_ptr += 4;
output_ptr += 4;
}
},
0, block_count, 1);
// remain
for (index_t i = block_count * 4; i < size; ++i) {
output_data[i] = std::max(0.f, std::min(limit_, input_data[i]));
}
break;
}
case LEAKYRELU: {
const float32x4_t vzero = vdupq_n_f32(0.f);
const float32x4_t valpha = vdupq_n_f32(leakyrelu_coefficient_);
const index_t block_count = size / 4;
thread_pool.Compute1D(
[=](index_t start, index_t end, index_t step) {
auto input_ptr = input_data + start * 4;
auto output_ptr = output_data + start * 4;
for (index_t i = start; i < end; i += step) {
float32x4_t v = vld1q_f32(input_ptr);
float32x4_t u = vminq_f32(v, vzero);
v = vmaxq_f32(v, vzero);
v = vmlaq_f32(v, valpha, u);
vst1q_f32(output_ptr, v);
input_ptr += 4;
output_ptr += 4;
}
},
0, block_count, 1);
// remain
for (index_t i = block_count * 4; i < size; ++i) {
output_data[i] = std::max(input_data[i], 0.f) +
std::min(input_data[i], 0.f) * leakyrelu_coefficient_;
}
break;
}
case TANH: {
thread_pool.Compute1D(
[=](index_t start, index_t end, index_t step) {
for (index_t i = start; i < end; i += step) {
output_data[i] = std::tanh(input_data[i]);
}
},
0, size, 1);
break;
}
case SIGMOID: {
thread_pool.Compute1D(
[=](index_t start, index_t end, index_t step) {
for (index_t i = start; i < end; i += step) {
output_data[i] = 1 / (1 + std::exp(-(input_data[i])));
}
},
0, size, 1);
break;
}
case NOOP: {
break;
}
default: {
MACE_NOT_IMPLEMENTED;
}
template<>
void Activation<float>::ActivateLeakyRelu(utils::ThreadPool *thread_pool,
const float *input_data,
const index_t input_size,
float *output_data) {
const float32x4_t vzero = vdupq_n_f32(0.f);
const float32x4_t valpha = vdupq_n_f32(leakyrelu_coefficient_);
const index_t block_count = input_size / 4;
thread_pool->Compute1D(
[=](index_t start, index_t end, index_t step) {
auto input_ptr = input_data + start * 4;
auto output_ptr = output_data + start * 4;
for (index_t i = start; i < end; i += step) {
float32x4_t v = vld1q_f32(input_ptr);
float32x4_t u = vminq_f32(v, vzero);
v = vmaxq_f32(v, vzero);
v = vmlaq_f32(v, valpha, u);
vst1q_f32(output_ptr, v);
input_ptr += 4;
output_ptr += 4;
}
},
0, block_count, 1);
// remain
for (index_t i = block_count * 4; i < input_size; ++i) {
output_data[i] = std::max(input_data[i], 0.f) +
std::min(input_data[i], 0.f) * leakyrelu_coefficient_;
}
}
void RegisterActivationDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Activation, delegator::ActivationParam,
MACE_DELEGATOR_KEY(Activation, DeviceType::CPU, float, ImplType::NEON));
template<>
void Activation<float>::ActivateTanh(utils::ThreadPool *thread_pool,
const float *input_data,
const index_t input_size,
float *output_data) {
thread_pool->Compute1D(
[=](index_t start, index_t end, index_t step) {
for (index_t i = start; i < end; i += step) {
output_data[i] = std::tanh(input_data[i]);
}
},
0, input_size, 1);
}
template<>
void Activation<float>::ActivateSigmoid(utils::ThreadPool *thread_pool,
const float *input_data,
const index_t input_size,
float *output_data) {
thread_pool->Compute1D(
[=](index_t start, index_t end, index_t step) {
for (index_t i = start; i < end; i += step) {
output_data[i] = 1 / (1 + std::exp(-(input_data[i])));
}
},
0, input_size, 1);
}
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
......@@ -13,129 +13,81 @@
// limitations under the License.
#include <arm_neon.h>
#include "mace/ops/delegator/bias_add.h"
#include "mace/ops/arm/base/bias_add.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class BiasAdd : public delegator::BiasAdd {
public:
explicit BiasAdd(const DelegatorParam &param) : delegator::BiasAdd(param) {}
~BiasAdd() = default;
MaceStatus Compute(const OpContext *context, const Tensor *input,
const Tensor *bias, Tensor *output) override;
private:
void AddBias(const OpContext *context, const Tensor *input,
const Tensor *bias, Tensor *output);
};
MaceStatus BiasAdd::Compute(const OpContext *context,
const Tensor *input,
const Tensor *bias,
Tensor *output) {
Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard bias_guard(bias);
if (input != output) {
MACE_RETURN_IF_ERROR(output->ResizeLike(input));
if (bias == nullptr) {
output->Copy(*input);
} else {
Tensor::MappingGuard output_guard(output);
AddBias(context, input, bias, output);
}
} else {
if (bias != nullptr) {
AddBias(context, input, bias, output);
template<>
void BiasAdd<float>::Add1DimBias(
utils::ThreadPool *thread_pool, const float *input_data,
const float *bias_data, float *output_data, const index_t batch,
const index_t channels, const index_t image_size) {
const index_t block_count = image_size / 4;
const index_t remain = image_size % 4;
thread_pool->Compute2D([=](index_t start0, index_t end0, index_t step0,
index_t start1, index_t end1, index_t step1) {
for (index_t b = start0; b < end0; b += step0) {
const index_t b_offset = b * channels;
for (index_t c = start1; c < end1; c += step1) {
const index_t offset = (b_offset + c) * image_size;
auto input_ptr = input_data + offset;
auto output_ptr = output_data + offset;
const float bias = bias_data[c];
float32x4_t vbias = vdupq_n_f32(bias);
for (index_t i = 0; i < block_count; ++i) {
float32x4_t v = vld1q_f32(input_ptr);
v = vaddq_f32(v, vbias);
vst1q_f32(output_ptr, v);
input_ptr += 4;
output_ptr += 4;
}
for (index_t i = 0; i < remain; ++i) {
(*output_ptr++) = (*input_ptr++) + bias;
}
}
}
}
return MaceStatus::MACE_SUCCESS;
}, 0, batch, 1, 0, channels, 1);
}
void BiasAdd::AddBias(const OpContext *context,
const Tensor *input,
const Tensor *bias,
mace::Tensor *output) {
auto input_data = input->data<float>();
auto bias_data = bias->data<float>();
auto output_data = output->mutable_data<float>();
const index_t batch = input->dim(0);
const index_t channels = input->dim(1);
const index_t height = output->dim(2);
const index_t width = output->dim(3);
const index_t image_size = height * width;
template<>
void BiasAdd<float>::Add2DimsBias(
utils::ThreadPool *thread_pool, const float *input_data,
const float *bias_data, float *output_data, const index_t batch,
const index_t channels, const index_t image_size) {
const index_t block_count = image_size / 4;
const index_t remain = image_size % 4;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
if (bias->dim_size() == 1) {
thread_pool.Compute2D([=](index_t start0, index_t end0, index_t step0,
index_t start1, index_t end1, index_t step1) {
for (index_t b = start0; b < end0; b += step0) {
const index_t b_offset = b * channels;
for (index_t c = start1; c < end1; c += step1) {
const index_t offset = (b_offset + c) * image_size;
auto input_ptr = input_data + offset;
auto output_ptr = output_data + offset;
const float bias = bias_data[c];
float32x4_t vbias = vdupq_n_f32(bias);
for (index_t i = 0; i < block_count; ++i) {
float32x4_t v = vld1q_f32(input_ptr);
v = vaddq_f32(v, vbias);
vst1q_f32(output_ptr, v);
input_ptr += 4;
output_ptr += 4;
}
for (index_t i = 0; i < remain; ++i) {
(*output_ptr++) = (*input_ptr++) + bias;
}
thread_pool->Compute2D([=](index_t start0, index_t end0, index_t step0,
index_t start1, index_t end1, index_t step1) {
for (index_t b = start0; b < end0; b += step0) {
const index_t b_offset = b * channels;
for (index_t c = start1; c < end1; c += step1) {
const index_t offset = (b_offset + c) * image_size;
auto input_ptr = input_data + offset;
auto output_ptr = output_data + offset;
const float bias = bias_data[b * channels + c];
float32x4_t vbias = vdupq_n_f32(bias);
for (index_t i = 0; i < block_count; ++i) {
float32x4_t v = vld1q_f32(input_ptr);
v = vaddq_f32(v, vbias);
vst1q_f32(output_ptr, v);
input_ptr += 4;
output_ptr += 4;
}
}
}, 0, batch, 1, 0, channels, 1);
} else {
thread_pool.Compute2D([=](index_t start0, index_t end0, index_t step0,
index_t start1, index_t end1, index_t step1) {
for (index_t b = start0; b < end0; b += step0) {
const index_t b_offset = b * channels;
for (index_t c = start1; c < end1; c += step1) {
const index_t offset = (b_offset + c) * image_size;
auto input_ptr = input_data + offset;
auto output_ptr = output_data + offset;
const float bias = bias_data[b * channels + c];
float32x4_t vbias = vdupq_n_f32(bias);
for (index_t i = 0; i < block_count; ++i) {
float32x4_t v = vld1q_f32(input_ptr);
v = vaddq_f32(v, vbias);
vst1q_f32(output_ptr, v);
input_ptr += 4;
output_ptr += 4;
}
for (index_t i = 0; i < remain; ++i) {
(*output_ptr++) = (*input_ptr++) + bias;
}
for (index_t i = 0; i < remain; ++i) {
(*output_ptr++) = (*input_ptr++) + bias;
}
}
}, 0, batch, 1, 0, channels, 1);
}
}
void RegisterBiasAddDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, BiasAdd, DelegatorParam,
MACE_DELEGATOR_KEY(BiasAdd, DeviceType::CPU, float, ImplType::NEON));
}
}, 0, batch, 1, 0, channels, 1);
}
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
......@@ -21,7 +21,6 @@
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
inline float32x4_t neon_vfma_lane_0(float32x4_t a,
float32x4_t b,
......@@ -63,7 +62,6 @@ inline float32x4_t neon_vfma_lane_3(float32x4_t a,
#endif
}
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
......
此差异已折叠。
此差异已折叠。
......@@ -18,8 +18,8 @@
#include "mace/ops/common/conv_pool_2d_util.h"
#include "mace/ops/delegator/conv_2d.h"
#include "mace/utils/memory.h"
#include "mace/utils/math.h"
#include "mace/utils/memory.h"
namespace mace {
namespace ops {
......
......@@ -20,8 +20,8 @@
#include "mace/core/ops/op_context.h"
#include "mace/core/tensor.h"
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/arm/fp32/gemm.h"
#include "mace/ops/arm/base/conv_2d.h"
#include "mace/ops/arm/base/gemm.h"
#include "mace/public/mace.h"
namespace mace {
......@@ -32,7 +32,7 @@ namespace fp32 {
class Conv2dK3x3Winograd : public Conv2dBase {
public:
explicit Conv2dK3x3Winograd(const delegator::Conv2dParam &param)
: Conv2dBase(param),
: Conv2dBase(param, sizeof(float)),
gemm_(delegator::GemmParam()),
transformed_filter_(nullptr),
out_tile_size_(0) {}
......@@ -94,7 +94,7 @@ class Conv2dK3x3Winograd : public Conv2dBase {
index_t tile_count,
float *output);
Gemm gemm_;
Gemm<float> gemm_;
std::unique_ptr<Tensor> transformed_filter_;
index_t out_tile_size_;
};
......
......@@ -15,26 +15,12 @@
#include <arm_neon.h>
#include <memory>
#include "mace/ops/arm/fp32/conv_2d.h"
#include "mace/ops/arm/base/conv_2d_5x5.h"
#include "mace/ops/delegator/conv_2d.h"
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
class Conv2dK5x5S1 : public Conv2dBase {
public:
explicit Conv2dK5x5S1(const delegator::Conv2dParam &param)
: Conv2dBase(param) {}
virtual ~Conv2dK5x5S1() {}
MaceStatus Compute(
const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) override;
};
#define MACE_Conv2dNeonK5x5SnLoadCalc4 \
/* load filter (4 outch x 1 height x 4 width) */ \
......@@ -91,89 +77,43 @@ class Conv2dK5x5S1 : public Conv2dBase {
vo0 = vmlaq_lane_f32(vo0, vi3, vget_high_f32(vf00), 1); \
vo0 = vmlaq_lane_f32(vo0, vi4, vf01, 1);
MaceStatus Conv2dK5x5S1::Compute(const OpContext *context,
const Tensor *input,
const Tensor *filter,
Tensor *output) {
std::unique_ptr<const Tensor> padded_input;
std::unique_ptr<Tensor> padded_output;
ResizeOutAndPadInOut(context,
input,
filter,
output,
1,
4,
&padded_input,
&padded_output);
const Tensor *in_tensor = input;
if (padded_input != nullptr) {
in_tensor = padded_input.get();
}
Tensor *out_tensor = output;
if (padded_output != nullptr) {
out_tensor = padded_output.get();
}
out_tensor->Clear();
Tensor::MappingGuard in_guard(input);
Tensor::MappingGuard filter_guard(filter);
Tensor::MappingGuard out_guard(output);
auto filter_data = filter->data<float>();
auto input_data = in_tensor->data<float>();
auto output_data = out_tensor->mutable_data<float>();
auto &in_shape = in_tensor->shape();
auto &out_shape = out_tensor->shape();
const index_t batch = in_shape[0];
const index_t in_channels = in_shape[1];
const index_t in_height = in_shape[2];
const index_t in_width = in_shape[3];
const index_t out_channels = out_shape[1];
const index_t out_height = out_shape[2];
const index_t out_width = out_shape[3];
const index_t in_image_size = in_height * in_width;
const index_t out_image_size = out_height * out_width;
const index_t in_batch_size = in_channels * in_image_size;
const index_t out_batch_size = out_channels * out_image_size;
utils::ThreadPool
&thread_pool = context->device()->cpu_runtime()->thread_pool();
thread_pool.Compute2D([=](index_t start0, index_t end0, index_t step0,
index_t start1, index_t end1, index_t step1) {
template<>
MaceStatus Conv2dK5x5S1<float>::DoCompute(
const ConvComputeParam &p, const float *filter_data,
const float *input_data, float *output_data) {
p.thread_pool.Compute2D([=](index_t start0, index_t end0, index_t step0,
index_t start1, index_t end1, index_t step1) {
for (index_t b = start0; b < end0; b += step0) {
for (index_t m = start1; m < end1; m += step1) {
if (m + 3 < out_channels) {
if (m + 3 < p.out_channels) {
float *out_ptr0_base =
output_data + b * out_batch_size + m * out_image_size;
output_data + b * p.out_batch_size + m * p.out_image_size;
float *out_ptr1_base =
output_data + b * out_batch_size + (m + 1) * out_image_size;
output_data + b * p.out_batch_size + (m + 1) * p.out_image_size;
float *out_ptr2_base =
output_data + b * out_batch_size + (m + 2) * out_image_size;
output_data + b * p.out_batch_size + (m + 2) * p.out_image_size;
float *out_ptr3_base =
output_data + b * out_batch_size + (m + 3) * out_image_size;
output_data + b * p.out_batch_size + (m + 3) * p.out_image_size;
for (index_t c = 0; c < in_channels; ++c) {
for (index_t c = 0; c < p.in_channels; ++c) {
const float *in_ptr_base =
input_data + b * in_batch_size + c * in_image_size;
input_data + b * p.in_batch_size + c * p.in_image_size;
const float
*filter_ptr0 = filter_data + m * in_channels * 25 + c * 25;
*filter_ptr0 = filter_data + m * p.in_channels * 25 + c * 25;
const float *filter_ptr1 =
filter_data + (m + 1) * in_channels * 25 + c * 25;
filter_data + (m + 1) * p.in_channels * 25 + c * 25;
const float *filter_ptr2 =
filter_data + (m + 2) * in_channels * 25 + c * 25;
filter_data + (m + 2) * p.in_channels * 25 + c * 25;
const float *filter_ptr3 =
filter_data + (m + 3) * in_channels * 25 + c * 25;
for (index_t h = 0; h < out_height; ++h) {
for (index_t w = 0; w + 3 < out_width; w += 4) {
filter_data + (m + 3) * p.in_channels * 25 + c * 25;
for (index_t h = 0; h < p.out_height; ++h) {
for (index_t w = 0; w + 3 < p.out_width; w += 4) {
// input offset
index_t in_offset = h * in_width + w;
index_t in_offset = h * p.in_width + w;
// output (4 outch x 1 height x 4 width): vo_outch_height
float32x4_t vo0, vo1, vo2, vo3;
// load output
index_t out_offset = h * out_width + w;
index_t out_offset = h * p.out_width + w;
vo0 = vld1q_f32(out_ptr0_base + out_offset);
vo1 = vld1q_f32(out_ptr1_base + out_offset);
vo2 = vld1q_f32(out_ptr2_base + out_offset);
......@@ -190,7 +130,7 @@ MaceStatus Conv2dK5x5S1::Compute(const OpContext *context,
MACE_Conv2dNeonK5x5SnLoadCalc4;
in_offset += in_width;
in_offset += p.in_width;
filter_ptr0 += 5;
filter_ptr1 += 5;
filter_ptr2 += 5;
......@@ -210,22 +150,22 @@ MaceStatus Conv2dK5x5S1::Compute(const OpContext *context,
} // h
} // c
} else {
for (index_t mm = m; mm < out_channels; ++mm) {
for (index_t mm = m; mm < p.out_channels; ++mm) {
float *out_ptr0_base =
output_data + b * out_batch_size + mm * out_image_size;
for (index_t c = 0; c < in_channels; ++c) {
output_data + b * p.out_batch_size + mm * p.out_image_size;
for (index_t c = 0; c < p.in_channels; ++c) {
const float *in_ptr_base =
input_data + b * in_batch_size + c * in_image_size;
input_data + b * p.in_batch_size + c * p.in_image_size;
const float
*filter_ptr0 = filter_data + mm * in_channels * 25 + c * 25;
for (index_t h = 0; h < out_height; ++h) {
for (index_t w = 0; w + 3 < out_width; w += 4) {
*filter_ptr0 = filter_data + mm * p.in_channels * 25 + c * 25;
for (index_t h = 0; h < p.out_height; ++h) {
for (index_t w = 0; w + 3 < p.out_width; w += 4) {
// input offset
index_t in_offset = h * in_width + w;
index_t in_offset = h * p.in_width + w;
// output (1 outch x 1 height x 4 width): vo_outch_height
float32x4_t vo0;
// load output
index_t out_offset = h * out_width + w;
index_t out_offset = h * p.out_width + w;
vo0 = vld1q_f32(out_ptr0_base + out_offset);
for (index_t r = 0; r < 5; ++r) {
// input (3 slide)
......@@ -239,7 +179,7 @@ MaceStatus Conv2dK5x5S1::Compute(const OpContext *context,
MACE_Conv2dNeonK5x5SnLoadCalc1;
in_offset += in_width;
in_offset += p.in_width;
filter_ptr0 += 5;
} // r
......@@ -252,20 +192,11 @@ MaceStatus Conv2dK5x5S1::Compute(const OpContext *context,
} // if
} // m
} // b
}, 0, batch, 1, 0, out_channels, 4);
}, 0, p.batch, 1, 0, p.out_channels, 4);
UnPadOutput(*out_tensor, output);
return MaceStatus::MACE_SUCCESS;
}
void RegisterConv2dK5x5Delegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Conv2dK5x5S1, delegator::Conv2dParam,
MACE_DELEGATOR_KEY_EX(Conv2d, DeviceType::CPU,
float, ImplType::NEON, K5x5S1));
}
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -12,12 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/ops/arm/fp32/gemv.h"
#include <arm_neon.h>
#include <algorithm>
#include "mace/ops/arm/base/gemv.h"
#include "mace/utils/math.h"
#if !defined(__aarch64__)
......@@ -34,18 +32,18 @@ float vaddvq_f32(float32x4_t v) {
namespace mace {
namespace ops {
namespace arm {
namespace fp32 {
MaceStatus Gemv::Compute(const OpContext *context,
const Tensor *lhs,
const Tensor *rhs,
const Tensor *bias,
const index_t batch,
const index_t lhs_height,
const index_t lhs_width,
const bool lhs_batched,
const bool rhs_batched,
Tensor *output) {
template<>
MaceStatus Gemv<float>::Compute(const OpContext *context,
const Tensor *lhs,
const Tensor *rhs,
const Tensor *bias,
const index_t batch,
const index_t lhs_height,
const index_t lhs_width,
const bool lhs_batched,
const bool rhs_batched,
Tensor *output) {
MACE_UNUSED(context);
MACE_CHECK(output->size() == batch * lhs_height,
......@@ -378,13 +376,6 @@ MaceStatus Gemv::Compute(const OpContext *context,
#undef vaddvq_f32
#endif
void RegisterGemvDelegator(OpDelegatorRegistry *registry) {
MACE_REGISTER_DELEGATOR(
registry, Gemv, DelegatorParam,
MACE_DELEGATOR_KEY(Gemv, DeviceType::CPU, float, ImplType::NEON));
}
} // namespace fp32
} // namespace arm
} // namespace ops
} // namespace mace
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册