提交 694fe556 编写于 作者: L Liangliang He

Merge branch 'master' into 'master'

Add relu neon

See merge request !6
# Description:
# Mace neon kernels.
#
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android")
cc_library(
name = "kernels",
srcs = glob(["*.cc"]) + if_android(glob(["neon/*.cc"])),
hdrs = glob(["*.h"]) + if_android(glob(["neon/*.h"])),
deps = [
"//mace/core:core",
],
copts = ['-std=c++11'],
)
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/neon/relu_neon.h"
namespace mace {
namespace kernels{
void NeonReluFuntion_float(const Tensor *input_tensor,
Tensor *output_tensor) {
int64 size = input_tensor->size();
output_tensor->ResizeLike(input_tensor);
const float* input = input_tensor->data<float>();
float* output = output_tensor->mutable_data<float>();
float32x4_t _zero = vdupq_n_f32(0.f);
for (; size > 0; size--) {
float32x4_t _inp = vld1q_f32(input);
float32x4_t _outp = vmaxq_f32(_inp, _zero);
vst1q_f32(output, _outp);
input += 4;
output += 4;
}
}
} // namespace kernels
} // namespace mace
\ No newline at end of file
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_KERNELS_RELU_NEON_H_
#define MACE_KERNELS_RELU_NEON_H_
#include "mace/core/tensor.h"
namespace mace {
namespace kernels {
void NeonReluFuntion_float(const Tensor *input_tensor,
Tensor *output_tensor);
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_RELU_NEON_H_
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_KERNELS_RELU_H_
#define MACE_KERNELS_RELU_H_
#include "mace/core/tensor.h"
namespace mace {
namespace kernels {
template<typename T>
void ReluFuntion(const Tensor *input_tensor, Tensor *output_tensor) {
int64 size = input_tensor->size();
output_tensor->ResizeLike(input_tensor);
const float* input = input_tensor->data<float>();
float* output = output_tensor->mutable_data<float>();
for (int64 i = 0; i < size; ++i) {
output[i] = std::max(input[i], static_cast<T>(0));
}
}
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_RELU_H_
\ No newline at end of file
......@@ -17,6 +17,7 @@ cc_library(
deps = [
"//mace/proto:cc_proto",
"//mace/core:core",
"//mace/kernels:kernels",
],
copts = ['-std=c++11'],
alwayslink = 1,
......
......@@ -4,42 +4,32 @@
#include "mace/ops/relu.h"
#include "mace/proto/mace.pb.h"
#include "mace/kernels/relu.h"
#if __ARM_NEON
#include "mace/kernels/neon/relu_neon.h"
#endif // __ARM_NEON
namespace mace {
template <>
bool ReluOp<DeviceType::CPU, float>::Run() {
const Tensor* X = Input(0);
Tensor* Y = Output(0);
Y->ResizeLike(X);
const float* Xdata = X-> data<float>();
float* Ydata = Y->mutable_data<float>();
for (int i = 0; i < X->size(); ++i) {
Ydata[i] = std::max(Xdata[i], 0.f);
VLOG(0) << i << ": " << Xdata[i] << " " << Ydata[i];
}
const Tensor* input_tensor = Input(0);
Tensor* output_tensor = Output(0);
kernels::ReluFuntion<float>(input_tensor, output_tensor);
return true;
}
REGISTER_CPU_OPERATOR(Relu, ReluOp<DeviceType::CPU, float>);
#if __ARM_NEON
template <>
bool ReluOp<DeviceType::NEON, float>::Run() {
const Tensor* X = Input(0);
Tensor* Y = Output(0);
Y->ResizeLike(X);
const float* Xdata = X-> data<float>();
float* Ydata = Y->mutable_data<float>();
for (int i = 0; i < X->size(); ++i) {
Ydata[i] = std::max(Xdata[i], 0.f);
VLOG(0) << i << ": " << Xdata[i] << " " << Ydata[i];
}
const Tensor* input_tensor = Input(0);
Tensor* output_tensor = Output(0);
kernels::NeonReluFuntion_float(input_tensor, output_tensor);
return true;
}
REGISTER_CPU_OPERATOR(Relu, ReluOp<DeviceType::CPU, float>);
REGISTER_NEON_OPERATOR(Relu, ReluOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
} // namespace mace
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册