提交 aef11884 编写于 作者: L Liangliang He

Merge branch 'master' into 'master'

AddN op; Refactor op default template impl.

See merge request !11
...@@ -27,10 +27,11 @@ int main() { ...@@ -27,10 +27,11 @@ int main() {
arg_1->set_f(1.5); arg_1->set_f(1.5);
OperatorDef op_def_2; OperatorDef op_def_2;
op_def_2.add_input("Output0");
op_def_2.add_input("Output1"); op_def_2.add_input("Output1");
op_def_2.add_output("Output2"); op_def_2.add_output("Output2");
op_def_2.set_name("ReluTest2"); op_def_2.set_name("AddNTest");
op_def_2.set_type("Relu"); op_def_2.set_type("AddN");
auto arg_2 = op_def_2.add_arg(); auto arg_2 = op_def_2.add_arg();
arg_2->set_name("arg0"); arg_2->set_name("arg0");
arg_2->set_f(2.5); arg_2->set_f(2.5);
......
...@@ -18,10 +18,10 @@ void AddNFuntion(const vector<const Tensor*>& input_tensor, Tensor *output_tenso ...@@ -18,10 +18,10 @@ void AddNFuntion(const vector<const Tensor*>& input_tensor, Tensor *output_tenso
int64 size = input_tensor[0]->size(); int64 size = input_tensor[0]->size();
vector<const T*> inputs(n); vector<const T*> inputs(n);
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
inputs[i] = input_tensor[i]->data<float>(); inputs[i] = input_tensor[i]->data<T>();
} }
output_tensor->ResizeLike(input_tensor[0]); output_tensor->ResizeLike(input_tensor[0]);
float* output = output_tensor->mutable_data<T>(); T* output = output_tensor->mutable_data<T>();
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
for (int64 j = 0; j < size; ++j) { for (int64 j = 0; j < size; ++j) {
......
...@@ -14,8 +14,8 @@ template<typename T> ...@@ -14,8 +14,8 @@ template<typename T>
void ReluFuntion(const Tensor *input_tensor, Tensor *output_tensor) { void ReluFuntion(const Tensor *input_tensor, Tensor *output_tensor) {
int64 size = input_tensor->size(); int64 size = input_tensor->size();
output_tensor->ResizeLike(input_tensor); output_tensor->ResizeLike(input_tensor);
const float *input = input_tensor->data<float>(); const T *input = input_tensor->data<T>();
float *output = output_tensor->mutable_data<float>(); T *output = output_tensor->mutable_data<T>();
for (int64 i = 0; i < size; ++i) { for (int64 i = 0; i < size; ++i) {
output[i] = std::max(input[i], static_cast<T>(0)); output[i] = std::max(input[i], static_cast<T>(0));
......
...@@ -12,7 +12,7 @@ load("//mace:mace.bzl", "if_android") ...@@ -12,7 +12,7 @@ load("//mace:mace.bzl", "if_android")
cc_library( cc_library(
name = "ops", name = "ops",
srcs = ["relu.cc"], srcs = glob(["*.cc"]),
hdrs = glob(["*.h"]), hdrs = glob(["*.h"]),
deps = [ deps = [
"//mace/proto:cc_proto", "//mace/proto:cc_proto",
...@@ -23,19 +23,4 @@ cc_library( ...@@ -23,19 +23,4 @@ cc_library(
alwayslink = 1, alwayslink = 1,
) )
cc_test(
name = "relu_test",
srcs = ["relu_test.cc",],
deps = [
"@gtest//:gtest_main",
":ops",
],
copts = ['-std=c++11'],
linkopts = if_android([
"-pie",
"-llog",
"-latomic",
]),
linkstatic = 1,
)
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/addn.h"
#include "mace/proto/mace.pb.h"
#if __ARM_NEON
#include "mace/kernels/neon/addn_neon.h"
#endif // __ARM_NEON
namespace mace {
REGISTER_CPU_OPERATOR(AddN, AddNOp<DeviceType::CPU, float>);
#if __ARM_NEON
template <>
bool AddNOp<DeviceType::NEON, float>::Run() {
Tensor* output_tensor = Output(0);
kernels::NeonAddNFuntion_float(Inputs(), output_tensor);
return true;
}
REGISTER_NEON_OPERATOR(AddN, AddNOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
} // namespace mace
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_OPS_ADDN_H_
#define MACE_OPS_ADDN_H_
#include "mace/core/operator.h"
#include "mace/kernels/addn.h"
namespace mace {
template<DeviceType D, class T>
class AddNOp : public Operator<D, T> {
public:
AddNOp(const OperatorDef &operator_def, Workspace *ws)
: Operator<D, T>(operator_def, ws) {}
bool Run() override {
Tensor* output_tensor = this->Output(0);
kernels::AddNFuntion<T>(this->Inputs(), output_tensor);
return true;
}
};
} // namespace mace
#endif // MACE_OPS_ADDN_H_
...@@ -4,23 +4,14 @@ ...@@ -4,23 +4,14 @@
#include "mace/ops/relu.h" #include "mace/ops/relu.h"
#include "mace/proto/mace.pb.h" #include "mace/proto/mace.pb.h"
#include "mace/kernels/relu.h"
#if __ARM_NEON #if __ARM_NEON
#include "mace/kernels/neon/relu_neon.h" #include "mace/kernels/neon/relu_neon.h"
#endif // __ARM_NEON #endif // __ARM_NEON
namespace mace { namespace mace {
template <>
bool ReluOp<DeviceType::CPU, float>::Run() {
const Tensor* input_tensor = Input(0);
Tensor* output_tensor = Output(0);
kernels::ReluFuntion<float>(input_tensor, output_tensor);
return true;
}
REGISTER_CPU_OPERATOR(Relu, ReluOp<DeviceType::CPU, float>); REGISTER_CPU_OPERATOR(Relu, ReluOp<DeviceType::CPU, float>);
#if __ARM_NEON #if __ARM_NEON
template <> template <>
bool ReluOp<DeviceType::NEON, float>::Run() { bool ReluOp<DeviceType::NEON, float>::Run() {
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#define MACE_OPS_RELU_H_ #define MACE_OPS_RELU_H_
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/kernels/relu.h"
namespace mace { namespace mace {
...@@ -14,7 +15,12 @@ class ReluOp : public Operator<D, T> { ...@@ -14,7 +15,12 @@ class ReluOp : public Operator<D, T> {
public: public:
ReluOp(const OperatorDef &operator_def, Workspace *ws) ReluOp(const OperatorDef &operator_def, Workspace *ws)
: Operator<D, T>(operator_def, ws) {} : Operator<D, T>(operator_def, ws) {}
bool Run() override; bool Run() override {
const Tensor* input_tensor = this->Input(0);
Tensor* output_tensor = this->Output(0);
kernels::ReluFuntion<T>(input_tensor, output_tensor);
return true;
}
}; };
} // namespace mace } // namespace mace
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "gtest/gtest.h"
#include "mace/core/operator.h"
#include "mace/core/net.h"
using namespace mace;
TEST(ReluTest, Relu) {
OperatorRegistry* registry = gDeviceTypeRegistry()->at(DeviceType::CPU);
vector<string> registry_keys = registry->Keys();
for (auto& key: registry_keys) {
VLOG(0) << "registry_op: " << key;
}
// Construct graph
OperatorDef op_def;
op_def.add_input("Input0");
op_def.add_output("Output0");
op_def.set_name("ReluTest");
op_def.set_type("Relu");
auto arg = op_def.add_arg();
arg->set_name("arg0");
arg->set_f(1.5);
NetDef net_def;
net_def.set_name("NetTest");
net_def.add_op()->CopyFrom(op_def);
VLOG(0) << net_def.DebugString();
// Create workspace and input tensor
Workspace ws;
Tensor* input = ws.CreateTensor("Input0", cpu_allocator(), DataType::DT_FLOAT);
input->Resize({2,3});
float* input_data = input->mutable_data<float>();
for (int i = 0; i < 6; ++i) {
input_data[i] = i-3;
}
// Create Net & run
auto net = CreateNet(net_def, &ws, DeviceType::CPU);
net->Run();
// Create Op & run
auto op = CreateOperator(op_def, &ws, DeviceType::CPU);
ASSERT_FLOAT_EQ(1.5f, op->GetSingleArgument<float>("arg0", 1.0f));
}
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册