提交 966b882a 编写于 作者: 吴承辉

Merge branch 'types' into 'master'

Update data types and linking opts

See merge request !20
...@@ -7,6 +7,8 @@ package( ...@@ -7,6 +7,8 @@ package(
licenses(["notice"]) # Apache 2.0 licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android")
cc_library( cc_library(
name = "core", name = "core",
srcs = glob([ srcs = glob([
...@@ -19,6 +21,10 @@ cc_library( ...@@ -19,6 +21,10 @@ cc_library(
deps = [ deps = [
"//mace/proto:cc_proto", "//mace/proto:cc_proto",
], ],
linkopts = if_android([
"-llog",
"-pie",
]),
) )
# Main program for tests # Main program for tests
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include "mace/core/integral_types.h"
#include "mace/core/logging.h" #include "mace/core/logging.h"
using std::set; using std::set;
...@@ -21,7 +20,7 @@ using std::string; ...@@ -21,7 +20,7 @@ using std::string;
using std::unique_ptr; using std::unique_ptr;
using std::vector; using std::vector;
typedef int64 TIndex; typedef int64_t index_t;
// Disable the copy and assignment operator for a class. // Disable the copy and assignment operator for a class.
#ifndef DISABLE_COPY_AND_ASSIGN #ifndef DISABLE_COPY_AND_ASSIGN
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_INTEGRAL_TYPES_H_
#define MACE_CORE_INTEGRAL_TYPES_H_
typedef int8_t int8;
typedef int16_t int16;
typedef int32_t int32;
typedef int64_t int64;
typedef uint8_t uint8;
typedef uint16_t uint16;
typedef uint32_t uint32;
typedef uint64_t uint64;
#endif // MACE_CORE_INTEGRAL_TYPES_H_
...@@ -69,18 +69,18 @@ void LogMessage::GenerateLogMessage() { ...@@ -69,18 +69,18 @@ void LogMessage::GenerateLogMessage() {
namespace { namespace {
// Parse log level (int64) from environment variable (char*) // Parse log level (int64_t) from environment variable (char*)
int64 LogLevelStrToInt(const char* tf_env_var_val) { int64_t LogLevelStrToInt(const char* mace_env_var_val) {
if (tf_env_var_val == nullptr) { if (mace_env_var_val == nullptr) {
return 0; return 0;
} }
// Ideally we would use env_var / safe_strto64, but it is // Ideally we would use env_var / safe_strto64, but it is
// hard to use here without pulling in a lot of dependencies, // hard to use here without pulling in a lot of dependencies,
// so we use std:istringstream instead // so we use std:istringstream instead
string min_log_level(tf_env_var_val); string min_log_level(mace_env_var_val);
std::istringstream ss(min_log_level); std::istringstream ss(min_log_level);
int64 level; int64_t level;
if (!(ss >> level)) { if (!(ss >> level)) {
// Invalid vlog level setting, set level to default (0) // Invalid vlog level setting, set level to default (0)
level = 0; level = 0;
...@@ -89,26 +89,26 @@ int64 LogLevelStrToInt(const char* tf_env_var_val) { ...@@ -89,26 +89,26 @@ int64 LogLevelStrToInt(const char* tf_env_var_val) {
return level; return level;
} }
int64 MinLogLevelFromEnv() { int64_t MinLogLevelFromEnv() {
const char* tf_env_var_val = getenv("MACE_CPP_MIN_LOG_LEVEL"); const char* mace_env_var_val = getenv("MACE_CPP_MIN_LOG_LEVEL");
return LogLevelStrToInt(tf_env_var_val); return LogLevelStrToInt(mace_env_var_val);
} }
int64 MinVLogLevelFromEnv() { int64_t MinVLogLevelFromEnv() {
const char* tf_env_var_val = getenv("MACE_CPP_MIN_VLOG_LEVEL"); const char* mace_env_var_val = getenv("MACE_CPP_MIN_VLOG_LEVEL");
return LogLevelStrToInt(tf_env_var_val); return LogLevelStrToInt(mace_env_var_val);
} }
} // namespace } // namespace
LogMessage::~LogMessage() { LogMessage::~LogMessage() {
// Read the min log level once during the first call to logging. // Read the min log level once during the first call to logging.
static int64 min_log_level = MinLogLevelFromEnv(); static int64_t min_log_level = MinLogLevelFromEnv();
if (severity_ >= min_log_level) GenerateLogMessage(); if (severity_ >= min_log_level) GenerateLogMessage();
} }
int64 LogMessage::MinVLogLevel() { int64_t LogMessage::MinVLogLevel() {
static int64 min_vlog_level = MinVLogLevelFromEnv(); static int64_t min_vlog_level = MinVLogLevelFromEnv();
return min_vlog_level; return min_vlog_level;
} }
......
...@@ -9,8 +9,6 @@ ...@@ -9,8 +9,6 @@
#include <limits> #include <limits>
#include <string> #include <string>
#include "mace/core/integral_types.h"
#undef ERROR #undef ERROR
namespace mace { namespace mace {
...@@ -62,7 +60,7 @@ class LogMessage : public std::basic_ostringstream<char> { ...@@ -62,7 +60,7 @@ class LogMessage : public std::basic_ostringstream<char> {
// Returns the minimum log level for VLOG statements. // Returns the minimum log level for VLOG statements.
// E.g., if MinVLogLevel() is 2, then VLOG(2) statements will produce output, // E.g., if MinVLogLevel() is 2, then VLOG(2) statements will produce output,
// but VLOG(3) will not. Defaults to 0. // but VLOG(3) will not. Defaults to 0.
static int64 MinVLogLevel(); static int64_t MinVLogLevel();
protected: protected:
void GenerateLogMessage(); void GenerateLogMessage();
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
namespace mace { namespace mace {
std::map<int32, OperatorRegistry*>* gDeviceTypeRegistry() { std::map<int32_t, OperatorRegistry*>* gDeviceTypeRegistry() {
static std::map<int32, OperatorRegistry*> g_device_type_registry; static std::map<int32_t, OperatorRegistry*> g_device_type_registry;
return &g_device_type_registry; return &g_device_type_registry;
} }
......
...@@ -44,7 +44,7 @@ class OperatorBase { ...@@ -44,7 +44,7 @@ class OperatorBase {
*operator_def_, name, default_value); *operator_def_, name, default_value);
} }
inline const Tensor *Input(TIndex idx) { inline const Tensor *Input(index_t idx) {
MACE_CHECK(idx < inputs_.size()); MACE_CHECK(idx < inputs_.size());
return inputs_[idx]; return inputs_[idx];
} }
......
...@@ -17,8 +17,8 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto, ...@@ -17,8 +17,8 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
DeviceType type) { DeviceType type) {
unique_ptr<Tensor> tensor(new Tensor(GetDeviceAllocator(type), unique_ptr<Tensor> tensor(new Tensor(GetDeviceAllocator(type),
proto.data_type())); proto.data_type()));
vector<TIndex> dims; vector<index_t> dims;
for (const TIndex d : proto.dims()) { for (const index_t d : proto.dims()) {
dims.push_back(d); dims.push_back(d);
} }
tensor->Resize(dims); tensor->Resize(dims);
...@@ -33,31 +33,31 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto, ...@@ -33,31 +33,31 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
proto.double_data().size()); proto.double_data().size());
break; break;
case DT_INT32: case DT_INT32:
tensor->template Copy<int32>(proto.int32_data().data(), tensor->template Copy<int32_t>(proto.int32_data().data(),
proto.int32_data().size()); proto.int32_data().size());
break; break;
case DT_UINT8: case DT_UINT8:
tensor->CopyWithCast<int32, uint8>(proto.int32_data().data(), tensor->CopyWithCast<int32_t, uint8_t>(proto.int32_data().data(),
proto.int32_data().size()); proto.int32_data().size());
break; break;
case DT_INT16: case DT_INT16:
tensor->CopyWithCast<int32, int16>(proto.int32_data().data(), tensor->CopyWithCast<int32_t, int16_t>(proto.int32_data().data(),
proto.int32_data().size()); proto.int32_data().size());
break; break;
case DT_INT8: case DT_INT8:
tensor->CopyWithCast<int32, int8>(proto.int32_data().data(), tensor->CopyWithCast<int32_t, int8_t>(proto.int32_data().data(),
proto.int32_data().size()); proto.int32_data().size());
break; break;
case DT_INT64: case DT_INT64:
tensor->Copy<int64>(proto.int64_data().data(), tensor->Copy<int64_t>(proto.int64_data().data(),
proto.int64_data().size()); proto.int64_data().size());
break; break;
case DT_UINT16: case DT_UINT16:
tensor->CopyWithCast<int32, uint16>(proto.int32_data().data(), tensor->CopyWithCast<int32_t, uint16_t>(proto.int32_data().data(),
proto.int32_data().size()); proto.int32_data().size());
break; break;
case DT_BOOL: case DT_BOOL:
tensor->CopyWithCast<int32, bool>(proto.int32_data().data(), tensor->CopyWithCast<int32_t, bool>(proto.int32_data().data(),
proto.int32_data().size()); proto.int32_data().size());
break; break;
case DT_STRING: { case DT_STRING: {
......
...@@ -25,13 +25,13 @@ namespace mace { ...@@ -25,13 +25,13 @@ namespace mace {
switch (TYPE_ENUM) { \ switch (TYPE_ENUM) { \
CASE(float, SINGLE_ARG(STMTS)) \ CASE(float, SINGLE_ARG(STMTS)) \
CASE(double, SINGLE_ARG(STMTS)) \ CASE(double, SINGLE_ARG(STMTS)) \
CASE(int32, SINGLE_ARG(STMTS)) \ CASE(int32_t, SINGLE_ARG(STMTS)) \
CASE(uint8, SINGLE_ARG(STMTS)) \ CASE(uint8_t, SINGLE_ARG(STMTS)) \
CASE(uint16, SINGLE_ARG(STMTS)) \ CASE(uint16_t, SINGLE_ARG(STMTS)) \
CASE(int16, SINGLE_ARG(STMTS)) \ CASE(int16_t, SINGLE_ARG(STMTS)) \
CASE(int8, SINGLE_ARG(STMTS)) \ CASE(int8_t, SINGLE_ARG(STMTS)) \
CASE(string, SINGLE_ARG(STMTS)) \ CASE(string, SINGLE_ARG(STMTS)) \
CASE(int64, SINGLE_ARG(STMTS)) \ CASE(int64_t, SINGLE_ARG(STMTS)) \
CASE(bool, SINGLE_ARG(STMTS)) \ CASE(bool, SINGLE_ARG(STMTS)) \
case DT_INVALID: \ case DT_INVALID: \
INVALID; \ INVALID; \
...@@ -64,17 +64,17 @@ class Tensor { ...@@ -64,17 +64,17 @@ class Tensor {
inline DataType dtype() const { return dtype_; } inline DataType dtype() const { return dtype_; }
inline const vector<TIndex>& shape() const { return shape_; } inline const vector<index_t>& shape() const { return shape_; }
inline TIndex dim_size() const { return shape_.size(); } inline index_t dim_size() const { return shape_.size(); }
inline TIndex dim(TIndex index) const { inline index_t dim(index_t index) const {
MACE_CHECK(index < shape_.size(), "Exceeding ndim limit"); MACE_CHECK(index < shape_.size(), "Exceeding ndim limit");
MACE_CHECK(index >= 0, "Cannot have negative dimension index"); MACE_CHECK(index >= 0, "Cannot have negative dimension index");
return shape_[index]; return shape_[index];
} }
inline TIndex size() const { return size_; } inline index_t size() const { return size_; }
inline const void* raw_data() const { inline const void* raw_data() const {
MACE_CHECK(data_.get() || size_ == 0); MACE_CHECK(data_.get() || size_ == 0);
...@@ -108,9 +108,9 @@ class Tensor { ...@@ -108,9 +108,9 @@ class Tensor {
return static_cast<T*>(raw_mutable_data()); return static_cast<T*>(raw_mutable_data());
} }
inline void Resize(const vector<TIndex>& shape) { inline void Resize(const vector<index_t>& shape) {
shape_ = shape; shape_ = shape;
TIndex size = NumElements(); index_t size = NumElements();
if (size_ != size) { if (size_ != size) {
size_ = size; size_ = size;
data_.reset(); data_.reset();
...@@ -126,14 +126,14 @@ class Tensor { ...@@ -126,14 +126,14 @@ class Tensor {
} }
template <typename T> template <typename T>
inline void Copy(const T* src, TIndex size) { inline void Copy(const T* src, index_t size) {
MACE_CHECK(size == size_, "copy src and dst with different size."); MACE_CHECK(size == size_, "copy src and dst with different size.");
CopyBytes(static_cast<const void*>(src), sizeof(T) * size); CopyBytes(static_cast<const void*>(src), sizeof(T) * size);
} }
template <typename SrcType, typename DstType> template <typename SrcType, typename DstType>
inline void CopyWithCast(const SrcType* src, size_t size) { inline void CopyWithCast(const SrcType* src, size_t size) {
MACE_CHECK(static_cast<TIndex>(size) == size_, "copy src and dst with different size."); MACE_CHECK(static_cast<index_t>(size) == size_, "copy src and dst with different size.");
unique_ptr<DstType[]> buffer(new DstType[size]); unique_ptr<DstType[]> buffer(new DstType[size]);
for (size_t i = 0; i < size; ++i) { for (size_t i = 0; i < size; ++i) {
buffer[i] = static_cast<DstType>(src[i]); buffer[i] = static_cast<DstType>(src[i]);
...@@ -161,15 +161,15 @@ class Tensor { ...@@ -161,15 +161,15 @@ class Tensor {
} }
private: private:
inline int64 NumElements() const { inline int64_t NumElements() const {
return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64>()); return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64_t>());
} }
Allocator* alloc_; Allocator* alloc_;
TIndex size_; index_t size_;
DataType dtype_; DataType dtype_;
std::shared_ptr<void> data_; std::shared_ptr<void> data_;
vector<TIndex> shape_; vector<index_t> shape_;
}; };
} // namespace tensor } // namespace tensor
......
...@@ -16,10 +16,10 @@ namespace mace { ...@@ -16,10 +16,10 @@ namespace mace {
namespace testing { namespace testing {
inline int64 NowMicros() { inline int64_t NowMicros() {
struct timeval tv; struct timeval tv;
gettimeofday(&tv, nullptr); gettimeofday(&tv, nullptr);
return static_cast<int64>(tv.tv_sec) * 1000000 + tv.tv_usec; return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
} }
} // namespace testing } // namespace testing
......
...@@ -16,10 +16,10 @@ namespace testing { ...@@ -16,10 +16,10 @@ namespace testing {
static std::vector<Benchmark*>* all_benchmarks = nullptr; static std::vector<Benchmark*>* all_benchmarks = nullptr;
static std::string label; static std::string label;
static int64 bytes_processed; static int64_t bytes_processed;
static int64 items_processed; static int64_t items_processed;
static int64 accum_time = 0; static int64_t accum_time = 0;
static int64 start_time = 0; static int64_t start_time = 0;
Benchmark::Benchmark(const char* name, void (*fn)(int)) Benchmark::Benchmark(const char* name, void (*fn)(int))
: name_(name), num_args_(0), fn0_(fn) { : name_(name), num_args_(0), fn0_(fn) {
...@@ -112,10 +112,10 @@ void Benchmark::Register() { ...@@ -112,10 +112,10 @@ void Benchmark::Register() {
} }
void Benchmark::Run(int arg1, int arg2, int* run_count, double* run_seconds) { void Benchmark::Run(int arg1, int arg2, int* run_count, double* run_seconds) {
static const int64 kMinIters = 100; static const int64_t kMinIters = 100;
static const int64 kMaxIters = 1000000000; static const int64_t kMaxIters = 1000000000;
static const double kMinTime = 0.5; static const double kMinTime = 0.5;
int64 iters = kMinIters; int64_t iters = kMinIters;
while (true) { while (true) {
accum_time = 0; accum_time = 0;
start_time = NowMicros(); start_time = NowMicros();
...@@ -142,13 +142,13 @@ void Benchmark::Run(int arg1, int arg2, int* run_count, double* run_seconds) { ...@@ -142,13 +142,13 @@ void Benchmark::Run(int arg1, int arg2, int* run_count, double* run_seconds) {
double multiplier = 1.4 * kMinTime / std::max(seconds, 1e-9); double multiplier = 1.4 * kMinTime / std::max(seconds, 1e-9);
multiplier = std::min(10.0, multiplier); multiplier = std::min(10.0, multiplier);
if (multiplier <= 1.0) multiplier *= 2.0; if (multiplier <= 1.0) multiplier *= 2.0;
iters = std::max<int64>(multiplier * iters, iters + 1); iters = std::max<int64_t>(multiplier * iters, iters + 1);
iters = std::min(iters, kMaxIters); iters = std::min(iters, kMaxIters);
} }
} }
void BytesProcessed(int64 n) { bytes_processed = n; } void BytesProcessed(int64_t n) { bytes_processed = n; }
void ItemsProcessed(int64 n) { items_processed = n; } void ItemsProcessed(int64_t n) { items_processed = n; }
void StartTiming() { void StartTiming() {
if (start_time == 0) start_time = NowMicros(); if (start_time == 0) start_time = NowMicros();
} }
......
...@@ -42,8 +42,8 @@ class Benchmark { ...@@ -42,8 +42,8 @@ class Benchmark {
}; };
void RunBenchmarks(); void RunBenchmarks();
void BytesProcessed(int64); void BytesProcessed(int64_t);
void ItemsProcessed(int64); void ItemsProcessed(int64_t);
void StartTiming(); void StartTiming();
void StopTiming(); void StopTiming();
......
...@@ -42,16 +42,16 @@ struct EnumToDataType {}; // Specializations below ...@@ -42,16 +42,16 @@ struct EnumToDataType {}; // Specializations below
MATCH_TYPE_AND_ENUM(float, DT_FLOAT); MATCH_TYPE_AND_ENUM(float, DT_FLOAT);
MATCH_TYPE_AND_ENUM(double, DT_DOUBLE); MATCH_TYPE_AND_ENUM(double, DT_DOUBLE);
MATCH_TYPE_AND_ENUM(int32, DT_INT32); MATCH_TYPE_AND_ENUM(int32_t, DT_INT32);
MATCH_TYPE_AND_ENUM(uint16, DT_UINT16); MATCH_TYPE_AND_ENUM(uint16_t, DT_UINT16);
MATCH_TYPE_AND_ENUM(uint8, DT_UINT8); MATCH_TYPE_AND_ENUM(uint8_t, DT_UINT8);
MATCH_TYPE_AND_ENUM(int16, DT_INT16); MATCH_TYPE_AND_ENUM(int16_t, DT_INT16);
MATCH_TYPE_AND_ENUM(int8, DT_INT8); MATCH_TYPE_AND_ENUM(int8_t, DT_INT8);
MATCH_TYPE_AND_ENUM(string, DT_STRING); MATCH_TYPE_AND_ENUM(string, DT_STRING);
MATCH_TYPE_AND_ENUM(int64, DT_INT64); MATCH_TYPE_AND_ENUM(int64_t, DT_INT64);
MATCH_TYPE_AND_ENUM(bool, DT_BOOL); MATCH_TYPE_AND_ENUM(bool, DT_BOOL);
static const int32 kint32max = ((int32)0x7FFFFFFF); static const int32_t kint32_tmax = ((int32_t)0x7FFFFFFF);
} // namespace mace } // namespace mace
......
...@@ -7,10 +7,6 @@ cc_binary( ...@@ -7,10 +7,6 @@ cc_binary(
"helloworld.cc", "helloworld.cc",
], ],
copts = ["-std=c++11"], copts = ["-std=c++11"],
linkopts = if_android([
"-pie",
"-llog",
]),
deps = [ deps = [
"//mace/core", "//mace/core",
"//mace/ops", "//mace/ops",
...@@ -21,10 +17,6 @@ cc_test( ...@@ -21,10 +17,6 @@ cc_test(
name = "benchmark_example", name = "benchmark_example",
srcs = ["benchmark_example.cc"], srcs = ["benchmark_example.cc"],
copts = ["-std=c++11"], copts = ["-std=c++11"],
linkopts = if_android([
"-pie",
"-llog",
]),
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
"//mace/core", "//mace/core",
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
static void foo(int iters) { static void foo(int iters) {
static const int N = 32; static const int N = 32;
const int64 tot = static_cast<int64>(iters) * N; const int64_t tot = static_cast<int64_t>(iters) * N;
mace::testing::ItemsProcessed(tot); mace::testing::ItemsProcessed(tot);
mace::testing::BytesProcessed(tot * (sizeof(float))); mace::testing::BytesProcessed(tot * (sizeof(float)));
...@@ -26,7 +26,7 @@ BENCHMARK(foo); ...@@ -26,7 +26,7 @@ BENCHMARK(foo);
static void bar(int iters, int n) { static void bar(int iters, int n) {
const int64 tot = static_cast<int64>(iters) * n; const int64_t tot = static_cast<int64_t>(iters) * n;
mace::testing::ItemsProcessed(tot); mace::testing::ItemsProcessed(tot);
mace::testing::BytesProcessed(tot * (sizeof(float))); mace::testing::BytesProcessed(tot * (sizeof(float)));
......
...@@ -18,6 +18,9 @@ cc_library( ...@@ -18,6 +18,9 @@ cc_library(
"//mace/core:core", "//mace/core:core",
], ],
copts = ['-std=c++11'], copts = ['-std=c++11'],
linkopts = ["-fopenmp"] + if_android([
"-lm",
]),
) )
cc_test( cc_test(
...@@ -29,10 +32,8 @@ cc_test( ...@@ -29,10 +32,8 @@ cc_test(
"//mace/core:core", "//mace/core:core",
], ],
copts = ['-std=c++11'], copts = ['-std=c++11'],
linkopts = ["-fopenmp"] + if_android([ linkopts = if_android([
"-pie", "-pie",
"-llog",
"-lm",
]), ]),
linkstatic = 1, linkstatic = 1,
testonly = 1, testonly = 1,
...@@ -47,11 +48,6 @@ cc_test( ...@@ -47,11 +48,6 @@ cc_test(
"//mace/core:test_benchmark_main", "//mace/core:test_benchmark_main",
], ],
copts = ['-std=c++11'], copts = ['-std=c++11'],
linkopts = ["-fopenmp"] + if_android([
"-pie",
"-llog",
"-lm",
]),
linkstatic = 1, linkstatic = 1,
testonly = 1, testonly = 1,
) )
...@@ -15,7 +15,7 @@ void AddNFuntion(const vector<const Tensor*>& input_tensor, Tensor *output_tenso ...@@ -15,7 +15,7 @@ void AddNFuntion(const vector<const Tensor*>& input_tensor, Tensor *output_tenso
int n = input_tensor.size(); int n = input_tensor.size();
MACE_CHECK(n > 1); MACE_CHECK(n > 1);
MACE_CHECK_NOTNULL(input_tensor[0]); MACE_CHECK_NOTNULL(input_tensor[0]);
int64 size = input_tensor[0]->size(); int64_t size = input_tensor[0]->size();
vector<const T*> inputs(n); vector<const T*> inputs(n);
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
inputs[i] = input_tensor[i]->data<T>(); inputs[i] = input_tensor[i]->data<T>();
...@@ -24,7 +24,7 @@ void AddNFuntion(const vector<const Tensor*>& input_tensor, Tensor *output_tenso ...@@ -24,7 +24,7 @@ void AddNFuntion(const vector<const Tensor*>& input_tensor, Tensor *output_tenso
T* output = output_tensor->mutable_data<T>(); T* output = output_tensor->mutable_data<T>();
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
for (int64 j = 0; j < size; ++j) { for (int64_t j = 0; j < size; ++j) {
output[j] += inputs[i][j]; output[j] += inputs[i][j];
} }
} }
......
...@@ -30,9 +30,9 @@ struct BatchNormFunctor : public BatchNormFunctorBase<D, T> { ...@@ -30,9 +30,9 @@ struct BatchNormFunctor : public BatchNormFunctorBase<D, T> {
const T* offset, const T* offset,
const T* mean, const T* mean,
const T* var, const T* var,
const TIndex n, const index_t n,
const TIndex channel, const index_t channel,
const TIndex sample_size, const index_t sample_size,
T* output) { T* output) {
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is // The calculation formula for inference is
...@@ -42,15 +42,15 @@ struct BatchNormFunctor : public BatchNormFunctorBase<D, T> { ...@@ -42,15 +42,15 @@ struct BatchNormFunctor : public BatchNormFunctorBase<D, T> {
// new_offset = \offset - mean * common_val; // new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset; // Y = new_scale * X + new_offset;
T new_scale, new_offset; T new_scale, new_offset;
for (TIndex c = 0; c < channel; ++c) { for (index_t c = 0; c < channel; ++c) {
new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_); new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_);
new_offset = offset[c] - mean[c] * new_scale; new_offset = offset[c] - mean[c] * new_scale;
TIndex pos = c * sample_size; index_t pos = c * sample_size;
for (TIndex i = 0; i < n; ++i) { for (index_t i = 0; i < n; ++i) {
const T* input_sample_ptr = input + pos; const T* input_sample_ptr = input + pos;
T* output_sample_ptr = output + pos; T* output_sample_ptr = output + pos;
for (TIndex j = 0; j < sample_size; ++j) { for (index_t j = 0; j < sample_size; ++j) {
output_sample_ptr[j] = new_scale * input_sample_ptr[j] + new_offset; output_sample_ptr[j] = new_scale * input_sample_ptr[j] + new_offset;
} }
pos += channel * sample_size; pos += channel * sample_size;
......
...@@ -11,7 +11,7 @@ using namespace mace; ...@@ -11,7 +11,7 @@ using namespace mace;
using namespace mace::kernels; using namespace mace::kernels;
static void AddNBenchmark(int iters, int n, int type) { static void AddNBenchmark(int iters, int n, int type) {
const int64 tot = static_cast<int64>(iters) * n * 3; const int64_t tot = static_cast<int64_t>(iters) * n * 3;
mace::testing::ItemsProcessed(tot); mace::testing::ItemsProcessed(tot);
mace::testing::BytesProcessed(tot * (sizeof(float))); mace::testing::BytesProcessed(tot * (sizeof(float)));
...@@ -35,7 +35,7 @@ static void AddNBenchmark(int iters, int n, int type) { ...@@ -35,7 +35,7 @@ static void AddNBenchmark(int iters, int n, int type) {
float *input3 = input_tensor3.mutable_data<float>(); float *input3 = input_tensor3.mutable_data<float>();
float *output = output_tensor.mutable_data<float>(); float *output = output_tensor.mutable_data<float>();
for (int64 i = 0; i < n; ++i) { for (int64_t i = 0; i < n; ++i) {
input1[i] = nd(gen); input1[i] = nd(gen);
input2[i] = nd(gen); input2[i] = nd(gen);
input3[i] = nd(gen); input3[i] = nd(gen);
......
...@@ -11,7 +11,7 @@ using namespace mace; ...@@ -11,7 +11,7 @@ using namespace mace;
using namespace mace::kernels; using namespace mace::kernels;
static void ReluBenchmark(int iters, int n, int type) { static void ReluBenchmark(int iters, int n, int type) {
const int64 tot = static_cast<int64>(iters) * n; const int64_t tot = static_cast<int64_t>(iters) * n;
mace::testing::ItemsProcessed(tot); mace::testing::ItemsProcessed(tot);
mace::testing::BytesProcessed(tot * (sizeof(float))); mace::testing::BytesProcessed(tot * (sizeof(float)));
...@@ -25,7 +25,7 @@ static void ReluBenchmark(int iters, int n, int type) { ...@@ -25,7 +25,7 @@ static void ReluBenchmark(int iters, int n, int type) {
output_tensor.ResizeLike(input_tensor); output_tensor.ResizeLike(input_tensor);
float *input = input_tensor.mutable_data<float>(); float *input = input_tensor.mutable_data<float>();
float *output = output_tensor.mutable_data<float>(); float *output = output_tensor.mutable_data<float>();
for (int64 i = 0; i < n; ++i) { for (int64_t i = 0; i < n; ++i) {
input[i] = nd(gen); input[i] = nd(gen);
} }
......
...@@ -14,7 +14,7 @@ void NeonAddNFuntion_float(const vector<const Tensor *> &input_tensor, ...@@ -14,7 +14,7 @@ void NeonAddNFuntion_float(const vector<const Tensor *> &input_tensor,
int n = input_tensor.size(); int n = input_tensor.size();
MACE_CHECK(n > 1); MACE_CHECK(n > 1);
MACE_CHECK_NOTNULL(input_tensor[0]); MACE_CHECK_NOTNULL(input_tensor[0]);
int64 size = input_tensor[0]->size(); int64_t size = input_tensor[0]->size();
output_tensor->ResizeLike(input_tensor[0]); output_tensor->ResizeLike(input_tensor[0]);
float *output = output_tensor->mutable_data<float>(); float *output = output_tensor->mutable_data<float>();
vector<const float *> inputs(n); vector<const float *> inputs(n);
...@@ -22,19 +22,19 @@ void NeonAddNFuntion_float(const vector<const Tensor *> &input_tensor, ...@@ -22,19 +22,19 @@ void NeonAddNFuntion_float(const vector<const Tensor *> &input_tensor,
inputs[i] = input_tensor[i]->data<float>(); inputs[i] = input_tensor[i]->data<float>();
} }
int64 cost = size * n; int64_t cost = size * n;
int64 groups = 1; int64_t groups = 1;
if (cost > kCostPerGroup) { if (cost > kCostPerGroup) {
groups = cost / kCostPerGroup; groups = cost / kCostPerGroup;
} }
int64 element_per_group = size / groups; int64_t element_per_group = size / groups;
#pragma omp parallel for num_threads(1) // no significant performance improve #pragma omp parallel for num_threads(1) // no significant performance improve
for (int64 i = 0; i < size; i += element_per_group) { for (int64_t i = 0; i < size; i += element_per_group) {
int64 count = std::min(element_per_group, size - i); int64_t count = std::min(element_per_group, size - i);
int nn = count >> 2; int nn = count >> 2;
int remain = count - (nn << 2); int remain = count - (nn << 2);
for (int64 j = 0; j < n; ++j) { for (int64_t j = 0; j < n; ++j) {
const float *inptr = inputs[j] + i; const float *inptr = inputs[j] + i;
float *outptr = output + i; float *outptr = output + i;
for (int k = 0; k < nn; ++k) { for (int k = 0; k < nn; ++k) {
......
...@@ -34,18 +34,18 @@ struct BatchNormFunctor<DeviceType::NEON, T> : public BatchNormFunctorBase<Devic ...@@ -34,18 +34,18 @@ struct BatchNormFunctor<DeviceType::NEON, T> : public BatchNormFunctorBase<Devic
T new_scale, new_offset; T new_scale, new_offset;
int count = sample_size >> 2; int count = sample_size >> 2;
int remain_count = sample_size - count; int remain_count = sample_size - count;
for (TIndex c = 0; c < channel; ++c) { for (index_t c = 0; c < channel; ++c) {
new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_); new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_);
new_offset = offset[c] - mean[c] * new_scale; new_offset = offset[c] - mean[c] * new_scale;
TIndex pos = c * sample_size; index_t pos = c * sample_size;
float32x4_t new_scale_f = vdupq_n_f32(new_scale); float32x4_t new_scale_f = vdupq_n_f32(new_scale);
float32x4_t new_offset_f = vdupq_n_f32(new_offset); float32x4_t new_offset_f = vdupq_n_f32(new_offset);
for (TIndex i = 0; i < n; ++i) { for (index_t i = 0; i < n; ++i) {
const float* input_sample_ptr = input + pos; const float* input_sample_ptr = input + pos;
float* output_sample_ptr = output + pos; float* output_sample_ptr = output + pos;
for(TIndex j = 0; j < count; ++j) { for(index_t j = 0; j < count; ++j) {
float32x4_t input_f = vld1q_f32(input_sample_ptr); float32x4_t input_f = vld1q_f32(input_sample_ptr);
float32x4_t output_f = new_offset_f; float32x4_t output_f = new_offset_f;
output_f = vfmaq_f32(output_f, input_f, new_scale_f); output_f = vfmaq_f32(output_f, input_f, new_scale_f);
...@@ -53,7 +53,7 @@ struct BatchNormFunctor<DeviceType::NEON, T> : public BatchNormFunctorBase<Devic ...@@ -53,7 +53,7 @@ struct BatchNormFunctor<DeviceType::NEON, T> : public BatchNormFunctorBase<Devic
input_sample_ptr += 4; input_sample_ptr += 4;
output_sample_ptr += 4; output_sample_ptr += 4;
} }
for(TIndex j = 0; j < remain_count; ++j) { for(index_t j = 0; j < remain_count; ++j) {
*output_sample_ptr = new_scale * *input_sample_ptr + new_offset; *output_sample_ptr = new_scale * *input_sample_ptr + new_offset;
++output_sample_ptr; ++output_sample_ptr;
++input_sample_ptr; ++input_sample_ptr;
......
...@@ -10,14 +10,14 @@ namespace kernels { ...@@ -10,14 +10,14 @@ namespace kernels {
void NeonReluFuntion_float(const Tensor *input_tensor, void NeonReluFuntion_float(const Tensor *input_tensor,
Tensor *output_tensor) { Tensor *output_tensor) {
int64 size = input_tensor->size(); int64_t size = input_tensor->size();
output_tensor->ResizeLike(input_tensor); output_tensor->ResizeLike(input_tensor);
const float *input = input_tensor->data<float>(); const float *input = input_tensor->data<float>();
float *output = output_tensor->mutable_data<float>(); float *output = output_tensor->mutable_data<float>();
#pragma omp parallel for num_threads(1) // no significant performance improve #pragma omp parallel for num_threads(1) // no significant performance improve
for (int64 i = 0; i < size; i += kCostPerGroup) { for (int64_t i = 0; i < size; i += kCostPerGroup) {
int64 count = std::min(static_cast<int64>(kCostPerGroup), size - i); int64_t count = std::min(static_cast<int64_t>(kCostPerGroup), size - i);
int nn = count >> 2; int nn = count >> 2;
int remain = count - (nn << 2); int remain = count - (nn << 2);
const float *inptr = input + i; const float *inptr = input + i;
......
...@@ -12,12 +12,12 @@ namespace kernels { ...@@ -12,12 +12,12 @@ namespace kernels {
template<typename T> template<typename T>
void ReluFuntion(const Tensor *input_tensor, Tensor *output_tensor) { void ReluFuntion(const Tensor *input_tensor, Tensor *output_tensor) {
int64 size = input_tensor->size(); int64_t size = input_tensor->size();
output_tensor->ResizeLike(input_tensor); output_tensor->ResizeLike(input_tensor);
const T *input = input_tensor->data<T>(); const T *input = input_tensor->data<T>();
T *output = output_tensor->mutable_data<T>(); T *output = output_tensor->mutable_data<T>();
for (int64 i = 0; i < size; ++i) { for (int64_t i = 0; i < size; ++i) {
output[i] = std::max(input[i], static_cast<T>(0)); output[i] = std::max(input[i], static_cast<T>(0));
} }
} }
......
...@@ -15,7 +15,7 @@ TEST(NeonTest, AddN) { ...@@ -15,7 +15,7 @@ TEST(NeonTest, AddN) {
std::mt19937 gen(rd()); std::mt19937 gen(rd());
std::normal_distribution<float> nd(0, 1); std::normal_distribution<float> nd(0, 1);
int64 count = 100000; int64_t count = 100000;
Tensor input_tensor1(cpu_allocator(), DataType::DT_FLOAT); Tensor input_tensor1(cpu_allocator(), DataType::DT_FLOAT);
input_tensor1.Resize({100, 1000}); input_tensor1.Resize({100, 1000});
Tensor input_tensor2(cpu_allocator(), DataType::DT_FLOAT); Tensor input_tensor2(cpu_allocator(), DataType::DT_FLOAT);
...@@ -37,7 +37,7 @@ TEST(NeonTest, AddN) { ...@@ -37,7 +37,7 @@ TEST(NeonTest, AddN) {
float *output = output_tensor.mutable_data<float>(); float *output = output_tensor.mutable_data<float>();
float *output_neon = output_tensor_neon.mutable_data<float>(); float *output_neon = output_tensor_neon.mutable_data<float>();
for (int64 i = 0; i < count; ++i) { for (int64_t i = 0; i < count; ++i) {
input1[i] = nd(gen); input1[i] = nd(gen);
input2[i] = nd(gen); input2[i] = nd(gen);
input3[i] = nd(gen); input3[i] = nd(gen);
...@@ -48,7 +48,7 @@ TEST(NeonTest, AddN) { ...@@ -48,7 +48,7 @@ TEST(NeonTest, AddN) {
ASSERT_EQ(count, output_tensor.size()); ASSERT_EQ(count, output_tensor.size());
ASSERT_EQ(count, output_tensor_neon.size()); ASSERT_EQ(count, output_tensor_neon.size());
for (int64 i = 0; i < count; ++i) { for (int64_t i = 0; i < count; ++i) {
ASSERT_FLOAT_EQ(output[i], output_neon[i]); ASSERT_FLOAT_EQ(output[i], output_neon[i]);
} }
} }
......
...@@ -15,7 +15,7 @@ TEST(NeonTest, Relu) { ...@@ -15,7 +15,7 @@ TEST(NeonTest, Relu) {
std::mt19937 gen(rd()); std::mt19937 gen(rd());
std::normal_distribution<float> nd(0, 1); std::normal_distribution<float> nd(0, 1);
int64 count = 100000; int64_t count = 100000;
Tensor input_tensor(cpu_allocator(), DataType::DT_FLOAT); Tensor input_tensor(cpu_allocator(), DataType::DT_FLOAT);
input_tensor.Resize({100, 1000}); input_tensor.Resize({100, 1000});
Tensor output_tensor(cpu_allocator(), DataType::DT_FLOAT); Tensor output_tensor(cpu_allocator(), DataType::DT_FLOAT);
...@@ -27,7 +27,7 @@ TEST(NeonTest, Relu) { ...@@ -27,7 +27,7 @@ TEST(NeonTest, Relu) {
float *output = output_tensor.mutable_data<float>(); float *output = output_tensor.mutable_data<float>();
float *output_neon = output_tensor_neon.mutable_data<float>(); float *output_neon = output_tensor_neon.mutable_data<float>();
for (int64 i = 0; i < count; ++i) { for (int64_t i = 0; i < count; ++i) {
input[i] = nd(gen); input[i] = nd(gen);
} }
...@@ -36,7 +36,7 @@ TEST(NeonTest, Relu) { ...@@ -36,7 +36,7 @@ TEST(NeonTest, Relu) {
ASSERT_EQ(count, output_tensor.size()); ASSERT_EQ(count, output_tensor.size());
ASSERT_EQ(count, output_tensor_neon.size()); ASSERT_EQ(count, output_tensor_neon.size());
for (int64 i = 0; i < count; ++i) { for (int64_t i = 0; i < count; ++i) {
ASSERT_FLOAT_EQ(output[i], output_neon[i]); ASSERT_FLOAT_EQ(output[i], output_neon[i]);
} }
} }
......
...@@ -33,9 +33,9 @@ class BatchNormOp : public Operator<D, T> { ...@@ -33,9 +33,9 @@ class BatchNormOp : public Operator<D, T> {
Tensor* output = this->Output(0); Tensor* output = this->Output(0);
output->ResizeLike(input); output->ResizeLike(input);
const TIndex n = input->dim(0); const index_t n = input->dim(0);
const TIndex channel = input->dim(1); const index_t channel = input->dim(1);
const TIndex sample_size = input->dim(2) * input->dim(3); const index_t sample_size = input->dim(2) * input->dim(3);
const float* input_ptr = input->data<float>(); const float* input_ptr = input->data<float>();
const float* scale_ptr = scale->data<float>(); const float* scale_ptr = scale->data<float>();
......
...@@ -43,7 +43,7 @@ class OpsTestBase : public ::testing::Test { ...@@ -43,7 +43,7 @@ class OpsTestBase : public ::testing::Test {
} }
public: public:
template <typename T> template <typename T>
void AddInputFromArray(const char* name, const std::vector<TIndex>& shape, const std::vector<T>& data) { void AddInputFromArray(const char* name, const std::vector<index_t>& shape, const std::vector<T>& data) {
Tensor* input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v()); Tensor* input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
input->Resize(shape); input->Resize(shape);
float* input_data = input->mutable_data<float>(); float* input_data = input->mutable_data<float>();
...@@ -70,7 +70,7 @@ class OpsTestBase : public ::testing::Test { ...@@ -70,7 +70,7 @@ class OpsTestBase : public ::testing::Test {
}; };
template <typename T> template <typename T>
Tensor CreateTensor(const std::vector<TIndex>& shape, const std::vector<T>& data) { Tensor CreateTensor(const std::vector<index_t>& shape, const std::vector<T>& data) {
Tensor res(cpu_allocator(), DataTypeToEnum<T>::v()); Tensor res(cpu_allocator(), DataTypeToEnum<T>::v());
res.Resize(shape); res.Resize(shape);
float* input_data = res.mutable_data<float>(); float* input_data = res.mutable_data<float>();
...@@ -90,7 +90,7 @@ inline std::string ShapeToString(const Tensor& x) { ...@@ -90,7 +90,7 @@ inline std::string ShapeToString(const Tensor& x) {
std::stringstream stream; std::stringstream stream;
for (int i = 0; i < x.dim_size(); i++) { for (int i = 0; i < x.dim_size(); i++) {
if (i > 0) stream<<","; if (i > 0) stream<<",";
int64 dim = x.dim(i); int64_t dim = x.dim(i);
if (dim < 0) { if (dim < 0) {
stream<<"?"; stream<<"?";
} else { } else {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册