diff --git a/mace/core/BUILD b/mace/core/BUILD index 218fd1bd2354a9936d243764c9cdb113389ba817..7f974ba4e89aa14085fde004eb8da1413d4de124 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -7,6 +7,8 @@ package( licenses(["notice"]) # Apache 2.0 +load("//mace:mace.bzl", "if_android") + cc_library( name = "core", srcs = glob([ @@ -19,6 +21,10 @@ cc_library( deps = [ "//mace/proto:cc_proto", ], + linkopts = if_android([ + "-llog", + "-pie", + ]), ) # Main program for tests diff --git a/mace/core/common.h b/mace/core/common.h index c2c2931660275634f894948927a3c1dd7e909204..e5e07225ab1b21165fe7b3b7f2ca809824a7e740 100644 --- a/mace/core/common.h +++ b/mace/core/common.h @@ -12,7 +12,6 @@ #include #include -#include "mace/core/integral_types.h" #include "mace/core/logging.h" using std::set; @@ -21,7 +20,7 @@ using std::string; using std::unique_ptr; using std::vector; -typedef int64 TIndex; +typedef int64_t index_t; // Disable the copy and assignment operator for a class. #ifndef DISABLE_COPY_AND_ASSIGN diff --git a/mace/core/integral_types.h b/mace/core/integral_types.h deleted file mode 100644 index 72298201ef68403a93dbdb4d41087ad0f669e7a7..0000000000000000000000000000000000000000 --- a/mace/core/integral_types.h +++ /dev/null @@ -1,19 +0,0 @@ -// -// Copyright (c) 2017 XiaoMi All rights reserved. -// - - -#ifndef MACE_CORE_INTEGRAL_TYPES_H_ -#define MACE_CORE_INTEGRAL_TYPES_H_ - -typedef int8_t int8; -typedef int16_t int16; -typedef int32_t int32; -typedef int64_t int64; - -typedef uint8_t uint8; -typedef uint16_t uint16; -typedef uint32_t uint32; -typedef uint64_t uint64; - -#endif // MACE_CORE_INTEGRAL_TYPES_H_ diff --git a/mace/core/logging.cc b/mace/core/logging.cc index 5e0982d58e5d38fa1117b9d35ba2bec8a55dc092..f01d0980241187b2fcc2acb829e3c4b79f30b8d4 100644 --- a/mace/core/logging.cc +++ b/mace/core/logging.cc @@ -69,18 +69,18 @@ void LogMessage::GenerateLogMessage() { namespace { -// Parse log level (int64) from environment variable (char*) -int64 LogLevelStrToInt(const char* tf_env_var_val) { - if (tf_env_var_val == nullptr) { +// Parse log level (int64_t) from environment variable (char*) +int64_t LogLevelStrToInt(const char* mace_env_var_val) { + if (mace_env_var_val == nullptr) { return 0; } // Ideally we would use env_var / safe_strto64, but it is // hard to use here without pulling in a lot of dependencies, // so we use std:istringstream instead - string min_log_level(tf_env_var_val); + string min_log_level(mace_env_var_val); std::istringstream ss(min_log_level); - int64 level; + int64_t level; if (!(ss >> level)) { // Invalid vlog level setting, set level to default (0) level = 0; @@ -89,26 +89,26 @@ int64 LogLevelStrToInt(const char* tf_env_var_val) { return level; } -int64 MinLogLevelFromEnv() { - const char* tf_env_var_val = getenv("MACE_CPP_MIN_LOG_LEVEL"); - return LogLevelStrToInt(tf_env_var_val); +int64_t MinLogLevelFromEnv() { + const char* mace_env_var_val = getenv("MACE_CPP_MIN_LOG_LEVEL"); + return LogLevelStrToInt(mace_env_var_val); } -int64 MinVLogLevelFromEnv() { - const char* tf_env_var_val = getenv("MACE_CPP_MIN_VLOG_LEVEL"); - return LogLevelStrToInt(tf_env_var_val); +int64_t MinVLogLevelFromEnv() { + const char* mace_env_var_val = getenv("MACE_CPP_MIN_VLOG_LEVEL"); + return LogLevelStrToInt(mace_env_var_val); } } // namespace LogMessage::~LogMessage() { // Read the min log level once during the first call to logging. - static int64 min_log_level = MinLogLevelFromEnv(); + static int64_t min_log_level = MinLogLevelFromEnv(); if (severity_ >= min_log_level) GenerateLogMessage(); } -int64 LogMessage::MinVLogLevel() { - static int64 min_vlog_level = MinVLogLevelFromEnv(); +int64_t LogMessage::MinVLogLevel() { + static int64_t min_vlog_level = MinVLogLevelFromEnv(); return min_vlog_level; } diff --git a/mace/core/logging.h b/mace/core/logging.h index c613a87d640618d689d6aadf04782f40a8172011..0787af3383d91074ae60c214f096923b8fc891d9 100644 --- a/mace/core/logging.h +++ b/mace/core/logging.h @@ -9,8 +9,6 @@ #include #include -#include "mace/core/integral_types.h" - #undef ERROR namespace mace { @@ -62,7 +60,7 @@ class LogMessage : public std::basic_ostringstream { // Returns the minimum log level for VLOG statements. // E.g., if MinVLogLevel() is 2, then VLOG(2) statements will produce output, // but VLOG(3) will not. Defaults to 0. - static int64 MinVLogLevel(); + static int64_t MinVLogLevel(); protected: void GenerateLogMessage(); diff --git a/mace/core/operator.cc b/mace/core/operator.cc index 2e5086ac222a70503bf655ff9d92557369beccb4..a755577b65b7d3c5c80dd1da50b6dd4d256bccf8 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -6,8 +6,8 @@ namespace mace { -std::map* gDeviceTypeRegistry() { - static std::map g_device_type_registry; +std::map* gDeviceTypeRegistry() { + static std::map g_device_type_registry; return &g_device_type_registry; } diff --git a/mace/core/operator.h b/mace/core/operator.h index 970404f6ed00da9b743cc2a1a7eb6559cd6a6a09..df488691bac3fd8de6aea9c98a7175f80b50f41e 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -44,7 +44,7 @@ class OperatorBase { *operator_def_, name, default_value); } - inline const Tensor *Input(TIndex idx) { + inline const Tensor *Input(index_t idx) { MACE_CHECK(idx < inputs_.size()); return inputs_[idx]; } diff --git a/mace/core/serializer.cc b/mace/core/serializer.cc index 310e76299db02eb8dae9fa2032f65a5cccd1c6e2..3e80e545b2a0aa23eb26906f588c9713beba046e 100644 --- a/mace/core/serializer.cc +++ b/mace/core/serializer.cc @@ -17,8 +17,8 @@ unique_ptr Serializer::Deserialize(const TensorProto &proto, DeviceType type) { unique_ptr tensor(new Tensor(GetDeviceAllocator(type), proto.data_type())); - vector dims; - for (const TIndex d : proto.dims()) { + vector dims; + for (const index_t d : proto.dims()) { dims.push_back(d); } tensor->Resize(dims); @@ -33,31 +33,31 @@ unique_ptr Serializer::Deserialize(const TensorProto &proto, proto.double_data().size()); break; case DT_INT32: - tensor->template Copy(proto.int32_data().data(), + tensor->template Copy(proto.int32_data().data(), proto.int32_data().size()); break; case DT_UINT8: - tensor->CopyWithCast(proto.int32_data().data(), + tensor->CopyWithCast(proto.int32_data().data(), proto.int32_data().size()); break; case DT_INT16: - tensor->CopyWithCast(proto.int32_data().data(), + tensor->CopyWithCast(proto.int32_data().data(), proto.int32_data().size()); break; case DT_INT8: - tensor->CopyWithCast(proto.int32_data().data(), + tensor->CopyWithCast(proto.int32_data().data(), proto.int32_data().size()); break; case DT_INT64: - tensor->Copy(proto.int64_data().data(), + tensor->Copy(proto.int64_data().data(), proto.int64_data().size()); break; case DT_UINT16: - tensor->CopyWithCast(proto.int32_data().data(), + tensor->CopyWithCast(proto.int32_data().data(), proto.int32_data().size()); break; case DT_BOOL: - tensor->CopyWithCast(proto.int32_data().data(), + tensor->CopyWithCast(proto.int32_data().data(), proto.int32_data().size()); break; case DT_STRING: { diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 77a44615aca803f22504340569a349da7835c648..1af32d3f2338f344332375d9cb67cbe23a4f119d 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -25,13 +25,13 @@ namespace mace { switch (TYPE_ENUM) { \ CASE(float, SINGLE_ARG(STMTS)) \ CASE(double, SINGLE_ARG(STMTS)) \ - CASE(int32, SINGLE_ARG(STMTS)) \ - CASE(uint8, SINGLE_ARG(STMTS)) \ - CASE(uint16, SINGLE_ARG(STMTS)) \ - CASE(int16, SINGLE_ARG(STMTS)) \ - CASE(int8, SINGLE_ARG(STMTS)) \ + CASE(int32_t, SINGLE_ARG(STMTS)) \ + CASE(uint8_t, SINGLE_ARG(STMTS)) \ + CASE(uint16_t, SINGLE_ARG(STMTS)) \ + CASE(int16_t, SINGLE_ARG(STMTS)) \ + CASE(int8_t, SINGLE_ARG(STMTS)) \ CASE(string, SINGLE_ARG(STMTS)) \ - CASE(int64, SINGLE_ARG(STMTS)) \ + CASE(int64_t, SINGLE_ARG(STMTS)) \ CASE(bool, SINGLE_ARG(STMTS)) \ case DT_INVALID: \ INVALID; \ @@ -64,17 +64,17 @@ class Tensor { inline DataType dtype() const { return dtype_; } - inline const vector& shape() const { return shape_; } + inline const vector& shape() const { return shape_; } - inline TIndex dim_size() const { return shape_.size(); } + inline index_t dim_size() const { return shape_.size(); } - inline TIndex dim(TIndex index) const { + inline index_t dim(index_t index) const { MACE_CHECK(index < shape_.size(), "Exceeding ndim limit"); MACE_CHECK(index >= 0, "Cannot have negative dimension index"); return shape_[index]; } - inline TIndex size() const { return size_; } + inline index_t size() const { return size_; } inline const void* raw_data() const { MACE_CHECK(data_.get() || size_ == 0); @@ -108,9 +108,9 @@ class Tensor { return static_cast(raw_mutable_data()); } - inline void Resize(const vector& shape) { + inline void Resize(const vector& shape) { shape_ = shape; - TIndex size = NumElements(); + index_t size = NumElements(); if (size_ != size) { size_ = size; data_.reset(); @@ -126,14 +126,14 @@ class Tensor { } template - inline void Copy(const T* src, TIndex size) { + inline void Copy(const T* src, index_t size) { MACE_CHECK(size == size_, "copy src and dst with different size."); CopyBytes(static_cast(src), sizeof(T) * size); } template inline void CopyWithCast(const SrcType* src, size_t size) { - MACE_CHECK(static_cast(size) == size_, "copy src and dst with different size."); + MACE_CHECK(static_cast(size) == size_, "copy src and dst with different size."); unique_ptr buffer(new DstType[size]); for (size_t i = 0; i < size; ++i) { buffer[i] = static_cast(src[i]); @@ -161,15 +161,15 @@ class Tensor { } private: - inline int64 NumElements() const { - return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies()); + inline int64_t NumElements() const { + return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies()); } Allocator* alloc_; - TIndex size_; + index_t size_; DataType dtype_; std::shared_ptr data_; - vector shape_; + vector shape_; }; } // namespace tensor diff --git a/mace/core/testing/env_time.h b/mace/core/testing/env_time.h index 6be189a658ab489fdf59fcc4f666c71574ad468b..f07783c1f66e4551886276e30796001ae1fc1a52 100644 --- a/mace/core/testing/env_time.h +++ b/mace/core/testing/env_time.h @@ -16,10 +16,10 @@ namespace mace { namespace testing { -inline int64 NowMicros() { +inline int64_t NowMicros() { struct timeval tv; gettimeofday(&tv, nullptr); - return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; + return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; } } // namespace testing diff --git a/mace/core/testing/test_benchmark.cc b/mace/core/testing/test_benchmark.cc index 1eb976ec998833ffcf436dcf4744566d460f168f..885a9a63f70956428008291f29dc293245c7d37a 100644 --- a/mace/core/testing/test_benchmark.cc +++ b/mace/core/testing/test_benchmark.cc @@ -16,10 +16,10 @@ namespace testing { static std::vector* all_benchmarks = nullptr; static std::string label; -static int64 bytes_processed; -static int64 items_processed; -static int64 accum_time = 0; -static int64 start_time = 0; +static int64_t bytes_processed; +static int64_t items_processed; +static int64_t accum_time = 0; +static int64_t start_time = 0; Benchmark::Benchmark(const char* name, void (*fn)(int)) : name_(name), num_args_(0), fn0_(fn) { @@ -112,10 +112,10 @@ void Benchmark::Register() { } void Benchmark::Run(int arg1, int arg2, int* run_count, double* run_seconds) { - static const int64 kMinIters = 100; - static const int64 kMaxIters = 1000000000; + static const int64_t kMinIters = 100; + static const int64_t kMaxIters = 1000000000; static const double kMinTime = 0.5; - int64 iters = kMinIters; + int64_t iters = kMinIters; while (true) { accum_time = 0; start_time = NowMicros(); @@ -142,13 +142,13 @@ void Benchmark::Run(int arg1, int arg2, int* run_count, double* run_seconds) { double multiplier = 1.4 * kMinTime / std::max(seconds, 1e-9); multiplier = std::min(10.0, multiplier); if (multiplier <= 1.0) multiplier *= 2.0; - iters = std::max(multiplier * iters, iters + 1); + iters = std::max(multiplier * iters, iters + 1); iters = std::min(iters, kMaxIters); } } -void BytesProcessed(int64 n) { bytes_processed = n; } -void ItemsProcessed(int64 n) { items_processed = n; } +void BytesProcessed(int64_t n) { bytes_processed = n; } +void ItemsProcessed(int64_t n) { items_processed = n; } void StartTiming() { if (start_time == 0) start_time = NowMicros(); } diff --git a/mace/core/testing/test_benchmark.h b/mace/core/testing/test_benchmark.h index 44a352f54df4cae609b3955eb5343c9b78d34126..5800f5edb0912899b09fc95ebebb8a741e2a48e1 100644 --- a/mace/core/testing/test_benchmark.h +++ b/mace/core/testing/test_benchmark.h @@ -42,8 +42,8 @@ class Benchmark { }; void RunBenchmarks(); -void BytesProcessed(int64); -void ItemsProcessed(int64); +void BytesProcessed(int64_t); +void ItemsProcessed(int64_t); void StartTiming(); void StopTiming(); diff --git a/mace/core/types.h b/mace/core/types.h index 161be5a7103a9c8c69be3932b9997e7dbee51124..b174993d024587875d6b597cdcb7a19f9d79d154 100644 --- a/mace/core/types.h +++ b/mace/core/types.h @@ -42,16 +42,16 @@ struct EnumToDataType {}; // Specializations below MATCH_TYPE_AND_ENUM(float, DT_FLOAT); MATCH_TYPE_AND_ENUM(double, DT_DOUBLE); -MATCH_TYPE_AND_ENUM(int32, DT_INT32); -MATCH_TYPE_AND_ENUM(uint16, DT_UINT16); -MATCH_TYPE_AND_ENUM(uint8, DT_UINT8); -MATCH_TYPE_AND_ENUM(int16, DT_INT16); -MATCH_TYPE_AND_ENUM(int8, DT_INT8); +MATCH_TYPE_AND_ENUM(int32_t, DT_INT32); +MATCH_TYPE_AND_ENUM(uint16_t, DT_UINT16); +MATCH_TYPE_AND_ENUM(uint8_t, DT_UINT8); +MATCH_TYPE_AND_ENUM(int16_t, DT_INT16); +MATCH_TYPE_AND_ENUM(int8_t, DT_INT8); MATCH_TYPE_AND_ENUM(string, DT_STRING); -MATCH_TYPE_AND_ENUM(int64, DT_INT64); +MATCH_TYPE_AND_ENUM(int64_t, DT_INT64); MATCH_TYPE_AND_ENUM(bool, DT_BOOL); -static const int32 kint32max = ((int32)0x7FFFFFFF); +static const int32_t kint32_tmax = ((int32_t)0x7FFFFFFF); } // namespace mace diff --git a/mace/examples/BUILD b/mace/examples/BUILD index 4f4a7794e0cb00f7d8312299dd7572afd74e68d6..82915d74e05e02c0bbddc04163d7c4e53f12f22b 100644 --- a/mace/examples/BUILD +++ b/mace/examples/BUILD @@ -7,10 +7,6 @@ cc_binary( "helloworld.cc", ], copts = ["-std=c++11"], - linkopts = if_android([ - "-pie", - "-llog", - ]), deps = [ "//mace/core", "//mace/ops", @@ -21,10 +17,6 @@ cc_test( name = "benchmark_example", srcs = ["benchmark_example.cc"], copts = ["-std=c++11"], - linkopts = if_android([ - "-pie", - "-llog", - ]), linkstatic = 1, deps = [ "//mace/core", diff --git a/mace/examples/benchmark_example.cc b/mace/examples/benchmark_example.cc index 106c6c3c4ccdf7dcc091a6c9f9bbc8c0c15d2611..50e5184b0bf384d81932466584cdbc688db40a21 100644 --- a/mace/examples/benchmark_example.cc +++ b/mace/examples/benchmark_example.cc @@ -6,7 +6,7 @@ static void foo(int iters) { static const int N = 32; - const int64 tot = static_cast(iters) * N; + const int64_t tot = static_cast(iters) * N; mace::testing::ItemsProcessed(tot); mace::testing::BytesProcessed(tot * (sizeof(float))); @@ -26,7 +26,7 @@ BENCHMARK(foo); static void bar(int iters, int n) { - const int64 tot = static_cast(iters) * n; + const int64_t tot = static_cast(iters) * n; mace::testing::ItemsProcessed(tot); mace::testing::BytesProcessed(tot * (sizeof(float))); diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index de8293e35421ac29031db5281d162de3999efb78..098e80a949d457406730e0f0146b45c03b75faee 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -18,6 +18,9 @@ cc_library( "//mace/core:core", ], copts = ['-std=c++11'], + linkopts = ["-fopenmp"] + if_android([ + "-lm", + ]), ) cc_test( @@ -29,11 +32,9 @@ cc_test( "//mace/core:core", ], copts = ['-std=c++11'], - linkopts = ["-fopenmp"] + if_android([ + linkopts = if_android([ "-pie", - "-llog", - "-lm", - ]), + ]), linkstatic = 1, testonly = 1, ) @@ -47,11 +48,6 @@ cc_test( "//mace/core:test_benchmark_main", ], copts = ['-std=c++11'], - linkopts = ["-fopenmp"] + if_android([ - "-pie", - "-llog", - "-lm", - ]), linkstatic = 1, testonly = 1, ) diff --git a/mace/kernels/addn.h b/mace/kernels/addn.h index 3f79ac69b468c623acbe2cdb6d9179bbe3906bda..30648eb8a15186198ec8b2c9fb98c04695bf4366 100644 --- a/mace/kernels/addn.h +++ b/mace/kernels/addn.h @@ -15,7 +15,7 @@ void AddNFuntion(const vector& input_tensor, Tensor *output_tenso int n = input_tensor.size(); MACE_CHECK(n > 1); MACE_CHECK_NOTNULL(input_tensor[0]); - int64 size = input_tensor[0]->size(); + int64_t size = input_tensor[0]->size(); vector inputs(n); for (int i = 0; i < n; ++i) { inputs[i] = input_tensor[i]->data(); @@ -24,7 +24,7 @@ void AddNFuntion(const vector& input_tensor, Tensor *output_tenso T* output = output_tensor->mutable_data(); for (int i = 0; i < n; ++i) { - for (int64 j = 0; j < size; ++j) { + for (int64_t j = 0; j < size; ++j) { output[j] += inputs[i][j]; } } diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index d2899d762c6274d0de3cf821bd66b5a983c60e8d..84ca48d4a76bc477258ce0d9ec152d5f313709a9 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -30,9 +30,9 @@ struct BatchNormFunctor : public BatchNormFunctorBase { const T* offset, const T* mean, const T* var, - const TIndex n, - const TIndex channel, - const TIndex sample_size, + const index_t n, + const index_t channel, + const index_t sample_size, T* output) { // Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // The calculation formula for inference is @@ -42,15 +42,15 @@ struct BatchNormFunctor : public BatchNormFunctorBase { // new_offset = \offset - mean * common_val; // Y = new_scale * X + new_offset; T new_scale, new_offset; - for (TIndex c = 0; c < channel; ++c) { + for (index_t c = 0; c < channel; ++c) { new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_); new_offset = offset[c] - mean[c] * new_scale; - TIndex pos = c * sample_size; + index_t pos = c * sample_size; - for (TIndex i = 0; i < n; ++i) { + for (index_t i = 0; i < n; ++i) { const T* input_sample_ptr = input + pos; T* output_sample_ptr = output + pos; - for (TIndex j = 0; j < sample_size; ++j) { + for (index_t j = 0; j < sample_size; ++j) { output_sample_ptr[j] = new_scale * input_sample_ptr[j] + new_offset; } pos += channel * sample_size; diff --git a/mace/kernels/benchmark/addn_benchmark.cc b/mace/kernels/benchmark/addn_benchmark.cc index f63fed77b11847f3aacca8291c333699b0bd840a..4cec0270dbc6e1d9f55eb0db404965d9d1f1088e 100644 --- a/mace/kernels/benchmark/addn_benchmark.cc +++ b/mace/kernels/benchmark/addn_benchmark.cc @@ -11,7 +11,7 @@ using namespace mace; using namespace mace::kernels; static void AddNBenchmark(int iters, int n, int type) { - const int64 tot = static_cast(iters) * n * 3; + const int64_t tot = static_cast(iters) * n * 3; mace::testing::ItemsProcessed(tot); mace::testing::BytesProcessed(tot * (sizeof(float))); @@ -35,7 +35,7 @@ static void AddNBenchmark(int iters, int n, int type) { float *input3 = input_tensor3.mutable_data(); float *output = output_tensor.mutable_data(); - for (int64 i = 0; i < n; ++i) { + for (int64_t i = 0; i < n; ++i) { input1[i] = nd(gen); input2[i] = nd(gen); input3[i] = nd(gen); diff --git a/mace/kernels/benchmark/relu_benchmark.cc b/mace/kernels/benchmark/relu_benchmark.cc index 9276cadc737bba60a0fac81893dd5aa797d3f6a9..86858681ca29518f6ed98e46f58794d82c984057 100644 --- a/mace/kernels/benchmark/relu_benchmark.cc +++ b/mace/kernels/benchmark/relu_benchmark.cc @@ -11,7 +11,7 @@ using namespace mace; using namespace mace::kernels; static void ReluBenchmark(int iters, int n, int type) { - const int64 tot = static_cast(iters) * n; + const int64_t tot = static_cast(iters) * n; mace::testing::ItemsProcessed(tot); mace::testing::BytesProcessed(tot * (sizeof(float))); @@ -25,7 +25,7 @@ static void ReluBenchmark(int iters, int n, int type) { output_tensor.ResizeLike(input_tensor); float *input = input_tensor.mutable_data(); float *output = output_tensor.mutable_data(); - for (int64 i = 0; i < n; ++i) { + for (int64_t i = 0; i < n; ++i) { input[i] = nd(gen); } diff --git a/mace/kernels/neon/addn_neon.cc b/mace/kernels/neon/addn_neon.cc index 3baab3c3b3dadb8570e0f7b4830fd9c14c1799fa..ad6f06e8df7c17dc189316a20be3be5586a212e6 100644 --- a/mace/kernels/neon/addn_neon.cc +++ b/mace/kernels/neon/addn_neon.cc @@ -14,7 +14,7 @@ void NeonAddNFuntion_float(const vector &input_tensor, int n = input_tensor.size(); MACE_CHECK(n > 1); MACE_CHECK_NOTNULL(input_tensor[0]); - int64 size = input_tensor[0]->size(); + int64_t size = input_tensor[0]->size(); output_tensor->ResizeLike(input_tensor[0]); float *output = output_tensor->mutable_data(); vector inputs(n); @@ -22,19 +22,19 @@ void NeonAddNFuntion_float(const vector &input_tensor, inputs[i] = input_tensor[i]->data(); } - int64 cost = size * n; - int64 groups = 1; + int64_t cost = size * n; + int64_t groups = 1; if (cost > kCostPerGroup) { groups = cost / kCostPerGroup; } - int64 element_per_group = size / groups; + int64_t element_per_group = size / groups; #pragma omp parallel for num_threads(1) // no significant performance improve - for (int64 i = 0; i < size; i += element_per_group) { - int64 count = std::min(element_per_group, size - i); + for (int64_t i = 0; i < size; i += element_per_group) { + int64_t count = std::min(element_per_group, size - i); int nn = count >> 2; int remain = count - (nn << 2); - for (int64 j = 0; j < n; ++j) { + for (int64_t j = 0; j < n; ++j) { const float *inptr = inputs[j] + i; float *outptr = output + i; for (int k = 0; k < nn; ++k) { diff --git a/mace/kernels/neon/batch_norm_neon.cc b/mace/kernels/neon/batch_norm_neon.cc index 9db63f68bd178330f5cfd94dc46b991ffea46a61..a306fdbc804e0c5995846fa89dd5bb681d31e1ed 100644 --- a/mace/kernels/neon/batch_norm_neon.cc +++ b/mace/kernels/neon/batch_norm_neon.cc @@ -34,18 +34,18 @@ struct BatchNormFunctor : public BatchNormFunctorBase> 2; int remain_count = sample_size - count; - for (TIndex c = 0; c < channel; ++c) { + for (index_t c = 0; c < channel; ++c) { new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_); new_offset = offset[c] - mean[c] * new_scale; - TIndex pos = c * sample_size; + index_t pos = c * sample_size; float32x4_t new_scale_f = vdupq_n_f32(new_scale); float32x4_t new_offset_f = vdupq_n_f32(new_offset); - for (TIndex i = 0; i < n; ++i) { + for (index_t i = 0; i < n; ++i) { const float* input_sample_ptr = input + pos; float* output_sample_ptr = output + pos; - for(TIndex j = 0; j < count; ++j) { + for(index_t j = 0; j < count; ++j) { float32x4_t input_f = vld1q_f32(input_sample_ptr); float32x4_t output_f = new_offset_f; output_f = vfmaq_f32(output_f, input_f, new_scale_f); @@ -53,7 +53,7 @@ struct BatchNormFunctor : public BatchNormFunctorBasesize(); + int64_t size = input_tensor->size(); output_tensor->ResizeLike(input_tensor); const float *input = input_tensor->data(); float *output = output_tensor->mutable_data(); #pragma omp parallel for num_threads(1) // no significant performance improve - for (int64 i = 0; i < size; i += kCostPerGroup) { - int64 count = std::min(static_cast(kCostPerGroup), size - i); + for (int64_t i = 0; i < size; i += kCostPerGroup) { + int64_t count = std::min(static_cast(kCostPerGroup), size - i); int nn = count >> 2; int remain = count - (nn << 2); const float *inptr = input + i; diff --git a/mace/kernels/relu.h b/mace/kernels/relu.h index 086f762b41e85c3ff7042086ba1b56d3607d30c2..d0de2f0b061524537479c9082ca250fba47e6c29 100644 --- a/mace/kernels/relu.h +++ b/mace/kernels/relu.h @@ -12,12 +12,12 @@ namespace kernels { template void ReluFuntion(const Tensor *input_tensor, Tensor *output_tensor) { - int64 size = input_tensor->size(); + int64_t size = input_tensor->size(); output_tensor->ResizeLike(input_tensor); const T *input = input_tensor->data(); T *output = output_tensor->mutable_data(); - for (int64 i = 0; i < size; ++i) { + for (int64_t i = 0; i < size; ++i) { output[i] = std::max(input[i], static_cast(0)); } } diff --git a/mace/kernels/test/addn_neon_test.cc b/mace/kernels/test/addn_neon_test.cc index 8d1ca924b9b3ef8fecf96301007afa593cd54600..521fe9129b64a1e8f646c2124b3f56de32af677a 100644 --- a/mace/kernels/test/addn_neon_test.cc +++ b/mace/kernels/test/addn_neon_test.cc @@ -15,7 +15,7 @@ TEST(NeonTest, AddN) { std::mt19937 gen(rd()); std::normal_distribution nd(0, 1); - int64 count = 100000; + int64_t count = 100000; Tensor input_tensor1(cpu_allocator(), DataType::DT_FLOAT); input_tensor1.Resize({100, 1000}); Tensor input_tensor2(cpu_allocator(), DataType::DT_FLOAT); @@ -37,7 +37,7 @@ TEST(NeonTest, AddN) { float *output = output_tensor.mutable_data(); float *output_neon = output_tensor_neon.mutable_data(); - for (int64 i = 0; i < count; ++i) { + for (int64_t i = 0; i < count; ++i) { input1[i] = nd(gen); input2[i] = nd(gen); input3[i] = nd(gen); @@ -48,7 +48,7 @@ TEST(NeonTest, AddN) { ASSERT_EQ(count, output_tensor.size()); ASSERT_EQ(count, output_tensor_neon.size()); - for (int64 i = 0; i < count; ++i) { + for (int64_t i = 0; i < count; ++i) { ASSERT_FLOAT_EQ(output[i], output_neon[i]); } } diff --git a/mace/kernels/test/relu_neon_test.cc b/mace/kernels/test/relu_neon_test.cc index 40c1bc62d68a94820ac99d1140203c24dd412235..a16dc2692501017a494d25d5af9dab73be8c44db 100644 --- a/mace/kernels/test/relu_neon_test.cc +++ b/mace/kernels/test/relu_neon_test.cc @@ -15,7 +15,7 @@ TEST(NeonTest, Relu) { std::mt19937 gen(rd()); std::normal_distribution nd(0, 1); - int64 count = 100000; + int64_t count = 100000; Tensor input_tensor(cpu_allocator(), DataType::DT_FLOAT); input_tensor.Resize({100, 1000}); Tensor output_tensor(cpu_allocator(), DataType::DT_FLOAT); @@ -27,7 +27,7 @@ TEST(NeonTest, Relu) { float *output = output_tensor.mutable_data(); float *output_neon = output_tensor_neon.mutable_data(); - for (int64 i = 0; i < count; ++i) { + for (int64_t i = 0; i < count; ++i) { input[i] = nd(gen); } @@ -36,7 +36,7 @@ TEST(NeonTest, Relu) { ASSERT_EQ(count, output_tensor.size()); ASSERT_EQ(count, output_tensor_neon.size()); - for (int64 i = 0; i < count; ++i) { + for (int64_t i = 0; i < count; ++i) { ASSERT_FLOAT_EQ(output[i], output_neon[i]); } } diff --git a/mace/ops/batch_norm.h b/mace/ops/batch_norm.h index 8a3c01b44354cbe991b41fe08d98b2302b1f4099..59c227c865b519b81c7e6d818a052336acd2e570 100644 --- a/mace/ops/batch_norm.h +++ b/mace/ops/batch_norm.h @@ -33,9 +33,9 @@ class BatchNormOp : public Operator { Tensor* output = this->Output(0); output->ResizeLike(input); - const TIndex n = input->dim(0); - const TIndex channel = input->dim(1); - const TIndex sample_size = input->dim(2) * input->dim(3); + const index_t n = input->dim(0); + const index_t channel = input->dim(1); + const index_t sample_size = input->dim(2) * input->dim(3); const float* input_ptr = input->data(); const float* scale_ptr = scale->data(); diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index 61085f7dd0ed090fb248db3f76037199d7538c78..0e96943c60085014bc01c65323882ebc0480249e 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -43,7 +43,7 @@ class OpsTestBase : public ::testing::Test { } public: template - void AddInputFromArray(const char* name, const std::vector& shape, const std::vector& data) { + void AddInputFromArray(const char* name, const std::vector& shape, const std::vector& data) { Tensor* input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum::v()); input->Resize(shape); float* input_data = input->mutable_data(); @@ -70,7 +70,7 @@ class OpsTestBase : public ::testing::Test { }; template -Tensor CreateTensor(const std::vector& shape, const std::vector& data) { +Tensor CreateTensor(const std::vector& shape, const std::vector& data) { Tensor res(cpu_allocator(), DataTypeToEnum::v()); res.Resize(shape); float* input_data = res.mutable_data(); @@ -90,7 +90,7 @@ inline std::string ShapeToString(const Tensor& x) { std::stringstream stream; for (int i = 0; i < x.dim_size(); i++) { if (i > 0) stream<<","; - int64 dim = x.dim(i); + int64_t dim = x.dim(i); if (dim < 0) { stream<<"?"; } else {