提交 a3850281 编写于 作者: L Liangliang He

Update data types and linking opts

上级 68a335f1
......@@ -7,6 +7,8 @@ package(
licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android")
cc_library(
name = "core",
srcs = glob([
......@@ -19,6 +21,10 @@ cc_library(
deps = [
"//mace/proto:cc_proto",
],
linkopts = if_android([
"-llog",
"-pie",
]),
)
# Main program for tests
......
......@@ -12,7 +12,6 @@
#include <vector>
#include <algorithm>
#include "mace/core/integral_types.h"
#include "mace/core/logging.h"
using std::set;
......@@ -21,7 +20,7 @@ using std::string;
using std::unique_ptr;
using std::vector;
typedef int64 TIndex;
typedef int64_t index_t;
// Disable the copy and assignment operator for a class.
#ifndef DISABLE_COPY_AND_ASSIGN
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_INTEGRAL_TYPES_H_
#define MACE_CORE_INTEGRAL_TYPES_H_
typedef int8_t int8;
typedef int16_t int16;
typedef int32_t int32;
typedef int64_t int64;
typedef uint8_t uint8;
typedef uint16_t uint16;
typedef uint32_t uint32;
typedef uint64_t uint64;
#endif // MACE_CORE_INTEGRAL_TYPES_H_
......@@ -69,18 +69,18 @@ void LogMessage::GenerateLogMessage() {
namespace {
// Parse log level (int64) from environment variable (char*)
int64 LogLevelStrToInt(const char* tf_env_var_val) {
if (tf_env_var_val == nullptr) {
// Parse log level (int64_t) from environment variable (char*)
int64_t LogLevelStrToInt(const char* mace_env_var_val) {
if (mace_env_var_val == nullptr) {
return 0;
}
// Ideally we would use env_var / safe_strto64, but it is
// hard to use here without pulling in a lot of dependencies,
// so we use std:istringstream instead
string min_log_level(tf_env_var_val);
string min_log_level(mace_env_var_val);
std::istringstream ss(min_log_level);
int64 level;
int64_t level;
if (!(ss >> level)) {
// Invalid vlog level setting, set level to default (0)
level = 0;
......@@ -89,26 +89,26 @@ int64 LogLevelStrToInt(const char* tf_env_var_val) {
return level;
}
int64 MinLogLevelFromEnv() {
const char* tf_env_var_val = getenv("MACE_CPP_MIN_LOG_LEVEL");
return LogLevelStrToInt(tf_env_var_val);
int64_t MinLogLevelFromEnv() {
const char* mace_env_var_val = getenv("MACE_CPP_MIN_LOG_LEVEL");
return LogLevelStrToInt(mace_env_var_val);
}
int64 MinVLogLevelFromEnv() {
const char* tf_env_var_val = getenv("MACE_CPP_MIN_VLOG_LEVEL");
return LogLevelStrToInt(tf_env_var_val);
int64_t MinVLogLevelFromEnv() {
const char* mace_env_var_val = getenv("MACE_CPP_MIN_VLOG_LEVEL");
return LogLevelStrToInt(mace_env_var_val);
}
} // namespace
LogMessage::~LogMessage() {
// Read the min log level once during the first call to logging.
static int64 min_log_level = MinLogLevelFromEnv();
static int64_t min_log_level = MinLogLevelFromEnv();
if (severity_ >= min_log_level) GenerateLogMessage();
}
int64 LogMessage::MinVLogLevel() {
static int64 min_vlog_level = MinVLogLevelFromEnv();
int64_t LogMessage::MinVLogLevel() {
static int64_t min_vlog_level = MinVLogLevelFromEnv();
return min_vlog_level;
}
......
......@@ -9,8 +9,6 @@
#include <limits>
#include <string>
#include "mace/core/integral_types.h"
#undef ERROR
namespace mace {
......@@ -62,7 +60,7 @@ class LogMessage : public std::basic_ostringstream<char> {
// Returns the minimum log level for VLOG statements.
// E.g., if MinVLogLevel() is 2, then VLOG(2) statements will produce output,
// but VLOG(3) will not. Defaults to 0.
static int64 MinVLogLevel();
static int64_t MinVLogLevel();
protected:
void GenerateLogMessage();
......
......@@ -6,8 +6,8 @@
namespace mace {
std::map<int32, OperatorRegistry*>* gDeviceTypeRegistry() {
static std::map<int32, OperatorRegistry*> g_device_type_registry;
std::map<int32_t, OperatorRegistry*>* gDeviceTypeRegistry() {
static std::map<int32_t, OperatorRegistry*> g_device_type_registry;
return &g_device_type_registry;
}
......
......@@ -44,7 +44,7 @@ class OperatorBase {
*operator_def_, name, default_value);
}
inline const Tensor *Input(TIndex idx) {
inline const Tensor *Input(index_t idx) {
MACE_CHECK(idx < inputs_.size());
return inputs_[idx];
}
......
......@@ -17,8 +17,8 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
DeviceType type) {
unique_ptr<Tensor> tensor(new Tensor(GetDeviceAllocator(type),
proto.data_type()));
vector<TIndex> dims;
for (const TIndex d : proto.dims()) {
vector<index_t> dims;
for (const index_t d : proto.dims()) {
dims.push_back(d);
}
tensor->Resize(dims);
......@@ -33,31 +33,31 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
proto.double_data().size());
break;
case DT_INT32:
tensor->template Copy<int32>(proto.int32_data().data(),
tensor->template Copy<int32_t>(proto.int32_data().data(),
proto.int32_data().size());
break;
case DT_UINT8:
tensor->CopyWithCast<int32, uint8>(proto.int32_data().data(),
tensor->CopyWithCast<int32_t, uint8_t>(proto.int32_data().data(),
proto.int32_data().size());
break;
case DT_INT16:
tensor->CopyWithCast<int32, int16>(proto.int32_data().data(),
tensor->CopyWithCast<int32_t, int16_t>(proto.int32_data().data(),
proto.int32_data().size());
break;
case DT_INT8:
tensor->CopyWithCast<int32, int8>(proto.int32_data().data(),
tensor->CopyWithCast<int32_t, int8_t>(proto.int32_data().data(),
proto.int32_data().size());
break;
case DT_INT64:
tensor->Copy<int64>(proto.int64_data().data(),
tensor->Copy<int64_t>(proto.int64_data().data(),
proto.int64_data().size());
break;
case DT_UINT16:
tensor->CopyWithCast<int32, uint16>(proto.int32_data().data(),
tensor->CopyWithCast<int32_t, uint16_t>(proto.int32_data().data(),
proto.int32_data().size());
break;
case DT_BOOL:
tensor->CopyWithCast<int32, bool>(proto.int32_data().data(),
tensor->CopyWithCast<int32_t, bool>(proto.int32_data().data(),
proto.int32_data().size());
break;
case DT_STRING: {
......
......@@ -25,13 +25,13 @@ namespace mace {
switch (TYPE_ENUM) { \
CASE(float, SINGLE_ARG(STMTS)) \
CASE(double, SINGLE_ARG(STMTS)) \
CASE(int32, SINGLE_ARG(STMTS)) \
CASE(uint8, SINGLE_ARG(STMTS)) \
CASE(uint16, SINGLE_ARG(STMTS)) \
CASE(int16, SINGLE_ARG(STMTS)) \
CASE(int8, SINGLE_ARG(STMTS)) \
CASE(int32_t, SINGLE_ARG(STMTS)) \
CASE(uint8_t, SINGLE_ARG(STMTS)) \
CASE(uint16_t, SINGLE_ARG(STMTS)) \
CASE(int16_t, SINGLE_ARG(STMTS)) \
CASE(int8_t, SINGLE_ARG(STMTS)) \
CASE(string, SINGLE_ARG(STMTS)) \
CASE(int64, SINGLE_ARG(STMTS)) \
CASE(int64_t, SINGLE_ARG(STMTS)) \
CASE(bool, SINGLE_ARG(STMTS)) \
case DT_INVALID: \
INVALID; \
......@@ -64,17 +64,17 @@ class Tensor {
inline DataType dtype() const { return dtype_; }
inline const vector<TIndex>& shape() const { return shape_; }
inline const vector<index_t>& shape() const { return shape_; }
inline TIndex dim_size() const { return shape_.size(); }
inline index_t dim_size() const { return shape_.size(); }
inline TIndex dim(TIndex index) const {
inline index_t dim(index_t index) const {
MACE_CHECK(index < shape_.size(), "Exceeding ndim limit");
MACE_CHECK(index >= 0, "Cannot have negative dimension index");
return shape_[index];
}
inline TIndex size() const { return size_; }
inline index_t size() const { return size_; }
inline const void* raw_data() const {
MACE_CHECK(data_.get() || size_ == 0);
......@@ -108,9 +108,9 @@ class Tensor {
return static_cast<T*>(raw_mutable_data());
}
inline void Resize(const vector<TIndex>& shape) {
inline void Resize(const vector<index_t>& shape) {
shape_ = shape;
TIndex size = NumElements();
index_t size = NumElements();
if (size_ != size) {
size_ = size;
data_.reset();
......@@ -126,14 +126,14 @@ class Tensor {
}
template <typename T>
inline void Copy(const T* src, TIndex size) {
inline void Copy(const T* src, index_t size) {
MACE_CHECK(size == size_, "copy src and dst with different size.");
CopyBytes(static_cast<const void*>(src), sizeof(T) * size);
}
template <typename SrcType, typename DstType>
inline void CopyWithCast(const SrcType* src, size_t size) {
MACE_CHECK(static_cast<TIndex>(size) == size_, "copy src and dst with different size.");
MACE_CHECK(static_cast<index_t>(size) == size_, "copy src and dst with different size.");
unique_ptr<DstType[]> buffer(new DstType[size]);
for (size_t i = 0; i < size; ++i) {
buffer[i] = static_cast<DstType>(src[i]);
......@@ -161,15 +161,15 @@ class Tensor {
}
private:
inline int64 NumElements() const {
return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64>());
inline int64_t NumElements() const {
return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64_t>());
}
Allocator* alloc_;
TIndex size_;
index_t size_;
DataType dtype_;
std::shared_ptr<void> data_;
vector<TIndex> shape_;
vector<index_t> shape_;
};
} // namespace tensor
......
......@@ -16,10 +16,10 @@ namespace mace {
namespace testing {
inline int64 NowMicros() {
inline int64_t NowMicros() {
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64>(tv.tv_sec) * 1000000 + tv.tv_usec;
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
}
} // namespace testing
......
......@@ -16,10 +16,10 @@ namespace testing {
static std::vector<Benchmark*>* all_benchmarks = nullptr;
static std::string label;
static int64 bytes_processed;
static int64 items_processed;
static int64 accum_time = 0;
static int64 start_time = 0;
static int64_t bytes_processed;
static int64_t items_processed;
static int64_t accum_time = 0;
static int64_t start_time = 0;
Benchmark::Benchmark(const char* name, void (*fn)(int))
: name_(name), num_args_(0), fn0_(fn) {
......@@ -112,10 +112,10 @@ void Benchmark::Register() {
}
void Benchmark::Run(int arg1, int arg2, int* run_count, double* run_seconds) {
static const int64 kMinIters = 100;
static const int64 kMaxIters = 1000000000;
static const int64_t kMinIters = 100;
static const int64_t kMaxIters = 1000000000;
static const double kMinTime = 0.5;
int64 iters = kMinIters;
int64_t iters = kMinIters;
while (true) {
accum_time = 0;
start_time = NowMicros();
......@@ -142,13 +142,13 @@ void Benchmark::Run(int arg1, int arg2, int* run_count, double* run_seconds) {
double multiplier = 1.4 * kMinTime / std::max(seconds, 1e-9);
multiplier = std::min(10.0, multiplier);
if (multiplier <= 1.0) multiplier *= 2.0;
iters = std::max<int64>(multiplier * iters, iters + 1);
iters = std::max<int64_t>(multiplier * iters, iters + 1);
iters = std::min(iters, kMaxIters);
}
}
void BytesProcessed(int64 n) { bytes_processed = n; }
void ItemsProcessed(int64 n) { items_processed = n; }
void BytesProcessed(int64_t n) { bytes_processed = n; }
void ItemsProcessed(int64_t n) { items_processed = n; }
void StartTiming() {
if (start_time == 0) start_time = NowMicros();
}
......
......@@ -42,8 +42,8 @@ class Benchmark {
};
void RunBenchmarks();
void BytesProcessed(int64);
void ItemsProcessed(int64);
void BytesProcessed(int64_t);
void ItemsProcessed(int64_t);
void StartTiming();
void StopTiming();
......
......@@ -42,16 +42,16 @@ struct EnumToDataType {}; // Specializations below
MATCH_TYPE_AND_ENUM(float, DT_FLOAT);
MATCH_TYPE_AND_ENUM(double, DT_DOUBLE);
MATCH_TYPE_AND_ENUM(int32, DT_INT32);
MATCH_TYPE_AND_ENUM(uint16, DT_UINT16);
MATCH_TYPE_AND_ENUM(uint8, DT_UINT8);
MATCH_TYPE_AND_ENUM(int16, DT_INT16);
MATCH_TYPE_AND_ENUM(int8, DT_INT8);
MATCH_TYPE_AND_ENUM(int32_t, DT_INT32);
MATCH_TYPE_AND_ENUM(uint16_t, DT_UINT16);
MATCH_TYPE_AND_ENUM(uint8_t, DT_UINT8);
MATCH_TYPE_AND_ENUM(int16_t, DT_INT16);
MATCH_TYPE_AND_ENUM(int8_t, DT_INT8);
MATCH_TYPE_AND_ENUM(string, DT_STRING);
MATCH_TYPE_AND_ENUM(int64, DT_INT64);
MATCH_TYPE_AND_ENUM(int64_t, DT_INT64);
MATCH_TYPE_AND_ENUM(bool, DT_BOOL);
static const int32 kint32max = ((int32)0x7FFFFFFF);
static const int32_t kint32_tmax = ((int32_t)0x7FFFFFFF);
} // namespace mace
......
......@@ -7,10 +7,6 @@ cc_binary(
"helloworld.cc",
],
copts = ["-std=c++11"],
linkopts = if_android([
"-pie",
"-llog",
]),
deps = [
"//mace/core",
"//mace/ops",
......@@ -21,10 +17,6 @@ cc_test(
name = "benchmark_example",
srcs = ["benchmark_example.cc"],
copts = ["-std=c++11"],
linkopts = if_android([
"-pie",
"-llog",
]),
linkstatic = 1,
deps = [
"//mace/core",
......
......@@ -6,7 +6,7 @@
static void foo(int iters) {
static const int N = 32;
const int64 tot = static_cast<int64>(iters) * N;
const int64_t tot = static_cast<int64_t>(iters) * N;
mace::testing::ItemsProcessed(tot);
mace::testing::BytesProcessed(tot * (sizeof(float)));
......@@ -26,7 +26,7 @@ BENCHMARK(foo);
static void bar(int iters, int n) {
const int64 tot = static_cast<int64>(iters) * n;
const int64_t tot = static_cast<int64_t>(iters) * n;
mace::testing::ItemsProcessed(tot);
mace::testing::BytesProcessed(tot * (sizeof(float)));
......
......@@ -18,6 +18,9 @@ cc_library(
"//mace/core:core",
],
copts = ['-std=c++11'],
linkopts = ["-fopenmp"] + if_android([
"-lm",
]),
)
cc_test(
......@@ -29,11 +32,9 @@ cc_test(
"//mace/core:core",
],
copts = ['-std=c++11'],
linkopts = ["-fopenmp"] + if_android([
linkopts = if_android([
"-pie",
"-llog",
"-lm",
]),
]),
linkstatic = 1,
testonly = 1,
)
......@@ -47,11 +48,6 @@ cc_test(
"//mace/core:test_benchmark_main",
],
copts = ['-std=c++11'],
linkopts = ["-fopenmp"] + if_android([
"-pie",
"-llog",
"-lm",
]),
linkstatic = 1,
testonly = 1,
)
......@@ -15,7 +15,7 @@ void AddNFuntion(const vector<const Tensor*>& input_tensor, Tensor *output_tenso
int n = input_tensor.size();
MACE_CHECK(n > 1);
MACE_CHECK_NOTNULL(input_tensor[0]);
int64 size = input_tensor[0]->size();
int64_t size = input_tensor[0]->size();
vector<const T*> inputs(n);
for (int i = 0; i < n; ++i) {
inputs[i] = input_tensor[i]->data<T>();
......@@ -24,7 +24,7 @@ void AddNFuntion(const vector<const Tensor*>& input_tensor, Tensor *output_tenso
T* output = output_tensor->mutable_data<T>();
for (int i = 0; i < n; ++i) {
for (int64 j = 0; j < size; ++j) {
for (int64_t j = 0; j < size; ++j) {
output[j] += inputs[i][j];
}
}
......
......@@ -30,9 +30,9 @@ struct BatchNormFunctor : public BatchNormFunctorBase<D, T> {
const T* offset,
const T* mean,
const T* var,
const TIndex n,
const TIndex channel,
const TIndex sample_size,
const index_t n,
const index_t channel,
const index_t sample_size,
T* output) {
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
......@@ -42,15 +42,15 @@ struct BatchNormFunctor : public BatchNormFunctorBase<D, T> {
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
T new_scale, new_offset;
for (TIndex c = 0; c < channel; ++c) {
for (index_t c = 0; c < channel; ++c) {
new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_);
new_offset = offset[c] - mean[c] * new_scale;
TIndex pos = c * sample_size;
index_t pos = c * sample_size;
for (TIndex i = 0; i < n; ++i) {
for (index_t i = 0; i < n; ++i) {
const T* input_sample_ptr = input + pos;
T* output_sample_ptr = output + pos;
for (TIndex j = 0; j < sample_size; ++j) {
for (index_t j = 0; j < sample_size; ++j) {
output_sample_ptr[j] = new_scale * input_sample_ptr[j] + new_offset;
}
pos += channel * sample_size;
......
......@@ -11,7 +11,7 @@ using namespace mace;
using namespace mace::kernels;
static void AddNBenchmark(int iters, int n, int type) {
const int64 tot = static_cast<int64>(iters) * n * 3;
const int64_t tot = static_cast<int64_t>(iters) * n * 3;
mace::testing::ItemsProcessed(tot);
mace::testing::BytesProcessed(tot * (sizeof(float)));
......@@ -35,7 +35,7 @@ static void AddNBenchmark(int iters, int n, int type) {
float *input3 = input_tensor3.mutable_data<float>();
float *output = output_tensor.mutable_data<float>();
for (int64 i = 0; i < n; ++i) {
for (int64_t i = 0; i < n; ++i) {
input1[i] = nd(gen);
input2[i] = nd(gen);
input3[i] = nd(gen);
......
......@@ -11,7 +11,7 @@ using namespace mace;
using namespace mace::kernels;
static void ReluBenchmark(int iters, int n, int type) {
const int64 tot = static_cast<int64>(iters) * n;
const int64_t tot = static_cast<int64_t>(iters) * n;
mace::testing::ItemsProcessed(tot);
mace::testing::BytesProcessed(tot * (sizeof(float)));
......@@ -25,7 +25,7 @@ static void ReluBenchmark(int iters, int n, int type) {
output_tensor.ResizeLike(input_tensor);
float *input = input_tensor.mutable_data<float>();
float *output = output_tensor.mutable_data<float>();
for (int64 i = 0; i < n; ++i) {
for (int64_t i = 0; i < n; ++i) {
input[i] = nd(gen);
}
......
......@@ -14,7 +14,7 @@ void NeonAddNFuntion_float(const vector<const Tensor *> &input_tensor,
int n = input_tensor.size();
MACE_CHECK(n > 1);
MACE_CHECK_NOTNULL(input_tensor[0]);
int64 size = input_tensor[0]->size();
int64_t size = input_tensor[0]->size();
output_tensor->ResizeLike(input_tensor[0]);
float *output = output_tensor->mutable_data<float>();
vector<const float *> inputs(n);
......@@ -22,19 +22,19 @@ void NeonAddNFuntion_float(const vector<const Tensor *> &input_tensor,
inputs[i] = input_tensor[i]->data<float>();
}
int64 cost = size * n;
int64 groups = 1;
int64_t cost = size * n;
int64_t groups = 1;
if (cost > kCostPerGroup) {
groups = cost / kCostPerGroup;
}
int64 element_per_group = size / groups;
int64_t element_per_group = size / groups;
#pragma omp parallel for num_threads(1) // no significant performance improve
for (int64 i = 0; i < size; i += element_per_group) {
int64 count = std::min(element_per_group, size - i);
for (int64_t i = 0; i < size; i += element_per_group) {
int64_t count = std::min(element_per_group, size - i);
int nn = count >> 2;
int remain = count - (nn << 2);
for (int64 j = 0; j < n; ++j) {
for (int64_t j = 0; j < n; ++j) {
const float *inptr = inputs[j] + i;
float *outptr = output + i;
for (int k = 0; k < nn; ++k) {
......
......@@ -34,18 +34,18 @@ struct BatchNormFunctor<DeviceType::NEON, T> : public BatchNormFunctorBase<Devic
T new_scale, new_offset;
int count = sample_size >> 2;
int remain_count = sample_size - count;
for (TIndex c = 0; c < channel; ++c) {
for (index_t c = 0; c < channel; ++c) {
new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_);
new_offset = offset[c] - mean[c] * new_scale;
TIndex pos = c * sample_size;
index_t pos = c * sample_size;
float32x4_t new_scale_f = vdupq_n_f32(new_scale);
float32x4_t new_offset_f = vdupq_n_f32(new_offset);
for (TIndex i = 0; i < n; ++i) {
for (index_t i = 0; i < n; ++i) {
const float* input_sample_ptr = input + pos;
float* output_sample_ptr = output + pos;
for(TIndex j = 0; j < count; ++j) {
for(index_t j = 0; j < count; ++j) {
float32x4_t input_f = vld1q_f32(input_sample_ptr);
float32x4_t output_f = new_offset_f;
output_f = vfmaq_f32(output_f, input_f, new_scale_f);
......@@ -53,7 +53,7 @@ struct BatchNormFunctor<DeviceType::NEON, T> : public BatchNormFunctorBase<Devic
input_sample_ptr += 4;
output_sample_ptr += 4;
}
for(TIndex j = 0; j < remain_count; ++j) {
for(index_t j = 0; j < remain_count; ++j) {
*output_sample_ptr = new_scale * *input_sample_ptr + new_offset;
++output_sample_ptr;
++input_sample_ptr;
......
......@@ -10,14 +10,14 @@ namespace kernels {
void NeonReluFuntion_float(const Tensor *input_tensor,
Tensor *output_tensor) {
int64 size = input_tensor->size();
int64_t size = input_tensor->size();
output_tensor->ResizeLike(input_tensor);
const float *input = input_tensor->data<float>();
float *output = output_tensor->mutable_data<float>();
#pragma omp parallel for num_threads(1) // no significant performance improve
for (int64 i = 0; i < size; i += kCostPerGroup) {
int64 count = std::min(static_cast<int64>(kCostPerGroup), size - i);
for (int64_t i = 0; i < size; i += kCostPerGroup) {
int64_t count = std::min(static_cast<int64_t>(kCostPerGroup), size - i);
int nn = count >> 2;
int remain = count - (nn << 2);
const float *inptr = input + i;
......
......@@ -12,12 +12,12 @@ namespace kernels {
template<typename T>
void ReluFuntion(const Tensor *input_tensor, Tensor *output_tensor) {
int64 size = input_tensor->size();
int64_t size = input_tensor->size();
output_tensor->ResizeLike(input_tensor);
const T *input = input_tensor->data<T>();
T *output = output_tensor->mutable_data<T>();
for (int64 i = 0; i < size; ++i) {
for (int64_t i = 0; i < size; ++i) {
output[i] = std::max(input[i], static_cast<T>(0));
}
}
......
......@@ -15,7 +15,7 @@ TEST(NeonTest, AddN) {
std::mt19937 gen(rd());
std::normal_distribution<float> nd(0, 1);
int64 count = 100000;
int64_t count = 100000;
Tensor input_tensor1(cpu_allocator(), DataType::DT_FLOAT);
input_tensor1.Resize({100, 1000});
Tensor input_tensor2(cpu_allocator(), DataType::DT_FLOAT);
......@@ -37,7 +37,7 @@ TEST(NeonTest, AddN) {
float *output = output_tensor.mutable_data<float>();
float *output_neon = output_tensor_neon.mutable_data<float>();
for (int64 i = 0; i < count; ++i) {
for (int64_t i = 0; i < count; ++i) {
input1[i] = nd(gen);
input2[i] = nd(gen);
input3[i] = nd(gen);
......@@ -48,7 +48,7 @@ TEST(NeonTest, AddN) {
ASSERT_EQ(count, output_tensor.size());
ASSERT_EQ(count, output_tensor_neon.size());
for (int64 i = 0; i < count; ++i) {
for (int64_t i = 0; i < count; ++i) {
ASSERT_FLOAT_EQ(output[i], output_neon[i]);
}
}
......
......@@ -15,7 +15,7 @@ TEST(NeonTest, Relu) {
std::mt19937 gen(rd());
std::normal_distribution<float> nd(0, 1);
int64 count = 100000;
int64_t count = 100000;
Tensor input_tensor(cpu_allocator(), DataType::DT_FLOAT);
input_tensor.Resize({100, 1000});
Tensor output_tensor(cpu_allocator(), DataType::DT_FLOAT);
......@@ -27,7 +27,7 @@ TEST(NeonTest, Relu) {
float *output = output_tensor.mutable_data<float>();
float *output_neon = output_tensor_neon.mutable_data<float>();
for (int64 i = 0; i < count; ++i) {
for (int64_t i = 0; i < count; ++i) {
input[i] = nd(gen);
}
......@@ -36,7 +36,7 @@ TEST(NeonTest, Relu) {
ASSERT_EQ(count, output_tensor.size());
ASSERT_EQ(count, output_tensor_neon.size());
for (int64 i = 0; i < count; ++i) {
for (int64_t i = 0; i < count; ++i) {
ASSERT_FLOAT_EQ(output[i], output_neon[i]);
}
}
......
......@@ -33,9 +33,9 @@ class BatchNormOp : public Operator<D, T> {
Tensor* output = this->Output(0);
output->ResizeLike(input);
const TIndex n = input->dim(0);
const TIndex channel = input->dim(1);
const TIndex sample_size = input->dim(2) * input->dim(3);
const index_t n = input->dim(0);
const index_t channel = input->dim(1);
const index_t sample_size = input->dim(2) * input->dim(3);
const float* input_ptr = input->data<float>();
const float* scale_ptr = scale->data<float>();
......
......@@ -43,7 +43,7 @@ class OpsTestBase : public ::testing::Test {
}
public:
template <typename T>
void AddInputFromArray(const char* name, const std::vector<TIndex>& shape, const std::vector<T>& data) {
void AddInputFromArray(const char* name, const std::vector<index_t>& shape, const std::vector<T>& data) {
Tensor* input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
input->Resize(shape);
float* input_data = input->mutable_data<float>();
......@@ -70,7 +70,7 @@ class OpsTestBase : public ::testing::Test {
};
template <typename T>
Tensor CreateTensor(const std::vector<TIndex>& shape, const std::vector<T>& data) {
Tensor CreateTensor(const std::vector<index_t>& shape, const std::vector<T>& data) {
Tensor res(cpu_allocator(), DataTypeToEnum<T>::v());
res.Resize(shape);
float* input_data = res.mutable_data<float>();
......@@ -90,7 +90,7 @@ inline std::string ShapeToString(const Tensor& x) {
std::stringstream stream;
for (int i = 0; i < x.dim_size(); i++) {
if (i > 0) stream<<",";
int64 dim = x.dim(i);
int64_t dim = x.dim(i);
if (dim < 0) {
stream<<"?";
} else {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册