提交 03a12d1e 编写于 作者: L Liangliang He

Merge branch 'fix_warnings' into 'master'

fix warnings

See merge request !464
......@@ -5,11 +5,11 @@ workspace(name = "mace")
# This statement defines the @com_google_protobuf repo.
http_archive(
name = "com_google_protobuf",
sha256 = "40d39d97a7b514b3e34daef732f822eca0081960b269863f5b573db5548cb237",
strip_prefix = "protobuf-3.4.0rc3",
sha256 = "542703acadc3f690d998f4641e1b988f15ba57ebca05fdfb1cd9095bec007948",
strip_prefix = "protobuf-3.4.0",
urls = [
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/protobuf/protobuf-3.4.0rc3.zip",
"https://github.com/google/protobuf/archive/v3.4.0rc3.zip"
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/protobuf/protobuf-3.4.0.zip",
"https://github.com/google/protobuf/archive/v3.4.0.zip"
],
)
......@@ -38,21 +38,22 @@ new_http_archive(
new_http_archive(
name = "opencl_clhpp",
build_file = "mace/third_party/opencl-clhpp/opencl-clhpp.BUILD",
sha256 = "d4eb63372ad31f7efcae626852f75f7929ff28d1cabb5f50ef11035963a69b46",
strip_prefix = "OpenCL-CLHPP-2.0.10",
sha256 = "dab6f1834ec6e3843438cc0f97d63817902aadd04566418c1fcc7fb78987d4e7",
strip_prefix = "OpenCL-CLHPP-4c6f7d56271727e37fb19a9b47649dd175df2b12",
urls = [
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/OpenCL-CLHPP/OpenCL-CLHPP-2.0.10.zip",
"https://github.com/KhronosGroup/OpenCL-CLHPP/archive/v2.0.10.zip"
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/OpenCL-CLHPP/OpenCL-CLHPP-4c6f7d56271727e37fb19a9b47649dd175df2b12.zip",
"https://github.com/KhronosGroup/OpenCL-CLHPP/archive/4c6f7d56271727e37fb19a9b47649dd175df2b12.zip"
],
)
new_http_archive(
name = "half",
build_file = "mace/third_party/half/half.BUILD",
sha256 = "cdd70d3bf3fe091b688e7ab3f48471c881a197d2c186c95cca8bf156961fb41c",
sha256 = "0f514a1e877932b21dc5edc26a148ddc700b6af2facfed4c030ca72f74d0219e",
strip_prefix = "half-code-356-trunk",
urls = [
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/half/half-1.12.0.zip",
"https://jaist.dl.sourceforge.net/project/half/half/1.12.0/half-1.12.0.zip"
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/half/half-code-356-trunk.zip",
"https://sourceforge.net/code-snapshots/svn/h/ha/half/code/half-code-356-trunk.zip"
],
)
......
......@@ -58,9 +58,9 @@ std::string ShapeToString(const std::vector<OutputShape> &output_shape) {
std::stringstream stream;
stream << "[";
for (int i = 0; i < output_shape.size(); ++i) {
for (size_t i = 0; i < output_shape.size(); ++i) {
const std::vector<index_t> &dims = output_shape[i].dims();
for (int j = 0; j < dims.size(); ++j) {
for (size_t j = 0; j < dims.size(); ++j) {
stream << dims[j];
if (j != dims.size() - 1) {
stream << ",";
......@@ -83,7 +83,7 @@ std::string VectorToString(const std::vector<T> &vec) {
std::stringstream stream;
stream << "[";
for (int i = 0; i < vec.size(); ++i) {
for (size_t i = 0; i < vec.size(); ++i) {
stream << vec[i];
if (i != vec.size() - 1) {
stream << ",";
......
......@@ -54,24 +54,24 @@ class TimeInfo {
sum_(0), square_sum(0)
{}
const int64_t round() const {
int64_t round() const {
return round_;
}
const T first() const {
T first() const {
return first_;
}
const T sum() const {
T sum() const {
return sum_;
}
const double avg() const {
double avg() const {
return round_ == 0 ? std::numeric_limits<double>::quiet_NaN() :
sum_ * 1.0f / round_;
}
const double std_deviation() const {
double std_deviation() const {
if (round_ == 0 || min_ == max_) {
return 0;
}
......@@ -111,12 +111,12 @@ class TimeInfo {
}
private:
int64_t round_;
T first_;
T curr_;
T min_;
T max_;
T sum_;
int64_t round_;
double square_sum;
};
......
......@@ -21,6 +21,7 @@
#include <vector>
#include <cstring>
#include "mace/core/macros.h"
#include "mace/core/registry.h"
#include "mace/core/types.h"
#include "mace/public/mace.h"
......@@ -83,6 +84,8 @@ class CPUAllocator : public Allocator {
void *NewImage(const std::vector<size_t> &shape,
const DataType dt) const override {
MACE_UNUSED(shape);
MACE_UNUSED(dt);
LOG(FATAL) << "Allocate CPU image";
return nullptr;
}
......@@ -96,14 +99,20 @@ class CPUAllocator : public Allocator {
free(data);
};
void *Map(void *buffer, size_t offset, size_t nbytes) const override {
MACE_UNUSED(nbytes);
return reinterpret_cast<char*>(buffer) + offset;
}
void *MapImage(void *buffer,
const std::vector<size_t> &image_shape,
std::vector<size_t> *mapped_image_pitch) const override {
MACE_UNUSED(image_shape);
MACE_UNUSED(mapped_image_pitch);
return buffer;
}
void Unmap(void *buffer, void *mapper_ptr) const override {}
void Unmap(void *buffer, void *mapper_ptr) const override {
MACE_UNUSED(buffer);
MACE_UNUSED(mapper_ptr);
}
bool OnHost() const override { return true; }
};
......
......@@ -21,6 +21,7 @@
#include <functional>
#include "mace/core/allocator.h"
#include "mace/core/macros.h"
#include "mace/core/types.h"
namespace mace {
......@@ -133,6 +134,7 @@ class Buffer : public BufferBase {
void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const {
MACE_CHECK_NOTNULL(buf_);
MACE_UNUSED(pitch);
return allocator_->Map(buf_, offset, length);
}
......@@ -232,6 +234,9 @@ class Image : public BufferBase {
std::vector<size_t> image_shape() const { return shape_; }
void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const {
MACE_UNUSED(offset);
MACE_UNUSED(length);
MACE_UNUSED(pitch);
MACE_NOT_IMPLEMENTED;
return nullptr;
}
......@@ -254,9 +259,17 @@ class Image : public BufferBase {
mapped_buf_ = nullptr;
}
void Resize(index_t size) { MACE_NOT_IMPLEMENTED; }
void Resize(index_t size) {
MACE_UNUSED(size);
MACE_NOT_IMPLEMENTED;
}
void Copy(void *src, index_t offset, index_t length) { MACE_NOT_IMPLEMENTED; }
void Copy(void *src, index_t offset, index_t length) {
MACE_UNUSED(src);
MACE_UNUSED(offset);
MACE_UNUSED(length);
MACE_NOT_IMPLEMENTED;
}
bool OnHost() const { return allocator_->OnHost(); }
......@@ -327,11 +340,17 @@ class BufferSlice : public BufferBase {
}
void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const {
MACE_UNUSED(offset);
MACE_UNUSED(length);
MACE_UNUSED(pitch);
MACE_NOT_IMPLEMENTED;
return nullptr;
}
void UnMap(void *mapped_ptr) const { MACE_NOT_IMPLEMENTED; }
void UnMap(void *mapped_ptr) const {
MACE_UNUSED(mapped_ptr);
MACE_NOT_IMPLEMENTED;
}
void Map(std::vector<size_t> *pitch) {
MACE_CHECK_NOTNULL(buffer_);
......@@ -350,7 +369,12 @@ class BufferSlice : public BufferBase {
" to ", size, " is illegal");
}
void Copy(void *src, index_t offset, index_t length) { MACE_NOT_IMPLEMENTED; }
void Copy(void *src, index_t offset, index_t length) {
MACE_UNUSED(src);
MACE_UNUSED(offset);
MACE_UNUSED(length);
MACE_NOT_IMPLEMENTED;
}
index_t offset() const { return offset_; }
......
......@@ -14,6 +14,7 @@
#include <utility>
#include "mace/core/macros.h"
#include "mace/core/net.h"
#include "mace/utils/memory_logging.h"
#include "mace/utils/timer.h"
......@@ -25,7 +26,10 @@ NetBase::NetBase(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws,
DeviceType type)
: name_(net_def->name()), op_registry_(op_registry) {}
: name_(net_def->name()), op_registry_(op_registry) {
MACE_UNUSED(ws);
MACE_UNUSED(type);
}
SerialNet::SerialNet(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
......
......@@ -33,6 +33,7 @@ namespace mace {
namespace {
#ifndef MACE_ENABLE_OPENMP
int GetCPUCount() {
char path[32];
int cpu_count = 0;
......@@ -50,12 +51,14 @@ int GetCPUCount() {
cpu_count++;
}
}
#endif
int GetCPUMaxFreq(int cpu_id) {
char path[64];
snprintf(path, sizeof(path),
"/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq",
cpu_id);
FILE *fp = fopen(path, "rb");
if (!fp) {
LOG(WARNING) << "File: " << path << " not exists.";
......@@ -63,47 +66,14 @@ int GetCPUMaxFreq(int cpu_id) {
}
int freq = 0;
fscanf(fp, "%d", &freq);
int items_read = fscanf(fp, "%d", &freq);
if (items_read != 1) {
LOG(WARNING) << "Read file: " << path << " failed.";
}
fclose(fp);
return freq;
}
void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids, int *big_core_offset) {
MACE_CHECK_NOTNULL(cpu_ids);
int cpu_count = cpu_ids->size();
std::vector<int> cpu_max_freq;
cpu_max_freq.resize(cpu_count);
// set cpu max frequency
for (int i = 0; i < cpu_count; ++i) {
cpu_max_freq[i] = GetCPUMaxFreq(i);
(*cpu_ids)[i] = i;
}
// sort cpu ids by max frequency asc, bubble sort
for (int i = 0; i < cpu_count - 1; ++i) {
for (int j = i + 1; j < cpu_count; ++j) {
if (cpu_max_freq[i] > cpu_max_freq[j]) {
int tmp = (*cpu_ids)[i];
(*cpu_ids)[i] = (*cpu_ids)[j];
(*cpu_ids)[j] = tmp;
tmp = cpu_max_freq[i];
cpu_max_freq[i] = cpu_max_freq[j];
cpu_max_freq[j] = tmp;
}
}
}
*big_core_offset = 0;
for (int i = 1; i < cpu_count; ++i) {
if (cpu_max_freq[i] > cpu_max_freq[i - 1]) {
*big_core_offset = i;
break;
}
}
}
void SetThreadAffinity(cpu_set_t mask) {
#if defined(__ANDROID__)
pid_t pid = gettid();
......
......@@ -23,6 +23,7 @@
#include <utility>
#include "mace/public/mace_runtime.h"
#include "mace/core/macros.h"
#include "mace/core/file_storage.h"
#include "mace/core/runtime/opencl/opencl_extension.h"
#include "mace/public/mace.h"
......@@ -176,6 +177,8 @@ void OpenCLPrintfCallback(const char *buffer,
size_t length,
size_t final,
void *user_data) {
MACE_UNUSED(final);
MACE_UNUSED(user_data);
fwrite(buffer, 1, length, stdout);
}
......@@ -218,6 +221,22 @@ void GetAdrenoContextProperties(std::vector<cl_context_properties> *properties,
// The properties list should be terminated with 0
properties->push_back(0);
}
GPUType ParseGPUType(const std::string &device_name) {
constexpr const char *kQualcommAdrenoGPUStr = "QUALCOMM Adreno(TM)";
constexpr const char *kMaliGPUStr = "Mali";
constexpr const char *kPowerVRGPUStr = "PowerVR";
if (device_name == kQualcommAdrenoGPUStr) {
return GPUType::QUALCOMM_ADRENO;
} else if (device_name.find(kMaliGPUStr) != std::string::npos) {
return GPUType::MALI;
} else if (device_name.find(kPowerVRGPUStr) != std::string::npos) {
return GPUType::PowerVR;
} else {
return GPUType::UNKNOWN;
}
}
} // namespace
void OpenCLProfilingTimer::StartTiming() {}
......@@ -389,11 +408,11 @@ cl::Device &OpenCLRuntime::device() { return *device_; }
cl::CommandQueue &OpenCLRuntime::command_queue() { return *command_queue_; }
const uint64_t OpenCLRuntime::device_global_mem_cache_size() const {
uint64_t OpenCLRuntime::device_global_mem_cache_size() const {
return device_gloabl_mem_cache_size_;
}
const uint32_t OpenCLRuntime::device_compute_units() const {
uint32_t OpenCLRuntime::device_compute_units() const {
return device_compute_units_;
}
......@@ -597,12 +616,12 @@ uint64_t OpenCLRuntime::GetKernelWaveSize(const cl::Kernel &kernel) {
return size;
}
const bool OpenCLRuntime::IsNonUniformWorkgroupsSupported() {
bool OpenCLRuntime::IsNonUniformWorkgroupsSupported() const {
return (gpu_type_ == GPUType::QUALCOMM_ADRENO &&
opencl_version_ == "2.0");
}
const GPUType OpenCLRuntime::gpu_type() const {
GPUType OpenCLRuntime::gpu_type() const {
return gpu_type_;
}
......@@ -610,36 +629,20 @@ const std::string OpenCLRuntime::platform_info() const {
return platform_info_;
}
const GPUType OpenCLRuntime::ParseGPUType(
const std::string &device_name) {
constexpr const char *kQualcommAdrenoGPUStr = "QUALCOMM Adreno(TM)";
constexpr const char *kMaliGPUStr = "Mali";
constexpr const char *kPowerVRGPUStr = "PowerVR";
if (device_name == kQualcommAdrenoGPUStr) {
return GPUType::QUALCOMM_ADRENO;
} else if (device_name.find(kMaliGPUStr) != std::string::npos) {
return GPUType::MALI;
} else if (device_name.find(kPowerVRGPUStr) != std::string::npos) {
return GPUType::PowerVR;
} else {
return GPUType::UNKNOWN;
}
}
const std::string OpenCLRuntime::ParseDeviceVersion(
const std::string &device_version) {
// OpenCL Device version string format:
// OpenCL<space><major_version.minor_version><space>\
// OpenCL<space><major_version.minor_version><space>
// <vendor-specific information>
auto words = Split(device_version, ' ');
return words[1];
}
const bool OpenCLRuntime::IsOutOfRangeCheckEnabled() const {
bool OpenCLRuntime::IsOutOfRangeCheckEnabled() const {
return out_of_range_check_;
}
const bool OpenCLRuntime::is_profiling_enabled() const {
bool OpenCLRuntime::is_profiling_enabled() const {
return is_profiling_enabled_;
}
......
......@@ -70,18 +70,18 @@ class OpenCLRuntime {
cl::Context &context();
cl::Device &device();
cl::CommandQueue &command_queue();
const GPUType gpu_type() const;
GPUType gpu_type() const;
const std::string platform_info() const;
const uint64_t device_global_mem_cache_size() const;
const uint32_t device_compute_units() const;
uint64_t device_global_mem_cache_size() const;
uint32_t device_compute_units() const;
void GetCallStats(const cl::Event &event, CallStats *stats);
uint64_t GetDeviceMaxWorkGroupSize();
uint64_t GetKernelMaxWorkGroupSize(const cl::Kernel &kernel);
uint64_t GetKernelWaveSize(const cl::Kernel &kernel);
const bool IsNonUniformWorkgroupsSupported();
const bool IsOutOfRangeCheckEnabled() const;
const bool is_profiling_enabled() const;
bool IsNonUniformWorkgroupsSupported() const;
bool IsOutOfRangeCheckEnabled() const;
bool is_profiling_enabled() const;
cl::Kernel BuildKernel(const std::string &program_name,
const std::string &kernel_name,
......@@ -112,7 +112,6 @@ class OpenCLRuntime {
const std::string &built_program_key,
const std::string &build_options_str,
cl::Program *program);
const GPUType ParseGPUType(const std::string &device_name);
const std::string ParseDeviceVersion(const std::string &device_version);
private:
......
......@@ -136,6 +136,7 @@ class ActivationFunctor<DeviceType::CPU, float> {
const Tensor *alpha,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
const float *input_ptr = input->data<float>();
float *output_ptr = output->mutable_data<float>();
if (activation_ == PRELU) {
......
......@@ -39,6 +39,7 @@ struct AddNFunctor {
void operator()(const std::vector<const Tensor *> &input_tensors,
Tensor *output_tensor,
StatsFuture *future) {
MACE_UNUSED(future);
output_tensor->ResizeLike(input_tensors[0]);
index_t size = output_tensor->size();
Tensor::MappingGuard output_map(output_tensor);
......
......@@ -42,22 +42,25 @@ void Conv2dNeonK3x3S1(const float *input,
for (index_t m = 0; m < out_channels; m += 2) {
if (m + 1 < out_channels) {
float *out_ptr0_base = output + b * out_batch_size + m * out_image_size;
#if defined(MACE_ENABLE_NEON)
float *out_ptr1_base =
output + b * out_batch_size + (m + 1) * out_image_size;
#endif
for (index_t c = 0; c < in_channels; ++c) {
float *out_ptr0 = out_ptr0_base;
float *out_ptr1 = out_ptr1_base;
const float *in_ptr0 = input + b * in_batch_size + c * in_image_size;
const float *filter_ptr0 = filter + m * in_channels * 9 + c * 9;
#if defined(MACE_ENABLE_NEON)
float *out_ptr1 = out_ptr1_base;
const float *in_ptr1 =
input + b * in_batch_size + c * in_image_size + 1 * in_width;
const float *in_ptr2 =
input + b * in_batch_size + c * in_image_size + 2 * in_width;
const float *in_ptr3 =
input + b * in_batch_size + c * in_image_size + 3 * in_width;
const float *filter_ptr0 = filter + m * in_channels * 9 + c * 9;
const float *filter_ptr1 = filter + (m + 1) * in_channels * 9 + c * 9;
#endif
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
// load filter (2 outch x 3 height x 3 width): vf_outch_height
float32x4_t vf00, vf01, vf02;
......@@ -321,12 +324,14 @@ void Conv2dNeonK3x3S1(const float *input,
const float
*in_ptr0 = input + b * in_batch_size + c * in_image_size;
#if defined(MACE_ENABLE_NEON)
const float *in_ptr1 =
input + b * in_batch_size + c * in_image_size + 1 * in_width;
const float *in_ptr2 =
input + b * in_batch_size + c * in_image_size + 2 * in_width;
const float *in_ptr3 =
input + b * in_batch_size + c * in_image_size + 3 * in_width;
#endif
const float *filter_ptr0 = filter + mm * in_channels * 9 + c * 9;
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
......
......@@ -121,23 +121,25 @@ void Conv2dNeonK5x5S1(const float *input,
for (index_t m = 0; m < out_channels; m += 4) {
if (m + 3 < out_channels) {
float *out_ptr0_base = output + b * out_batch_size + m * out_image_size;
#if defined(MACE_ENABLE_NEON) && !defined(__aarch64__)
float *out_ptr1_base =
output + b * out_batch_size + (m + 1) * out_image_size;
float *out_ptr2_base =
output + b * out_batch_size + (m + 2) * out_image_size;
float *out_ptr3_base =
output + b * out_batch_size + (m + 3) * out_image_size;
#endif
for (index_t c = 0; c < in_channels; ++c) {
const float *in_ptr_base =
input + b * in_batch_size + c * in_image_size;
const float *filter_ptr0 = filter + m * in_channels * 25 + c * 25;
#if defined(MACE_ENABLE_NEON) && !defined(__aarch64__)
const float *filter_ptr1 =
filter + (m + 1) * in_channels * 25 + c * 25;
const float *filter_ptr2 =
filter + (m + 2) * in_channels * 25 + c * 25;
const float *filter_ptr3 =
filter + (m + 3) * in_channels * 25 + c * 25;
#if defined(MACE_ENABLE_NEON) && !defined(__aarch64__)
for (index_t h = 0; h < out_height; ++h) {
for (index_t w = 0; w + 3 < out_width; w += 4) {
// input offset
......
......@@ -198,23 +198,25 @@ void Conv2dNeonK7x7S1(const float *input,
for (index_t m = 0; m < out_channels; m += 4) {
if (m + 3 < out_channels) {
float *out_ptr0_base = output + b * out_batch_size + m * out_image_size;
#if defined(MACE_ENABLE_NEON)
float *out_ptr1_base =
output + b * out_batch_size + (m + 1) * out_image_size;
float *out_ptr2_base =
output + b * out_batch_size + (m + 2) * out_image_size;
float *out_ptr3_base =
output + b * out_batch_size + (m + 3) * out_image_size;
#endif
for (index_t c = 0; c < in_channels; ++c) {
const float *in_ptr_base =
input + b * in_batch_size + c * in_image_size;
const float *filter_ptr0 = filter + m * in_channels * 49 + c * 49;
#if defined(MACE_ENABLE_NEON)
const float *filter_ptr1 =
filter + (m + 1) * in_channels * 49 + c * 49;
const float *filter_ptr2 =
filter + (m + 2) * in_channels * 49 + c * 49;
const float *filter_ptr3 =
filter + (m + 3) * in_channels * 49 + c * 49;
#if defined(MACE_ENABLE_NEON)
for (index_t h = 0; h < out_height; ++h) {
for (index_t w = 0; w + 3 < out_width; w += 4) {
// input offset
......@@ -352,23 +354,25 @@ void Conv2dNeonK7x7S2(const float *input,
for (index_t m = 0; m < out_channels; m += 4) {
if (m + 3 < out_channels) {
float *out_ptr0_base = output + b * out_batch_size + m * out_image_size;
#if defined(MACE_ENABLE_NEON)
float *out_ptr1_base =
output + b * out_batch_size + (m + 1) * out_image_size;
float *out_ptr2_base =
output + b * out_batch_size + (m + 2) * out_image_size;
float *out_ptr3_base =
output + b * out_batch_size + (m + 3) * out_image_size;
#endif
for (index_t c = 0; c < in_channels; ++c) {
const float *in_ptr_base =
input + b * in_batch_size + c * in_image_size;
const float *filter_ptr0 = filter + m * in_channels * 49 + c * 49;
#if defined(MACE_ENABLE_NEON)
const float *filter_ptr1 =
filter + (m + 1) * in_channels * 49 + c * 49;
const float *filter_ptr2 =
filter + (m + 2) * in_channels * 49 + c * 49;
const float *filter_ptr3 =
filter + (m + 3) * in_channels * 49 + c * 49;
#if defined(MACE_ENABLE_NEON)
for (index_t h = 0; h < out_height; ++h) {
for (index_t w = 0; w + 3 < out_width; w += 4) {
// input offset
......@@ -516,23 +520,25 @@ void Conv2dNeonK7x7S3(const float *input,
for (index_t m = 0; m < out_channels; m += 4) {
if (m + 3 < out_channels) {
float *out_ptr0_base = output + b * out_batch_size + m * out_image_size;
#if defined(MACE_ENABLE_NEON)
float *out_ptr1_base =
output + b * out_batch_size + (m + 1) * out_image_size;
float *out_ptr2_base =
output + b * out_batch_size + (m + 2) * out_image_size;
float *out_ptr3_base =
output + b * out_batch_size + (m + 3) * out_image_size;
#endif
for (index_t c = 0; c < in_channels; ++c) {
const float *in_ptr_base =
input + b * in_batch_size + c * in_image_size;
const float *filter_ptr0 = filter + m * in_channels * 49 + c * 49;
#if defined(MACE_ENABLE_NEON)
const float *filter_ptr1 =
filter + (m + 1) * in_channels * 49 + c * 49;
const float *filter_ptr2 =
filter + (m + 2) * in_channels * 49 + c * 49;
const float *filter_ptr3 =
filter + (m + 3) * in_channels * 49 + c * 49;
#if defined(MACE_ENABLE_NEON)
for (index_t h = 0; h < out_height; ++h) {
for (index_t w = 0; w + 3 < out_width; w += 4) {
// input offset
......
......@@ -17,6 +17,7 @@
#endif
#include "mace/kernels/arm/depthwise_conv2d_neon.h"
#include "mace/core/macros.h"
namespace mace {
namespace kernels {
......@@ -65,6 +66,10 @@ void DepthwiseConv2dNeonK3x3S1(const float *input,
const index_t valid_w_start,
const index_t valid_w_stop,
float *output) {
#if !defined(MACE_ENABLE_NEON)
MACE_UNUSED(valid_w_start);
MACE_UNUSED(valid_w_stop);
#endif
const index_t multiplier = out_channels / in_channels;
const index_t in_image_size = in_height * in_width;
const index_t out_image_size = out_height * out_width;
......@@ -305,6 +310,10 @@ void DepthwiseConv2dNeonK3x3S2(const float *input,
const index_t valid_w_start,
const index_t valid_w_stop,
float *output) {
#if !defined(MACE_ENABLE_NEON)
MACE_UNUSED(valid_w_start);
MACE_UNUSED(valid_w_stop);
#endif
const index_t multiplier = out_channels / in_channels;
const index_t in_image_size = in_height * in_width;
const index_t out_image_size = out_height * out_width;
......
......@@ -64,6 +64,7 @@ struct BatchNormFunctor<DeviceType::CPU, float> : BatchNormFunctorBase {
const float epsilon,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
......
......@@ -38,6 +38,7 @@ struct BiasAddFunctor<DeviceType::CPU, float> {
const Tensor *bias,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
const index_t batch = input->dim(0);
const index_t channels = input->dim(1);
const index_t height = input->dim(2);
......
......@@ -39,6 +39,10 @@ struct BufferToImageFunctor : BufferToImageFunctorBase {
const BufferType type,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(input);
MACE_UNUSED(type);
MACE_UNUSED(output);
MACE_UNUSED(future);
MACE_NOT_IMPLEMENTED;
}
};
......
......@@ -31,6 +31,7 @@ struct ChannelShuffleFunctor {
void operator()(const Tensor *input,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
output->ResizeLike(input);
Tensor::MappingGuard logits_guard(input);
......@@ -56,7 +57,7 @@ struct ChannelShuffleFunctor {
index_t idx = c / groups_;
for (index_t hw = 0; hw < height * width; ++hw) {
output_base[c * image_size + hw] = input_base[
(c % groups_ * channels_per_group + c / groups_) * image_size + hw];
(g * channels_per_group + idx) * image_size + hw];
}
}
}
......
......@@ -43,8 +43,9 @@ struct ConcatFunctor : ConcatFunctorBase {
void operator()(const std::vector<const Tensor *> &input_list,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
const Tensor *input0 = input_list.front();
const int inputs_count = input_list.size();
const size_t inputs_count = input_list.size();
std::vector<index_t> output_shape(input0->shape());
index_t inner_size = 1;
......@@ -53,7 +54,7 @@ struct ConcatFunctor : ConcatFunctorBase {
}
std::vector<index_t> outer_sizes(inputs_count, 0);
outer_sizes[0] = input0->size() / inner_size;
for (int i = 1; i < inputs_count; ++i) {
for (size_t i = 1; i < inputs_count; ++i) {
const Tensor *input = input_list[i];
MACE_CHECK(input->dim_size() == input0->dim_size(),
"Ranks of all input tensors must be same.");
......
......@@ -103,8 +103,6 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
const index_t in_batch_size = in_channels * in_image_size;
const index_t out_batch_size = out_channels * out_image_size;
const index_t filter_size = filter_height * filter_width;
const index_t in_tile_size =
3 * stride_w + (filter_width - 1) * dilation_w + 1;
#pragma omp parallel for collapse(2)
for (index_t b = 0; b < batch; ++b) {
......@@ -267,6 +265,7 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
const Tensor *bias,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
MACE_CHECK_NOTNULL(input);
MACE_CHECK_NOTNULL(filter);
MACE_CHECK_NOTNULL(output);
......@@ -345,7 +344,6 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
Tensor::MappingGuard bias_guard(bias);
Tensor::MappingGuard output_guard(output);
auto input_data = input->data<float>();
auto filter_data = filter->data<float>();
auto bias_data = bias == nullptr ? nullptr : bias->data<float>();
auto output_data = output->mutable_data<float>();
......@@ -719,7 +717,10 @@ struct Conv2dFunctor<DeviceType::GPU, T> : Conv2dFunctorBase {
paddings,
dilations,
activation,
relux_max_limit) {}
relux_max_limit) {
MACE_UNUSED(is_filter_transformed);
MACE_UNUSED(scratch);
}
void operator()(const Tensor *input,
const Tensor *filter,
......
......@@ -33,6 +33,7 @@ struct DepthToSpaceOpFunctor {
explicit DepthToSpaceOpFunctor(const int block_size, bool d2s)
: block_size_(block_size), d2s_(d2s) {}
void operator()(const Tensor *input, Tensor *output, StatsFuture *future) {
MACE_UNUSED(future);
const int batch_size = input->dim(0);
const int input_depth = input->dim(1);
const int input_height = input->dim(2);
......
......@@ -133,6 +133,7 @@ struct DepthwiseConv2dFunctor<DeviceType::CPU, float>
const Tensor *bias,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
MACE_CHECK_NOTNULL(input);
MACE_CHECK_NOTNULL(filter);
MACE_CHECK_NOTNULL(output);
......
......@@ -470,6 +470,7 @@ struct EltwiseFunctor<DeviceType::CPU, float>: EltwiseFunctorBase {
const Tensor *input1,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
bool swapped = false;
if (input1 != nullptr) {
MACE_CHECK(input0->dim_size() == input1->dim_size()
......
......@@ -59,6 +59,7 @@ struct FullyConnectedFunctor<DeviceType::CPU, float>: FullyConnectedBase {
const Tensor *bias,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
std::vector<index_t> output_shape = {input->dim(0), weight->dim(0), 1, 1};
output->Resize(output_shape);
const index_t N = output->dim(0);
......
......@@ -135,7 +135,9 @@ inline void GemmTile(const float *A,
const index_t stride_k,
const index_t stride_w,
float *C) {
#if defined(MACE_ENABLE_NEON)
index_t h, w, k;
#endif
#if defined(MACE_ENABLE_NEON) && defined(__aarch64__)
for (h = 0; h + 7 < height; h += 8) {
......@@ -443,6 +445,7 @@ inline void GemmTile(const float *A,
#else
#if defined(MACE_ENABLE_NEON) // armv7
w = (width >> 2) << 2;
for (h = 0; h + 3 < height; h += 4) {
for (k = 0; k + 3 < K; k += 4) {
const float *a_ptr = A + (h * stride_k + k);
......@@ -523,8 +526,6 @@ inline void GemmTile(const float *A,
c_ptr2 += 4;
c_ptr3 += 4;
}
w = (width >> 2) << 2;
}
if (w < width) {
const float *a_ptr = A + (h * stride_k + k);
......
......@@ -42,6 +42,7 @@ struct LocalResponseNormFunctor<DeviceType::CPU, float> {
float beta,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
const index_t batch = input->dim(0);
const index_t channels = input->dim(1);
const index_t height = input->dim(2);
......
......@@ -42,6 +42,7 @@ struct MatMulFunctor {
const Tensor *B,
Tensor *C,
StatsFuture *future) {
MACE_UNUSED(future);
std::vector<index_t> c_shape = {A->dim(0), A->dim(1), B->dim(2), 1};
C->Resize(c_shape);
......@@ -59,14 +60,6 @@ struct MatMulFunctor {
// It is better to use large block size if it fits for fast cache.
// Assume l1 cache size is 32k, we load three blocks at a time (A, B, C),
// the block size should be sqrt(32k / sizeof(T) / 3).
const index_t block_size = 48;
const index_t block_tile_height = RoundUpDiv(height, block_size);
const index_t block_tile_width = RoundUpDiv(width, block_size);
const index_t block_tile_k = RoundUpDiv(K, block_size);
const index_t remain_height = height % block_size;
const index_t remain_width = width % block_size;
const index_t remain_k = K % block_size;
constexpr index_t register_tile_size = 4;
memset(c_ptr_base, 0, batch * height * width * sizeof(T));
Gemm(a_ptr_base, b_ptr_base, batch, height, K, width, c_ptr_base);
......
......@@ -36,7 +36,7 @@ void AddNFunctor<DeviceType::GPU, T>::operator()(
auto runtime = OpenCLRuntime::Global();
for (int i = 1; i < size; ++i) {
for (size_t i = 1; i < size; ++i) {
MACE_CHECK_NOTNULL(input_tensors[i]);
MACE_CHECK(batch == input_tensors[i]->dim(0));
MACE_CHECK(height == input_tensors[i]->dim(1));
......
......@@ -136,7 +136,6 @@ static void ConcatN(cl::Kernel *kernel,
const index_t batch = output->dim(0);
const index_t height = output->dim(1);
const index_t width = output->dim(2);
const index_t channel = output->dim(3);
auto runtime = OpenCLRuntime::Global();
......
......@@ -72,6 +72,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
StatsFuture *future,
uint32_t *kwg_size,
std::unique_ptr<BufferBase> *kernel_error) {
MACE_UNUSED(padding);
MACE_UNUSED(dilations);
const index_t batch = output->dim(0);
const index_t height = output->dim(1);
const index_t width = output->dim(2);
......
......@@ -135,7 +135,6 @@ static void DepthwiseConv2d(cl::Kernel *kernel,
static_cast<uint32_t>(runtime->GetKernelMaxWorkGroupSize(*kernel));
}
if (!IsVecEqual(*prev_input_shape, input->shape())) {
const index_t input_batch = input->dim(0);
const index_t input_height = input->dim(1);
const index_t input_width = input->dim(2);
......
......@@ -25,6 +25,7 @@ void EltwiseFunctor<DeviceType::GPU, T>::operator()(const Tensor *input0,
const Tensor *input1,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
bool swapped = false;
if (input1 != nullptr) {
MACE_CHECK(input0->dim_size() == input1->dim_size()
......
......@@ -206,17 +206,6 @@ std::string DtToUpstreamCLCMDDt(const DataType dt) {
}
}
std::vector<uint32_t> Default2DLocalWS(const uint32_t *gws,
const uint32_t kwg_size) {
std::vector<uint32_t> lws(3, 0);
uint64_t cache_size =
OpenCLRuntime::Global()->device_global_mem_cache_size();
uint32_t base = cache_size / kBaseGPUMemCacheSize;
lws[0] = std::min<uint32_t>(base, kwg_size);
lws[1] = kwg_size / lws[1];
return lws;
}
std::vector<uint32_t> Default3DLocalWS(const uint32_t *gws,
const uint32_t kwg_size) {
std::vector<uint32_t> lws(4, 0);
......
......@@ -19,6 +19,7 @@
#include <vector>
#include "mace/core/future.h"
#include "mace/core/macros.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/types.h"
......@@ -95,6 +96,7 @@ bool IsVecEqual(const std::vector<T> &input0,
template <typename T>
void AppendToStream(std::stringstream *ss, const std::string &delimiter, T v) {
MACE_UNUSED(delimiter);
(*ss) << v;
}
......@@ -114,8 +116,6 @@ std::string Concat(Args... args) {
return ss.str();
}
std::vector<uint32_t> Default2DLocalWS(const uint32_t *gws,
const uint32_t kwg_size);
std::vector<uint32_t> Default3DLocalWS(const uint32_t *gws,
const uint32_t kwg_size);
} // namespace kernels
......
......@@ -25,6 +25,7 @@ void MatMulFunctor<DeviceType::GPU, T>::operator()(const Tensor *A,
const Tensor *B,
Tensor *C,
StatsFuture *future) {
MACE_UNUSED(future);
std::vector<index_t> c_shape = {A->dim(0), A->dim(1), B->dim(2), 1};
std::vector<size_t> c_image_shape;
CalImage2DShape(c_shape, BufferType::IN_OUT_HEIGHT, &c_image_shape);
......
......@@ -25,9 +25,9 @@ namespace mace {
namespace kernels {
namespace {
const bool BufferToImageOpImpl(Tensor *buffer,
Tensor *image,
const std::vector<size_t> &image_shape) {
bool BufferToImageOpImpl(Tensor *buffer,
Tensor *image,
const std::vector<size_t> &image_shape) {
std::unique_ptr<BufferBase> kernel_error;
uint32_t gws[2] = {static_cast<uint32_t>(image_shape[0]),
static_cast<uint32_t>(image_shape[1])};
......@@ -150,7 +150,7 @@ TEST(OutOfRangeCheckTest, RandomTest) {
ASSERT_FALSE(BufferToImageOpImpl(buffer, image, image_shape));
std::vector<size_t> overflow_image_shape = image_shape;
for (int i = 0; i < overflow_image_shape.size(); ++i) {
for (size_t i = 0; i < overflow_image_shape.size(); ++i) {
overflow_image_shape[i] += 1;
}
ASSERT_TRUE(BufferToImageOpImpl(buffer, image, overflow_image_shape));
......
......@@ -25,7 +25,8 @@ void PadFunctor<DeviceType::GPU, T>::operator()(
const Tensor *input,
Tensor *output,
StatsFuture *future) {
MACE_CHECK(this->paddings_.size() == (input->dim_size() * 2));
MACE_CHECK(
this->paddings_.size() == static_cast<size_t>((input->dim_size() * 2)));
MACE_CHECK((this->paddings_[0] == 0) && (this->paddings_[1] == 0)
&& (this->paddings_[6] == 0) && (this->paddings_[7] == 0))
<< "Mace only support height/width dimension now";
......
......@@ -75,7 +75,7 @@ void SliceFunctor<DeviceType::GPU, T>::operator()(
const std::vector<uint32_t> lws = Default3DLocalWS(gws, kwg_size_);
cl::Event event;
CallStats call_stats{INT64_MAX, 0};
for (int i = 0; i < outputs_count; ++i) {
for (size_t i = 0; i < outputs_count; ++i) {
uint32_t idx = 0;
if (runtime->IsOutOfRangeCheckEnabled()) {
kernel_.setArg(idx++,
......
......@@ -47,7 +47,9 @@ struct PadFunctor : public PadFunctorBase {
void operator()(const Tensor *input,
Tensor *output,
StatsFuture *future) {
MACE_CHECK(this->paddings_.size() == (input->dim_size() * 2));
MACE_UNUSED(future);
MACE_CHECK(
this->paddings_.size() == static_cast<size_t>(input->dim_size()) * 2);
auto input_shape = input->shape();
output->Resize({input_shape[0] + this->paddings_[0] + this->paddings_[1],
input_shape[1] + this->paddings_[2] + this->paddings_[3],
......
......@@ -173,6 +173,7 @@ struct PoolingFunctor<DeviceType::CPU, float>: PoolingFunctorBase {
void operator()(const Tensor *input_tensor,
Tensor *output_tensor,
StatsFuture *future) {
MACE_UNUSED(future);
std::vector<index_t> output_shape(4);
std::vector<index_t> filter_shape = {
input_tensor->dim(1), input_tensor->dim(1), kernels_[0], kernels_[1]};
......
......@@ -92,7 +92,7 @@ inline std::vector<int> nms(const float *bboxes_ptr,
for (int i = 0; i < num_bboxes; ++i) {
if (suppressed[i] == 1) continue;
keep.push_back(i);
if (keep.size() >= post_nms_top_n) break;
if (keep.size() >= static_cast<size_t>(post_nms_top_n)) break;
int coord_idx = i << 2;
const float x1 = bboxes_ptr[coord_idx];
const float y1 = bboxes_ptr[coord_idx + 1];
......@@ -141,10 +141,11 @@ struct ProposalFunctor {
const Tensor *img_info_tensor,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
MACE_CHECK(rpn_cls_prob->dim(1) == rpn_bbox_pred->dim(1) &&
rpn_cls_prob->dim(2) == rpn_bbox_pred->dim(2));
MACE_CHECK((rpn_cls_prob->dim(3) / 2 == rpn_bbox_pred->dim(3) / 4) &&
(rpn_cls_prob->dim(3) / 2 == anchors_.size()));
(static_cast<size_t>(rpn_cls_prob->dim(3) / 2) == anchors_.size()));
const float *img_info = img_info_tensor->data<float>();
const int im_height = static_cast<int>(img_info[0] - 1);
const int im_width = static_cast<int>(img_info[1] - 1);
......
......@@ -38,6 +38,7 @@ struct PSROIAlignFunctor {
const Tensor *rois,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
const int height = static_cast<int>(input->dim(1));
const int width = static_cast<int>(input->dim(2));
const int channels = static_cast<int>(input->dim(3));
......
......@@ -81,6 +81,7 @@ struct QuantizeFunctor<CPU, uint8_t> {
Tensor *out_min,
Tensor *out_max,
StatsFuture *future) {
MACE_UNUSED(future);
const float *input_data = input->data<float>();
const float in_min_data = in_min->data<float>()[0];
const float in_max_data = in_max->data<float>()[0];
......@@ -109,6 +110,7 @@ struct DequantizeFunctor<CPU, uint8_t> {
const Tensor *in_max,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
const uint8_t *input_data = input->data<uint8_t>();
const float in_min_data = in_min->data<float>()[0];
const float in_max_data = in_max->data<float>()[0];
......@@ -137,6 +139,7 @@ struct RequantizeFunctor<CPU, uint8_t> {
Tensor *out_min,
Tensor *out_max,
StatsFuture *future) {
MACE_UNUSED(future);
const int *input_data = input->data<int>();
const float in_min_data = in_min->data<float>()[0];
const float in_max_data = in_max->data<float>()[0];
......
......@@ -35,6 +35,7 @@ struct ReshapeFunctor {
const std::vector<index_t> &out_shape,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(future);
output->ResizeWithBuffer(out_shape, input->UnderlyingBuffer());
}
};
......
......@@ -137,6 +137,7 @@ struct ResizeBilinearFunctor<DeviceType::CPU, float>
: ResizeBilinearFunctorBase(size, align_corners) {}
void operator()(const Tensor *input, Tensor *output, StatsFuture *future) {
MACE_UNUSED(future);
const index_t batch = input->dim(0);
const index_t channels = input->dim(1);
const index_t in_height = input->dim(2);
......
......@@ -44,6 +44,7 @@ struct SliceFunctor : SliceFunctorBase {
void operator()(const Tensor *input,
const std::vector<Tensor *> &output_list,
StatsFuture *future) {
MACE_UNUSED(future);
const index_t input_channels = input->dim(axis_);
const size_t outputs_count = output_list.size();
const index_t output_channels = input_channels / outputs_count;
......
......@@ -39,6 +39,7 @@ struct SoftmaxFunctor;
template<>
struct SoftmaxFunctor<DeviceType::CPU, float> {
void operator()(const Tensor *input, Tensor *output, StatsFuture *future) {
MACE_UNUSED(future);
const index_t batch = input->dim(0);
const index_t class_count = input->dim(1);
const index_t class_size = input->dim(2) * input->dim(3);
......
......@@ -53,6 +53,10 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase {
const std::vector<index_t> &output_shape,
Tensor *batch_tensor,
StatsFuture *future) {
MACE_UNUSED(space_tensor);
MACE_UNUSED(output_shape);
MACE_UNUSED(batch_tensor);
MACE_UNUSED(future);
MACE_NOT_IMPLEMENTED;
}
};
......
......@@ -30,6 +30,7 @@ struct TransposeFunctor {
explicit TransposeFunctor(const std::vector<int> &dims) : dims_(dims) {}
void operator()(const Tensor *input, Tensor *output, StatsFuture *future) {
MACE_UNUSED(future);
Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard output_guard(output);
const std::vector<index_t> &input_shape = input->shape();
......
......@@ -51,6 +51,9 @@ struct WinogradTransformFunctor : WinogradTransformFunctorBase {
: WinogradTransformFunctorBase(padding_type, paddings) {}
void operator()(const Tensor *input, Tensor *output, StatsFuture *future) {
MACE_UNUSED(input);
MACE_UNUSED(output);
MACE_UNUSED(future);
MACE_NOT_IMPLEMENTED;
}
};
......@@ -105,6 +108,9 @@ struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase {
const Tensor *bias,
Tensor *output,
StatsFuture *future) {
MACE_UNUSED(input);
MACE_UNUSED(bias);
MACE_UNUSED(output);
MACE_NOT_IMPLEMENTED;
}
};
......
......@@ -29,6 +29,8 @@ void Register_BatchToSpaceND(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
BatchToSpaceNDOp<DeviceType::GPU, half>);
#else
MACE_UNUSED(op_registry);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -38,7 +38,7 @@ class BatchToSpaceNDOp : public Operator<D, T> {
Tensor *space_tensor = this->Output(OUTPUT);
std::vector<index_t> output_shape(4, 0);
CalculateOutputShape(batch_tensor, space_tensor, output_shape.data());
CalculateOutputShape(batch_tensor, output_shape.data());
functor_(space_tensor, output_shape, const_cast<Tensor *>(batch_tensor),
future);
return true;
......@@ -46,7 +46,6 @@ class BatchToSpaceNDOp : public Operator<D, T> {
private:
inline void CalculateOutputShape(const Tensor *input_tensor,
Tensor *output,
index_t *output_shape) {
auto crops = OperatorBase::GetRepeatedArgument<int>("crops", {0, 0, 0, 0});
auto block_shape =
......
......@@ -45,7 +45,6 @@ class ChannelShuffleOp : public Operator<D, T> {
MACE_CHECK(channels % group_ == 0,
"input channels must be an integral multiple of group. ",
input->dim(3));
int channels_per_group = channels / group_;
functor_(input, output, future);
return true;
......
......@@ -30,7 +30,7 @@ void CalculateScaleOffset(const std::vector<float> &gamma,
std::vector<float> *scale,
std::vector<float> *offset) {
size_t size = gamma.size();
for (int i = 0; i < size; ++i) {
for (size_t i = 0; i < size; ++i) {
(*scale)[i] = gamma[i] / std::sqrt(var[i] + epsilon);
(*offset)[i] = beta[i] - mean[i] * (*scale)[i];
}
......
......@@ -45,7 +45,7 @@ TEST_F(ProposalOpTest, CPUSimple) {
.Finalize(net.NewOperatorDef());
std::vector<float> scores(height * width * 18);
for (int i = 0 ; i < scores.size(); ++i) {
for (size_t i = 0 ; i < scores.size(); ++i) {
scores[i] = i;
}
......
......@@ -95,8 +95,6 @@ TEST_F(QuantizeTest, TestQuantizeTrend) {
net.RunOp();
auto output = net.GetTensor("Output");
auto output_min = net.GetTensor("OutputMin");
auto output_max = net.GetTensor("OutputMax");
const uint8_t *output_data = net.GetTensor("Output")->data<uint8_t>();
for (int i = 1; i < output->size(); ++i) {
......
......@@ -30,6 +30,8 @@ void Register_SpaceToBatchND(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
SpaceToBatchNDOp<DeviceType::GPU, half>);
#else
MACE_UNUSED(op_registry);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -39,7 +39,7 @@ class SpaceToBatchNDOp : public Operator<D, T> {
Tensor *batch_tensor = this->Output(OUTPUT);
std::vector<index_t> output_shape(4, 0);
CalculateOutputShape(space_tensor, batch_tensor, output_shape.data());
CalculateOutputShape(space_tensor, output_shape.data());
functor_(const_cast<Tensor *>(space_tensor), output_shape, batch_tensor,
future);
return true;
......@@ -47,7 +47,6 @@ class SpaceToBatchNDOp : public Operator<D, T> {
private:
inline void CalculateOutputShape(const Tensor *input_tensor,
Tensor *output,
index_t *output_shape) {
auto paddings =
OperatorBase::GetRepeatedArgument<int>("paddings", {0, 0, 0, 0});
......
......@@ -35,11 +35,11 @@ class TransposeOp : public Operator<D, T> {
const Tensor *input = this->Input(INPUT);
Tensor *output = this->Output(OUTPUT);
const std::vector<index_t> &input_shape = input->shape();
MACE_CHECK(input_shape.size() == 4 && dims_.size() == 4
|| input_shape.size() == 2 && dims_.size() == 2,
MACE_CHECK((input_shape.size() == 4 && dims_.size() == 4)
|| (input_shape.size() == 2 && dims_.size() == 2),
"rank should be 2 or 4");
std::vector<index_t> output_shape;
for (int i = 0; i < dims_.size(); ++i) {
for (size_t i = 0; i < dims_.size(); ++i) {
output_shape.push_back(input_shape[dims_[i]]);
}
output->Resize(output_shape);
......
......@@ -30,6 +30,8 @@ void Register_WinogradInverseTransform(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
WinogradInverseTransformOp<DeviceType::GPU, half>);
#else
MACE_UNUSED(op_registry);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -30,6 +30,8 @@ void Register_WinogradTransform(OperatorRegistry *op_registry) {
.TypeConstraint<half>("T")
.Build(),
WinogradTransformOp<DeviceType::GPU, half>);
#else
MACE_UNUSED(op_registry);
#endif // MACE_ENABLE_OPENCL
}
......
......@@ -4,13 +4,15 @@ load("//mace:mace.bzl", "if_openmp_enabled", "if_android")
cc_binary(
name = "mace_run",
srcs = ["mace_run.cc"],
copts = if_openmp_enabled(["-fopenmp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]),
linkopts = if_openmp_enabled(["-fopenmp"]),
linkstatic = 1,
copts = if_android(["-DMACE_ENABLE_OPENCL"]),
deps = [
"//external:gflags_nothreads",
"//mace/codegen:generated_models",
"//mace/codegen:generated_mace_engine_factory",
"//mace/core:core",
"//mace/codegen:generated_models",
"//mace/core",
],
)
......@@ -385,7 +385,7 @@ int Main(int argc, char **argv) {
ParseShape(output_shapes[i], &output_shape_vec[i]);
}
bool ret;
bool ret = false;
#pragma omp parallel for
for (int i = 0; i < FLAGS_restart_round; ++i) {
VLOG(0) << "restart round " << i;
......@@ -395,9 +395,8 @@ int Main(int argc, char **argv) {
}
if (ret) {
return 0;
} else {
return -1;
}
return -1;
}
} // namespace validation
......
......@@ -12,20 +12,18 @@ load("//mace:mace.bzl", "if_android")
cc_library(
name = "utils",
srcs = [
"command_line_flags.cc",
"logging.cc",
"string_util.cc",
],
hdrs = [
"command_line_flags.h",
"env_time.h",
"logging.h",
"memory_logging.h",
"rwlock.h",
"string_util.h",
"timer.h",
"tuner.h",
"utils.h",
"rwlock.h",
],
linkopts = if_android([
"-llog",
......@@ -35,24 +33,6 @@ cc_library(
],
)
cc_test(
name = "utils_test",
testonly = 1,
srcs = [
"utils_test.cc",
],
linkopts = if_android([
"-pie",
"-lm",
]),
linkstatic = 1,
deps = [
":utils",
"@gtest//:gtest",
"@gtest//:gtest_main",
],
)
cc_library(
name = "utils_dev",
srcs = [
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/utils/command_line_flags.h"
#include <cstring>
#include <iomanip>
#include "mace/utils/logging.h"
namespace mace {
namespace utils {
bool StringConsume(const std::string &x, std::string *arg) {
MACE_CHECK_NOTNULL(arg);
if ((arg->size() >= x.size()) &&
(memcmp(arg->data(), x.data(), x.size()) == 0)) {
*arg = arg->substr(x.size());
return true;
}
return false;
}
bool ParseStringFlag(std::string arg,
std::string flag,
std::string *dst,
bool *value_parsing_ok) {
*value_parsing_ok = true;
if (StringConsume("--", &arg) && StringConsume(flag, &arg) &&
StringConsume("=", &arg)) {
*dst = arg;
return true;
}
return false;
}
bool ParseInt32Flag(std::string arg,
std::string flag,
int32_t *dst,
bool *value_parsing_ok) {
*value_parsing_ok = true;
if (StringConsume("--", &arg) && StringConsume(flag, &arg) &&
StringConsume("=", &arg)) {
char extra;
if (sscanf(arg.data(), "%d%c", dst, &extra) != 1) {
LOG(ERROR) << "Couldn't interpret value " << arg << " for flag " << flag
<< ".";
*value_parsing_ok = false;
}
return true;
}
return false;
}
bool ParseInt64Flag(std::string arg,
std::string flag,
int64_t *dst,
bool *value_parsing_ok) {
*value_parsing_ok = true;
if (StringConsume("--", &arg) && StringConsume(flag, &arg) &&
StringConsume("=", &arg)) {
char extra;
if (sscanf(arg.data(), "%lld%c", dst, &extra) != 1) {
LOG(ERROR) << "Couldn't interpret value " << arg << " for flag " << flag
<< ".";
*value_parsing_ok = false;
}
return true;
}
return false;
}
bool ParseBoolFlag(std::string arg, std::string flag,
bool *dst, bool *value_parsing_ok) {
*value_parsing_ok = true;
if (StringConsume("--", &arg) && StringConsume(flag, &arg)) {
if (arg.empty()) {
*dst = true;
return true;
}
if (arg == "=true") {
*dst = true;
return true;
} else if (arg == "=false") {
*dst = false;
return true;
} else {
LOG(ERROR) << "Couldn't interpret value " << arg << " for flag " << flag
<< ".";
*value_parsing_ok = false;
return true;
}
}
return false;
}
bool ParseFloatFlag(std::string arg,
std::string flag,
float *dst,
bool *value_parsing_ok) {
*value_parsing_ok = true;
if (StringConsume("--", &arg) && StringConsume(flag, &arg) &&
StringConsume("=", &arg)) {
char extra;
if (sscanf(arg.data(), "%f%c", dst, &extra) != 1) {
LOG(ERROR) << "Couldn't interpret value " << arg << " for flag " << flag
<< ".";
*value_parsing_ok = false;
}
return true;
}
return false;
}
} // namespace utils
Flag::Flag(const char *name, int *dst, const std::string &usage_text)
: name_(name), type_(TYPE_INT), int_value_(dst), usage_text_(usage_text) {}
Flag::Flag(const char *name, int64_t *dst, const std::string &usage_text)
: name_(name),
type_(TYPE_INT64),
int64_value_(dst),
usage_text_(usage_text) {}
Flag::Flag(const char *name, bool *dst, const std::string &usage_text)
: name_(name),
type_(TYPE_BOOL),
bool_value_(dst),
usage_text_(usage_text) {}
Flag::Flag(const char *name, std::string *dst, const std::string &usage_text)
: name_(name),
type_(TYPE_STRING),
string_value_(dst),
usage_text_(usage_text) {}
Flag::Flag(const char *name, float *dst, const std::string &usage_text)
: name_(name),
type_(TYPE_FLOAT),
float_value_(dst),
usage_text_(usage_text) {}
bool Flag::Parse(std::string arg, bool *value_parsing_ok) const {
bool result = false;
if (type_ == TYPE_INT) {
result = utils::ParseInt32Flag(arg, name_, int_value_, value_parsing_ok);
} else if (type_ == TYPE_INT64) {
result = utils::ParseInt64Flag(arg, name_, int64_value_, value_parsing_ok);
} else if (type_ == TYPE_BOOL) {
result = utils::ParseBoolFlag(arg, name_, bool_value_, value_parsing_ok);
} else if (type_ == TYPE_STRING) {
result = utils::ParseStringFlag(arg, name_,
string_value_, value_parsing_ok);
} else if (type_ == TYPE_FLOAT) {
result = utils::ParseFloatFlag(arg, name_, float_value_, value_parsing_ok);
}
return result;
}
/*static*/ bool Flags::Parse(int *argc,
char **argv,
const std::vector<Flag> &flag_list) {
bool result = true;
std::vector<char *> unknown_flags;
for (int i = 1; i < *argc; ++i) {
if (std::string(argv[i]) == "--") {
while (i < *argc) {
unknown_flags.push_back(argv[i]);
++i;
}
break;
}
bool was_found = false;
for (const Flag &flag : flag_list) {
bool value_parsing_ok;
was_found = flag.Parse(argv[i], &value_parsing_ok);
if (!value_parsing_ok) {
result = false;
}
if (was_found) {
break;
}
}
if (!was_found) {
unknown_flags.push_back(argv[i]);
}
}
// Passthrough any extra flags.
int dst = 1; // Skip argv[0]
for (char *f : unknown_flags) {
argv[dst++] = f;
}
argv[dst++] = nullptr;
*argc = unknown_flags.size() + 1;
return result && (*argc < 2 || strcmp(argv[1], "--help") != 0);
}
std::string Flags::Usage(const std::string &cmdline,
const std::vector<Flag> &flag_list) {
std::stringstream usage_text;
usage_text << "usage: " << cmdline << std::endl;
if (!flag_list.empty()) {
usage_text << "Flags: " << std::endl;
}
for (const Flag &flag : flag_list) {
usage_text << "\t" << std::left << std::setw(30) << flag.name_;
usage_text << flag.usage_text_ << std::endl;
}
return usage_text.str();
}
} // namespace mace
// Copyright 2018 Xiaomi, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_UTILS_COMMAND_LINE_FLAGS_H_
#define MACE_UTILS_COMMAND_LINE_FLAGS_H_
#include <string>
#include <vector>
namespace mace {
class Flag {
public:
Flag(const char *name, int *dst1, const std::string &usage_text);
Flag(const char *name, int64_t *dst1, const std::string &usage_text);
Flag(const char *name, bool *dst, const std::string &usage_text);
Flag(const char *name, std::string *dst, const std::string &usage_text);
Flag(const char *name, float *dst, const std::string &usage_text);
private:
friend class Flags;
bool Parse(std::string arg, bool *value_parsing_ok) const;
std::string name_;
enum { TYPE_INT, TYPE_INT64, TYPE_BOOL, TYPE_STRING, TYPE_FLOAT } type_;
int *int_value_;
int64_t *int64_value_;
bool *bool_value_;
std::string *string_value_;
float *float_value_;
std::string usage_text_;
};
class Flags {
public:
// Parse the command line represented by argv[0, ..., (*argc)-1] to find flag
// instances matching flags in flaglist[]. Update the variables associated
// with matching flags, and remove the matching arguments from (*argc, argv).
// Return true iff all recognized flag values were parsed correctly, and the
// first remaining argument is not "--help".
static bool Parse(int *argc, char **argv, const std::vector<Flag> &flag_list);
// Return a usage message with command line cmdline, and the
// usage_text strings in flag_list[].
static std::string Usage(const std::string &cmdline,
const std::vector<Flag> &flag_list);
};
} // namespace mace
#endif // MACE_UTILS_COMMAND_LINE_FLAGS_H_
......@@ -58,7 +58,7 @@ std::string MakeString(const std::vector<T> &args) {
std::stringstream ss;
ss << "[";
const size_t size = args.size();
for (int i = 0; i < size; ++i) {
for (size_t i = 0; i < size; ++i) {
ss << args[i];
if (i < size - 1) {
ss << ", ";
......
......@@ -146,7 +146,7 @@ class Tuner {
for (iter = 0; iter < num_runs; ++iter) {
res = func(params, timer, tuning_result);
total_time_us += timer->AccumulatedMicros();
if (iter >= 1 && total_time_us > 100000 || total_time_us > 200000) {
if ((iter >= 1 && total_time_us > 100000) || total_time_us > 200000) {
++iter;
break;
}
......@@ -165,7 +165,7 @@ class Tuner {
std::vector<param_type> *)> &func,
Timer *timer,
std::vector<param_type> *opt_params) {
RetType res;
RetType res = 0;
double opt_time = std::numeric_limits<double>::max();
auto params = param_generator();
std::vector<param_type> tuning_result;
......
......@@ -22,6 +22,7 @@ namespace mace {
bool GetTuningParams(
const char *path,
std::unordered_map<std::string, std::vector<unsigned int>> *param_table) {
(void)(path);
extern const std::map<std::string, std::vector<unsigned int>>
kTuningParamsData;
for (auto it = kTuningParamsData.begin(); it != kTuningParamsData.end();
......
......@@ -54,7 +54,7 @@ TEST_F(TunerTest, SimpleRun) {
}
TEST_F(TunerTest, SimpleTune) {
int expect = 3;
unsigned int expect = 3;
auto TunerFunc = [&](const std::vector<unsigned int> &params, Timer *timer,
std::vector<uint32_t> *tuning_result) -> int {
int res = 0;
......
......@@ -277,7 +277,7 @@ def bazel_build(target,
stdout_buff = []
process_output = make_output_processor(stdout_buff)
if abi == "host":
p = sh.bazel(
bazel_args = (
"build",
"-c",
"opt",
......@@ -287,12 +287,17 @@ def bazel_build(target,
target,
"--copt=-std=c++11",
"--copt=-D_GLIBCXX_USE_C99_MATH_TR1",
"--copt=-Werror=return-type",
"--copt=-Werror",
"--copt=-Wextra",
"--copt=-Wno-missing-field-initializers",
"--copt=-O3",
"--define",
"openmp=%s" % str(enable_openmp).lower(),
"--define",
"production=%s" % str(production_mode).lower(),
)
p = sh.bazel(
*bazel_args,
_out=process_output,
_bg=True,
_err_to_out=True)
......@@ -311,7 +316,9 @@ def bazel_build(target,
"--cpu=%s" % abi,
"--copt=-std=c++11",
"--copt=-D_GLIBCXX_USE_C99_MATH_TR1",
"--copt=-Werror=return-type",
"--copt=-Werror",
"--copt=-Wextra",
"--copt=-Wno-missing-field-initializers",
"--copt=-DMACE_OBFUSCATE_LITERALS",
"--copt=-O3",
"--define",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册