提交 80d1c9dd 编写于 作者: L liuqi

Support int32 input data type.

1. Support int32 input data type.
2. Support GatherV2 op
3. Add transpose to ExpandDim op.
上级 ad4953cb
...@@ -114,7 +114,7 @@ jobs: ...@@ -114,7 +114,7 @@ jobs:
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --target_abis=armeabi-v7a || exit 1
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --target_abis=armeabi-v7a || exit 1
- echo 'Extra Test' - echo 'Extra Test'
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - python tools/bazel_adb_run.py --target="//mace/utils:utils_test" --run_target=False --target_abis=armeabi-v7a || exit 1
env: TYPE=Extra-Test-ARMEABI-v7a env: TYPE=Extra-Test-ARMEABI-v7a
os: linux os: linux
dist: xenial dist: xenial
......
...@@ -332,18 +332,17 @@ int Main(int argc, char **argv) { ...@@ -332,18 +332,17 @@ int Main(int argc, char **argv) {
std::map<std::string, mace::MaceTensor> inputs; std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs; std::map<std::string, mace::MaceTensor> outputs;
for (size_t i = 0; i < input_count; ++i) { for (size_t i = 0; i < input_count; ++i) {
// Allocate input and output // only support float and int32, use char for generalization
int64_t input_size = int64_t input_size =
std::accumulate(input_shape_vec[i].begin(), input_shape_vec[i].end(), 1, std::accumulate(input_shape_vec[i].begin(), input_shape_vec[i].end(), 4,
std::multiplies<int64_t>()); std::multiplies<int64_t>());
auto buffer_in = std::shared_ptr<float>(new float[input_size], auto buffer_in = std::shared_ptr<char>(new char[input_size],
std::default_delete<float[]>()); std::default_delete<char[]>());
// load input // load input
std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]), std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]),
std::ios::in | std::ios::binary); std::ios::in | std::ios::binary);
if (in_file.is_open()) { if (in_file.is_open()) {
in_file.read(reinterpret_cast<char *>(buffer_in.get()), in_file.read(buffer_in.get(), input_size);
input_size * sizeof(float));
in_file.close(); in_file.close();
} else { } else {
LOG(INFO) << "Open input file failed"; LOG(INFO) << "Open input file failed";
...@@ -354,12 +353,13 @@ int Main(int argc, char **argv) { ...@@ -354,12 +353,13 @@ int Main(int argc, char **argv) {
} }
for (size_t i = 0; i < output_count; ++i) { for (size_t i = 0; i < output_count; ++i) {
// only support float and int32, use char for generalization
int64_t output_size = int64_t output_size =
std::accumulate(output_shape_vec[i].begin(), std::accumulate(output_shape_vec[i].begin(),
output_shape_vec[i].end(), 1, output_shape_vec[i].end(), 4,
std::multiplies<int64_t>()); std::multiplies<int64_t>());
auto buffer_out = std::shared_ptr<float>(new float[output_size], auto buffer_out = std::shared_ptr<char>(new char[output_size],
std::default_delete<float[]>()); std::default_delete<char[]>());
outputs[output_names[i]] = mace::MaceTensor(output_shape_vec[i], outputs[output_names[i]] = mace::MaceTensor(output_shape_vec[i],
buffer_out, buffer_out,
output_data_formats[i]); output_data_formats[i]);
......
...@@ -33,7 +33,7 @@ namespace mace { ...@@ -33,7 +33,7 @@ namespace mace {
bool MemoryOptimizer::IsMemoryReuseOp(const std::string &op_type) { bool MemoryOptimizer::IsMemoryReuseOp(const std::string &op_type) {
static const std::unordered_set<std::string> kReuseOp = { static const std::unordered_set<std::string> kReuseOp = {
"Reshape", "Identity", "Squeeze", "ExpandDims" "Reshape", "Identity", "Squeeze"
}; };
return kReuseOp.count(op_type) == 1; return kReuseOp.count(op_type) == 1;
} }
......
...@@ -267,6 +267,7 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -267,6 +267,7 @@ bool RunModel(const std::vector<std::string> &input_names,
std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 1, std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 1,
std::multiplies<int64_t>()); std::multiplies<int64_t>());
inputs_size[input_names[i]] = input_size; inputs_size[input_names[i]] = input_size;
// Only support float and int32 data type
auto buffer_in = std::shared_ptr<float>(new float[input_size], auto buffer_in = std::shared_ptr<float>(new float[input_size],
std::default_delete<float[]>()); std::default_delete<float[]>());
inputs[input_names[i]] = mace::MaceTensor(input_shapes[i], buffer_in, inputs[input_names[i]] = mace::MaceTensor(input_shapes[i], buffer_in,
...@@ -277,6 +278,7 @@ bool RunModel(const std::vector<std::string> &input_names, ...@@ -277,6 +278,7 @@ bool RunModel(const std::vector<std::string> &input_names,
int64_t output_size = int64_t output_size =
std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1, std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1,
std::multiplies<int64_t>()); std::multiplies<int64_t>());
// Only support float and int32 data type
auto buffer_out = std::shared_ptr<float>(new float[output_size], auto buffer_out = std::shared_ptr<float>(new float[output_size],
std::default_delete<float[]>()); std::default_delete<float[]>());
outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out, outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out,
......
...@@ -284,13 +284,13 @@ MaceStatus MaceEngineConfig::SetCPUThreadPolicy( ...@@ -284,13 +284,13 @@ MaceStatus MaceEngineConfig::SetCPUThreadPolicy(
class MaceTensor::Impl { class MaceTensor::Impl {
public: public:
std::vector<int64_t> shape; std::vector<int64_t> shape;
std::shared_ptr<float> data; std::shared_ptr<void> data;
DataFormat format; DataFormat format;
int64_t buffer_size; int64_t buffer_size;
}; };
MaceTensor::MaceTensor(const std::vector<int64_t> &shape, MaceTensor::MaceTensor(const std::vector<int64_t> &shape,
std::shared_ptr<float> data, std::shared_ptr<void> data,
const DataFormat format) { const DataFormat format) {
MACE_CHECK_NOTNULL(data.get()); MACE_CHECK_NOTNULL(data.get());
MACE_CHECK(format == DataFormat::DF_NONE || format == DataFormat::NHWC MACE_CHECK(format == DataFormat::DF_NONE || format == DataFormat::NHWC
...@@ -345,9 +345,21 @@ MaceTensor::~MaceTensor() = default; ...@@ -345,9 +345,21 @@ MaceTensor::~MaceTensor() = default;
const std::vector<int64_t> &MaceTensor::shape() const { return impl_->shape; } const std::vector<int64_t> &MaceTensor::shape() const { return impl_->shape; }
const std::shared_ptr<float> MaceTensor::data() const { return impl_->data; } const std::shared_ptr<float> MaceTensor::data() const {
return std::static_pointer_cast<float>(impl_->data);
}
std::shared_ptr<float> MaceTensor::data() {
return std::static_pointer_cast<float>(impl_->data);
}
std::shared_ptr<void> MaceTensor::raw_data() const {
return impl_->data;
}
std::shared_ptr<float> MaceTensor::data() { return impl_->data; } std::shared_ptr<void> MaceTensor::raw_mutable_data() {
return impl_->data;
}
DataFormat MaceTensor::data_format() const { DataFormat MaceTensor::data_format() const {
return impl_->format; return impl_->format;
...@@ -466,8 +478,9 @@ MaceStatus MaceEngine::Impl::Init( ...@@ -466,8 +478,9 @@ MaceStatus MaceEngine::Impl::Init(
<< "' does not belong to model's inputs: " << "' does not belong to model's inputs: "
<< MakeString(MapKeys(input_info_map_)); << MakeString(MapKeys(input_info_map_));
} }
DataType input_dt = input_info_map_[input_name].data_type();
Tensor *input_tensor = Tensor *input_tensor =
ws_->CreateTensor(input_name, device_->allocator(), DT_FLOAT); ws_->CreateTensor(input_name, device_->allocator(), input_dt);
// Resize to possible largest shape to avoid resize during running. // Resize to possible largest shape to avoid resize during running.
std::vector<index_t> shape(input_info_map_[input_name].dims_size()); std::vector<index_t> shape(input_info_map_[input_name].dims_size());
for (int i = 0; i < input_info_map_[input_name].dims_size(); ++i) { for (int i = 0; i < input_info_map_[input_name].dims_size(); ++i) {
...@@ -485,8 +498,9 @@ MaceStatus MaceEngine::Impl::Init( ...@@ -485,8 +498,9 @@ MaceStatus MaceEngine::Impl::Init(
<< MakeString(MapKeys(output_info_map_)); << MakeString(MapKeys(output_info_map_));
} }
#if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA) #if defined(MACE_ENABLE_HEXAGON) || defined(MACE_ENABLE_HTA)
DataType output_dt = output_info_map_[output_name].data_type();
Tensor *output_tensor = Tensor *output_tensor =
ws_->CreateTensor(output_name, device_->allocator(), DT_FLOAT); ws_->CreateTensor(output_name, device_->allocator(), output_dt);
output_tensor->set_data_format(NHWC); output_tensor->set_data_format(NHWC);
#endif #endif
} }
...@@ -572,54 +586,71 @@ MaceStatus MaceEngine::Impl::TransposeInput( ...@@ -572,54 +586,71 @@ MaceStatus MaceEngine::Impl::TransposeInput(
Tensor *input_tensor) { Tensor *input_tensor) {
bool has_data_format = input_tensor->data_format() != DataFormat::DF_NONE; bool has_data_format = input_tensor->data_format() != DataFormat::DF_NONE;
DataFormat data_format = DataFormat::DF_NONE; DataFormat data_format = DataFormat::DF_NONE;
DataType input_dt = input_tensor->dtype();
if (has_data_format) { if (has_data_format) {
std::vector<int> dst_dims;
if (device_->device_type() == DeviceType::CPU && if (device_->device_type() == DeviceType::CPU &&
input.second.shape().size() == 4 && input.second.shape().size() == 4 &&
input.second.data_format() == NHWC && input.second.data_format() == NHWC &&
!is_quantized_model_) { !is_quantized_model_) {
VLOG(1) << "Transform input " << input.first << " from NHWC to NCHW"; VLOG(1) << "Transform input " << input.first << " from NHWC to NCHW";
input_tensor->set_data_format(DataFormat::NCHW); input_tensor->set_data_format(DataFormat::NCHW);
std::vector<int> dst_dims = {0, 3, 1, 2}; dst_dims = {0, 3, 1, 2};
std::vector<index_t> output_shape =
TransposeShape<int64_t, index_t>(input.second.shape(), dst_dims);
MACE_RETURN_IF_ERROR(input_tensor->Resize(output_shape));
Tensor::MappingGuard input_guard(input_tensor);
float *input_data = input_tensor->mutable_data<float>();
return ops::Transpose(input.second.data().get(),
input.second.shape(),
dst_dims,
input_data);
} else if ( } else if (
(is_quantized_model_ || device_->device_type() == DeviceType::GPU) && (is_quantized_model_ || device_->device_type() == DeviceType::GPU) &&
input.second.shape().size() == 4 && input.second.shape().size() == 4 &&
input.second.data_format() == DataFormat::NCHW) { input.second.data_format() == DataFormat::NCHW) {
VLOG(1) << "Transform input " << input.first << " from NCHW to NHWC"; VLOG(1) << "Transform input " << input.first << " from NCHW to NHWC";
std::vector<int> dst_dims = {0, 2, 3, 1};
input_tensor->set_data_format(DataFormat::NHWC); input_tensor->set_data_format(DataFormat::NHWC);
dst_dims = {0, 2, 3, 1};
}
if (!dst_dims.empty()) {
std::vector<index_t> output_shape = std::vector<index_t> output_shape =
TransposeShape<int64_t, index_t>(input.second.shape(), dst_dims); TransposeShape<int64_t, index_t>(input.second.shape(), dst_dims);
MACE_RETURN_IF_ERROR(input_tensor->Resize(output_shape)); MACE_RETURN_IF_ERROR(input_tensor->Resize(output_shape));
Tensor::MappingGuard input_guard(input_tensor); Tensor::MappingGuard input_guard(input_tensor);
float *input_data = input_tensor->mutable_data<float>(); if (input_dt == DataType::DT_FLOAT) {
return ops::Transpose(input.second.data().get(), auto input_data = input_tensor->mutable_data<float>();
input.second.shape(), return ops::Transpose(input.second.data<float>().get(),
dst_dims, input.second.shape(),
input_data); dst_dims,
input_data,
input_dt);
} else if (input_dt == DataType::DT_INT32) {
auto input_data = input_tensor->mutable_data<int>();
return ops::Transpose(input.second.data<int>().get(),
input.second.shape(),
dst_dims,
input_data,
input_dt);
} else {
LOG(FATAL) << "MACE do not support the input data type: " << input_dt;
}
} }
data_format = input.second.data_format(); data_format = input.second.data_format();
} }
input_tensor->set_data_format(data_format); input_tensor->set_data_format(data_format);
MACE_RETURN_IF_ERROR(input_tensor->Resize(input.second.shape())); MACE_RETURN_IF_ERROR(input_tensor->Resize(input.second.shape()));
Tensor::MappingGuard input_guard(input_tensor); Tensor::MappingGuard input_guard(input_tensor);
float *input_data = input_tensor->mutable_data<float>(); if (input_dt == DataType::DT_FLOAT) {
memcpy(input_data, input.second.data().get(), auto input_data = input_tensor->mutable_data<float>();
input_tensor->size() * sizeof(float)); memcpy(input_data, input.second.data().get(),
input_tensor->size() * sizeof(float));
} else if (input_dt == DataType::DT_INT32) {
auto input_data = input_tensor->mutable_data<int>();
memcpy(input_data, input.second.data().get(),
input_tensor->size() * sizeof(int));
} else {
LOG(FATAL) << "MACE do not support the input data type: " << input_dt;
}
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
MaceStatus MaceEngine::Impl::TransposeOutput( MaceStatus MaceEngine::Impl::TransposeOutput(
const mace::Tensor *output_tensor, const mace::Tensor *output_tensor,
std::pair<const std::string, mace::MaceTensor> *output) { std::pair<const std::string, mace::MaceTensor> *output) {
DataType output_dt = output_tensor->dtype();
// save output // save output
if (output_tensor != nullptr && output->second.data() != nullptr) { if (output_tensor != nullptr && output->second.data() != nullptr) {
if (output_tensor->data_format() != DataFormat::DF_NONE && if (output_tensor->data_format() != DataFormat::DF_NONE &&
...@@ -655,11 +686,23 @@ MaceStatus MaceEngine::Impl::TransposeOutput( ...@@ -655,11 +686,23 @@ MaceStatus MaceEngine::Impl::TransposeOutput(
<< output->second.impl_->buffer_size; << output->second.impl_->buffer_size;
output->second.impl_->shape = shape; output->second.impl_->shape = shape;
Tensor::MappingGuard output_guard(output_tensor); Tensor::MappingGuard output_guard(output_tensor);
const float *output_data = output_tensor->data<float>(); if (output_dt == DataType::DT_FLOAT) {
return ops::Transpose(output_data, auto output_data = output_tensor->data<float>();
output_tensor->shape(), return ops::Transpose(output_data,
dst_dims, output_tensor->shape(),
output->second.data().get()); dst_dims,
output->second.data<float>().get());
} else if (output_dt == DataType::DT_INT32) {
auto output_data = output_tensor->data<int>();
return ops::Transpose(output_data,
output_tensor->shape(),
dst_dims,
output->second.data<int>().get(),
output_dt);
} else {
LOG(FATAL) << "MACE do not support the output data type: " << output_dt;
return MaceStatus::MACE_INVALID_ARGS;
}
} else { } else {
Tensor::MappingGuard output_guard(output_tensor); Tensor::MappingGuard output_guard(output_tensor);
auto shape = output_tensor->shape(); auto shape = output_tensor->shape();
...@@ -670,8 +713,17 @@ MaceStatus MaceEngine::Impl::TransposeOutput( ...@@ -670,8 +713,17 @@ MaceStatus MaceEngine::Impl::TransposeOutput(
<< MakeString<int64_t>(shape) << " vs buffer size " << MakeString<int64_t>(shape) << " vs buffer size "
<< output->second.impl_->buffer_size; << output->second.impl_->buffer_size;
output->second.impl_->shape = shape; output->second.impl_->shape = shape;
std::memcpy(output->second.data().get(), output_tensor->data<float>(), if (output_dt == DataType::DT_FLOAT) {
output_size * sizeof(float)); std::memcpy(output->second.data<float>().get(),
output_tensor->data<float>(),
output_size * sizeof(float));
} else if (output_dt == DataType::DT_INT32) {
std::memcpy(output->second.data<int>().get(),
output_tensor->data<int>(),
output_size * sizeof(int));
} else {
LOG(FATAL) << "MACE do not support the output data type: " << output_dt;
}
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
} else { } else {
......
...@@ -14,19 +14,14 @@ ...@@ -14,19 +14,14 @@
#include "mace/ops/common/transpose.h" #include "mace/ops/common/transpose.h"
#include <algorithm>
#if defined(MACE_ENABLE_NEON) #if defined(MACE_ENABLE_NEON)
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
#include "mace/core/types.h"
#include "mace/utils/logging.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace { namespace transpose {
void TransposeNHWCToNCHWC3(const float *input, void TransposeNHWCToNCHWC3(const float *input,
float *output, float *output,
const index_t height, const index_t height,
...@@ -100,119 +95,44 @@ void TransposeNCHWToNHWCC2(const float *input, ...@@ -100,119 +95,44 @@ void TransposeNCHWToNHWCC2(const float *input,
#endif #endif
} }
} }
} // namespace
MaceStatus Transpose(const float *input, void TransposeNHWCToNCHWC3(const int *input,
const std::vector<int64_t> &input_shape, int *output,
const std::vector<int> &dst_dims, const index_t height,
float *output) { const index_t width) {
MACE_CHECK((input_shape.size() == 2 && dst_dims.size() == 2) || index_t image_size = height * width;
(input_shape.size() == 4 && dst_dims.size() == 4),
"Only support 2D or 4D transpose");
std::vector<index_t> output_shape; #pragma omp parallel for
for (size_t i = 0; i < dst_dims.size(); ++i) { for (index_t h = 0; h < height; ++h) {
output_shape.push_back(input_shape[dst_dims[i]]); index_t in_offset = h * width * 3;
} index_t out_offset = h * width;
if (input_shape.size() == 2) { for (index_t w = 0; w < width; ++w) {
MACE_CHECK(dst_dims[0] == 1 && dst_dims[1] == 0, "no need transform"); for (index_t c = 0; c < 3; ++c) {
index_t height = input_shape[0]; output[out_offset + c * image_size + w] = input[in_offset + w * 3 + c];
index_t width = input_shape[1];
index_t stride_i = height;
index_t stride_j = width;
index_t tile_size = height > 512 || width > 512 ? 64 : 32;
#pragma omp parallel for collapse(2)
for (index_t i = 0; i < height; i += tile_size) {
for (index_t j = 0; j < width; j += tile_size) {
index_t end_i = std::min(i + tile_size, height);
index_t end_j = std::min(j + tile_size, width);
for (index_t tile_i = i; tile_i < end_i; ++tile_i) {
for (index_t tile_j = j; tile_j < end_j; ++tile_j) {
output[tile_j * stride_i + tile_i] =
input[tile_i * stride_j + tile_j];
}
}
} }
} }
} else if (input_shape.size() == 4) { }
std::vector<int> transpose_order_from_NHWC_to_NCHW{0, 3, 1, 2}; }
std::vector<int> transpose_order_from_NCHW_to_NHWC{0, 2, 3, 1};
index_t batch_size = input_shape[1] * input_shape[2] * input_shape[3];
if (dst_dims == transpose_order_from_NHWC_to_NCHW && input_shape[3] == 3) {
for (index_t b = 0; b < input_shape[0]; ++b) {
TransposeNHWCToNCHWC3(input + b * batch_size,
output + b * batch_size,
input_shape[1],
input_shape[2]);
}
} else if (dst_dims == transpose_order_from_NCHW_to_NHWC
&& input_shape[1] == 2) {
for (index_t b = 0; b < input_shape[0]; ++b) {
TransposeNCHWToNHWCC2(input + b * batch_size,
output + b * batch_size,
input_shape[2],
input_shape[3]);
}
} else if (dst_dims == std::vector<int>{0, 2, 1, 3}) {
index_t height = input_shape[1];
index_t width = input_shape[2];
index_t channel = input_shape[3];
index_t channel_raw_size = channel * sizeof(float);
index_t stride_i = height;
index_t stride_j = width;
index_t tile_size = std::max(static_cast<index_t>(1),
static_cast<index_t>(std::sqrt(
8 * 1024 / channel)));
#pragma omp parallel for collapse(2)
for (index_t i = 0; i < height; i += tile_size) {
for (index_t j = 0; j < width; j += tile_size) {
index_t end_i = std::min(i + tile_size, height);
index_t end_j = std::min(j + tile_size, width);
for (index_t tile_i = i; tile_i < end_i; ++tile_i) {
for (index_t tile_j = j; tile_j < end_j; ++tile_j) {
memcpy(output + (tile_j * stride_i + tile_i) * channel,
input + (tile_i * stride_j + tile_j) * channel,
channel_raw_size);
}
}
}
}
} else {
std::vector<index_t>
in_stride{input_shape[1] * input_shape[2] * input_shape[3],
input_shape[2] * input_shape[3], input_shape[3], 1};
std::vector<index_t>
out_stride{output_shape[1] * output_shape[2] * output_shape[3],
output_shape[2] * output_shape[3], output_shape[3], 1};
std::vector<index_t> idim(4, 0); void TransposeNCHWToNHWCC2(const int *input,
std::vector<index_t> odim(4, 0); int *output,
for (odim[0] = 0; odim[0] < output_shape[0]; ++odim[0]) { const index_t height,
for (odim[1] = 0; odim[1] < output_shape[1]; ++odim[1]) { const index_t width) {
for (odim[2] = 0; odim[2] < output_shape[2]; ++odim[2]) { index_t image_size = height * width;
for (odim[3] = 0; odim[3] < output_shape[3]; ++odim[3]) { #pragma omp parallel for
idim[dst_dims[0]] = odim[0]; for (index_t h = 0; h < height; ++h) {
idim[dst_dims[1]] = odim[1]; index_t in_offset = h * width;
idim[dst_dims[2]] = odim[2]; index_t out_offset = h * width * 2;
idim[dst_dims[3]] = odim[3];
output[odim[0] * out_stride[0] + odim[1] * out_stride[1] for (index_t w = 0; w < width; ++w) {
+ odim[2] * out_stride[2] + odim[3]] = for (index_t c = 0; c < 2; ++c) {
input[idim[0] * in_stride[0] + idim[1] * in_stride[1] output[out_offset + w * 2 + c] = input[in_offset + c * image_size + w];
+ idim[2] * in_stride[2] + idim[3]];
}
}
}
} }
} }
} else {
MACE_NOT_IMPLEMENTED;
} }
return MaceStatus::MACE_SUCCESS;
} }
} // namespace transpose
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
...@@ -15,17 +15,154 @@ ...@@ -15,17 +15,154 @@
#ifndef MACE_OPS_COMMON_TRANSPOSE_H_ #ifndef MACE_OPS_COMMON_TRANSPOSE_H_
#define MACE_OPS_COMMON_TRANSPOSE_H_ #define MACE_OPS_COMMON_TRANSPOSE_H_
#include <algorithm>
#include <vector> #include <vector>
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/core/tensor.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace transpose {
MaceStatus Transpose(const float *input, void TransposeNHWCToNCHWC3(const float *input,
float *output,
const index_t height,
const index_t width);
void TransposeNHWCToNCHWC3(const int *input,
int *output,
const index_t height,
const index_t width);
void TransposeNCHWToNHWCC2(const float *input,
float *output,
const index_t height,
const index_t width);
void TransposeNCHWToNHWCC2(const int *input,
int *output,
const index_t height,
const index_t width);
} // namespace transpose
template <typename T>
MaceStatus Transpose(const T *input,
const std::vector<int64_t> &input_shape, const std::vector<int64_t> &input_shape,
const std::vector<int> &dst_dims, const std::vector<int> &dst_dims,
float *output); T *output,
DataType data_type = DataType::DT_FLOAT) {
MACE_CHECK((input_shape.size() == 2 && dst_dims.size() == 2) ||
(input_shape.size() == 4 && dst_dims.size() == 4),
"Only support 2D or 4D transpose");
std::vector<index_t> output_shape;
for (size_t i = 0; i < dst_dims.size(); ++i) {
output_shape.push_back(input_shape[dst_dims[i]]);
}
if (input_shape.size() == 2) {
MACE_CHECK(dst_dims[0] == 1 && dst_dims[1] == 0, "no need transform");
index_t height = input_shape[0];
index_t width = input_shape[1];
index_t stride_i = height;
index_t stride_j = width;
index_t tile_size = height > 512 || width > 512 ? 64 : 32;
#pragma omp parallel for collapse(2)
for (index_t i = 0; i < height; i += tile_size) {
for (index_t j = 0; j < width; j += tile_size) {
index_t end_i = std::min(i + tile_size, height);
index_t end_j = std::min(j + tile_size, width);
for (index_t tile_i = i; tile_i < end_i; ++tile_i) {
for (index_t tile_j = j; tile_j < end_j; ++tile_j) {
output[tile_j * stride_i + tile_i] =
input[tile_i * stride_j + tile_j];
}
}
}
}
} else if (input_shape.size() == 4) {
std::vector<int> transpose_order_from_NHWC_to_NCHW{0, 3, 1, 2};
std::vector<int> transpose_order_from_NCHW_to_NHWC{0, 2, 3, 1};
index_t batch_size = input_shape[1] * input_shape[2] * input_shape[3];
bool supported_dt = (data_type == DataType::DT_FLOAT ||
data_type == DataType::DT_INT32);
if (dst_dims == transpose_order_from_NHWC_to_NCHW && input_shape[3] == 3 &&
supported_dt) {
for (index_t b = 0; b < input_shape[0]; ++b) {
transpose::TransposeNHWCToNCHWC3(input + b * batch_size,
output + b * batch_size,
input_shape[1],
input_shape[2]);
}
} else if (dst_dims == transpose_order_from_NCHW_to_NHWC
&& input_shape[1] == 2 && supported_dt) {
for (index_t b = 0; b < input_shape[0]; ++b) {
transpose::TransposeNCHWToNHWCC2(input + b * batch_size,
output + b * batch_size,
input_shape[2],
input_shape[3]);
}
} else if (dst_dims == std::vector<int>{0, 2, 1, 3}) {
index_t height = input_shape[1];
index_t width = input_shape[2];
index_t channel = input_shape[3];
index_t channel_raw_size = channel * sizeof(T);
index_t stride_i = height;
index_t stride_j = width;
index_t tile_size = std::max(static_cast<index_t>(1),
static_cast<index_t>(std::sqrt(
8 * 1024 / channel)));
#pragma omp parallel for collapse(2)
for (index_t i = 0; i < height; i += tile_size) {
for (index_t j = 0; j < width; j += tile_size) {
index_t end_i = std::min(i + tile_size, height);
index_t end_j = std::min(j + tile_size, width);
for (index_t tile_i = i; tile_i < end_i; ++tile_i) {
for (index_t tile_j = j; tile_j < end_j; ++tile_j) {
memcpy(output + (tile_j * stride_i + tile_i) * channel,
input + (tile_i * stride_j + tile_j) * channel,
channel_raw_size);
}
}
}
}
} else {
std::vector<index_t>
in_stride{input_shape[1] * input_shape[2] * input_shape[3],
input_shape[2] * input_shape[3], input_shape[3], 1};
std::vector<index_t>
out_stride{output_shape[1] * output_shape[2] * output_shape[3],
output_shape[2] * output_shape[3], output_shape[3], 1};
std::vector<index_t> idim(4, 0);
std::vector<index_t> odim(4, 0);
for (odim[0] = 0; odim[0] < output_shape[0]; ++odim[0]) {
for (odim[1] = 0; odim[1] < output_shape[1]; ++odim[1]) {
for (odim[2] = 0; odim[2] < output_shape[2]; ++odim[2]) {
for (odim[3] = 0; odim[3] < output_shape[3]; ++odim[3]) {
idim[dst_dims[0]] = odim[0];
idim[dst_dims[1]] = odim[1];
idim[dst_dims[2]] = odim[2];
idim[dst_dims[3]] = odim[3];
output[odim[0] * out_stride[0] + odim[1] * out_stride[1]
+ odim[2] * out_stride[2] + odim[3]] =
input[idim[0] * in_stride[0] + idim[1] * in_stride[1]
+ idim[2] * in_stride[2] + idim[3]];
}
}
}
}
}
} else {
MACE_NOT_IMPLEMENTED;
}
return MaceStatus::MACE_SUCCESS;
}
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/ops/common/transpose.h"
#include "mace/utils/math.h"
namespace mace { namespace mace {
namespace ops { namespace ops {
...@@ -33,21 +35,35 @@ class ExpandDimsOp<DeviceType::CPU, T> : public Operation { ...@@ -33,21 +35,35 @@ class ExpandDimsOp<DeviceType::CPU, T> : public Operation {
const Tensor *input = this->Input(0); const Tensor *input = this->Input(0);
Tensor *output = this->Output(0); Tensor *output = this->Output(0);
index_t input_dims_size = input->dim_size(); index_t input_dims_size = input->dim_size();
if ( axis_ < 0 ) { if (axis_ < 0) {
axis_ += input_dims_size + 1; axis_ += input_dims_size + 1;
} }
MACE_CHECK(axis_ >= 0 && axis_ <= input_dims_size, MACE_CHECK(axis_ >= 0 && axis_ <= input_dims_size,
"axis is out of bound: ", axis_); "axis is out of bound: ", axis_);
const std::vector<index_t> input_shape = input->shape(); const std::vector<index_t> input_shape = input->shape();
std::vector<index_t> output_shape; std::vector<index_t> output_shape(input_shape);
output_shape.insert(output_shape.end(), input_shape.begin(), output_shape.insert(output_shape.begin() + axis_, 1);
input_shape.begin() + axis_);
output_shape.insert(output_shape.end(), 1);
output_shape.insert(output_shape.end(), input_shape.begin() + axis_,
input_shape.end());
output->ReuseTensorBuffer(*input); bool has_data_format = Operation::GetOptionalArg<int>(
output->Reshape(output_shape); "has_data_format", 0) == 1;
if (has_data_format && output_shape.size() == 4) {
// only tensorflow support expand dim, so the default format is NHWC
// transform NHWC to NCHW
auto t_output_shape = TransposeShape<int64_t, int64_t>(output_shape,
{0, 3, 1, 2});
output->Resize(t_output_shape);
Tensor::MappingGuard input_guard(input);
Tensor::MappingGuard output_guard(output);
auto input_data = input->data<T>();
auto output_data = output->mutable_data<T>();
Transpose(input_data, output_shape, {0, 3, 1, 2}, output_data);
} else {
output->Resize(output_shape);
Tensor::MappingGuard input_guard(input);
auto input_data = input->data<T>();
output->Copy<T>(input_data, input->size());
}
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
...@@ -62,11 +78,6 @@ void RegisterExpandDims(OpRegistryBase *op_registry) { ...@@ -62,11 +78,6 @@ void RegisterExpandDims(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp, MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp,
DeviceType::CPU, int32_t); DeviceType::CPU, int32_t);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "ExpandDims", ExpandDimsOp,
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
} }
} // namespace ops } // namespace ops
......
...@@ -326,7 +326,7 @@ class MACE_API MaceTensor { ...@@ -326,7 +326,7 @@ class MACE_API MaceTensor {
// of shared_ptr and manage the life cycle of the buffer by yourself. // of shared_ptr and manage the life cycle of the buffer by yourself.
// For example, std::shared_ptr<float>(raw_buffer, [](float *){}); // For example, std::shared_ptr<float>(raw_buffer, [](float *){});
MaceTensor(const std::vector<int64_t> &shape, MaceTensor(const std::vector<int64_t> &shape,
std::shared_ptr<float> data, std::shared_ptr<void> data,
const DataFormat format = DataFormat::NHWC); const DataFormat format = DataFormat::NHWC);
MaceTensor(); MaceTensor();
MaceTensor(const MaceTensor &other); MaceTensor(const MaceTensor &other);
...@@ -339,8 +339,20 @@ class MACE_API MaceTensor { ...@@ -339,8 +339,20 @@ class MACE_API MaceTensor {
const std::vector<int64_t> &shape() const; const std::vector<int64_t> &shape() const;
const std::shared_ptr<float> data() const; const std::shared_ptr<float> data() const;
std::shared_ptr<float> data(); std::shared_ptr<float> data();
template <typename T>
const std::shared_ptr<T> data() const {
return std::static_pointer_cast<T>(raw_data());
}
template <typename T>
std::shared_ptr<T> data() {
return std::static_pointer_cast<T>(raw_mutable_data());
}
DataFormat data_format() const; DataFormat data_format() const;
private:
std::shared_ptr<void> raw_data() const;
std::shared_ptr<void> raw_mutable_data();
private: private:
class Impl; class Impl;
std::unique_ptr<Impl> impl_; std::unique_ptr<Impl> impl_;
......
...@@ -47,6 +47,11 @@ data_format_map = { ...@@ -47,6 +47,11 @@ data_format_map = {
'OIHW': cvt.DataFormat.OIHW, 'OIHW': cvt.DataFormat.OIHW,
} }
data_type_map = {
'float32': mace_pb2.DT_FLOAT,
'int32': mace_pb2.DT_INT32,
}
def parse_data_type(data_type, device_type): def parse_data_type(data_type, device_type):
if device_type == cvt.DeviceType.CPU.value or \ if device_type == cvt.DeviceType.CPU.value or \
...@@ -141,6 +146,7 @@ def main(unused_args): ...@@ -141,6 +146,7 @@ def main(unused_args):
option.data_type = parse_data_type(FLAGS.data_type, option.device) option.data_type = parse_data_type(FLAGS.data_type, option.device)
input_node_names = FLAGS.input_node.split(',') input_node_names = FLAGS.input_node.split(',')
input_data_types = FLAGS.input_data_types.split(',')
input_node_shapes = FLAGS.input_shape.split(':') input_node_shapes = FLAGS.input_shape.split(':')
input_node_formats = FLAGS.input_data_formats.split(",") input_node_formats = FLAGS.input_data_formats.split(",")
if FLAGS.input_range: if FLAGS.input_range:
...@@ -152,10 +158,8 @@ def main(unused_args): ...@@ -152,10 +158,8 @@ def main(unused_args):
for i in six.moves.range(len(input_node_names)): for i in six.moves.range(len(input_node_names)):
input_node = cvt.NodeInfo() input_node = cvt.NodeInfo()
input_node.name = input_node_names[i] input_node.name = input_node_names[i]
if len(input_node_formats) == 1: input_node.data_type = data_type_map[input_data_types[i]]
input_node.data_format = data_format_map[input_node_formats[0]] input_node.data_format = data_format_map[input_node_formats[i]]
else:
input_node.data_format = data_format_map[input_node_formats[i]]
input_node.shape = parse_int_array_from_str(input_node_shapes[i]) input_node.shape = parse_int_array_from_str(input_node_shapes[i])
if input_node.data_format == cvt.DataFormat.NCHW and\ if input_node.data_format == cvt.DataFormat.NCHW and\
len(input_node.shape) == 4: len(input_node.shape) == 4:
...@@ -166,6 +170,7 @@ def main(unused_args): ...@@ -166,6 +170,7 @@ def main(unused_args):
option.add_input_node(input_node) option.add_input_node(input_node)
output_node_names = FLAGS.output_node.split(',') output_node_names = FLAGS.output_node.split(',')
output_data_types = FLAGS.output_data_types.split(',')
output_node_shapes = FLAGS.output_shape.split(':') output_node_shapes = FLAGS.output_shape.split(':')
output_node_formats = FLAGS.output_data_formats.split(",") output_node_formats = FLAGS.output_data_formats.split(",")
if len(output_node_names) != len(output_node_shapes): if len(output_node_names) != len(output_node_shapes):
...@@ -173,10 +178,8 @@ def main(unused_args): ...@@ -173,10 +178,8 @@ def main(unused_args):
for i in six.moves.range(len(output_node_names)): for i in six.moves.range(len(output_node_names)):
output_node = cvt.NodeInfo() output_node = cvt.NodeInfo()
output_node.name = output_node_names[i] output_node.name = output_node_names[i]
if len(output_node_formats) == 1: output_node.data_type = data_type_map[output_data_types[i]]
output_node.data_format = data_format_map[output_node_formats[0]] output_node.data_format = data_format_map[output_node_formats[i]]
else:
output_node.data_format = data_format_map[output_node_formats[i]]
output_node.shape = parse_int_array_from_str(output_node_shapes[i]) output_node.shape = parse_int_array_from_str(output_node_shapes[i])
if output_node.data_format == cvt.DataFormat.NCHW and\ if output_node.data_format == cvt.DataFormat.NCHW and\
len(output_node.shape) == 4: len(output_node.shape) == 4:
...@@ -290,6 +293,11 @@ def parse_args(): ...@@ -290,6 +293,11 @@ def parse_args():
type=str, type=str,
default="input_node", default="input_node",
help="e.g., input_node") help="e.g., input_node")
parser.add_argument(
"--input_data_types",
type=str,
default="float32",
help="e.g., float32|int32")
parser.add_argument( parser.add_argument(
"--input_data_formats", "--input_data_formats",
type=str, type=str,
...@@ -297,6 +305,11 @@ def parse_args(): ...@@ -297,6 +305,11 @@ def parse_args():
help="e.g., NHWC,NONE") help="e.g., NHWC,NONE")
parser.add_argument( parser.add_argument(
"--output_node", type=str, default="softmax", help="e.g., softmax") "--output_node", type=str, default="softmax", help="e.g., softmax")
parser.add_argument(
"--output_data_types",
type=str,
default="float32",
help="e.g., float32|int32")
parser.add_argument( parser.add_argument(
"--output_data_formats", "--output_data_formats",
type=str, type=str,
......
...@@ -298,6 +298,7 @@ class NodeInfo(object): ...@@ -298,6 +298,7 @@ class NodeInfo(object):
def __init__(self): def __init__(self):
self._name = None self._name = None
self._data_type = mace_pb2.DT_FLOAT
self._shape = [] self._shape = []
self._data_format = DataFormat.NHWC self._data_format = DataFormat.NHWC
self._range = [-1.0, 1.0] self._range = [-1.0, 1.0]
...@@ -306,6 +307,10 @@ class NodeInfo(object): ...@@ -306,6 +307,10 @@ class NodeInfo(object):
def name(self): def name(self):
return self._name return self._name
@property
def data_type(self):
return self._data_type
@property @property
def shape(self): def shape(self):
return self._shape return self._shape
...@@ -322,6 +327,10 @@ class NodeInfo(object): ...@@ -322,6 +327,10 @@ class NodeInfo(object):
def name(self, name): def name(self, name):
self._name = name self._name = name
@data_type.setter
def data_type(self, data_type):
self._data_type = data_type
@shape.setter @shape.setter
def shape(self, shape): def shape(self, shape):
self._shape = shape self._shape = shape
......
...@@ -102,6 +102,7 @@ TFSupportedOps = [ ...@@ -102,6 +102,7 @@ TFSupportedOps = [
'Mean', 'Mean',
'Const', 'Const',
'Gather', 'Gather',
'GatherV2',
'StridedSlice', 'StridedSlice',
'Slice', 'Slice',
'ReverseV2', 'ReverseV2',
...@@ -241,6 +242,7 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -241,6 +242,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
TFOpType.Mean.name: self.convert_mean, TFOpType.Mean.name: self.convert_mean,
TFOpType.Const.name: self.convert_nop, TFOpType.Const.name: self.convert_nop,
TFOpType.Gather.name: self.convert_gather, TFOpType.Gather.name: self.convert_gather,
TFOpType.GatherV2.name: self.convert_gather,
TFOpType.StridedSlice.name: self.convert_stridedslice, TFOpType.StridedSlice.name: self.convert_stridedslice,
TFOpType.Slice.name: self.convert_slice, TFOpType.Slice.name: self.convert_slice,
TFOpType.ReverseV2.name: self.convert_reverse, TFOpType.ReverseV2.name: self.convert_reverse,
...@@ -838,16 +840,11 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -838,16 +840,11 @@ class TensorflowConverter(base_converter.ConverterInterface):
op = self.convert_general_op(tf_op) op = self.convert_general_op(tf_op)
op.type = MaceOp.ExpandDims.name op.type = MaceOp.ExpandDims.name
axis_value = tf_op.inputs[1].eval().astype(np.int32)
axis_arg = op.arg.add() axis_arg = op.arg.add()
axis_arg.name = MaceKeyword.mace_axis_str axis_arg.name = MaceKeyword.mace_axis_str
try:
axis_value = tf_op.get_attr('dim')
except ValueError:
try:
axis_value = tf_op.get_attr('axis')
except ValueError:
axis_value = 0
axis_arg.i = axis_value axis_arg.i = axis_value
del op.input[1]
def convert_squeeze(self, tf_op): def convert_squeeze(self, tf_op):
op = self.convert_general_op(tf_op) op = self.convert_general_op(tf_op)
......
...@@ -323,7 +323,7 @@ class Transformer(base_converter.ConverterInterface): ...@@ -323,7 +323,7 @@ class Transformer(base_converter.ConverterInterface):
input_info.name = input_node.name input_info.name = input_node.name
input_info.data_format = input_node.data_format.value input_info.data_format = input_node.data_format.value
input_info.dims.extend(input_node.shape) input_info.dims.extend(input_node.shape)
input_info.data_type = mace_pb2.DT_FLOAT input_info.data_type = input_node.data_type
output_nodes = self._option.check_nodes.values() output_nodes = self._option.check_nodes.values()
for output_node in output_nodes: for output_node in output_nodes:
...@@ -332,7 +332,7 @@ class Transformer(base_converter.ConverterInterface): ...@@ -332,7 +332,7 @@ class Transformer(base_converter.ConverterInterface):
output_info.data_format = output_node.data_format.value output_info.data_format = output_node.data_format.value
output_info.dims.extend( output_info.dims.extend(
self._producer[output_node.name].output_shape[0].dims) self._producer[output_node.name].output_shape[0].dims)
output_info.data_type = mace_pb2.DT_FLOAT output_info.data_type = output_node.data_type
return False return False
......
...@@ -317,17 +317,18 @@ bool RunModel(const std::string &model_name, ...@@ -317,17 +317,18 @@ bool RunModel(const std::string &model_name,
std::map<std::string, mace::MaceTensor> outputs; std::map<std::string, mace::MaceTensor> outputs;
for (size_t i = 0; i < input_count; ++i) { for (size_t i = 0; i < input_count; ++i) {
// Allocate input and output // Allocate input and output
// only support float and int32, use char for generalization
// sizeof(int) == 4, sizeof(float) == 4
int64_t input_size = int64_t input_size =
std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 1, std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 4,
std::multiplies<int64_t>()); std::multiplies<int64_t>());
auto buffer_in = std::shared_ptr<float>(new float[input_size], auto buffer_in = std::shared_ptr<char>(new char[input_size],
std::default_delete<float[]>()); std::default_delete<char[]>());
// load input // load input
std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]), std::ifstream in_file(FLAGS_input_file + "_" + FormatName(input_names[i]),
std::ios::in | std::ios::binary); std::ios::in | std::ios::binary);
if (in_file.is_open()) { if (in_file.is_open()) {
in_file.read(reinterpret_cast<char *>(buffer_in.get()), in_file.read(buffer_in.get(), input_size);
input_size * sizeof(float));
in_file.close(); in_file.close();
} else { } else {
LOG(INFO) << "Open input file failed"; LOG(INFO) << "Open input file failed";
...@@ -338,11 +339,12 @@ bool RunModel(const std::string &model_name, ...@@ -338,11 +339,12 @@ bool RunModel(const std::string &model_name,
} }
for (size_t i = 0; i < output_count; ++i) { for (size_t i = 0; i < output_count; ++i) {
// only support float and int32, use char for generalization
int64_t output_size = int64_t output_size =
std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1, std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 4,
std::multiplies<int64_t>()); std::multiplies<int64_t>());
auto buffer_out = std::shared_ptr<float>(new float[output_size], auto buffer_out = std::shared_ptr<char>(new char[output_size],
std::default_delete<float[]>()); std::default_delete<char[]>());
outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out, outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out,
output_data_formats[i]); output_data_formats[i]);
} }
...@@ -454,12 +456,12 @@ bool RunModel(const std::string &model_name, ...@@ -454,12 +456,12 @@ bool RunModel(const std::string &model_name,
std::string output_name = std::string output_name =
FLAGS_output_file + "_" + FormatName(output_names[i]); FLAGS_output_file + "_" + FormatName(output_names[i]);
std::ofstream out_file(output_name, std::ios::binary); std::ofstream out_file(output_name, std::ios::binary);
// only support float and int32
int64_t output_size = int64_t output_size =
std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1, std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 4,
std::multiplies<int64_t>()); std::multiplies<int64_t>());
out_file.write( out_file.write(
reinterpret_cast<char *>(outputs[output_names[i]].data().get()), outputs[output_names[i]].data<char>().get(), output_size);
output_size * sizeof(float));
out_file.flush(); out_file.flush();
out_file.close(); out_file.close();
LOG(INFO) << "Write output file " << output_name << " with size " LOG(INFO) << "Write output file " << output_name << " with size "
...@@ -524,6 +526,7 @@ int Main(int argc, char **argv) { ...@@ -524,6 +526,7 @@ int Main(int argc, char **argv) {
// get cpu capability // get cpu capability
Capability cpu_capability = GetCapability(DeviceType::CPU); Capability cpu_capability = GetCapability(DeviceType::CPU);
float cpu_float32_performance = cpu_capability.float32_performance.exec_time;
bool ret = false; bool ret = false;
for (int i = 0; i < FLAGS_restart_round; ++i) { for (int i = 0; i < FLAGS_restart_round; ++i) {
...@@ -531,7 +534,7 @@ int Main(int argc, char **argv) { ...@@ -531,7 +534,7 @@ int Main(int argc, char **argv) {
ret = RunModel(FLAGS_model_name, ret = RunModel(FLAGS_model_name,
input_names, input_shape_vec, input_data_formats, input_names, input_shape_vec, input_data_formats,
output_names, output_shape_vec, output_data_formats, output_names, output_shape_vec, output_data_formats,
cpu_capability.float32_performance.exec_time); cpu_float32_performance);
} }
if (ret) { if (ret) {
return 0; return 0;
......
...@@ -397,6 +397,7 @@ class YAMLKeyword(object): ...@@ -397,6 +397,7 @@ class YAMLKeyword(object):
runtime = 'runtime' runtime = 'runtime'
data_type = 'data_type' data_type = 'data_type'
input_data_types = 'input_data_types' input_data_types = 'input_data_types'
output_data_types = 'output_data_types'
input_data_formats = 'input_data_formats' input_data_formats = 'input_data_formats'
output_data_formats = 'output_data_formats' output_data_formats = 'output_data_formats'
limit_opencl_kernel_time = 'limit_opencl_kernel_time' limit_opencl_kernel_time = 'limit_opencl_kernel_time'
......
...@@ -65,13 +65,13 @@ RuntimeTypeStrs = [ ...@@ -65,13 +65,13 @@ RuntimeTypeStrs = [
"cpu+gpu" "cpu+gpu"
] ]
InputDataTypeStrs = [ InOutDataTypeStrs = [
"int32", "int32",
"float32", "float32",
] ]
InputDataType = Enum('InputDataType', InOutDataType = Enum('InputDataType',
[(ele, ele) for ele in InputDataTypeStrs], [(ele, ele) for ele in InOutDataTypeStrs],
type=str) type=str)
FPDataTypeStrs = [ FPDataTypeStrs = [
...@@ -410,17 +410,23 @@ def format_model_config(flags): ...@@ -410,17 +410,23 @@ def format_model_config(flags):
else: else:
subgraph[key] = [] subgraph[key] = []
input_data_types = subgraph.get(YAMLKeyword.input_data_types, "") for key in [YAMLKeyword.input_data_types,
if input_data_types: YAMLKeyword.output_data_types]:
if not isinstance(input_data_types, list): if key == YAMLKeyword.input_data_types:
subgraph[YAMLKeyword.input_data_types] = [input_data_types] count = input_size
for input_data_type in subgraph[YAMLKeyword.input_data_types]: else:
mace_check(input_data_type in InputDataTypeStrs, count = output_size
ModuleName.YAML_CONFIG, data_types = subgraph.get(key, "")
"'input_data_types' must be in " if data_types:
+ str(InputDataTypeStrs)) if not isinstance(data_types, list):
else: subgraph[key] = [data_types] * count
subgraph[YAMLKeyword.input_data_types] = [] for data_type in subgraph[key]:
mace_check(data_type in InOutDataTypeStrs,
ModuleName.YAML_CONFIG,
key + " must be in "
+ str(InOutDataTypeStrs))
else:
subgraph[key] = [InOutDataType.float32] * count
input_data_formats = subgraph.get(YAMLKeyword.input_data_formats, input_data_formats = subgraph.get(YAMLKeyword.input_data_formats,
[]) [])
...@@ -722,8 +728,10 @@ def convert_model(configs, cl_mem_type): ...@@ -722,8 +728,10 @@ def convert_model(configs, cl_mem_type):
model_config[YAMLKeyword.model_sha256_checksum], model_config[YAMLKeyword.model_sha256_checksum],
model_config[YAMLKeyword.weight_sha256_checksum], model_config[YAMLKeyword.weight_sha256_checksum],
",".join(subgraphs[0][YAMLKeyword.input_tensors]), ",".join(subgraphs[0][YAMLKeyword.input_tensors]),
",".join(subgraphs[0][YAMLKeyword.input_data_types]),
",".join(subgraphs[0][YAMLKeyword.input_data_formats]), ",".join(subgraphs[0][YAMLKeyword.input_data_formats]),
",".join(subgraphs[0][YAMLKeyword.output_tensors]), ",".join(subgraphs[0][YAMLKeyword.output_tensors]),
",".join(subgraphs[0][YAMLKeyword.output_data_types]),
",".join(subgraphs[0][YAMLKeyword.output_data_formats]), ",".join(subgraphs[0][YAMLKeyword.output_data_formats]),
",".join(subgraphs[0][YAMLKeyword.check_tensors]), ",".join(subgraphs[0][YAMLKeyword.check_tensors]),
runtime, runtime,
......
...@@ -480,8 +480,10 @@ def gen_model_code(model_codegen_dir, ...@@ -480,8 +480,10 @@ def gen_model_code(model_codegen_dir,
model_sha256_checksum, model_sha256_checksum,
weight_sha256_checksum, weight_sha256_checksum,
input_nodes, input_nodes,
input_data_types,
input_data_formats, input_data_formats,
output_nodes, output_nodes,
output_data_types,
output_data_formats, output_data_formats,
check_nodes, check_nodes,
runtime, runtime,
...@@ -515,8 +517,10 @@ def gen_model_code(model_codegen_dir, ...@@ -515,8 +517,10 @@ def gen_model_code(model_codegen_dir,
"--model_checksum=%s" % model_sha256_checksum, "--model_checksum=%s" % model_sha256_checksum,
"--weight_checksum=%s" % weight_sha256_checksum, "--weight_checksum=%s" % weight_sha256_checksum,
"--input_node=%s" % input_nodes, "--input_node=%s" % input_nodes,
"--input_data_types=%s" % input_data_types,
"--input_data_formats=%s" % input_data_formats, "--input_data_formats=%s" % input_data_formats,
"--output_node=%s" % output_nodes, "--output_node=%s" % output_nodes,
"--output_data_types=%s" % output_data_types,
"--output_data_formats=%s" % output_data_formats, "--output_data_formats=%s" % output_data_formats,
"--check_node=%s" % check_nodes, "--check_node=%s" % check_nodes,
"--runtime=%s" % runtime, "--runtime=%s" % runtime,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册