提交 e7a89f6f 编写于 作者: L liuqi

Add Init api in MaceEngine for return error code.

上级 9e2ab0d1
...@@ -110,7 +110,8 @@ RUN apt-get install -y --no-install-recommends \ ...@@ -110,7 +110,8 @@ RUN apt-get install -y --no-install-recommends \
# Install tools # Install tools
RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com setuptools RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com setuptools
RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com tensorflow==1.6.0 \ RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com tensorflow==1.7.0 \
numpy>=1.14.0 \
scipy \ scipy \
jinja2 \ jinja2 \
pyyaml \ pyyaml \
......
...@@ -33,11 +33,13 @@ How to build ...@@ -33,11 +33,13 @@ How to build
+=====================+=================+===================================================================================================+ +=====================+=================+===================================================================================================+
| bazel | >= 0.5.4 | - | | bazel | >= 0.5.4 | - |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| android-ndk | r12c | - | | android-ndk | r15c,r16b | - |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| adb | >= 1.0.32 | apt install -y android-tools-adb | | adb | >= 1.0.32 | apt install -y android-tools-adb |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| tensorflow | 1.4.0 | pip install tensorflow==1.4.0 | | tensorflow | 1.7.0 | pip install tensorflow==1.7.0 |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| numpy | >= 1.14.0 | pip install numpy |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| scipy | >= 1.0.0 | pip install scipy | | scipy | >= 1.0.0 | pip install scipy |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +---------------------+-----------------+---------------------------------------------------------------------------------------------------+
...@@ -45,6 +47,10 @@ How to build ...@@ -45,6 +47,10 @@ How to build
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| PyYaml | >= 3.12 | pip install pyyaml | | PyYaml | >= 3.12 | pip install pyyaml |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| sh | >= 1.12.14 | pip install sh |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| filelock | >= 3.0.0 | pip install filelock |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
| docker(for caffe) | >= 17.09.0-ce | `install doc <https://docs.docker.com/install/linux/docker-ce/ubuntu/#set-up-the-repository>`__ | | docker(for caffe) | >= 17.09.0-ce | `install doc <https://docs.docker.com/install/linux/docker-ce/ubuntu/#set-up-the-repository>`__ |
+---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +---------------------+-----------------+---------------------------------------------------------------------------------------------------+
...@@ -229,29 +235,47 @@ Caffe目前只支持最新版本,旧版本请使用Caffe的工具进行升级 ...@@ -229,29 +235,47 @@ Caffe目前只支持最新版本,旧版本请使用Caffe的工具进行升级
// 引入头文件 // 引入头文件
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/{MODEL_TAG}.h" #include "mace/public/mace_engine_factory.h"
// 0. 设置内部存储 // 0. 设置内部存储(设置一次即可)
const std::string file_path ="/path/to/store/internel/files"; const std::string file_path ="/path/to/store/internel/files";
std::shared_ptr<KVStorageFactory> storage_factory( std::shared_ptr<KVStorageFactory> storage_factory(
new FileStorageFactory(file_path)); new FileStorageFactory(file_path));
ConfigKVStorageFactory(storage_factory); ConfigKVStorageFactory(storage_factory);
//1. 从文件或代码中Load模型数据,也可通过自定义的方式来Load (例如可自己实现压缩加密等) //1. 声明设备类型(必须与build时指定的runtime一致)
// 如果使用的是数据嵌入的方式,将参数设为nullptr。 DeviceType device_type = DeviceType::GPU;
unsigned char *model_data = mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str());
//2. 创建net对象 //2. 定义输入输出名称数组
NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data);
//3. 声明设备类型(必须与build时指定的runtime一致)
DeviceType device_type = DeviceType::OPENCL;
//4. 定义输入输出名称数组
std::vector<std::string> input_names = {...}; std::vector<std::string> input_names = {...};
std::vector<std::string> output_names = {...}; std::vector<std::string> output_names = {...};
//5. 创建输入输出对象 //3. 创建MaceEngine对象
std::shared_ptr<mace::MaceEngine> engine;
MaceStatus create_engine_status;
// Create Engine
if (model_data_file.empty()) {
create_engine_status =
CreateMaceEngine(model_name.c_str(),
nullptr,
input_names,
output_names,
device_type,
&engine);
} else {
create_engine_status =
CreateMaceEngine(model_name.c_str(),
model_data_file.c_str(),
input_names,
output_names,
device_type,
&engine);
}
if (create_engine_status != MaceStatus::MACE_SUCCESS) {
// do something
}
//4. 创建输入输出对象
std::map<std::string, mace::MaceTensor> inputs; std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs; std::map<std::string, mace::MaceTensor> outputs;
for (size_t i = 0; i < input_count; ++i) { for (size_t i = 0; i < input_count; ++i) {
...@@ -276,14 +300,6 @@ Caffe目前只支持最新版本,旧版本请使用Caffe的工具进行升级 ...@@ -276,14 +300,6 @@ Caffe目前只支持最新版本,旧版本请使用Caffe的工具进行升级
outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out); outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out);
} }
//6. 创建MaceEngine对象 //5. 执行模型,得到结果
mace::MaceEngine engine(&net_def, device_type, input_names, output_names);
//7. 如果设备类型是OPENCL或HEXAGON,可以在此释放model_data
if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) {
mace::MACE_MODEL_TAG::UnloadModelData(model_data);
}
//8. 执行模型,得到结果
engine.Run(inputs, &outputs); engine.Run(inputs, &outputs);
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#define MACE_CORE_ALLOCATOR_H_ #define MACE_CORE_ALLOCATOR_H_
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include <map> #include <map>
#include <limits> #include <limits>
#include <vector> #include <vector>
...@@ -42,9 +43,10 @@ class Allocator { ...@@ -42,9 +43,10 @@ class Allocator {
public: public:
Allocator() {} Allocator() {}
virtual ~Allocator() noexcept {} virtual ~Allocator() noexcept {}
virtual void *New(size_t nbytes) const = 0; virtual MaceStatus New(size_t nbytes, void **result) const = 0;
virtual void *NewImage(const std::vector<size_t> &image_shape, virtual MaceStatus NewImage(const std::vector<size_t> &image_shape,
const DataType dt) const = 0; const DataType dt,
void **result) const = 0;
virtual void Delete(void *data) const = 0; virtual void Delete(void *data) const = 0;
virtual void DeleteImage(void *data) const = 0; virtual void DeleteImage(void *data) const = 0;
virtual void *Map(void *buffer, size_t offset, size_t nbytes) const = 0; virtual void *Map(void *buffer, size_t offset, size_t nbytes) const = 0;
...@@ -53,44 +55,54 @@ class Allocator { ...@@ -53,44 +55,54 @@ class Allocator {
std::vector<size_t> *mapped_image_pitch) const = 0; std::vector<size_t> *mapped_image_pitch) const = 0;
virtual void Unmap(void *buffer, void *mapper_ptr) const = 0; virtual void Unmap(void *buffer, void *mapper_ptr) const = 0;
virtual bool OnHost() const = 0; virtual bool OnHost() const = 0;
template <typename T>
T *New(size_t num_elements) {
if (num_elements > (std::numeric_limits<size_t>::max() / sizeof(T))) {
return nullptr;
}
void *p = New(sizeof(T) * num_elements);
T *typed_p = reinterpret_cast<T *>(p);
return typed_p;
}
}; };
class CPUAllocator : public Allocator { class CPUAllocator : public Allocator {
public: public:
~CPUAllocator() override {} ~CPUAllocator() override {}
void *New(size_t nbytes) const override { MaceStatus New(size_t nbytes, void **result) const override {
VLOG(3) << "Allocate CPU buffer: " << nbytes; VLOG(3) << "Allocate CPU buffer: " << nbytes;
if (nbytes == 0) {
return MaceStatus::MACE_SUCCESS;
}
void *data = nullptr; void *data = nullptr;
#if defined(__ANDROID__) || defined(__hexagon__) #if defined(__ANDROID__) || defined(__hexagon__)
data = memalign(kMaceAlignment, nbytes); data = memalign(kMaceAlignment, nbytes);
if (data == NULL) {
LOG(WARNING) << "Allocate CPU Buffer with "
<< nbytes << " bytes failed because of"
<< strerror(errno);
*result = nullptr;
return MaceStatus::MACE_OUT_OF_RESOURCES;
}
#else #else
MACE_CHECK(posix_memalign(&data, kMaceAlignment, nbytes) == 0); int ret = posix_memalign(&data, kMaceAlignment, nbytes);
if (ret != 0) {
LOG(WARNING) << "Allocate CPU Buffer with "
<< nbytes << " bytes failed because of"
<< strerror(errno);
*result = nullptr;
return MaceStatus::MACE_OUT_OF_RESOURCES;
}
#endif #endif
MACE_CHECK_NOTNULL(data);
// TODO(heliangliang) This should be avoided sometimes // TODO(heliangliang) This should be avoided sometimes
memset(data, 0, nbytes); memset(data, 0, nbytes);
return data; *result = data;
return MaceStatus::MACE_SUCCESS;
} }
void *NewImage(const std::vector<size_t> &shape, MaceStatus NewImage(const std::vector<size_t> &shape,
const DataType dt) const override { const DataType dt,
void **result) const override {
MACE_UNUSED(shape); MACE_UNUSED(shape);
MACE_UNUSED(dt); MACE_UNUSED(dt);
MACE_UNUSED(result);
LOG(FATAL) << "Allocate CPU image"; LOG(FATAL) << "Allocate CPU image";
return nullptr; return MaceStatus::MACE_SUCCESS;
} }
void Delete(void *data) const override { void Delete(void *data) const override {
MACE_CHECK_NOTNULL(data);
VLOG(3) << "Free CPU buffer"; VLOG(3) << "Free CPU buffer";
free(data); free(data);
} }
......
...@@ -38,6 +38,11 @@ class BufferBase { ...@@ -38,6 +38,11 @@ class BufferBase {
virtual void *raw_mutable_data() = 0; virtual void *raw_mutable_data() = 0;
virtual MaceStatus Allocate(index_t nbytes) = 0;
virtual MaceStatus Allocate(const std::vector<size_t> &shape,
DataType data_type) = 0;
virtual void *Map(index_t offset, virtual void *Map(index_t offset,
index_t length, index_t length,
std::vector<size_t> *pitch) const = 0; std::vector<size_t> *pitch) const = 0;
...@@ -48,7 +53,7 @@ class BufferBase { ...@@ -48,7 +53,7 @@ class BufferBase {
virtual void UnMap() = 0; virtual void UnMap() = 0;
virtual void Resize(index_t size) = 0; virtual MaceStatus Resize(index_t nbytes) = 0;
virtual void Copy(void *src, index_t offset, index_t length) = 0; virtual void Copy(void *src, index_t offset, index_t length) = 0;
...@@ -83,14 +88,6 @@ class Buffer : public BufferBase { ...@@ -83,14 +88,6 @@ class Buffer : public BufferBase {
mapped_buf_(nullptr), mapped_buf_(nullptr),
is_data_owner_(true) {} is_data_owner_(true) {}
Buffer(Allocator *allocator, index_t size)
: BufferBase(size),
allocator_(allocator),
mapped_buf_(nullptr),
is_data_owner_(true) {
buf_ = allocator->New(size);
}
Buffer(Allocator *allocator, void *data, index_t size) Buffer(Allocator *allocator, void *data, index_t size)
: BufferBase(size), : BufferBase(size),
allocator_(allocator), allocator_(allocator),
...@@ -132,6 +129,31 @@ class Buffer : public BufferBase { ...@@ -132,6 +129,31 @@ class Buffer : public BufferBase {
} }
} }
MaceStatus Allocate(index_t nbytes) {
if (nbytes <= 0) {
return MaceStatus::MACE_SUCCESS;
}
MACE_CHECK(is_data_owner_,
"data is not owned by this buffer, cannot reallocate");
if (mapped_buf_ != nullptr) {
UnMap();
}
if (buf_ != nullptr) {
allocator_->Delete(buf_);
}
size_ = nbytes;
return allocator_->New(nbytes, &buf_);
}
MaceStatus Allocate(const std::vector<size_t> &shape,
DataType data_type) {
if (shape.empty()) return MaceStatus::MACE_SUCCESS;
index_t nbytes = std::accumulate(shape.begin(), shape.end(),
1, std::multiplies<size_t>())
* GetEnumTypeSize(data_type);
return this->Allocate(nbytes);
}
void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const { void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const {
MACE_CHECK_NOTNULL(buf_); MACE_CHECK_NOTNULL(buf_);
MACE_UNUSED(pitch); MACE_UNUSED(pitch);
...@@ -154,16 +176,17 @@ class Buffer : public BufferBase { ...@@ -154,16 +176,17 @@ class Buffer : public BufferBase {
mapped_buf_ = nullptr; mapped_buf_ = nullptr;
} }
void Resize(index_t size) { MaceStatus Resize(index_t nbytes) {
MACE_CHECK(is_data_owner_, MACE_CHECK(is_data_owner_,
"data is not owned by this buffer, cannot resize"); "data is not owned by this buffer, cannot resize");
if (size != size_) { if (nbytes != size_) {
if (buf_ != nullptr) { if (buf_ != nullptr) {
allocator_->Delete(buf_); allocator_->Delete(buf_);
} }
size_ = size; size_ = nbytes;
buf_ = allocator_->New(size); return allocator_->New(nbytes, &buf_);
} }
return MaceStatus::MACE_SUCCESS;
} }
void Copy(void *src, index_t offset, index_t length) { void Copy(void *src, index_t offset, index_t length) {
...@@ -195,18 +218,6 @@ class Image : public BufferBase { ...@@ -195,18 +218,6 @@ class Image : public BufferBase {
buf_(nullptr), buf_(nullptr),
mapped_buf_(nullptr) {} mapped_buf_(nullptr) {}
Image(std::vector<size_t> shape, DataType data_type)
: BufferBase(
std::accumulate(
shape.begin(), shape.end(), 1, std::multiplies<index_t>()) *
GetEnumTypeSize(data_type)),
allocator_(GetDeviceAllocator(GPU)),
mapped_buf_(nullptr) {
shape_ = shape;
data_type_ = data_type;
buf_ = allocator_->NewImage(shape, data_type);
}
virtual ~Image() { virtual ~Image() {
if (mapped_buf_ != nullptr) { if (mapped_buf_ != nullptr) {
UnMap(); UnMap();
...@@ -233,6 +244,29 @@ class Image : public BufferBase { ...@@ -233,6 +244,29 @@ class Image : public BufferBase {
std::vector<size_t> image_shape() const { return shape_; } std::vector<size_t> image_shape() const { return shape_; }
MaceStatus Allocate(index_t nbytes) {
MACE_UNUSED(nbytes);
LOG(FATAL) << "Image should not call this allocate function";
return MaceStatus::MACE_SUCCESS;
}
MaceStatus Allocate(const std::vector<size_t> &shape,
DataType data_type) {
index_t size = std::accumulate(
shape.begin(), shape.end(), 1, std::multiplies<index_t>()) *
GetEnumTypeSize(data_type);
if (mapped_buf_ != nullptr) {
UnMap();
}
if (buf_ != nullptr) {
allocator_->DeleteImage(buf_);
}
size_ = size;
shape_ = shape;
data_type_ = data_type;
return allocator_->NewImage(shape, data_type, &buf_);
}
void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const { void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const {
MACE_UNUSED(offset); MACE_UNUSED(offset);
MACE_UNUSED(length); MACE_UNUSED(length);
...@@ -259,9 +293,10 @@ class Image : public BufferBase { ...@@ -259,9 +293,10 @@ class Image : public BufferBase {
mapped_buf_ = nullptr; mapped_buf_ = nullptr;
} }
void Resize(index_t size) { MaceStatus Resize(index_t size) {
MACE_UNUSED(size); MACE_UNUSED(size);
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
return MaceStatus::MACE_SUCCESS;
} }
void Copy(void *src, index_t offset, index_t length) { void Copy(void *src, index_t offset, index_t length) {
...@@ -339,6 +374,20 @@ class BufferSlice : public BufferBase { ...@@ -339,6 +374,20 @@ class BufferSlice : public BufferBase {
} }
} }
MaceStatus Allocate(index_t size) {
MACE_UNUSED(size);
LOG(FATAL) << "BufferSlice should not call allocate function";
return MaceStatus::MACE_SUCCESS;
}
MaceStatus Allocate(const std::vector<size_t> &shape,
DataType data_type) {
MACE_UNUSED(shape);
MACE_UNUSED(data_type);
LOG(FATAL) << "BufferSlice should not call allocate function";
return MaceStatus::MACE_SUCCESS;
}
void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const { void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const {
MACE_UNUSED(offset); MACE_UNUSED(offset);
MACE_UNUSED(length); MACE_UNUSED(length);
...@@ -364,9 +413,10 @@ class BufferSlice : public BufferBase { ...@@ -364,9 +413,10 @@ class BufferSlice : public BufferBase {
mapped_buf_ = nullptr; mapped_buf_ = nullptr;
} }
void Resize(index_t size) { MaceStatus Resize(index_t size) {
MACE_CHECK(size == size_, "resize buffer slice from ", size_, MACE_CHECK(size == size_, "resize buffer slice from ", size_,
" to ", size, " is illegal"); " to ", size, " is illegal");
return MaceStatus::MACE_SUCCESS;
} }
void Copy(void *src, index_t offset, index_t length) { void Copy(void *src, index_t offset, index_t length) {
...@@ -396,20 +446,17 @@ class ScratchBuffer: public Buffer { ...@@ -396,20 +446,17 @@ class ScratchBuffer: public Buffer {
: Buffer(allocator), : Buffer(allocator),
offset_(0) {} offset_(0) {}
ScratchBuffer(Allocator *allocator, index_t size)
: Buffer(allocator, size),
offset_(0) {}
ScratchBuffer(Allocator *allocator, void *data, index_t size) ScratchBuffer(Allocator *allocator, void *data, index_t size)
: Buffer(allocator, data, size), : Buffer(allocator, data, size),
offset_(0) {} offset_(0) {}
virtual ~ScratchBuffer() {} virtual ~ScratchBuffer() {}
void GrowSize(index_t size) { MaceStatus GrowSize(index_t size) {
if (size > size_) { if (size > size_) {
Resize(size); return Resize(size);
} }
return MaceStatus::MACE_SUCCESS;
} }
BufferSlice Scratch(index_t size) { BufferSlice Scratch(index_t size) {
......
...@@ -82,12 +82,14 @@ std::shared_ptr<float> MaceTensor::data() { return impl_->data; } ...@@ -82,12 +82,14 @@ std::shared_ptr<float> MaceTensor::data() { return impl_->data; }
// Mace Engine // Mace Engine
class MaceEngine::Impl { class MaceEngine::Impl {
public: public:
explicit Impl(const NetDef *net_def, explicit Impl(DeviceType device_type);
DeviceType device_type,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes);
~Impl(); ~Impl();
MaceStatus Init(const NetDef *net_def,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes);
MaceStatus Run(const std::map<std::string, MaceTensor> &inputs, MaceStatus Run(const std::map<std::string, MaceTensor> &inputs,
std::map<std::string, MaceTensor> *outputs, std::map<std::string, MaceTensor> *outputs,
RunMetadata *run_metadata); RunMetadata *run_metadata);
...@@ -104,10 +106,8 @@ class MaceEngine::Impl { ...@@ -104,10 +106,8 @@ class MaceEngine::Impl {
DISABLE_COPY_AND_ASSIGN(Impl); DISABLE_COPY_AND_ASSIGN(Impl);
}; };
MaceEngine::Impl::Impl(const NetDef *net_def,
DeviceType device_type, MaceEngine::Impl::Impl(DeviceType device_type)
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes)
: op_registry_(new OperatorRegistry()), : op_registry_(new OperatorRegistry()),
device_type_(device_type), device_type_(device_type),
ws_(new Workspace()), ws_(new Workspace()),
...@@ -115,7 +115,12 @@ MaceEngine::Impl::Impl(const NetDef *net_def, ...@@ -115,7 +115,12 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
#ifdef MACE_ENABLE_HEXAGON #ifdef MACE_ENABLE_HEXAGON
, hexagon_controller_(nullptr) , hexagon_controller_(nullptr)
#endif #endif
{ {}
MaceStatus MaceEngine::Impl::Init(
const NetDef *net_def,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes) {
LOG(INFO) << "MACE version: " << MaceVersion(); LOG(INFO) << "MACE version: " << MaceVersion();
// Set storage path for internal usage // Set storage path for internal usage
for (auto input_name : input_nodes) { for (auto input_name : input_nodes) {
...@@ -127,7 +132,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def, ...@@ -127,7 +132,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
GetDeviceAllocator(device_type_), DT_FLOAT); GetDeviceAllocator(device_type_), DT_FLOAT);
} }
#ifdef MACE_ENABLE_HEXAGON #ifdef MACE_ENABLE_HEXAGON
if (device_type == HEXAGON) { if (device_type_ == HEXAGON) {
hexagon_controller_.reset(new HexagonControlWrapper()); hexagon_controller_.reset(new HexagonControlWrapper());
MACE_CHECK(hexagon_controller_->Config(), "hexagon config error"); MACE_CHECK(hexagon_controller_->Config(), "hexagon config error");
MACE_CHECK(hexagon_controller_->Init(), "hexagon init error"); MACE_CHECK(hexagon_controller_->Init(), "hexagon init error");
...@@ -143,18 +148,22 @@ MaceEngine::Impl::Impl(const NetDef *net_def, ...@@ -143,18 +148,22 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
} }
} else { } else {
#endif #endif
ws_->LoadModelTensor(*net_def, device_type); MaceStatus status = ws_->LoadModelTensor(*net_def, device_type_);
if (status != MaceStatus::MACE_SUCCESS) {
return status;
}
// Init model // Init model
auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type, auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type_,
NetMode::INIT); NetMode::INIT);
if (!net->Run()) { if (!net->Run()) {
LOG(FATAL) << "Net init run failed"; LOG(FATAL) << "Net init run failed";
} }
net_ = CreateNet(op_registry_, *net_def, ws_.get(), device_type); net_ = CreateNet(op_registry_, *net_def, ws_.get(), device_type_);
#ifdef MACE_ENABLE_HEXAGON #ifdef MACE_ENABLE_HEXAGON
} }
#endif #endif
return MaceStatus::MACE_SUCCESS;
} }
MaceEngine::Impl::~Impl() { MaceEngine::Impl::~Impl() {
...@@ -244,16 +253,17 @@ MaceStatus MaceEngine::Impl::Run( ...@@ -244,16 +253,17 @@ MaceStatus MaceEngine::Impl::Run(
return MACE_SUCCESS; return MACE_SUCCESS;
} }
MaceEngine::MaceEngine(const NetDef *net_def, MaceEngine::MaceEngine(DeviceType device_type):
DeviceType device_type, impl_(new MaceEngine::Impl(device_type)) {}
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes) {
impl_ = std::unique_ptr<MaceEngine::Impl>(
new MaceEngine::Impl(net_def, device_type, input_nodes, output_nodes));
}
MaceEngine::~MaceEngine() = default; MaceEngine::~MaceEngine() = default;
MaceStatus MaceEngine::Init(const NetDef *net_def,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes) {
return impl_->Init(net_def, input_nodes, output_nodes);
}
MaceStatus MaceEngine::Run(const std::map<std::string, MaceTensor> &inputs, MaceStatus MaceEngine::Run(const std::map<std::string, MaceTensor> &inputs,
std::map<std::string, MaceTensor> *outputs, std::map<std::string, MaceTensor> *outputs,
RunMetadata *run_metadata) { RunMetadata *run_metadata) {
......
...@@ -44,18 +44,30 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) { ...@@ -44,18 +44,30 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) {
OpenCLAllocator::OpenCLAllocator() {} OpenCLAllocator::OpenCLAllocator() {}
OpenCLAllocator::~OpenCLAllocator() {} OpenCLAllocator::~OpenCLAllocator() {}
void *OpenCLAllocator::New(size_t nbytes) const { MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const {
if (nbytes == 0) {
return MaceStatus::MACE_SUCCESS;
}
VLOG(3) << "Allocate OpenCL buffer: " << nbytes; VLOG(3) << "Allocate OpenCL buffer: " << nbytes;
cl_int error; cl_int error;
cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(), cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(),
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
nbytes, nullptr, &error); nbytes, nullptr, &error);
MACE_CHECK_CL_SUCCESS(error); if (error != CL_SUCCESS) {
return static_cast<void *>(buffer); LOG(WARNING) << "Allocate OpenCL Buffer with "
<< nbytes << " bytes failed because of"
<< OpenCLErrorToString(error);
*result = nullptr;
return MaceStatus::MACE_OUT_OF_RESOURCES;
} else {
*result = buffer;
return MaceStatus::MACE_SUCCESS;
}
} }
void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape, MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
const DataType dt) const { const DataType dt,
void **result) const {
MACE_CHECK(image_shape.size() == 2) << "Image shape's size must equal 2"; MACE_CHECK(image_shape.size() == 2) << "Image shape's size must equal 2";
VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", "
<< image_shape[1]; << image_shape[1];
...@@ -67,11 +79,17 @@ void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape, ...@@ -67,11 +79,17 @@ void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
new cl::Image2D(OpenCLRuntime::Global()->context(), new cl::Image2D(OpenCLRuntime::Global()->context(),
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format,
image_shape[0], image_shape[1], 0, nullptr, &error); image_shape[0], image_shape[1], 0, nullptr, &error);
MACE_CHECK_CL_SUCCESS(error) << " with image shape: [" if (error != CL_SUCCESS) {
<< image_shape[0] << ", " << image_shape[1] LOG(WARNING) << "Allocate OpenCL image with shape: ["
<< "]"; << image_shape[0] << ", " << image_shape[1]
<< "] failed because of"
return cl_image; << OpenCLErrorToString(error);
*result = nullptr;
return MaceStatus::MACE_OUT_OF_RESOURCES;
} else {
*result = cl_image;
return MaceStatus::MACE_SUCCESS;
}
} }
void OpenCLAllocator::Delete(void *buffer) const { void OpenCLAllocator::Delete(void *buffer) const {
......
...@@ -27,15 +27,16 @@ class OpenCLAllocator : public Allocator { ...@@ -27,15 +27,16 @@ class OpenCLAllocator : public Allocator {
~OpenCLAllocator() override; ~OpenCLAllocator() override;
void *New(size_t nbytes) const override; MaceStatus New(size_t nbytes, void **result) const override;
/* /*
* Use Image2D with RGBA (128-bit) format to represent the image. * Use Image2D with RGBA (128-bit) format to represent the image.
* *
* @ shape : [depth, ..., height, width ]. * @ shape : [depth, ..., height, width ].
*/ */
void *NewImage(const std::vector<size_t> &image_shape, MaceStatus NewImage(const std::vector<size_t> &image_shape,
const DataType dt) const override; const DataType dt,
void **result) const override;
void Delete(void *buffer) const override; void Delete(void *buffer) const override;
......
...@@ -216,16 +216,19 @@ class Tensor { ...@@ -216,16 +216,19 @@ class Tensor {
MACE_CHECK(raw_size() <= buffer_->size()); MACE_CHECK(raw_size() <= buffer_->size());
} }
inline void Resize(const std::vector<index_t> &shape) { inline MaceStatus Resize(const std::vector<index_t> &shape) {
shape_ = shape; shape_ = shape;
image_shape_.clear(); image_shape_.clear();
if (buffer_ != nullptr) { if (buffer_ != nullptr) {
MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage."); MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage.");
if (raw_size() + EXTRA_BUFFER_PAD_SIZE > buffer_->size()) if (raw_size() + EXTRA_BUFFER_PAD_SIZE > buffer_->size()) {
buffer_->Resize(raw_size() + EXTRA_BUFFER_PAD_SIZE); return buffer_->Resize(raw_size() + EXTRA_BUFFER_PAD_SIZE);
}
return MaceStatus::MACE_SUCCESS;
} else { } else {
MACE_CHECK(is_buffer_owner_); MACE_CHECK(is_buffer_owner_);
buffer_ = new Buffer(allocator_, raw_size() + EXTRA_BUFFER_PAD_SIZE); buffer_ = new Buffer(allocator_);
return buffer_->Allocate(raw_size() + EXTRA_BUFFER_PAD_SIZE);
} }
} }
...@@ -241,13 +244,14 @@ class Tensor { ...@@ -241,13 +244,14 @@ class Tensor {
is_buffer_owner_ = false; is_buffer_owner_ = false;
} }
inline void ResizeImage(const std::vector<index_t> &shape, inline MaceStatus ResizeImage(const std::vector<index_t> &shape,
const std::vector<size_t> &image_shape) { const std::vector<size_t> &image_shape) {
shape_ = shape; shape_ = shape;
image_shape_ = image_shape; image_shape_ = image_shape;
if (buffer_ == nullptr) { if (buffer_ == nullptr) {
MACE_CHECK(is_buffer_owner_); MACE_CHECK(is_buffer_owner_);
buffer_ = new Image(image_shape, dtype_); buffer_ = new Image();
return buffer_->Allocate(image_shape, dtype_);
} else { } else {
MACE_CHECK(has_opencl_image(), "Cannot ResizeImage buffer, use Resize."); MACE_CHECK(has_opencl_image(), "Cannot ResizeImage buffer, use Resize.");
Image *image = dynamic_cast<Image *>(buffer_); Image *image = dynamic_cast<Image *>(buffer_);
...@@ -257,24 +261,27 @@ class Tensor { ...@@ -257,24 +261,27 @@ class Tensor {
"): current physical image shape: ", image->image_shape()[0], "): current physical image shape: ", image->image_shape()[0],
", ", image->image_shape()[1], " < logical image shape: ", ", ", image->image_shape()[1], " < logical image shape: ",
image_shape[0], ", ", image_shape[1]); image_shape[0], ", ", image_shape[1]);
return MaceStatus::MACE_SUCCESS;
} }
} }
inline void ResizeLike(const Tensor &other) { ResizeLike(&other); } inline MaceStatus ResizeLike(const Tensor &other) {
return ResizeLike(&other);
}
inline void ResizeLike(const Tensor *other) { inline MaceStatus ResizeLike(const Tensor *other) {
if (other->has_opencl_image()) { if (other->has_opencl_image()) {
if (is_buffer_owner_ && buffer_ != nullptr && !has_opencl_image()) { if (is_buffer_owner_ && buffer_ != nullptr && !has_opencl_image()) {
delete buffer_; delete buffer_;
buffer_ = nullptr; buffer_ = nullptr;
} }
ResizeImage(other->shape(), other->image_shape_); return ResizeImage(other->shape(), other->image_shape_);
} else { } else {
if (is_buffer_owner_ && buffer_ != nullptr && has_opencl_image()) { if (is_buffer_owner_ && buffer_ != nullptr && has_opencl_image()) {
delete buffer_; delete buffer_;
buffer_ = nullptr; buffer_ = nullptr;
} }
Resize(other->shape()); return Resize(other->shape());
} }
} }
......
...@@ -60,7 +60,7 @@ std::vector<std::string> Workspace::Tensors() const { ...@@ -60,7 +60,7 @@ std::vector<std::string> Workspace::Tensors() const {
return names; return names;
} }
void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
MACE_LATENCY_LOGGER(1, "Load model tensors"); MACE_LATENCY_LOGGER(1, "Load model tensors");
index_t model_data_size = 0; index_t model_data_size = 0;
unsigned char *model_data_ptr = nullptr; unsigned char *model_data_ptr = nullptr;
...@@ -89,7 +89,11 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { ...@@ -89,7 +89,11 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
model_data_size)); model_data_size));
} else { } else {
tensor_buffer_ = std::unique_ptr<Buffer>( tensor_buffer_ = std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(type), model_data_size)); new Buffer(GetDeviceAllocator(type)));
MaceStatus status = tensor_buffer_->Allocate(model_data_size);
if (status != MaceStatus::MACE_SUCCESS) {
return status;
}
tensor_buffer_->Map(nullptr); tensor_buffer_->Map(nullptr);
tensor_buffer_->Copy(model_data_ptr, 0, model_data_size); tensor_buffer_->Copy(model_data_ptr, 0, model_data_size);
tensor_buffer_->UnMap(); tensor_buffer_->UnMap();
...@@ -120,14 +124,16 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { ...@@ -120,14 +124,16 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
} }
if (type == DeviceType::CPU || type == DeviceType::GPU) { if (type == DeviceType::CPU || type == DeviceType::GPU) {
CreateOutputTensorBuffer(net_def, type); MaceStatus status = CreateOutputTensorBuffer(net_def, type);
if (status != MaceStatus::MACE_SUCCESS) return status;
} }
return MaceStatus::MACE_SUCCESS;
} }
void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, MaceStatus Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
DeviceType device_type) { DeviceType device_type) {
if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) { if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) {
return; return MaceStatus::MACE_SUCCESS;
} }
DataType dtype = DataType::DT_INVALID; DataType dtype = DataType::DT_INVALID;
...@@ -157,14 +163,24 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, ...@@ -157,14 +163,24 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
// TODO(liuqi): refactor based on PB // TODO(liuqi): refactor based on PB
if (mem_block.mem_id() >= 20000) { if (mem_block.mem_id() >= 20000) {
std::unique_ptr<BufferBase> image_buf( std::unique_ptr<BufferBase> image_buf(
new Image({mem_block.x(), mem_block.y()}, dtype)); new Image());
MaceStatus status = image_buf->Allocate(
{mem_block.x(), mem_block.y()}, dtype);
if (status != MaceStatus::MACE_SUCCESS) {
return status;
}
preallocated_allocator_.SetBuffer(mem_block.mem_id(), preallocated_allocator_.SetBuffer(mem_block.mem_id(),
std::move(image_buf)); std::move(image_buf));
} }
} else { } else {
if (mem_block.mem_id() < 20000) { if (mem_block.mem_id() < 20000) {
std::unique_ptr<BufferBase> tensor_buf( std::unique_ptr<BufferBase> tensor_buf(
new Buffer(GetDeviceAllocator(device_type), mem_block.x())); new Buffer(GetDeviceAllocator(device_type)));
MaceStatus status = tensor_buf->Allocate(
mem_block.x() * GetEnumTypeSize(dtype));
if (status != MaceStatus::MACE_SUCCESS) {
return status;
}
preallocated_allocator_.SetBuffer(mem_block.mem_id(), preallocated_allocator_.SetBuffer(mem_block.mem_id(),
std::move(tensor_buf)); std::move(tensor_buf));
} }
...@@ -201,6 +217,7 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, ...@@ -201,6 +217,7 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
} }
} }
} }
return MaceStatus::MACE_SUCCESS;
} }
ScratchBuffer *Workspace::GetScratchBuffer(DeviceType device_type) { ScratchBuffer *Workspace::GetScratchBuffer(DeviceType device_type) {
......
...@@ -47,12 +47,13 @@ class Workspace { ...@@ -47,12 +47,13 @@ class Workspace {
std::vector<std::string> Tensors() const; std::vector<std::string> Tensors() const;
void LoadModelTensor(const NetDef &net_def, DeviceType type); MaceStatus LoadModelTensor(const NetDef &net_def, DeviceType type);
ScratchBuffer *GetScratchBuffer(DeviceType device_type); ScratchBuffer *GetScratchBuffer(DeviceType device_type);
private: private:
void CreateOutputTensorBuffer(const NetDef &net_def, DeviceType device_type); MaceStatus CreateOutputTensorBuffer(const NetDef &net_def,
DeviceType device_type);
TensorMap tensor_map_; TensorMap tensor_map_;
......
...@@ -99,7 +99,6 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase { ...@@ -99,7 +99,6 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
#pragma omp parallel for collapse(2) #pragma omp parallel for collapse(2)
for (index_t b = 0; b < in_shape[0]; b++) { for (index_t b = 0; b < in_shape[0]; b++) {
for (index_t m = 0; m < filter_shape[0]; m += 4) { for (index_t m = 0; m < filter_shape[0]; m += 4) {
const index_t in_height = in_shape[2];
const index_t in_width = in_shape[3]; const index_t in_width = in_shape[3];
const index_t out_height = out_shape[2]; const index_t out_height = out_shape[2];
const index_t out_width = out_shape[3]; const index_t out_width = out_shape[3];
...@@ -322,8 +321,6 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase { ...@@ -322,8 +321,6 @@ struct Conv2dFunctor<DeviceType::CPU, float> : Conv2dFunctorBase {
index_t dilation_h = dilations_[0]; index_t dilation_h = dilations_[0];
index_t dilation_w = dilations_[1]; index_t dilation_w = dilations_[1];
const index_t filter_hw[2] = {filter_h, filter_w};
MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch"); MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch");
index_t padded_input_height = input_height + paddings[0]; index_t padded_input_height = input_height + paddings[0];
......
...@@ -45,7 +45,8 @@ void ActivationFunctor<DeviceType::GPU, T>::operator()(const Tensor *input, ...@@ -45,7 +45,8 @@ void ActivationFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -58,7 +58,8 @@ void AddNFunctor<DeviceType::GPU, T>::operator()( ...@@ -58,7 +58,8 @@ void AddNFunctor<DeviceType::GPU, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -56,7 +56,8 @@ void BatchNormFunctor<DeviceType::GPU, T>::operator()(const Tensor *input, ...@@ -56,7 +56,8 @@ void BatchNormFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -49,7 +49,8 @@ void BiasAddFunctor<DeviceType::GPU, T>::operator()(const Tensor *input, ...@@ -49,7 +49,8 @@ void BiasAddFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -93,7 +93,8 @@ void BufferToImageFunctor<DeviceType::GPU, T>::operator()( ...@@ -93,7 +93,8 @@ void BufferToImageFunctor<DeviceType::GPU, T>::operator()(
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
if (!kernel_error_) { if (!kernel_error_) {
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -56,7 +56,8 @@ void ChannelShuffleFunctor<DeviceType::GPU, T>::operator()( ...@@ -56,7 +56,8 @@ void ChannelShuffleFunctor<DeviceType::GPU, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -67,7 +67,8 @@ static void Concat2(cl::Kernel *kernel, ...@@ -67,7 +67,8 @@ static void Concat2(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>( *kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
(*kernel_error)->Allocate(1);
(*kernel_error)->Map(nullptr); (*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0; *((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap(); (*kernel_error)->UnMap();
...@@ -148,7 +149,8 @@ static void ConcatN(cl::Kernel *kernel, ...@@ -148,7 +149,8 @@ static void ConcatN(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>( *kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
(*kernel_error)->Allocate(1);
(*kernel_error)->Map(nullptr); (*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0; *((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap(); (*kernel_error)->UnMap();
......
...@@ -100,7 +100,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, ...@@ -100,7 +100,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>( *kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
(*kernel_error)->Allocate(1);
(*kernel_error)->Map(nullptr); (*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0; *((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap(); (*kernel_error)->UnMap();
......
...@@ -86,7 +86,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, ...@@ -86,7 +86,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>( *kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
(*kernel_error)->Allocate(1);
(*kernel_error)->Map(nullptr); (*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0; *((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap(); (*kernel_error)->UnMap();
......
...@@ -94,7 +94,8 @@ extern void Conv2dOpencl(cl::Kernel *kernel, ...@@ -94,7 +94,8 @@ extern void Conv2dOpencl(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>( *kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
(*kernel_error)->Allocate(1);
(*kernel_error)->Map(nullptr); (*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0; *((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap(); (*kernel_error)->UnMap();
......
...@@ -65,7 +65,8 @@ void Deconv2dOpencl(cl::Kernel *kernel, ...@@ -65,7 +65,8 @@ void Deconv2dOpencl(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>( *kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
(*kernel_error)->Allocate(1);
(*kernel_error)->Map(nullptr); (*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0; *((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap(); (*kernel_error)->UnMap();
......
...@@ -86,7 +86,8 @@ void DepthToSpaceOpFunctor<DeviceType::GPU, T>::operator()( ...@@ -86,7 +86,8 @@ void DepthToSpaceOpFunctor<DeviceType::GPU, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -97,7 +97,8 @@ static void DepthwiseConv2d(cl::Kernel *kernel, ...@@ -97,7 +97,8 @@ static void DepthwiseConv2d(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>( *kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
(*kernel_error)->Allocate(1);
(*kernel_error)->Map(nullptr); (*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0; *((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap(); (*kernel_error)->UnMap();
......
...@@ -97,7 +97,8 @@ void EltwiseFunctor<DeviceType::GPU, T>::operator()(const Tensor *input0, ...@@ -97,7 +97,8 @@ void EltwiseFunctor<DeviceType::GPU, T>::operator()(const Tensor *input0,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -74,7 +74,8 @@ void FCWXKernel(cl::Kernel *kernel, ...@@ -74,7 +74,8 @@ void FCWXKernel(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>( *kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
(*kernel_error)->Allocate(1);
(*kernel_error)->Map(nullptr); (*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0; *((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap(); (*kernel_error)->UnMap();
...@@ -200,7 +201,8 @@ void FCWTXKernel(cl::Kernel *kernel, ...@@ -200,7 +201,8 @@ void FCWTXKernel(cl::Kernel *kernel,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
*kernel_error = std::move(std::unique_ptr<Buffer>( *kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
(*kernel_error)->Allocate(1);
(*kernel_error)->Map(nullptr); (*kernel_error)->Map(nullptr);
*((*kernel_error)->mutable_data<char>()) = 0; *((*kernel_error)->mutable_data<char>()) = 0;
(*kernel_error)->UnMap(); (*kernel_error)->UnMap();
......
...@@ -86,7 +86,8 @@ void ImageToBufferFunctor<DeviceType::GPU, T>::operator()( ...@@ -86,7 +86,8 @@ void ImageToBufferFunctor<DeviceType::GPU, T>::operator()(
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
if (!kernel_error_) { if (!kernel_error_) {
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -54,7 +54,8 @@ void MatMulFunctor<DeviceType::GPU, T>::operator()(const Tensor *A, ...@@ -54,7 +54,8 @@ void MatMulFunctor<DeviceType::GPU, T>::operator()(const Tensor *A,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -57,7 +57,8 @@ bool BufferToImageOpImpl(Tensor *buffer, ...@@ -57,7 +57,8 @@ bool BufferToImageOpImpl(Tensor *buffer,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error = std::move(std::unique_ptr<Buffer>( kernel_error = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error->Allocate(1);
kernel_error->Map(nullptr); kernel_error->Map(nullptr);
*(kernel_error->mutable_data<char>()) = 0; *(kernel_error->mutable_data<char>()) = 0;
kernel_error->UnMap(); kernel_error->UnMap();
......
...@@ -60,7 +60,8 @@ void PadFunctor<DeviceType::GPU, T>::operator()( ...@@ -60,7 +60,8 @@ void PadFunctor<DeviceType::GPU, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -72,7 +72,8 @@ void PoolingFunctor<DeviceType::GPU, T>::operator()(const Tensor *input, ...@@ -72,7 +72,8 @@ void PoolingFunctor<DeviceType::GPU, T>::operator()(const Tensor *input,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -78,7 +78,8 @@ void ResizeBilinearFunctor<DeviceType::GPU, T>::operator()( ...@@ -78,7 +78,8 @@ void ResizeBilinearFunctor<DeviceType::GPU, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -51,7 +51,8 @@ void SliceFunctor<DeviceType::GPU, T>::operator()( ...@@ -51,7 +51,8 @@ void SliceFunctor<DeviceType::GPU, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -70,7 +70,8 @@ void SoftmaxFunctor<DeviceType::GPU, T>::operator()(const Tensor *logits, ...@@ -70,7 +70,8 @@ void SoftmaxFunctor<DeviceType::GPU, T>::operator()(const Tensor *logits,
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -70,7 +70,8 @@ void SpaceToBatchFunctor<DeviceType::GPU, T>::operator()( ...@@ -70,7 +70,8 @@ void SpaceToBatchFunctor<DeviceType::GPU, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -39,7 +39,8 @@ void WinogradTransformFunctor<DeviceType::GPU, T>::operator()( ...@@ -39,7 +39,8 @@ void WinogradTransformFunctor<DeviceType::GPU, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
...@@ -138,7 +139,8 @@ void WinogradInverseTransformFunctor<DeviceType::GPU, T>::operator()( ...@@ -138,7 +139,8 @@ void WinogradInverseTransformFunctor<DeviceType::GPU, T>::operator()(
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
built_options.emplace("-DOUT_OF_RANGE_CHECK"); built_options.emplace("-DOUT_OF_RANGE_CHECK");
kernel_error_ = std::move(std::unique_ptr<Buffer>( kernel_error_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); new Buffer(GetDeviceAllocator(DeviceType::GPU))));
kernel_error_->Allocate(1);
kernel_error_->Map(nullptr); kernel_error_->Map(nullptr);
*(kernel_error_->mutable_data<char>()) = 0; *(kernel_error_->mutable_data<char>()) = 0;
kernel_error_->UnMap(); kernel_error_->UnMap();
......
...@@ -30,7 +30,11 @@ const char *MaceVersion(); ...@@ -30,7 +30,11 @@ const char *MaceVersion();
enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 }; enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 };
enum MaceStatus { MACE_SUCCESS = 0, MACE_INVALID_ARGS = 1 }; enum MaceStatus {
MACE_SUCCESS = 0,
MACE_INVALID_ARGS = 1,
MACE_OUT_OF_RESOURCES = 2
};
// MACE input/output tensor // MACE input/output tensor
class MaceTensor { class MaceTensor {
...@@ -61,12 +65,13 @@ class RunMetadata; ...@@ -61,12 +65,13 @@ class RunMetadata;
class MaceEngine { class MaceEngine {
public: public:
explicit MaceEngine(const NetDef *net_def, explicit MaceEngine(DeviceType device_type);
DeviceType device_type,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes);
~MaceEngine(); ~MaceEngine();
MaceStatus Init(const NetDef *net_def,
const std::vector<std::string> &input_nodes,
const std::vector<std::string> &output_nodes);
MaceStatus Run(const std::map<std::string, MaceTensor> &inputs, MaceStatus Run(const std::map<std::string, MaceTensor> &inputs,
std::map<std::string, MaceTensor> *outputs); std::map<std::string, MaceTensor> *outputs);
......
...@@ -61,24 +61,25 @@ MaceStatus CreateMaceEngine( ...@@ -61,24 +61,25 @@ MaceStatus CreateMaceEngine(
} }
const unsigned char * model_data = nullptr; const unsigned char * model_data = nullptr;
NetDef net_def; NetDef net_def;
MaceStatus status = MaceStatus::MACE_SUCCESS;
switch (model_name_map[model_name]) { switch (model_name_map[model_name]) {
{% for i in range(model_tags |length) %} {% for i in range(model_tags |length) %}
case {{ i }}: case {{ i }}:
model_data = model_data =
mace::{{model_tags[i]}}::LoadModelData(model_data_file); mace::{{model_tags[i]}}::LoadModelData(model_data_file);
net_def = mace::{{model_tags[i]}}::CreateNet(model_data); net_def = mace::{{model_tags[i]}}::CreateNet(model_data);
engine->reset( engine->reset(new mace::MaceEngine(device_type));
new mace::MaceEngine(&net_def, device_type, input_nodes, output_nodes)); status = (*engine)->Init(&net_def, input_nodes, output_nodes);
if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) {
mace::{{model_tags[i]}}::UnloadModelData(model_data); mace::{{model_tags[i]}}::UnloadModelData(model_data);
} }
break; break;
{% endfor %} {% endfor %}
default: default:
return MaceStatus::MACE_INVALID_ARGS; status = MaceStatus::MACE_INVALID_ARGS;
} }
return MaceStatus::MACE_SUCCESS; return status;
} }
} // namespace mace } // namespace mace
...@@ -304,7 +304,9 @@ void MaceRunFunc(const int in_out_size) { ...@@ -304,7 +304,9 @@ void MaceRunFunc(const int in_out_size) {
new FileStorageFactory(file_path)); new FileStorageFactory(file_path));
mace::SetKVStorageFactory(storage_factory); mace::SetKVStorageFactory(storage_factory);
MaceEngine engine(&net_def, device, input_names, output_names); MaceEngine engine(device);
MaceStatus status = engine.Init(&net_def, input_names, output_names);
ASSERT_EQ(status, MaceStatus::MACE_SUCCESS);
std::map<std::string, mace::MaceTensor> inputs; std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs; std::map<std::string, mace::MaceTensor> outputs;
......
...@@ -308,7 +308,9 @@ void MaceRun(const int in_out_size, ...@@ -308,7 +308,9 @@ void MaceRun(const int in_out_size,
&net_def); &net_def);
} }
MaceEngine engine(&net_def, device, input_names, output_names); MaceEngine engine(device);
MaceStatus status = engine.Init(&net_def, input_names, output_names);
ASSERT_EQ(status, MaceStatus::MACE_SUCCESS);
std::map<std::string, mace::MaceTensor> inputs; std::map<std::string, mace::MaceTensor> inputs;
std::map<std::string, mace::MaceTensor> outputs; std::map<std::string, mace::MaceTensor> outputs;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册