提交 e7dc96c1 编写于 作者: Y Yanzhan Yang 提交者: Jiaying Zhao

refine wrap to support GPU test=develop (#1892)

上级 26450c49
......@@ -49,129 +49,211 @@ DDim make_ddim(const std::vector<int64_t> &dims) {
}
// tensor class
Tensor::Tensor(float *data, DDim ddim) {
this->data_ = data;
this->ddim_ = ddim;
}
template <typename T>
float *Tensor::data() const {
return this->data_;
}
float *Tensor::data() const { return this->data_; }
DDim Tensor::dims() const { return this->ddim_; }
// net class
template <typename Device>
void Net<Device>::SetThreadNum(int threads) {
void Net::SetThreadNum(int threads) {
if (this->device_ == kCPU) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::CPU> *)this->engine_;
if (engine != nullptr) {
engine->SetThreadNum(threads);
}
}
}
void Net::SetCLPath(std::string path) {
if (this->device_ == kGPU_CL) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
engine->SetCLPath(path);
}
}
template <typename Device>
PMStatus Net<Device>::Load(const std::string &dirname, const bool optimize,
bool Net::Load(const std::string &dirname, const bool optimize,
const bool quantification, const int batch_size,
const bool lod_mode) {
if (this->device_ == kCPU) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::CPU> *)this->engine_;
if (engine != nullptr) {
paddle_mobile::PMStatus status =
engine->Load(dirname, false, false, 1, true);
return status == paddle_mobile::PMSuccess ? PMSuccess : PMUnKownError;
engine->Load(dirname, optimize, quantification, batch_size, lod_mode);
return status == paddle_mobile::PMSuccess;
}
} else if (this->device_ == kGPU_CL) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
if (engine != nullptr) {
paddle_mobile::PMStatus status =
engine->Load(dirname, optimize, quantification, batch_size, lod_mode);
return status == paddle_mobile::PMSuccess;
}
return PMUnKownError;
}
return false;
}
template <typename Device>
PMStatus Net<Device>::Load(const std::string &model_path,
const std::string &para_path, const bool optimize,
const bool quantification, const int batch_size,
const bool lod_mode) {
bool Net::Load(const std::string &model_path, const std::string &para_path,
const bool optimize, const bool quantification,
const int batch_size, const bool lod_mode) {
if (this->device_ == kCPU) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::CPU> *)this->engine_;
if (engine != nullptr) {
paddle_mobile::PMStatus status =
engine->Load(model_path, para_path, false, false, 1, true);
return status == paddle_mobile::PMSuccess ? PMSuccess : PMUnKownError;
engine->Load(model_path, para_path, optimize, quantification,
batch_size, lod_mode);
return status == paddle_mobile::PMSuccess;
}
return PMUnKownError;
} else if (this->device_ == kGPU_CL) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
if (engine != nullptr) {
paddle_mobile::PMStatus status =
engine->Load(model_path, para_path, optimize, quantification,
batch_size, lod_mode);
return status == paddle_mobile::PMSuccess;
}
}
return false;
}
template <typename Device>
bool Net<Device>::LoadCombinedMemory(size_t model_len, const uint8_t *model_buf,
bool Net::LoadCombinedMemory(size_t model_len, const uint8_t *model_buf,
size_t combined_params_len,
uint8_t *combined_params_buf,
bool optimize, bool quantification,
int batch_size, bool lod_mode) {
uint8_t *combined_params_buf, bool optimize,
bool quantification, int batch_size,
bool lod_mode) {
if (this->device_ == kCPU) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::CPU> *)this->engine_;
if (engine != nullptr) {
bool status =
engine->LoadCombinedMemory(model_len, model_buf, combined_params_len,
combined_params_buf, false, false, 1, true);
bool status = engine->LoadCombinedMemory(
model_len, model_buf, combined_params_len, combined_params_buf,
optimize, quantification, batch_size, lod_mode);
return status;
}
return false;
}
template <typename Device>
PMStatus Net<Device>::Predict(const Tensor &input) {
} else if (this->device_ == kGPU_CL) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::CPU> *)this->engine_;
(paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
if (engine != nullptr) {
auto input_data = input.data<float>();
auto input_dims = input.dims();
std::vector<int64_t> input_dims_as_vector = input_dims.dims;
paddle_mobile::framework::Tensor input_inner(
input_data, paddle_mobile::framework::make_ddim(input_dims_as_vector));
paddle_mobile::PMStatus status = engine->Predict(input_inner);
return status == paddle_mobile::PMSuccess ? PMSuccess : PMUnKownError;
bool status = engine->LoadCombinedMemory(
model_len, model_buf, combined_params_len, combined_params_buf,
optimize, quantification, batch_size, lod_mode);
return status;
}
}
return PMUnKownError;
return false;
}
template <typename Device>
std::vector<float> Net<Device>::Predict(const std::vector<float> &input,
std::vector<float> Net::Predict(const std::vector<float> &input,
const std::vector<int64_t> &dims) {
if (this->device_ == kCPU) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::CPU> *)this->engine_;
if (engine != nullptr) {
auto result = engine->Predict(input, dims);
return result;
}
} else if (this->device_ == kGPU_CL) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
if (engine != nullptr) {
auto result = engine->Predict(input, dims);
return result;
}
}
return std::vector<float>();
}
template <typename Device>
PMStatus Net<Device>::Predict() {
bool Net::Predict() {
if (this->device_ == kCPU) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::CPU> *)this->engine_;
if (engine != nullptr) {
paddle_mobile::PMStatus status = engine->Predict();
return status == paddle_mobile::PMSuccess ? PMSuccess : PMUnKownError;
return status == paddle_mobile::PMSuccess;
}
} else if (this->device_ == kGPU_CL) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
if (engine != nullptr) {
paddle_mobile::PMStatus status = engine->Predict();
return status == paddle_mobile::PMSuccess;
}
}
return false;
}
bool Net::Predict(const Tensor &input) {
if (this->device_ == kCPU) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::CPU> *)this->engine_;
if (engine != nullptr) {
auto input_data = input.data();
auto input_dims = input.dims();
std::vector<int64_t> input_dims_as_vector = input_dims.dims;
paddle_mobile::framework::Tensor input_inner(
input_data,
paddle_mobile::framework::make_ddim(input_dims_as_vector));
paddle_mobile::PMStatus status = engine->Predict(input_inner);
return status == paddle_mobile::PMSuccess;
}
return PMUnKownError;
} else if (this->device_ == kGPU_CL) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
if (engine != nullptr) {
auto input_data = input.data();
auto input_dims = input.dims();
std::vector<int64_t> input_dims_as_vector = input_dims.dims;
paddle_mobile::framework::Tensor input_inner(
input_data,
paddle_mobile::framework::make_ddim(input_dims_as_vector));
paddle_mobile::PMStatus status = engine->Predict(input_inner);
return status == paddle_mobile::PMSuccess;
}
}
return false;
}
template <typename Device>
void Net<Device>::Feed(const std::string &var_name, const Tensor &input) {
void Net::Feed(const std::string &var_name, const Tensor &input) {
if (this->device_ == kCPU) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::CPU> *)this->engine_;
if (engine != nullptr) {
auto input_data = input.data<float>();
auto input_data = input.data();
auto input_dims = input.dims();
std::vector<int64_t> input_dims_as_vector = input_dims.dims;
paddle_mobile::framework::Tensor input_inner(
input_data,
paddle_mobile::framework::make_ddim(input_dims_as_vector));
engine->Feed(var_name, input_inner);
}
} else if (this->device_ == kGPU_CL) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
if (engine != nullptr) {
auto input_data = input.data();
auto input_dims = input.dims();
std::vector<int64_t> input_dims_as_vector = input_dims.dims;
paddle_mobile::framework::Tensor input_inner(
input_data, paddle_mobile::framework::make_ddim(input_dims_as_vector));
input_data,
paddle_mobile::framework::make_ddim(input_dims_as_vector));
engine->Feed(var_name, input_inner);
}
}
}
template <typename Device>
std::shared_ptr<Tensor> Net<Device>::Fetch(const std::string &var_name) {
std::shared_ptr<Tensor> Net::Fetch(const std::string &var_name) {
if (this->device_ == kCPU) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::CPU> *)this->engine_;
if (engine != nullptr) {
......@@ -186,30 +268,55 @@ std::shared_ptr<Tensor> Net<Device>::Fetch(const std::string &var_name) {
std::shared_ptr<Tensor> ptr(new Tensor(output_data, ddim));
return ptr;
}
} else if (this->device_ == kGPU_CL) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
if (engine != nullptr) {
auto output_inner = engine->Fetch(var_name);
auto ddim_inner = output_inner->dims();
std::vector<int64_t> ddim_as_vector;
for (int i = 0; i < ddim_inner.size(); i++) {
ddim_as_vector.push_back(ddim_inner[i]);
}
auto ddim = make_ddim(ddim_as_vector);
auto output_data = output_inner->data<float>();
std::shared_ptr<Tensor> ptr(new Tensor(output_data, ddim));
return ptr;
}
}
return nullptr;
}
template <typename Device>
Net<Device>::Net() {
Net::Net(DeviceTypeEnum device) {
if (this->engine_ == nullptr) {
PaddleMobileConfigInternal config;
this->engine_ = new paddle_mobile::PaddleMobile<paddle_mobile::CPU>(config);
this->device_ = device;
if (this->device_ == kCPU) {
this->engine_ =
new paddle_mobile::PaddleMobile<paddle_mobile::CPU>(config);
} else if (this->device_ == kGPU_CL) {
this->engine_ =
new paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL>(config);
}
}
}
template <typename Device>
Net<Device>::~Net() {
Net::~Net() {
if (this->engine_ != nullptr) {
if (this->device_ == kCPU) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::CPU> *)this->engine_;
delete engine;
this->engine_ = nullptr;
} else if (this->device_ == kGPU_CL) {
auto engine =
(paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> *)this->engine_;
delete engine;
this->engine_ = nullptr;
}
}
}
template class Net<CPU>;
template float *Tensor::data<float>() const;
#endif
} // namespace wrap
......
......@@ -28,84 +28,67 @@ namespace wrap {
#ifndef PADDLE_MOBILE_FPGA
// device type
enum DeviceTypeEnum {
kINVALID = -1,
__attribute__((__visibility__("default"))) enum DeviceTypeEnum {
kCPU = 0,
kFPGA = 1,
kGPU_MALI = 2,
kGPU_CL = 3
kGPU_CL = 1
};
template <DeviceTypeEnum T>
struct DeviceType {};
typedef DeviceType<kCPU> CPU;
typedef DeviceType<kFPGA> FPGA;
typedef DeviceType<kGPU_MALI> GPU_MALI;
typedef DeviceType<kGPU_CL> GPU_CL;
// ddim class
class DDim {
public:
int size();
int64_t &operator[](int idx);
int64_t operator[](int idx) const;
__attribute__((__visibility__("default"))) int size();
__attribute__((__visibility__("default"))) int64_t &operator[](int idx);
__attribute__((__visibility__("default"))) int64_t operator[](int idx) const;
std::vector<int64_t> dims;
__attribute__((__visibility__("default"))) std::vector<int64_t> dims;
};
DDim make_ddim(const std::vector<int64_t> &dims);
__attribute__((__visibility__("default"))) DDim make_ddim(
const std::vector<int64_t> &dims);
// tensor class
class Tensor {
public:
Tensor(float *data, DDim ddim);
__attribute__((__visibility__("default"))) Tensor(float *data, DDim ddim);
template <typename T>
float *data() const;
DDim dims() const;
__attribute__((__visibility__("default"))) float *data() const;
__attribute__((__visibility__("default"))) DDim dims() const;
private:
float *data_;
DDim ddim_;
};
// pm status
enum PMStatus {
PMSuccess = 0xFF, /*!< No errors */
PMNotInitialized = 0x01, /*!< Data not initialized. */
PMInvalidValue = 0x02, /*!< Incorrect variable value. */
PMMemAllocFailed = 0x03, /*!< Memory allocation error. */
PMUnKownError = 0x04, /*!< Unknown error. */
PMOutOfAuthority = 0x05, /*!< Try to modified data not your own*/
PMOutOfMem = 0x06, /*!< OOM error*/
PMUnImplError = 0x07, /*!< Unimplement error. */
PMWrongDevice = 0x08 /*!< un-correct device. */
};
// net class
template <typename Device>
class Net {
public:
Net();
~Net();
void SetThreadNum(int thread_num);
PMStatus Load(const std::string &dirname, const bool optimize = false,
__attribute__((__visibility__("default"))) Net(DeviceTypeEnum device);
__attribute__((__visibility__("default"))) ~Net();
__attribute__((__visibility__("default"))) void SetThreadNum(int thread_num);
__attribute__((__visibility__("default"))) void SetCLPath(std::string path);
__attribute__((__visibility__("default"))) bool Load(
const std::string &dirname, const bool optimize = false,
const bool quantification = false, const int batch_size = 1,
const bool lod_mode = false);
PMStatus Load(const std::string &model_path, const std::string &para_path,
__attribute__((__visibility__("default"))) bool Load(
const std::string &model_path, const std::string &para_path,
const bool optimize = false, const bool quantification = false,
const int batch_size = 1, const bool lod_mode = false);
bool LoadCombinedMemory(size_t model_len, const uint8_t *model_buf,
size_t combined_params_len,
__attribute__((__visibility__("default"))) bool LoadCombinedMemory(
size_t model_len, const uint8_t *model_buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize = false,
bool quantification = false, int batch_size = 1,
bool lod_mode = false);
PMStatus Predict(const Tensor &input);
std::vector<float> Predict(const std::vector<float> &input,
const std::vector<int64_t> &dims);
PMStatus Predict();
void Feed(const std::string &var_name, const Tensor &input);
std::shared_ptr<Tensor> Fetch(const std::string &var_name);
bool quantification = false, int batch_size = 1, bool lod_mode = false);
__attribute__((__visibility__("default"))) std::vector<float> Predict(
const std::vector<float> &input, const std::vector<int64_t> &dims);
__attribute__((__visibility__("default"))) bool Predict();
__attribute__((__visibility__("default"))) bool Predict(const Tensor &input);
__attribute__((__visibility__("default"))) void Feed(
const std::string &var_name, const Tensor &input);
__attribute__((__visibility__("default"))) std::shared_ptr<Tensor> Fetch(
const std::string &var_name);
private:
void *engine_ = nullptr;
DeviceTypeEnum device_;
};
#endif
......
......@@ -191,7 +191,8 @@ void PaddleMobile__Framework__protobuf_c_buffer_simple_append(
if (allocator == NULL) allocator = &protobuf_c__allocator;
while (new_alloced < new_len) new_alloced += new_alloced;
new_data = PaddleMobile__Framework__do_alloc(allocator, new_alloced);
new_data =
(uint8_t *)PaddleMobile__Framework__do_alloc(allocator, new_alloced);
if (!new_data) return;
memcpy(new_data, simp->data, simp->len);
if (simp->must_free_data)
......@@ -905,7 +906,7 @@ static size_t PaddleMobile__Framework__parse_tag_and_wiretype(
unsigned shift = 4;
unsigned rv;
*wiretype_out = data[0] & 7;
*wiretype_out = (PaddleMobile__Framework__ProtobufCWireType)(data[0] & 7);
if ((data[0] & 0x80) == 0) {
*tag_out = tag;
return 1;
......@@ -1013,7 +1014,7 @@ static protobuf_c_boolean PaddleMobile__Framework__merge_messages(
fields[i].type);
uint8_t *new_field;
new_field = PaddleMobile__Framework__do_alloc(
new_field = (uint8_t *)PaddleMobile__Framework__do_alloc(
allocator, (*n_earlier + *n_latter) * el_size);
if (!new_field) return FALSE;
......@@ -1102,7 +1103,7 @@ static protobuf_c_boolean PaddleMobile__Framework__merge_messages(
case PROTOBUF_C_TYPE_STRING: {
char *e_str = *(char **)earlier_elem;
char *l_str = *(char **)latter_elem;
const char *d_str = def_val;
const char *d_str = (const char *)def_val;
need_to_merge = e_str != d_str && l_str == d_str;
break;
......@@ -1286,7 +1287,7 @@ static protobuf_c_boolean PaddleMobile__Framework__parse_required_member(
unsigned len = scanned_member->len;
const uint8_t *data = scanned_member->data;
PaddleMobile__Framework__ProtobufCWireType wire_type =
scanned_member->wire_type;
(PaddleMobile__Framework__ProtobufCWireType)scanned_member->wire_type;
switch (scanned_member->field->type) {
case PROTOBUF_C_TYPE_ENUM:
......@@ -1330,36 +1331,40 @@ static protobuf_c_boolean PaddleMobile__Framework__parse_required_member(
PaddleMobile__Framework__parse_boolean(len, data);
return TRUE;
case PROTOBUF_C_TYPE_STRING: {
char **pstr = member;
char **pstr = (char **)member;
unsigned pref_len = scanned_member->length_prefix_len;
if (wire_type != PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED) return FALSE;
if (maybe_clear && *pstr != NULL) {
const char *def = scanned_member->field->default_value;
const char *def = (const char *)scanned_member->field->default_value;
if (*pstr != NULL && *pstr != def)
PaddleMobile__Framework__do_free(allocator, *pstr);
}
*pstr = PaddleMobile__Framework__do_alloc(allocator, len - pref_len + 1);
*pstr = (char *)PaddleMobile__Framework__do_alloc(allocator,
len - pref_len + 1);
if (*pstr == NULL) return FALSE;
memcpy(*pstr, data + pref_len, len - pref_len);
(*pstr)[len - pref_len] = 0;
return TRUE;
}
case PROTOBUF_C_TYPE_BYTES: {
PaddleMobile__Framework__ProtobufCBinaryData *bd = member;
PaddleMobile__Framework__ProtobufCBinaryData *bd =
(PaddleMobile__Framework__ProtobufCBinaryData *)member;
const PaddleMobile__Framework__ProtobufCBinaryData *def_bd;
unsigned pref_len = scanned_member->length_prefix_len;
if (wire_type != PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED) return FALSE;
def_bd = scanned_member->field->default_value;
def_bd = (const PaddleMobile__Framework__ProtobufCBinaryData *)
scanned_member->field->default_value;
if (maybe_clear && bd->data != NULL &&
(def_bd == NULL || bd->data != def_bd->data)) {
PaddleMobile__Framework__do_free(allocator, bd->data);
}
if (len - pref_len > 0) {
bd->data = PaddleMobile__Framework__do_alloc(allocator, len - pref_len);
bd->data = (uint8_t *)PaddleMobile__Framework__do_alloc(allocator,
len - pref_len);
if (bd->data == NULL) return FALSE;
memcpy(bd->data, data + pref_len, len - pref_len);
} else {
......@@ -1369,7 +1374,8 @@ static protobuf_c_boolean PaddleMobile__Framework__parse_required_member(
return TRUE;
}
case PROTOBUF_C_TYPE_MESSAGE: {
PaddleMobile__Framework__ProtobufCMessage **pmessage = member;
PaddleMobile__Framework__ProtobufCMessage **pmessage =
(PaddleMobile__Framework__ProtobufCMessage **)member;
PaddleMobile__Framework__ProtobufCMessage *subm;
const PaddleMobile__Framework__ProtobufCMessage *def_mess;
protobuf_c_boolean merge_successful = TRUE;
......@@ -1377,10 +1383,12 @@ static protobuf_c_boolean PaddleMobile__Framework__parse_required_member(
if (wire_type != PROTOBUF_C_WIRE_TYPE_LENGTH_PREFIXED) return FALSE;
def_mess = scanned_member->field->default_value;
def_mess = (const PaddleMobile__Framework__ProtobufCMessage *)
scanned_member->field->default_value;
subm = PaddleMobile__Framework__protobuf_c_message_unpack(
scanned_member->field->descriptor, allocator, len - pref_len,
data + pref_len);
(const PaddleMobile__Framework__ProtobufCMessageDescriptor *)
scanned_member->field->descriptor,
allocator, len - pref_len, data + pref_len);
if (maybe_clear && *pmessage != NULL && *pmessage != def_mess) {
if (subm != NULL)
......@@ -1418,15 +1426,17 @@ static protobuf_c_boolean PaddleMobile__Framework__parse_oneof_member(
switch (old_field->type) {
case PROTOBUF_C_TYPE_STRING: {
char **pstr = member;
const char *def = old_field->default_value;
char **pstr = (char **)member;
const char *def = (const char *)old_field->default_value;
if (*pstr != NULL && *pstr != def)
PaddleMobile__Framework__do_free(allocator, *pstr);
break;
}
case PROTOBUF_C_TYPE_BYTES: {
PaddleMobile__Framework__ProtobufCBinaryData *bd = member;
PaddleMobile__Framework__ProtobufCBinaryData *bd =
(PaddleMobile__Framework__ProtobufCBinaryData *)member;
const PaddleMobile__Framework__ProtobufCBinaryData *def_bd =
(const PaddleMobile__Framework__ProtobufCBinaryData *)
old_field->default_value;
if (bd->data != NULL && (def_bd == NULL || bd->data != def_bd->data)) {
PaddleMobile__Framework__do_free(allocator, bd->data);
......@@ -1434,8 +1444,10 @@ static protobuf_c_boolean PaddleMobile__Framework__parse_oneof_member(
break;
}
case PROTOBUF_C_TYPE_MESSAGE: {
PaddleMobile__Framework__ProtobufCMessage **pmessage = member;
PaddleMobile__Framework__ProtobufCMessage **pmessage =
(PaddleMobile__Framework__ProtobufCMessage **)member;
const PaddleMobile__Framework__ProtobufCMessage *def_mess =
(const PaddleMobile__Framework__ProtobufCMessage *)
old_field->default_value;
if (*pmessage != NULL && *pmessage != def_mess)
PaddleMobile__Framework__protobuf_c_message_free_unpacked(*pmessage,
......@@ -1651,10 +1663,11 @@ static protobuf_c_boolean PaddleMobile__Framework__parse_member(
PaddleMobile__Framework__ProtobufCMessageUnknownField *ufield =
message->unknown_fields + (message->n_unknown_fields++);
ufield->tag = scanned_member->tag;
ufield->wire_type = scanned_member->wire_type;
ufield->wire_type =
(PaddleMobile__Framework__ProtobufCWireType)scanned_member->wire_type;
ufield->len = scanned_member->len;
ufield->data =
PaddleMobile__Framework__do_alloc(allocator, scanned_member->len);
ufield->data = (uint8_t *)PaddleMobile__Framework__do_alloc(
allocator, scanned_member->len);
if (ufield->data == NULL) return FALSE;
memcpy(ufield->data, scanned_member->data, ufield->len);
return TRUE;
......@@ -1810,13 +1823,14 @@ PaddleMobile__Framework__protobuf_c_message_unpack(
if (allocator == NULL) allocator = &protobuf_c__allocator;
rv = PaddleMobile__Framework__do_alloc(allocator, desc->sizeof_message);
rv = (PaddleMobile__Framework__ProtobufCMessage *)
PaddleMobile__Framework__do_alloc(allocator, desc->sizeof_message);
if (!rv) return (NULL);
scanned_member_slabs[0] = first_member_slab;
required_fields_bitmap_len = (desc->n_fields + 7) / 8;
if (required_fields_bitmap_len > sizeof(required_fields_bitmap_stack)) {
required_fields_bitmap = PaddleMobile__Framework__do_alloc(
required_fields_bitmap = (unsigned char *)PaddleMobile__Framework__do_alloc(
allocator, required_fields_bitmap_len);
if (!required_fields_bitmap) {
PaddleMobile__Framework__do_free(allocator, rv);
......@@ -1944,7 +1958,7 @@ PaddleMobile__Framework__protobuf_c_message_unpack(
size = sizeof(ScannedMember)
<< (which_slab + FIRST_SCANNED_MEMBER_SLAB_SIZE_LOG2);
scanned_member_slabs[which_slab] =
PaddleMobile__Framework__do_alloc(allocator, size);
(ScannedMember *)PaddleMobile__Framework__do_alloc(allocator, size);
if (scanned_member_slabs[which_slab] == NULL)
goto error_cleanup_during_scan;
}
......@@ -2012,10 +2026,13 @@ PaddleMobile__Framework__protobuf_c_message_unpack(
/* allocate space for unknown fields */
if (n_unknown) {
rv->unknown_fields = PaddleMobile__Framework__do_alloc(
rv->unknown_fields =
(PaddleMobile__Framework__ProtobufCMessageUnknownField *)
PaddleMobile__Framework__do_alloc(
allocator,
n_unknown *
sizeof(PaddleMobile__Framework__ProtobufCMessageUnknownField));
sizeof(
PaddleMobile__Framework__ProtobufCMessageUnknownField));
if (rv->unknown_fields == NULL) goto error_cleanup;
}
......@@ -2118,7 +2135,9 @@ void PaddleMobile__Framework__protobuf_c_message_free_unpacked(
.data;
const PaddleMobile__Framework__ProtobufCBinaryData *default_bd;
default_bd = desc->fields[f].default_value;
default_bd =
(const PaddleMobile__Framework__ProtobufCBinaryData *)desc->fields[f]
.default_value;
if (data != NULL && (default_bd == NULL || default_bd->data != data)) {
PaddleMobile__Framework__do_free(allocator, data);
}
......@@ -2166,7 +2185,8 @@ protobuf_c_boolean PaddleMobile__Framework__protobuf_c_message_check(
void *field = STRUCT_MEMBER_P(message, f->offset);
if (label == PROTOBUF_C_LABEL_REPEATED) {
size_t *quantity = STRUCT_MEMBER_P(message, f->quantifier_offset);
size_t *quantity =
(size_t *)STRUCT_MEMBER_P(message, f->quantifier_offset);
if (*quantity > 0 && *(void **)field == NULL) {
return FALSE;
......@@ -2208,9 +2228,10 @@ protobuf_c_boolean PaddleMobile__Framework__protobuf_c_message_check(
char *string = *(char **)field;
if (label == PROTOBUF_C_LABEL_REQUIRED && string == NULL) return FALSE;
} else if (type == PROTOBUF_C_TYPE_BYTES) {
protobuf_c_boolean *has =
STRUCT_MEMBER_P(message, f->quantifier_offset);
PaddleMobile__Framework__ProtobufCBinaryData *bd = field;
protobuf_c_boolean *has = (protobuf_c_boolean *)STRUCT_MEMBER_P(
message, f->quantifier_offset);
PaddleMobile__Framework__ProtobufCBinaryData *bd =
(PaddleMobile__Framework__ProtobufCBinaryData *)field;
if (label == PROTOBUF_C_LABEL_REQUIRED || *has == TRUE) {
if (bd->len > 0 && bd->data == NULL) return FALSE;
}
......
......@@ -12,27 +12,41 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include <sstream>
#include "../test_helper.h"
#include "../test_include.h"
#include <vector>
#include "io/paddle_mobile_wrap.h"
int main(int argc, char *argv[]) {
#ifndef PADDLE_MOBILE_FPGA
paddle_mobile::wrap::Net<paddle_mobile::wrap::CPU> *net =
new paddle_mobile::wrap::Net<paddle_mobile::wrap::CPU>();
paddle_mobile::wrap::Net *net =
new paddle_mobile::wrap::Net(paddle_mobile::wrap::kGPU_CL);
net->SetCLPath("/data/local/tmp/bin");
net->Load("./checked_model/model", "./checked_model/params", false, false, 1,
true);
int size = 1 * 3 * 64 * 64;
int size = 1 * 3 * 416 * 416;
std::vector<int64_t> shape{1, 3, 416, 416};
float *data = new float[size];
for (int i = 0; i < size; i++) {
data[i] = 0.0;
}
std::vector<int64_t> shape{1, 3, 64, 64};
std::ifstream infile;
infile.open("input.txt");
for (int i = 0; i < size; i++) {
infile >> data[i];
}
infile.close();
// input as vector
// std::vector<float> data_as_vector(data, data + size);
// auto output = net->Predict(data_as_vector, shape);
// for (auto item : output) {
// std::cout << item << std::endl;
// }
// input as float pointer
paddle_mobile::wrap::Tensor input(data,
paddle_mobile::wrap::make_ddim(shape));
net->Feed("data", input);
net->Feed("image", input);
net->Predict();
auto output = net->Fetch("save_infer_model/scale_0");
int output_size = 1;
......@@ -44,7 +58,8 @@ int main(int argc, char *argv[]) {
std::cout << std::endl;
std::cout << "output data: ";
for (int i = 0; i < output_size; i++) {
std::cout << output->data<float>()[i] << std::endl;
std::cout << output->data()[i] << std::endl;
}
#endif
return 0;
}
......@@ -2,3 +2,4 @@
!run.py
!.gitignore
!/model-encrypt-tool
!test_wrap.py
# -*- coding: utf-8 -*
import os
import sys
import math
import subprocess
import numpy as np
import paddle.fluid as fluid
model_path = "yolov2"
checked_model_path = "checked_model"
feed_path = "feeds"
output_path = "outputs"
diff_threshold = 0.05
is_lod = False
mobile_model_path = ""
fast_check = False
is_sample_step = False
sample_step = 1
sample_num = 20
need_encrypt = False
checked_encrypt_model_path = "checked_encrypt_model"
output_var_filter = []
output_key_filter = {}
check_shape = False
np.set_printoptions(linewidth=150)
mobile_exec_root = "/data/local/tmp/bin"
mobile_src_root = os.path.abspath("../../../")
if mobile_src_root.endswith("/"):
mobile_src_root = mobile_src_root[:-1]
dot = "•"
black = lambda x: "\033[30m" + str(x) + "\033[0m"
red = lambda x: "\033[31m" + str(x) + "\033[0m"
green = lambda x: "\033[32m" + str(x) + "\033[0m"
yellow = lambda x: "\033[33m" + str(x) + "\033[0m"
reset = lambda x: "\033[0m" + str(x)
def pp_tab(x, level=0):
header = ""
for i in range(0, level):
header += "\t"
print(header + str(x))
def pp_black(x, level=0):
pp_tab(black(x) + reset(""), level)
def pp_red(x, level=0):
pp_tab(red(x) + reset(""), level)
def pp_green(x, level=0):
pp_tab(green(x) + reset(""), level)
def pp_yellow(x, level=0):
pp_tab(yellow(x) + reset(""), level)
def sh(command):
pipe = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
return pipe.stdout.read().decode("utf-8")
def push(src, dest=""):
sh("adb push {} {}".format(src, mobile_exec_root + "/" + dest))
pp_yellow(dot + " start inspecting fluid model")
exe = fluid.Executor(fluid.CPUPlace())
exe.run(fluid.default_startup_program())
# 加载模型
def load_model(model_path):
prog, feeds, fetches = fluid.io.load_inference_model(dirname=model_path, executor=exe, model_filename="model", params_filename="params")
return (prog, feeds, fetches)
prog, feeds, fetches = load_model(model_path)
# 强制要求所有张量的形状,在model和params中一致,并重新保存模型
def resave_model(feed_kv):
if len(mobile_model_path) > 0:
pp_green("has set mobile_model_path, stop checking model & params", 1)
sh("cp {}/* {}".format(mobile_model_path, checked_model_path))
return
ops = prog.current_block().ops
vars = prog.current_block().vars
# 强制所有var为可持久化
p_names = []
for name in vars:
name = str(name)
v = fluid.framework._get_var(name, prog)
if not v.persistable:
v.persistable = True
p_names.append(name)
outputs = run_model(feed_kv=feed_kv)
has_found_wrong_shape = False
# 修正每个var的形状
for name in vars:
name = str(name)
v = vars[name]
if v.persistable:
v1 = fluid.global_scope().find_var(name)
try:
t1 = v1.get_tensor()
shape = t1.shape()
except:
continue
if v.desc.shape() != shape:
has_found_wrong_shape = True
v.desc.set_shape(shape)
# 恢复var的可持久化属性
for name in p_names:
v = fluid.framework._get_var(name, prog)
v.persistable = False
fluid.io.save_inference_model(dirname=checked_model_path, feeded_var_names=feeds, target_vars=fetches, executor=exe, main_program=prog, model_filename="model", params_filename="params")
if has_found_wrong_shape:
pp_red("has found wrong shape", 1)
else:
pp_green("has not found wrong shape", 1)
pp_green("new model is saved into directory 【{}】".format(checked_model_path), 1)
# 分别加密model和params,加密key使用同一个
def encrypt_model():
if not need_encrypt:
return
pp_yellow(dot + dot + " encrypting model")
if not os.path.exists(checked_encrypt_model_path):
os.mkdir(checked_encrypt_model_path)
res = sh("model-encrypt-tool/enc_key_gen -l 20 -c 232")
lines = res.split("\n")
for line in lines:
if line.startswith("key:"):
line = line.replace('key:','')
sh("model-encrypt-tool/enc_model_gen -k '{}' -c 2 -i checked_model/model -o "
"checked_model/model.ml".format(line))
sh("model-encrypt-tool/enc_model_gen -k '{}' -c 2 -i checked_model/params -o checked_model/params.ml".format(line))
pp_green("model has been encrypted, key is : {}".format(line), 1)
sh("mv {} {}".format(checked_model_path + "/*.ml", checked_encrypt_model_path))
return
pp_red("model encrypt error", 1)
# 生成feed的key-value对
def gen_feed_kv():
feed_kv = {}
for feed_name in feeds:
feed_shape = get_feed_var_shape(feed_name)
data = np.random.random(feed_shape).astype("float32")
feed_kv[feed_name] = data
return feed_kv
# 保存feed的key-value对
def save_feed_kv(feed_kv):
for feed_name in feed_kv:
feed_data = feed_kv[feed_name]
feed_list = feed_data.flatten().tolist()
if not os.path.exists(feed_path):
os.mkdir(feed_path)
file_name = feed_name.replace("/", "_")
out_file = open(feed_path + "/" + file_name, "w")
for feed_item in feed_list:
out_file.write("{}\n".format(feed_item))
out_file.close()
last_feed_var_name = None
last_feed_file_name = None
last_feed_var_lod = None
# 加载feed的key-value对
def load_feed_kv():
if not os.path.exists(feed_path):
return None
global last_feed_var_name
global last_feed_file_name
global last_feed_var_lod
feed_kv = {}
pp_yellow(dot + dot + " checking feed info")
pp_green("feed data is saved into directory 【{}】".format(feed_path), 1)
for feed_name in feeds:
feed_shape = get_feed_var_shape(feed_name)
pp_tab("feed var name : {}; feed var shape : {}".format(feed_name, feed_shape), 1)
file_name = feed_name.replace("/", "_")
last_feed_var_name = feed_name
last_feed_file_name = file_name
feed_file_path = feed_path + "/" + file_name
if not os.path.exists(feed_file_path):
return None
data = np.loadtxt(feed_file_path)
expected_len = 1
for dim in feed_shape:
expected_len *= dim
if len(np.atleast_1d(data)) != expected_len:
return None
data = data.reshape(feed_shape).astype("float32")
if is_lod:
data_shape = [1]
for dim in feed_shape:
data_shape.append(dim)
data = data.reshape(data_shape).astype("float32")
tensor = fluid.LoDTensor()
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
data = data.reshape(feed_shape)
tensor.set(data, fluid.CPUPlace())
tensor.set_lod([lod])
last_feed_var_lod = lod
feed_kv[feed_name] = tensor
else:
feed_kv[feed_name] = data
return feed_kv
# 运行模型
def run_model(feed_kv=None):
if feed_kv is None:
feed_kv = gen_feed_kv()
outputs = exe.run(prog, feed=feed_kv, fetch_list=fetches, return_numpy=False)
results = []
for output in outputs:
results.append(np.array(output))
return results
# 获取变量形状
def get_var_shape(var_name):
vars = prog.current_block().vars
shape = vars[var_name].desc.shape()
for i in range(len(shape)):
dim = shape[i]
if dim == -1:
shape[i] = 1
return shape
# 获取输入变量形状
def get_feed_var_shape(var_name):
# 如果想写死输入形状,放开以下语句
# return [1, 3, 224, 224]
return get_var_shape(var_name)
persistable_cache = []
# 所有var,全部变成持久化
def force_all_vars_to_persistable():
global persistable_cache
for var_name in vars.keys():
var_name = str(var_name)
v = fluid.framework._get_var(var_name, prog)
persistable = v.persistable
if not persistable:
persistable_cache.append(var_name)
v.persistable = True
# 恢复持久化属性
def restore_all_vars_persistable():
global persistable_cache
for var_name in vars.keys():
var_name = str(var_name)
v = fluid.framework._get_var(var_name, prog)
persistable = v.persistable
if var_name in persistable_cache:
v.persistable = False
persistable_cache = []
# 获取var的数据
def get_var_data(var_name, feed_kv=None):
output = np.array(fluid.global_scope().var(var_name).get_tensor())
return output
output_var_cache = {}
def tensor_sample(tensor):
if is_sample_step:
step = sample_step
else:
step = math.floor(len(tensor) / sample_num)
step = max(step, 1)
step = int(step)
sample = []
for i in range(0, len(tensor), step):
sample.append(tensor[i])
return sample
op_cache = {}
# 获取每层输出的数据
def save_all_op_output(feed_kv=None):
force_all_vars_to_persistable()
outputs = run_model(feed_kv=feed_kv)
if not os.path.exists(output_path):
os.mkdir(output_path)
ops = prog.current_block().ops
fetch_names = []
for fetch in fetches:
fetch_names.append(fetch.name)
feed_names = feeds
for fetch_name in fetch_names:
output_var_filter.append(fetch_name)
for i in range(len(ops)):
op = ops[i]
var_name = None
var_name_index = -1
for index in range(len(op.output_names)):
if op.output_names[index] in ["Y", "Out", "Output"]:
var_name_index = index
break
if var_name_index != -1:
var_name = op.output_arg_names[var_name_index]
else:
for name in op.output_arg_names:
var_name = name
if "tmp" in name:
break
if len(output_var_filter) > 0:
if var_name not in output_var_filter:
continue
# real_var_name = None
# if op.type == "fetch":
# for name in op.input_arg_names:
# real_var_name = name
# if "tmp" in name:
# break
# else:
# real_var_name = var_name
if fast_check:
if var_name not in fetch_names and var_name not in feed_names:
continue
try:
data = get_var_data(var_name, feed_kv=feed_kv).flatten().tolist()
sample = tensor_sample(data)
output_var_cache[var_name] = (sample)
op_cache[i] = (var_name, op)
file_name = var_name.replace("/", "_")
out_file = open(output_path + "/" + file_name, "w")
if var_name in feed_names:
for item in data:
out_file.write("{}\n".format(item))
else:
for item in sample:
out_file.write("{}\n".format(item))
out_file.close()
except:
pass
for i in range(len(ops)):
op = ops[i]
if op.type not in output_key_filter:
continue
var_name = None
var_name_index = -1
for index in range(len(op.output_names)):
if op.output_names[index] in output_key_filter[op.type]:
var_name_index = index
break
if var_name_index != -1:
var_name = op.output_arg_names[var_name_index]
else:
continue
if len(output_var_filter) > 0:
if var_name not in output_var_filter:
continue
# real_var_name = None
# if op.type == "fetch":
# for name in op.input_arg_names:
# real_var_name = name
# if "tmp" in name:
# break
# else:
# real_var_name = var_name
if fast_check:
if var_name not in fetch_names and var_name not in feed_names:
continue
try:
data = get_var_data(var_name, feed_kv=feed_kv).flatten().tolist()
sample = tensor_sample(data)
output_var_cache[var_name] = (sample)
op_cache[i] = (var_name, op)
file_name = var_name.replace("/", "_")
out_file = open(output_path + "/" + file_name, "w")
if var_name in feed_names:
for item in data:
out_file.write("{}\n".format(item))
else:
for item in sample:
out_file.write("{}\n".format(item))
out_file.close()
except:
pass
pp_green("all the op outputs are saved into directory 【{}】".format(output_path), 1)
restore_all_vars_persistable()
ops = prog.current_block().ops
vars = prog.current_block().vars
pp_yellow(dot + dot + " checking op list")
op_types = set()
for op in ops:
op_types.add(op.type)
pp_tab("op types : {}".format(op_types), 1)
def check_mobile_results(args, fuse, mem_opt):
args = "{} {} {}".format("1" if fuse else "0", "1" if mem_opt else "0", args)
res = sh("adb shell \"cd {} && export LD_LIBRARY_PATH=. && ./test-net {}\"".format(mobile_exec_root, args))
lines = res.split("\n")
for line in lines:
print(line)
for line in lines:
if line.startswith("auto-test-debug"):
print(line)
pp_yellow(dot + dot + " checking paddle mobile results for {} -- {} ".format(green("【fusion】" if fuse else "【non fusion】"), green("【memory-optimization】" if mem_opt else "【non-memory-optimization】")))
mobile_var_cache = {}
for line in lines:
parts = line.split(" ")
if len(parts) < 2:
continue
if "auto-test" != parts[0]:
continue
if parts[1] == "load-time-cost":
pp_green("load time cost : {}".format(parts[2]), 1)
elif parts[1] == "predict-time-cost":
pp_green("predict time cost : {}".format(parts[2]), 1)
elif parts[1] == "preprocess-time-cost":
pp_green("preprocess time cost : {}".format(parts[2]), 1)
elif parts[1] == "var":
var_name = parts[2]
values = list(map(lambda x: float(x), parts[3:]))
mobile_var_cache[var_name] = values
error_index = None
error_values1 = None
error_values2 = None
checked_names = []
fetch_names = []
for fetch in fetches:
fetch_names.append(fetch.name)
for index in op_cache:
op_output_var_name, op = op_cache[index]
if mem_opt:
found_in_fetch = False
for fetch in fetches:
if op_output_var_name == fetch.name:
found_in_fetch = True
break
if not found_in_fetch:
continue
if not op_output_var_name in output_var_cache:
continue
if not op_output_var_name in mobile_var_cache:
continue
values1 = output_var_cache[op_output_var_name]
values2 = mobile_var_cache[op_output_var_name]
shape = get_var_shape(op_output_var_name) if check_shape else []
if len(values1) + len(shape) != len(values2):
error_index = index
for i in range(len(shape)):
v1 = shape[i]
v2 = values2[i]
if v1 != v2:
error_index = index
break
if error_index == None:
for i in range(len(values1)):
v1 = values1[i]
v2 = values2[len(shape) + i]
if abs(v1 - v2) > diff_threshold:
error_index = index
break
checked_names.append(op_output_var_name)
if error_index != None:
error_values1 = values1
error_values2 = values2
break
if error_index == None:
for name in fetch_names:
if name not in checked_names:
error_index = -1
break
if error_index == None:
pp_green("outputs are all correct", 1)
elif error_index == -1:
pp_red("outputs are missing")
else:
error_values1 = np.array(error_values1)
error_values2 = np.array(error_values2)
# pp_red("mobile op is not correct, error occurs at {}th op, op's type is {}")
pp_red("corresponding fluid op is {}th op, op's type is {}, wrong var name is {}".format(
error_index,op_cache[error_index][1].type,op_output_var_name), 1)
pp_red("fluid results are : ", 1)
pp_red(str(error_values1).replace("\n", "\n" + "\t" * 1), 1)
pp_yellow("paddle mobile results are : ", 1)
pp_red(str(error_values2).replace("\n", "\n" + "\t" * 1), 1)
# print(output_var_cache)
# print(mobile_var_cache)
def main():
# 加载kv
feed_kv = load_feed_kv()
if feed_kv == None:
feed_kv = gen_feed_kv()
save_feed_kv(feed_kv)
feed_kv = load_feed_kv()
# 预测
pp_yellow(dot + dot + " checking inference")
outputs = run_model(feed_kv=feed_kv)
pp_tab("fluid output : {}".format(outputs), 1)
# 重新保存模型
pp_yellow(dot + dot + " checking model correctness")
resave_model(feed_kv=feed_kv)
# 输出加密模型
encrypt_model()
# 输出所有中间结果
pp_yellow(dot + dot + " checking output result of every op")
save_all_op_output(feed_kv=feed_kv)
pp_yellow(dot + dot + " checking fetch info")
for fetch in fetches:
fetch_name = fetch.name
fetch_shape = get_var_shape(fetch_name)
pp_tab("fetch var name : {}; fetch var shape : {}".format(fetch_name, fetch_shape), 1)
# 输出所有op、var信息
info_file = open("info.txt", "w")
for i in range(len(ops)):
op = ops[i]
info_file.write("{}th op: type - {}\n".format(i, op.type))
info_file.write("inputs:\n")
for var_name in op.input_arg_names:
try:
shape = get_var_shape(var_name)
shape_str = ", ".join(list(map(lambda x: str(x), shape)))
info_file.write("var {} : {}\n".format(var_name, shape_str))
except:
pass
info_file.write("outputs:\n")
for var_name in op.output_arg_names:
try:
shape = get_var_shape(var_name)
shape_str = ", ".join(list(map(lambda x: str(x), shape)))
info_file.write("var {} : {}\n".format(var_name, shape_str))
except:
pass
info_file.close()
# 开始检查mobile的正确性
print("")
print("==================================================")
print("")
pp_yellow(dot + " start inspecting paddle mobile correctness & performance")
push(checked_model_path)
push(feed_path + "/" + last_feed_file_name, "input.txt")
push(mobile_src_root + "/build/release/arm-v7a/build/libpaddle-mobile.so")
push(mobile_src_root + "/build/release/arm-v7a/build/cl_kernel")
push(mobile_src_root + "/test/build/test-wrap")
res = sh("adb shell 'cd {} && export LD_LIBRARY_PATH=. && ./test-wrap'".format(mobile_exec_root))
lines = res.split("\n")
for line in lines:
print(line)
if __name__ == "__main__":
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册