diff --git a/src/common/util.cpp b/src/common/util.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1ed19b6cd8e360e127b2c96cc5379dc7a222bb17 --- /dev/null +++ b/src/common/util.cpp @@ -0,0 +1,31 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "common/util.h" + +char *ReadFileToBuff(std::string filename) { + FILE *file = fopen(filename.c_str(), "rb"); + PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ", + filename.c_str()); + fseek(file, 0, SEEK_END); + int64_t size = ftell(file); + PADDLE_MOBILE_ENFORCE(size > 0, "file should not be empty"); + rewind(file); + char *data = new char[size]; + size_t bytes_read = fread(data, 1, size, file); + PADDLE_MOBILE_ENFORCE(bytes_read == size, + "read binary file bytes do not match with fseek"); + fclose(file); + return data; +} diff --git a/src/common/util.h b/src/common/util.h new file mode 100644 index 0000000000000000000000000000000000000000..3525edfe8475d3a8a68250f7f4c57a492ad56b14 --- /dev/null +++ b/src/common/util.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "common/enforce.h" +#include + +namespace paddle_mobile { + +char *ReadFileToBuff(std::string filename); + +} // namespace paddle_mobile diff --git a/src/framework/program/program.h b/src/framework/program/program.h index 192328a567e6d3bfad7a8a3b35e3bc64131a2cd2..696cf75b91ff88837cffd3304f5fe3cd491e77eb 100644 --- a/src/framework/program/program.h +++ b/src/framework/program/program.h @@ -33,8 +33,6 @@ class Program { bool quantification = false; size_t combined_params_len; const uint8_t *combined_params_buf; - - private: }; } // namespace framework diff --git a/src/io/executor.cpp b/src/io/executor.cpp index 72b9112e623b5bf1ceada5053d97fc87a52de0c0..19d273af6dce6d9233e889aff7ac1b354b033197 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -32,34 +32,15 @@ namespace paddle_mobile { using framework::Variable; -char *Get_binary_data(std::string filename) { - FILE *file = fopen(filename.c_str(), "rb"); - PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ", - filename.c_str()); - fseek(file, 0, SEEK_END); - int64_t size = ftell(file); - PADDLE_MOBILE_ENFORCE(size > 0, "size is too small"); - rewind(file); - char *data = new char[size]; - size_t bytes_read = fread(data, 1, size, file); - PADDLE_MOBILE_ENFORCE(bytes_read == size, - "read binary file bytes do not match with fseek"); - fclose(file); - return data; -} - template Executor::Executor(const framework::Program p, const bool use_optimize, const bool loddable) : program_(p), use_optimize_(use_optimize), loddable_(loddable) { - if (use_optimize_) { - to_predict_program_ = program_.optimizeProgram; - } else { - to_predict_program_ = program_.originProgram; - } Variable *variable_ptr = program_.scope->Var("batch_size"); variable_ptr->SetValue(1); + to_predict_program_ = + use_optimize_ ? program_.optimizeProgram : program_.originProgram; PADDLE_MOBILE_ENFORCE(to_predict_program_ != nullptr, "to_predict_program_ == NULL!"); const std::vector> &blocks = @@ -75,8 +56,8 @@ Executor::Executor(const framework::Program p, auto op_base = framework::OpRegistry::CreateOp( op->Type(), op->GetInputs(), op->GetOutputs(), op->GetAttrMap(), program_.scope); - // use pre_infershape to pre resize , but if u use an lod mode tensor u - // need to resize in runtime + // infer shape to reshape tensor before predict, + // but for lod tensor, it will need to reshape in runtime if (!loddable_) { op_base->InferShape(); } @@ -96,75 +77,74 @@ Executor::Executor(const framework::Program p, } } -// should use istream to keep offset for data template -void LoadMemInternal(const void *data, framework::LoDTensor *tensor) { - const char *data_buf = static_cast(data); +void LoadMemInternal(void **data, framework::LoDTensor *tensor) { + char **data_buf = reinterpret_cast(data); int64_t size = tensor->numel(); Dtype* tensor_data = tensor->mutable_data(); - // stored as low precision, but compute with float - // TODO(hjchen2) must consider signed and unsigned if (0) { + // TODO should be moved into operator init function float min_value; float max_value; memcpy(&min_value, data_buf, sizeof(float)); memcpy(&max_value, data_buf + sizeof(float), sizeof(float)); data_buf += 2 * sizeof(float); const float factor = (max_value - min_value) / 255.0; - const uint8_t *uint8_data = reinterpret_cast(data_buf); + const uint8_t *uint8_data = reinterpret_cast(data_buf); for (int k = 0; k < size; ++k) { tensor_data[k] = uint8_data[k] * factor + min_value; } data_buf += size * sizeof(uint8_t); } else { - memcpy(tensor_data, data_buf, size * sizeof(Dtype)); - data_buf += size * sizeof(Dtype); + memcpy(tensor_data, *data_buf, size * sizeof(Dtype)); + *data_buf += size * sizeof(Dtype); } } template -void Executor::LoadMemory(const void *data, - const framework::VarDesc var_desc, - framework::LoDTensor *tensor) { - const char *data_buf = static_cast(data); +void Executor::LoadMemory( + void **data, + const std::shared_ptr var_desc, + framework::LoDTensor *tensor) { + char **data_buf = reinterpret_cast(data); // version - uint32_t version = *(reinterpret_cast(data_buf)); - data_buf += sizeof(uint32_t); + uint32_t version = *(reinterpret_cast(*data_buf)); + *data_buf += sizeof(uint32_t); // lod information - uint64_t lod_level = *(reinterpret_cast(data_buf)); - data_buf += sizeof(uint64_t); + uint64_t lod_level = *(reinterpret_cast(*data_buf)); + *data_buf += sizeof(uint64_t); auto *lod = tensor->mutable_lod(); lod->resize(lod_level); for (uint64_t i = 0; i < lod_level; ++i) { - uint64_t size = *(reinterpret_cast(data_buf)); - data_buf += sizeof(uint64_t); + uint64_t size = *(reinterpret_cast(*data_buf)); + *data_buf += sizeof(uint64_t); std::vector tmp_dim(size / sizeof(size_t)); - memcpy(tmp_dim.data(), data_buf, size); + memcpy(tmp_dim.data(), *data_buf, size); (*lod)[i] = std::move(tmp_dim); - data_buf += size; + *data_buf += size; } // tensor version - uint32_t tensor_version = *(reinterpret_cast(data_buf)); - data_buf += sizeof(uint32_t); + uint32_t tensor_version = *(reinterpret_cast(*data_buf)); + *data_buf += sizeof(uint32_t); // tensor desc size - int32_t tensor_desc_size = *(reinterpret_cast(data_buf)); - data_buf += sizeof(int32_t); + int32_t tensor_desc_size = *(reinterpret_cast(*data_buf)); + *data_buf += sizeof(int32_t); // skip tensor desc - data_buf += tensor_desc_size; + *data_buf += tensor_desc_size; - const framework::TensorDesc &tensor_desc = var_desc.Tensor_desc(); + const framework::TensorDesc &tensor_desc = var_desc->Tensor_desc(); tensor->Resize(framework::make_ddim(tensor_desc.Dims())); // parse tensor from stream switch (tensor_desc.DataType()) { case framework::VARTYPE_TYPE_FP32: - LoadMemInternal(data_buf, tensor); + LoadMemInternal((void**)data_buf, tensor); break; case framework::VARTYPE_TYPE_INT8: - LoadMemInternal(data_buf, tensor); + LoadMemInternal((void**)data_buf, tensor); break; case framework::VARTYPE_TYPE_INT32: - LoadMemInternal(data_buf, tensor); + LoadMemInternal((void**)data_buf, tensor); break; default: LOG(kLOG_ERROR) << "data type is not supported"; @@ -181,11 +161,10 @@ void Executor::InitMemory() { if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { continue; } - char *origin_data = - Get_binary_data(program_.model_path + "/" + var_desc->Name()); - char *data = origin_data; - LoadMemory(data, *var_desc, tensor); - delete[] origin_data; + char *data = + ReadFileToBuff(program_.model_path + "/" + var_desc->Name()); + LoadMemory((void**)&data, var_desc, tensor); + delete [] data; } else { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { varInputMemory(var_desc, var, tensor); @@ -197,16 +176,15 @@ void Executor::InitMemory() { template void Executor::InitCombineMemory() { - char *origin_data; + char *data = nullptr; + bool self_alloc = false; if (program_.combined_params_buf && program_.combined_params_len) { - LOG(kLOG_INFO) << "use outter memory"; - origin_data = (char *)program_.combined_params_buf; + data = (char *)program_.combined_params_buf; } else { - LOG(kLOG_INFO) << " begin init combine memory"; - origin_data = Get_binary_data(program_.para_path); + self_alloc = true; + data = ReadFileToBuff(program_.para_path); } - PADDLE_MOBILE_ENFORCE(origin_data != nullptr, "origin_data==nullptr!!!"); - char *data = origin_data; + PADDLE_MOBILE_ENFORCE(data != nullptr, "data == nullptr"); for (const auto &block : to_predict_program_->Blocks()) { for (const auto &var_desc : block->Vars()) { auto var = program_.scope->Var(var_desc->Name()); @@ -215,7 +193,7 @@ void Executor::InitCombineMemory() { if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { continue; } - LoadMemory(data, *var_desc, tensor); + LoadMemory((void**)&data, var_desc, tensor); } else { if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { varInputMemory(var_desc, var, tensor); @@ -223,9 +201,10 @@ void Executor::InitCombineMemory() { } } } - - delete[] origin_data; - LOG(kLOG_INFO) << " end init combine memory "; + if (self_alloc) { + delete [] data; + } + LOG(kLOG_INFO) << "init combine memory finish"; } template @@ -233,33 +212,27 @@ bool Executor::varInputMemory( const std::shared_ptr &var_desc, Variable *var, framework::LoDTensor *tensor) const { auto type = var_desc->Tensor_desc().DataType(); - bool is_mute_match = (type == framework::VARTYPE_TYPE_FP32) || - (type == framework::VARTYPE_TYPE_INT8) || - (type == framework::VARTYPE_TYPE_INT32) || - (type == framework::VARTYPE_TYPE_INT64); - PADDLE_MOBILE_ENFORCE(is_mute_match, "got unhandled data type : %d", type); - switch (type) { - case framework::VARTYPE_TYPE_FP32: { + case framework::VARTYPE_TYPE_FP32: tensor->mutable_data(); break; - } - case framework::VARTYPE_TYPE_INT8: { + case framework::VARTYPE_TYPE_INT8: tensor->mutable_data(); - break; - } - case framework::VARTYPE_TYPE_INT32: { + break; + case framework::VARTYPE_TYPE_INT32: tensor->mutable_data(); break; - } - case framework::VARTYPE_TYPE_INT64: { + case framework::VARTYPE_TYPE_INT64: tensor->mutable_data(); break; - } - default: { + default: break; - } } + bool is_mute_match = (type == framework::VARTYPE_TYPE_FP32) || + (type == framework::VARTYPE_TYPE_INT8) || + (type == framework::VARTYPE_TYPE_INT32) || + (type == framework::VARTYPE_TYPE_INT64); + PADDLE_MOBILE_ENFORCE(is_mute_match, "got unhandled data type : %d", type); return is_mute_match; } @@ -299,17 +272,12 @@ std::shared_ptr Executor::Predict( framework::GetVarValue(out_keys[0], output_map, *(program_.scope)); #ifdef PADDLE_MOBILE_PROFILE - // FILE *pf = fopen("profile.out", "w"); std::unordered_map _tp; for (int i = 0; i < profile.size(); i++) { const auto &pInfo = profile[i]; uint64_t timeCost = pInfo.runEnd - pInfo.runBegin; _tp[ops[i]->Type()] += timeCost; - // fprintf(pf, "%d\t%s\t%d\t%llu\t%llu\t%llu\n", i, - // ops[i]->Type().c_str(), - // pInfo.tid, pInfo.runBegin, pInfo.runEnd, timeCost); } - // fclose(pf); printf("====================[ profile ]======================\n"); using prof_t = std::pair; std::vector _tv(_tp.begin(), _tp.end()); @@ -359,7 +327,6 @@ std::shared_ptr Executor::PredictLod( if (loddable_) { ops[i]->InferShape(); } - // to Run ops[i]->Run(); #ifdef PADDLE_MOBILE_PROFILE clock_gettime(CLOCK_MONOTONIC, &ts); @@ -375,17 +342,12 @@ std::shared_ptr Executor::PredictLod( framework::GetVarValue(out_keys[0], output_map, *(program_.scope)); #ifdef PADDLE_MOBILE_PROFILE - // FILE *pf = fopen("profile.out", "w"); std::unordered_map _tp; for (int i = 0; i < profile.size(); i++) { const auto &pInfo = profile[i]; uint64_t timeCost = pInfo.runEnd - pInfo.runBegin; _tp[ops[i]->Type()] += timeCost; - // fprintf(pf, "%d\t%s\t%d\t%llu\t%llu\t%llu\n", i, - // ops[i]->Type().c_str(), - // pInfo.tid, pInfo.runBegin, pInfo.runEnd, timeCost); } - // fclose(pf); printf("====================[ profile ]======================\n"); using prof_t = std::pair; std::vector _tv(_tp.begin(), _tp.end()); diff --git a/src/io/executor.h b/src/io/executor.h index c75979db578c99bd9c5b366588a2cb950bcfe54f..18f5d8123b3052d399f5110d81915913bcc88a8b 100644 --- a/src/io/executor.h +++ b/src/io/executor.h @@ -14,15 +14,16 @@ limitations under the License. */ #pragma once -#include -#include -#include -#include #include "common/types.h" +#include "common/util.h" #include "framework/lod_tensor.h" #include "framework/operator.h" #include "framework/program/program.h" #include "framework/tensor.h" +#include +#include +#include +#include namespace paddle_mobile { @@ -37,15 +38,18 @@ class Executor { Executor(const framework::Program program, const bool use_optimize = true, const bool loddable = false); - // predict with tensor - // @param input input tensor to do prediction + + // predict with tensor input + // @param t input tensor to do prediction // @return predicted tensor std::shared_ptr Predict(const framework::Tensor &t); - // predict with lod tensor - // @param input input lod tensor to do prediction + + // predict with lod tensor input + // @param t input lod tensor to do prediction // @return predicted lod tensor std::shared_ptr PredictLod( const framework::LoDTensor &t); + // predict with vector input and dims // @param input vector whose elements will be formed // @param input lod tensor to do prediction @@ -57,21 +61,22 @@ class Executor { protected: Executor() = default; + std::shared_ptr Predict(const framework::Tensor &t, + int block_id); + bool varInputMemory(const std::shared_ptr &var_desc, + framework::Variable *var, + framework::LoDTensor *tensor) const; void InitMemory(); - void LoadMemory(const void* data, - const framework::VarDesc var_desc, - framework::LoDTensor *tensor); void InitCombineMemory(); + void LoadMemory(void** data, + const std::shared_ptr var_desc, + framework::LoDTensor *tensor); + framework::Program program_; - int batch_size_ = 1; std::shared_ptr to_predict_program_; - std::shared_ptr Predict(const framework::Tensor &t, - int block_id); std::map>>> ops_of_block_; - bool use_optimize_ = false; - bool loddable_ = false; #ifdef PADDLE_MOBILE_PROFILE struct ProfInfo { int tid = 0; @@ -79,10 +84,9 @@ class Executor { uint64_t runEnd = 0UL; }; #endif - - bool varInputMemory(const std::shared_ptr &var_desc, - framework::Variable *var, - framework::LoDTensor *tensor) const; + int batch_size_ = 1; + bool use_optimize_ = false; + bool loddable_ = false; }; } // namespace paddle_mobile diff --git a/src/io/loader.h b/src/io/loader.h index 505366793da50413c52d8970cb47d062608d6484..7a04da1230cb78ba61f5c2746e2c29348b293b2b 100644 --- a/src/io/loader.h +++ b/src/io/loader.h @@ -24,19 +24,11 @@ namespace paddle_mobile { template class Loader { public: - /* - * @b load separate format fluid model - * @b 加载分开形式的 fluid 模型 - * */ const framework::Program Load(const std::string &dirname, bool optimize = false, bool quantification = false, bool can_add_split = false); - /* - * @b load combine format fluid mode - * @b 加载结合在一起格式的模型 - * */ const framework::Program Load(const std::string &model_path, const std::string ¶_path, bool optimize = false,