diff --git a/src/framework/cl/cl_engine.h b/src/framework/cl/cl_engine.h index f9f373b2a74087960b03c55ec922f95f187cfbc4..76d08513aa4301b9aa22b159a70a17b7b0619b92 100644 --- a/src/framework/cl/cl_engine.h +++ b/src/framework/cl/cl_engine.h @@ -114,6 +114,9 @@ class CLEngine { cl_device_id DeviceID(int index = 0) { return devices_[index]; } + std::string GetCLPath() { return cl_path_; } + void setClPath(std::string cl_path) { cl_path_ = cl_path; } + private: CLEngine() { initialized_ = false; } @@ -129,6 +132,7 @@ class CLEngine { cl_int status_; + std::string cl_path_; std::unique_ptr<_cl_program, CLProgramDeleter> program_; // bool SetClContext(); diff --git a/src/framework/cl/cl_scope.h b/src/framework/cl/cl_scope.h index 0965b133e6d8270b7cd6e28c8ed9a33739b2e2a8..c7c06ca75f47cd65d2350dfa6930068aca73ced0 100644 --- a/src/framework/cl/cl_scope.h +++ b/src/framework/cl/cl_scope.h @@ -58,7 +58,8 @@ class CLScope { } auto program = CLEngine::Instance()->CreateProgramWith( - context_.get(), "./cl_kernel/" + file_name); + context_.get(), + CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name); DLOG << " --- begin build program -> " << file_name << " --- "; CLEngine::Instance()->BuildProgram(program.get()); diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 1b441bc5bdfed264f525dda571278067d2ae99b7..81bfaf3a4d07f5a3ef82c19de57f1681dfc1f8c7 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -649,12 +649,14 @@ void Executor::InitMemory() { template <> void Executor::InitCombineMemory() { - char *origin_data; + char *origin_data = nullptr; + bool self_alloc = false; if (program_.combined_params_buf && program_.combined_params_len) { LOG(kLOG_INFO) << "use outter memory"; origin_data = reinterpret_cast(program_.combined_params_buf); } else { LOG(kLOG_INFO) << " begin init combine memory"; + self_alloc = true; origin_data = ReadFileToBuff(program_.para_path); } PADDLE_MOBILE_ENFORCE(origin_data != nullptr, "origin_data==nullptr!!!"); @@ -701,7 +703,9 @@ void Executor::InitCombineMemory() { } } } - delete origin_data; + if (self_alloc) { + delete origin_data; + } LOG(kLOG_INFO) << " end init combine memory "; } diff --git a/src/framework/loader.cpp b/src/framework/loader.cpp index a434314730eb40b7e4017050a84a7d9742934396..eb07e9f6155370880f6fb8e302a8e396df17954d 100644 --- a/src/framework/loader.cpp +++ b/src/framework/loader.cpp @@ -82,6 +82,54 @@ void Loader::InitMemoryFromProgram( } } } +template <> +const Program +Loader::LoadCombinedMemory( + size_t read_size, const uint8_t *buf, size_t combined_params_len, + uint8_t *combined_params_buf, bool optimize, bool quantification) { + bool can_add_split = false; + + PaddleMobile__Framework__Proto__ProgramDesc *c_program; + PADDLE_MOBILE_ENFORCE(buf != nullptr, "read from __model__ is null"); + + c_program = paddle_mobile__framework__proto__program_desc__unpack( + nullptr, read_size, buf); + // + PADDLE_MOBILE_ENFORCE(c_program != nullptr, "program is null"); + // + DLOG << "n_ops: " << (*c_program->blocks)->n_ops; + // + + auto originProgramDesc = std::make_shared(c_program); + + Program program; + program.combined = true; + program.originProgram = originProgramDesc; + program.quantification = quantification; + program.combined_params_len = combined_params_len; + program.combined_params_buf = combined_params_buf; + + auto scope = std::make_shared(); + program.scope = scope; + InitMemoryFromProgram(originProgramDesc, scope); + if (optimize) { + ProgramOptimize program_optimize; + program.optimizeProgram = + program_optimize.FusionOptimize(originProgramDesc, can_add_split); + if (!program.optimizeProgram) { + program.optimizeProgram = originProgramDesc; + } + } + if (optimize) { + program.optimizeProgram->Description("optimize: "); + } else { + originProgramDesc->Description("program: "); + } + paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, + nullptr); + return program; +} + #endif /** diff --git a/src/io/api_paddle_mobile.cc b/src/io/api_paddle_mobile.cc index 67f255315fa71acbf24f5071735020c0a435ce64..144cf127a44c78279ca1d95815646a4f01fed6bd 100644 --- a/src/io/api_paddle_mobile.cc +++ b/src/io/api_paddle_mobile.cc @@ -29,7 +29,9 @@ PaddleMobilePredictor::PaddleMobilePredictor( template bool PaddleMobilePredictor::Init(const PaddleMobileConfig &config) { paddle_mobile_.reset(new PaddleMobile()); - +#ifdef PADDLE_MOBILE_CL + paddle_mobile_->SetCLPath(config.cl_path); +#endif if (config.memory_pack.from_memory) { DLOG << "load from memory!"; paddle_mobile_->LoadCombinedMemory(config.memory_pack.model_size, diff --git a/src/io/paddle_inference_api.h b/src/io/paddle_inference_api.h index d37895d3aaa108edb1a8956ccbcb91cbe4b97725..3c9ffa00c7e749d1c9d77562b2db0b42ee605164 100644 --- a/src/io/paddle_inference_api.h +++ b/src/io/paddle_inference_api.h @@ -132,6 +132,7 @@ struct PaddleMobileConfig : public PaddlePredictor::Config { int thread_num = 1; std::string prog_file; std::string param_file; + std::string cl_path; struct PaddleModelMemoryPack memory_pack; }; diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp index 3cd7c38b2b102659739aefc66b4b25f61cc48bcf..921b72520f1905fcdc7b2a0d15ee4ec5d844cda7 100644 --- a/src/io/paddle_mobile.cpp +++ b/src/io/paddle_mobile.cpp @@ -158,6 +158,13 @@ void PaddleMobile::Predict_To(int end) { } #endif +#ifdef PADDLE_MOBILE_CL +template +void PaddleMobile::SetCLPath(std::string path) { + framework::CLEngine::Instance()->setClPath(path); +} +#endif + template class PaddleMobile; template class PaddleMobile; template class PaddleMobile; diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index 0e86fa988fe8a07131d3ea19fe7c606c27d70c2c..1e8f81c51e02ea6bdbdea8694aa62c9c30e6e6a8 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -26,6 +26,9 @@ limitations under the License. */ #include "framework/load_ops.h" #include "framework/loader.h" #include "framework/tensor.h" +#ifdef PADDLE_MOBILE_CL +#include "framework/cl/cl_engine.h" +#endif namespace paddle_mobile { @@ -68,6 +71,11 @@ class PaddleMobile { void Predict_To(int end); #endif +#ifdef PADDLE_MOBILE_CL + public: + void SetCLPath(std::string cl_path); +#endif + private: std::shared_ptr> loader_; std::shared_ptr> executor_; diff --git a/src/operators/math/gemm.cpp b/src/operators/math/gemm.cpp index 44621ba99a92a3ed456b8d7d0959e3580662d910..605fa17c3c70ec3151cc1a2fb249edab336548a1 100644 --- a/src/operators/math/gemm.cpp +++ b/src/operators/math/gemm.cpp @@ -3307,8 +3307,13 @@ void Gemm::Sgemm_omp(int m, int n, int k, float alpha, const float *A, int lda, float *local_A = packedA + MC * KC * local_threads; float *local_C = packedC + MC * NC * local_threads; (*this.*procPackA)(mc, KC, mc % MR, &A(i, 0), lda, local_A); - InnerKernelWithBias(mc, n, alpha, local_A, packedB, beta, local_C, - &C(i, 0), ldc, relu, bias + i); + if (bias == nullptr) { + InnerKernelWithBias(mc, n, alpha, local_A, packedB, beta, local_C, + &C(i, 0), ldc, relu, nullptr); + } else { + InnerKernelWithBias(mc, n, alpha, local_A, packedB, beta, local_C, + &C(i, 0), ldc, relu, bias + i); + } } } else { #pragma omp parallel for diff --git a/test/framework/test_load_memory_inference_api.cpp b/test/framework/test_load_memory_inference_api.cpp index 05d51910172547c6dab7adc8231663be55c916bf..5b2773f8f1a21c3b9253b34fc5c18cd64ece27e7 100644 --- a/test/framework/test_load_memory_inference_api.cpp +++ b/test/framework/test_load_memory_inference_api.cpp @@ -55,11 +55,11 @@ static char *Get_binary_data(std::string filename) { paddle_mobile::PaddleMobileConfig GetConfig() { paddle_mobile::PaddleMobileConfig config; config.precision = paddle_mobile::PaddleMobileConfig::FP32; - config.device = paddle_mobile::PaddleMobileConfig::kCPU; + config.device = paddle_mobile::PaddleMobileConfig::kGPU_CL; const std::shared_ptr &memory_pack = std::make_shared(); - auto model_path = std::string(g_genet_combine) + "/model"; - auto params_path = std::string(g_genet_combine) + "/params"; + auto model_path = std::string(g_mobilenet_combined) + "/model"; + auto params_path = std::string(g_mobilenet_combined) + "/params"; memory_pack->model_size = ReadBuffer(model_path.c_str(), &memory_pack->model_buf); std::cout << "sizeBuf: " << memory_pack->model_size << std::endl; diff --git a/test/net/test_mobilenet_GPU.cpp b/test/net/test_mobilenet_GPU.cpp index a5276d6e521855ad81e6b9e2edb58c271ae713d9..07582e10dd5db8985f87bae215b8cf1808431565 100644 --- a/test/net/test_mobilenet_GPU.cpp +++ b/test/net/test_mobilenet_GPU.cpp @@ -22,7 +22,7 @@ int main() { auto time1 = paddle_mobile::time(); // auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model", // std::string(g_mobilenet_detect) + "/params", true); - + paddle_mobile.SetCLPath("."); auto isok = paddle_mobile.Load(std::string(g_mobilenet), true); if (isok) { auto time2 = paddle_mobile::time();