未验证 提交 fd490e7b 编写于 作者: S suiyang 提交者: GitHub

Merge branch 'develop' into develop

......@@ -114,6 +114,9 @@ class CLEngine {
cl_device_id DeviceID(int index = 0) { return devices_[index]; }
std::string GetCLPath() { return cl_path_; }
void setClPath(std::string cl_path) { cl_path_ = cl_path; }
private:
CLEngine() { initialized_ = false; }
......@@ -129,6 +132,7 @@ class CLEngine {
cl_int status_;
std::string cl_path_;
std::unique_ptr<_cl_program, CLProgramDeleter> program_;
// bool SetClContext();
......
......@@ -58,7 +58,8 @@ class CLScope {
}
auto program = CLEngine::Instance()->CreateProgramWith(
context_.get(), "./cl_kernel/" + file_name);
context_.get(),
CLEngine::Instance()->GetCLPath() + "/cl_kernel/" + file_name);
DLOG << " --- begin build program -> " << file_name << " --- ";
CLEngine::Instance()->BuildProgram(program.get());
......
......@@ -649,12 +649,14 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
template <>
void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
char *origin_data;
char *origin_data = nullptr;
bool self_alloc = false;
if (program_.combined_params_buf && program_.combined_params_len) {
LOG(kLOG_INFO) << "use outter memory";
origin_data = reinterpret_cast<char *>(program_.combined_params_buf);
} else {
LOG(kLOG_INFO) << " begin init combine memory";
self_alloc = true;
origin_data = ReadFileToBuff(program_.para_path);
}
PADDLE_MOBILE_ENFORCE(origin_data != nullptr, "origin_data==nullptr!!!");
......@@ -701,7 +703,9 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
}
}
}
delete origin_data;
if (self_alloc) {
delete origin_data;
}
LOG(kLOG_INFO) << " end init combine memory ";
}
......
......@@ -82,6 +82,54 @@ void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
}
}
}
template <>
const Program<GPU_CL, Precision::FP32>
Loader<GPU_CL, Precision::FP32>::LoadCombinedMemory(
size_t read_size, const uint8_t *buf, size_t combined_params_len,
uint8_t *combined_params_buf, bool optimize, bool quantification) {
bool can_add_split = false;
PaddleMobile__Framework__Proto__ProgramDesc *c_program;
PADDLE_MOBILE_ENFORCE(buf != nullptr, "read from __model__ is null");
c_program = paddle_mobile__framework__proto__program_desc__unpack(
nullptr, read_size, buf);
//
PADDLE_MOBILE_ENFORCE(c_program != nullptr, "program is null");
//
DLOG << "n_ops: " << (*c_program->blocks)->n_ops;
//
auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
Program<GPU_CL, Precision::FP32> program;
program.combined = true;
program.originProgram = originProgramDesc;
program.quantification = quantification;
program.combined_params_len = combined_params_len;
program.combined_params_buf = combined_params_buf;
auto scope = std::make_shared<Scope>();
program.scope = scope;
InitMemoryFromProgram(originProgramDesc, scope);
if (optimize) {
ProgramOptimize program_optimize;
program.optimizeProgram =
program_optimize.FusionOptimize(originProgramDesc, can_add_split);
if (!program.optimizeProgram) {
program.optimizeProgram = originProgramDesc;
}
}
if (optimize) {
program.optimizeProgram->Description("optimize: ");
} else {
originProgramDesc->Description("program: ");
}
paddle_mobile__framework__proto__program_desc__free_unpacked(c_program,
nullptr);
return program;
}
#endif
/**
......
......@@ -29,7 +29,9 @@ PaddleMobilePredictor<Dtype, P>::PaddleMobilePredictor(
template <typename Dtype, Precision P>
bool PaddleMobilePredictor<Dtype, P>::Init(const PaddleMobileConfig &config) {
paddle_mobile_.reset(new PaddleMobile<Dtype, P>());
#ifdef PADDLE_MOBILE_CL
paddle_mobile_->SetCLPath(config.cl_path);
#endif
if (config.memory_pack.from_memory) {
DLOG << "load from memory!";
paddle_mobile_->LoadCombinedMemory(config.memory_pack.model_size,
......
......@@ -132,6 +132,7 @@ struct PaddleMobileConfig : public PaddlePredictor::Config {
int thread_num = 1;
std::string prog_file;
std::string param_file;
std::string cl_path;
struct PaddleModelMemoryPack memory_pack;
};
......
......@@ -158,6 +158,13 @@ void PaddleMobile<Dtype, P>::Predict_To(int end) {
}
#endif
#ifdef PADDLE_MOBILE_CL
template <typename Dtype, Precision P>
void PaddleMobile<Dtype, P>::SetCLPath(std::string path) {
framework::CLEngine::Instance()->setClPath(path);
}
#endif
template class PaddleMobile<CPU, Precision::FP32>;
template class PaddleMobile<FPGA, Precision::FP32>;
template class PaddleMobile<GPU_MALI, Precision::FP32>;
......
......@@ -26,6 +26,9 @@ limitations under the License. */
#include "framework/load_ops.h"
#include "framework/loader.h"
#include "framework/tensor.h"
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_engine.h"
#endif
namespace paddle_mobile {
......@@ -68,6 +71,11 @@ class PaddleMobile {
void Predict_To(int end);
#endif
#ifdef PADDLE_MOBILE_CL
public:
void SetCLPath(std::string cl_path);
#endif
private:
std::shared_ptr<framework::Loader<Dtype, P>> loader_;
std::shared_ptr<framework::Executor<Dtype, P>> executor_;
......
......@@ -3307,8 +3307,13 @@ void Gemm::Sgemm_omp(int m, int n, int k, float alpha, const float *A, int lda,
float *local_A = packedA + MC * KC * local_threads;
float *local_C = packedC + MC * NC * local_threads;
(*this.*procPackA)(mc, KC, mc % MR, &A(i, 0), lda, local_A);
InnerKernelWithBias(mc, n, alpha, local_A, packedB, beta, local_C,
&C(i, 0), ldc, relu, bias + i);
if (bias == nullptr) {
InnerKernelWithBias(mc, n, alpha, local_A, packedB, beta, local_C,
&C(i, 0), ldc, relu, nullptr);
} else {
InnerKernelWithBias(mc, n, alpha, local_A, packedB, beta, local_C,
&C(i, 0), ldc, relu, bias + i);
}
}
} else {
#pragma omp parallel for
......
......@@ -55,11 +55,11 @@ static char *Get_binary_data(std::string filename) {
paddle_mobile::PaddleMobileConfig GetConfig() {
paddle_mobile::PaddleMobileConfig config;
config.precision = paddle_mobile::PaddleMobileConfig::FP32;
config.device = paddle_mobile::PaddleMobileConfig::kCPU;
config.device = paddle_mobile::PaddleMobileConfig::kGPU_CL;
const std::shared_ptr<paddle_mobile::PaddleModelMemoryPack> &memory_pack =
std::make_shared<paddle_mobile::PaddleModelMemoryPack>();
auto model_path = std::string(g_genet_combine) + "/model";
auto params_path = std::string(g_genet_combine) + "/params";
auto model_path = std::string(g_mobilenet_combined) + "/model";
auto params_path = std::string(g_mobilenet_combined) + "/params";
memory_pack->model_size =
ReadBuffer(model_path.c_str(), &memory_pack->model_buf);
std::cout << "sizeBuf: " << memory_pack->model_size << std::endl;
......
......@@ -22,7 +22,7 @@ int main() {
auto time1 = paddle_mobile::time();
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
// std::string(g_mobilenet_detect) + "/params", true);
paddle_mobile.SetCLPath(".");
auto isok = paddle_mobile.Load(std::string(g_mobilenet), true);
if (isok) {
auto time2 = paddle_mobile::time();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册