diff --git a/src/framework/cl/cl_tensor.h b/src/framework/cl/cl_tensor.h index a0ed438f9773dbacaaa7446d594719c0cf12b32e..01fdc7970e772e945ae880cee5bdc2bec589ffab 100644 --- a/src/framework/cl/cl_tensor.h +++ b/src/framework/cl/cl_tensor.h @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle_mobile { namespace framework { -class CLTensor : TensorBase { +class CLTensor : public TensorBase { public: CLTensor(cl_context context, cl_command_queue command_queue) : context_(context), command_queue_(command_queue) {} diff --git a/src/operators/kernel/cl/feed_kernel.cpp b/src/operators/kernel/cl/feed_kernel.cpp index c8c94038aa3536431c1fc9dcf982e6714b6484bb..0522905fee91fd466b2c334677acce0d25cfac7e 100644 --- a/src/operators/kernel/cl/feed_kernel.cpp +++ b/src/operators/kernel/cl/feed_kernel.cpp @@ -31,8 +31,6 @@ void FeedKernel::Compute(const FeedParam ¶m) { auto kernel = this->cl_helper_.KernelAt(0); auto default_work_size = this->cl_helper_.DefaultWorkSize(*(param.Out())); cl_int status; - param.Out()->InitEmptyImage(cl_helper_.CLContext(), - cl_helper_.CLCommandQueue(), param.Out()->dims()); auto output = param.Out(); const Tensor *input = ¶m.InputX()->at(col); // DLOG << *input; diff --git a/src/operators/kernel/cl/fetch_kernel.cpp b/src/operators/kernel/cl/fetch_kernel.cpp index 4a477f081e89b6fe7b1dbd34ab80cacfea2c21fd..2ce3e928a12ce752236709211ce06a52e3fcd9c3 100644 --- a/src/operators/kernel/cl/fetch_kernel.cpp +++ b/src/operators/kernel/cl/fetch_kernel.cpp @@ -14,19 +14,13 @@ limitations under the License. */ #include "operators/kernel/fetch_kernel.h" #include "framework/cl/cl_tensor.h" -// #include "common/common.h" -// #include namespace paddle_mobile { namespace operators { template <> bool FetchKernel::Init(FetchParam *param) { - // if (param->InputX()->dims().size() <= 2) { - // this->cl_helper_.AddKernel("fetch_2d", "fetch_kernel.cl"); - // } else { this->cl_helper_.AddKernel("fetch", "fetch_kernel.cl"); - // } return true; } @@ -40,25 +34,28 @@ void FetchKernel::Compute(const FetchParam ¶m) { auto *out = ¶m.Out()->at(col); out->Resize(param.InputX()->dims()); out->mutable_data(); - const auto &dim = param.InputX()->dims(); + + DLOG << "fetch kernel out dims = " << out->dims(); + DLOG << "fetch kernel out memory size = " << out->memory_size(); + + auto dim = param.InputX()->dims(); size_t new_dims[] = {1, 1, 1, 1}; for (int j = 0; j < dim.size(); ++j) { new_dims[4 - dim.size() + j] = dim[j]; } - size_t C, in_height, in_width; + size_t in_ch, in_height, in_width; - C = new_dims[1]; + in_ch = new_dims[1]; in_height = new_dims[2]; - // if (dim.size() <= 2) { - // in_width = param.InputX()->ImageWidth(); - // } else { in_width = new_dims[3]; - // } + int size_ch = in_height * in_width; + int size_block = size_ch * 4; + int size_batch = size_ch * in_ch; - CLTensor out_cl_tensor(this->cl_helper_.CLContext(), - this->cl_helper_.CLCommandQueue()); + framework::CLTensor out_cl_tensor(this->cl_helper_.CLContext(), + this->cl_helper_.CLCommandQueue()); out_cl_tensor.Resize(out->dims()); cl_mem outBuffer = out_cl_tensor.mutable_data(); @@ -66,35 +63,28 @@ void FetchKernel::Compute(const FetchParam ¶m) { clSetKernelArg(kernel, 1, sizeof(int), &in_width); clSetKernelArg(kernel, 2, sizeof(cl_mem), &input); clSetKernelArg(kernel, 3, sizeof(cl_mem), &outBuffer); - // if (dim.size() > 2) { - int size_ch = in_height * in_width; - int size_block = size_ch * 4; - int size_batch = size_ch * C; - int out_c = new_dims[1]; clSetKernelArg(kernel, 4, sizeof(int), &size_ch); clSetKernelArg(kernel, 5, sizeof(int), &size_block); clSetKernelArg(kernel, 6, sizeof(int), &size_batch); - clSetKernelArg(kernel, 7, sizeof(int), &out_c); - // } + clSetKernelArg(kernel, 7, sizeof(int), &in_ch); // cl_event wait_event = param.InpdutX()->GetClEvent(); clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, default_work_size.data(), NULL, 0, NULL, NULL); - // auto time1 = paddle_mobile::time(); - // printf(" before finish \n"); // clFlsh(this->cl_helper_.CLCommandQueue()); clFinish(this->cl_helper_.CLCommandQueue()); // printf(" after finish \n"); - // auto time2 = paddle_mobile::time(); - // - // - // std::cout << " finish cost :" << paddle_mobile::time_diff(time1, time2) - // << "ms" << std::endl; + DLOG << "fetch kernel out dims = " << out->dims(); + DLOG << "fetch kernel out memory size = " << out->memory_size(); - memcpy(out->data(), out_cl_tensor.Data(), out->memory_size()); + DLOG << "fetch kernel out_cl_tensor dims = " << out_cl_tensor.dims(); + DLOG << "fetch kernel out_cl_tensor memery size = " + << out_cl_tensor.memory_size(); + memcpy(out->data(), out_cl_tensor.Data(), + sizeof(float) * out->numel()); } template class FetchKernel; diff --git a/test/net/test_super.cpp b/test/net/test_super.cpp index dcae08887de02cda30d291801c2696206f0bf84d..6815a886a7e22178c52bc447a99faa910520d817 100644 --- a/test/net/test_super.cpp +++ b/test/net/test_super.cpp @@ -21,12 +21,14 @@ int main() { paddle_mobile::PaddleMobileConfigInternal config; config.load_when_predict = true; - paddle_mobile::PaddleMobile paddle_mobile(config); - // paddle_mobile.SetThreadNum(4); auto time1 = paddle_mobile::time(); #ifdef PADDLE_MOBILE_CL + paddle_mobile::PaddleMobile paddle_mobile(config); paddle_mobile.SetCLPath("/data/local/tmp/bin"); +#else + paddle_mobile::PaddleMobile paddle_mobile(config); #endif + // paddle_mobile.SetThreadNum(4); auto isok = paddle_mobile.Load(std::string(g_super) + "/model", std::string(g_super) + "/params", true, false, @@ -131,12 +133,12 @@ int main() { auto time5 = paddle_mobile::time(); vec_result4 = paddle_mobile.Predict(input4, dims4); auto time6 = paddle_mobile::time(); - std::cout << "224*224 predict cost :第" << i << ": " + std::cout << "300*300 predict cost :第" << i << ": " << paddle_mobile::time_diff(time5, time6) << "ms" << std::endl; } auto time4 = paddle_mobile::time(); - std::cout << "224*224 predict cost :" + std::cout << "300*300 predict cost :" << paddle_mobile::time_diff(time3, time4) / max << "ms" << std::endl; // biggest = std::max_element(std::begin(vec_result4),